diff options
Diffstat (limited to 'tools/lib')
201 files changed, 28282 insertions, 27943 deletions
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index a13e9c7f1fc5..8665c799e0fa 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -15,17 +15,23 @@ LD ?= $(CROSS_COMPILE)ld MAKEFLAGS += --no-print-directory +INSTALL = install + + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -fPIC ifeq ($(DEBUG),0) -ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 -else - CFLAGS += -O6 -endif endif ifeq ($(DEBUG),0) @@ -45,10 +51,23 @@ RM = rm -f API_IN := $(OUTPUT)libapi-in.o +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) + all: export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include +include $(srctree)/tools/scripts/Makefile.include all: fixdep $(LIBFILE) @@ -58,9 +77,52 @@ $(API_IN): FORCE $(LIBFILE): $(API_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN) +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$2' ]; then \ + $(INSTALL) -d -m 755 '$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$2' +endef + +install_lib: $(LIBFILE) + $(call QUIET_INSTALL, $(LIBFILE)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) + +HDRS := cpu.h debug.h io.h io_dir.h +FD_HDRS := fd/array.h +FS_HDRS := fs/fs.h fs/tracing_path.h +INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/api +INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) +INSTALL_FD_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(FD_HDRS)) +INSTALL_FS_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(FS_HDRS)) + +$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644) + +$(INSTALL_FD_HDRS): $(INSTALL_HDRS_PFX)/fd/%.h: fd/%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/fd/,644) + +$(INSTALL_FS_HDRS): $(INSTALL_HDRS_PFX)/fs/%.h: fs/%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/fs/,644) + +install_headers: $(INSTALL_HDRS) $(INSTALL_FD_HDRS) $(INSTALL_FS_HDRS) + $(call QUIET_INSTALL, libapi_headers) + +install: install_lib install_headers + clean: $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ - find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) FORCE: diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c index 5e6cb9debe37..f0f195207fca 100644 --- a/tools/lib/api/fd/array.c +++ b/tools/lib/api/fd/array.c @@ -88,6 +88,23 @@ int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags return pos; } +int fdarray__dup_entry_from(struct fdarray *fda, int pos, struct fdarray *from) +{ + struct pollfd *entry; + int npos; + + if (pos >= from->nr) + return -EINVAL; + + entry = &from->entries[pos]; + + npos = fdarray__add(fda, entry->fd, entry->events, from->priv[pos].flags); + if (npos >= 0) + fda->priv[npos] = from->priv[pos]; + + return npos; +} + int fdarray__filter(struct fdarray *fda, short revents, void (*entry_destructor)(struct fdarray *fda, int fd, void *arg), void *arg) diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h index 7fcf21a33c0c..5c01f7b05dfb 100644 --- a/tools/lib/api/fd/array.h +++ b/tools/lib/api/fd/array.h @@ -31,8 +31,9 @@ struct fdarray { }; enum fdarray_flags { - fdarray_flag__default = 0x00000000, - fdarray_flag__nonfilterable = 0x00000001 + fdarray_flag__default = 0x00000000, + fdarray_flag__nonfilterable = 0x00000001, + fdarray_flag__non_perf_event = 0x00000002, }; void fdarray__init(struct fdarray *fda, int nr_autogrow); @@ -42,6 +43,7 @@ struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow); void fdarray__delete(struct fdarray *fda); int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags); +int fdarray__dup_entry_from(struct fdarray *fda, int pos, struct fdarray *from); int fdarray__poll(struct fdarray *fda, int timeout); int fdarray__filter(struct fdarray *fda, short revents, void (*entry_destructor)(struct fdarray *fda, int fd, void *arg), diff --git a/tools/lib/api/fs/cgroup.c b/tools/lib/api/fs/cgroup.c index 1573dae4259d..250629a09423 100644 --- a/tools/lib/api/fs/cgroup.c +++ b/tools/lib/api/fs/cgroup.c @@ -14,7 +14,7 @@ struct cgroupfs_cache_entry { }; /* just cache last used one */ -static struct cgroupfs_cache_entry cached; +static struct cgroupfs_cache_entry *cached; int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys) { @@ -24,9 +24,9 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys) char *p, *path; char mountpoint[PATH_MAX]; - if (!strcmp(cached.subsys, subsys)) { - if (strlen(cached.mountpoint) < maxlen) { - strcpy(buf, cached.mountpoint); + if (cached && !strcmp(cached->subsys, subsys)) { + if (strlen(cached->mountpoint) < maxlen) { + strcpy(buf, cached->mountpoint); return 0; } return -1; @@ -91,8 +91,13 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys) free(line); fclose(fp); - strncpy(cached.subsys, subsys, sizeof(cached.subsys) - 1); - strcpy(cached.mountpoint, mountpoint); + if (!cached) + cached = calloc(1, sizeof(*cached)); + + if (cached) { + strncpy(cached->subsys, subsys, sizeof(cached->subsys) - 1); + strcpy(cached->mountpoint, mountpoint); + } if (mountpoint[0] && strlen(mountpoint) < maxlen) { strcpy(buf, mountpoint); diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 82f53d81a7a7..edec23406dbc 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <assert.h> #include <ctype.h> #include <errno.h> #include <limits.h> @@ -10,10 +11,12 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#include <pthread.h> #include <unistd.h> #include <sys/mount.h> #include "fs.h" +#include "../io.h" #include "debug-internal.h" #define _STR(x) #x @@ -43,7 +46,7 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif -static const char * const sysfs__fs_known_mountpoints[] = { +static const char * const sysfs__known_mountpoints[] = { "/sys", 0, }; @@ -86,87 +89,89 @@ static const char * const bpf_fs__known_mountpoints[] = { }; struct fs { - const char *name; - const char * const *mounts; - char path[PATH_MAX]; - bool found; - bool checked; - long magic; -}; - -enum { - FS__SYSFS = 0, - FS__PROCFS = 1, - FS__DEBUGFS = 2, - FS__TRACEFS = 3, - FS__HUGETLBFS = 4, - FS__BPF_FS = 5, + const char * const name; + const char * const * const mounts; + char *path; + pthread_mutex_t mount_mutex; + const long magic; }; #ifndef TRACEFS_MAGIC #define TRACEFS_MAGIC 0x74726163 #endif -static struct fs fs__entries[] = { - [FS__SYSFS] = { - .name = "sysfs", - .mounts = sysfs__fs_known_mountpoints, - .magic = SYSFS_MAGIC, - .checked = false, - }, - [FS__PROCFS] = { - .name = "proc", - .mounts = procfs__known_mountpoints, - .magic = PROC_SUPER_MAGIC, - .checked = false, - }, - [FS__DEBUGFS] = { - .name = "debugfs", - .mounts = debugfs__known_mountpoints, - .magic = DEBUGFS_MAGIC, - .checked = false, - }, - [FS__TRACEFS] = { - .name = "tracefs", - .mounts = tracefs__known_mountpoints, - .magic = TRACEFS_MAGIC, - .checked = false, - }, - [FS__HUGETLBFS] = { - .name = "hugetlbfs", - .mounts = hugetlbfs__known_mountpoints, - .magic = HUGETLBFS_MAGIC, - .checked = false, - }, - [FS__BPF_FS] = { - .name = "bpf", - .mounts = bpf_fs__known_mountpoints, - .magic = BPF_FS_MAGIC, - .checked = false, - }, -}; +static void fs__init_once(struct fs *fs); +static const char *fs__mountpoint(const struct fs *fs); +static const char *fs__mount(struct fs *fs); + +#define FS(lower_name, fs_name, upper_name) \ +static struct fs fs__##lower_name = { \ + .name = #fs_name, \ + .mounts = lower_name##__known_mountpoints, \ + .magic = upper_name##_MAGIC, \ + .mount_mutex = PTHREAD_MUTEX_INITIALIZER, \ +}; \ + \ +static void lower_name##_init_once(void) \ +{ \ + struct fs *fs = &fs__##lower_name; \ + \ + fs__init_once(fs); \ +} \ + \ +const char *lower_name##__mountpoint(void) \ +{ \ + static pthread_once_t init_once = PTHREAD_ONCE_INIT; \ + struct fs *fs = &fs__##lower_name; \ + \ + pthread_once(&init_once, lower_name##_init_once); \ + return fs__mountpoint(fs); \ +} \ + \ +const char *lower_name##__mount(void) \ +{ \ + const char *mountpoint = lower_name##__mountpoint(); \ + struct fs *fs = &fs__##lower_name; \ + \ + if (mountpoint) \ + return mountpoint; \ + \ + return fs__mount(fs); \ +} \ + \ +bool lower_name##__configured(void) \ +{ \ + return lower_name##__mountpoint() != NULL; \ +} + +FS(sysfs, sysfs, SYSFS); +FS(procfs, procfs, PROC_SUPER); +FS(debugfs, debugfs, DEBUGFS); +FS(tracefs, tracefs, TRACEFS); +FS(hugetlbfs, hugetlbfs, HUGETLBFS); +FS(bpf_fs, bpf, BPF_FS); static bool fs__read_mounts(struct fs *fs) { - bool found = false; char type[100]; FILE *fp; + char path[PATH_MAX + 1]; fp = fopen("/proc/mounts", "r"); if (fp == NULL) - return NULL; + return false; - while (!found && - fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", - fs->path, type) == 2) { + while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", + path, type) == 2) { - if (strcmp(type, fs->name) == 0) - found = true; + if (strcmp(type, fs->name) == 0) { + fs->path = strdup(path); + fclose(fp); + return fs->path != NULL; + } } - fclose(fp); - fs->checked = true; - return fs->found = found; + return false; } static int fs__valid_mount(const char *fs, long magic) @@ -188,8 +193,9 @@ static bool fs__check_mounts(struct fs *fs) ptr = fs->mounts; while (*ptr) { if (fs__valid_mount(*ptr, fs->magic) == 0) { - fs->found = true; - strcpy(fs->path, *ptr); + fs->path = strdup(*ptr); + if (!fs->path) + return false; return true; } ptr++; @@ -227,43 +233,26 @@ static bool fs__env_override(struct fs *fs) if (!override_path) return false; - fs->found = true; - fs->checked = true; - strncpy(fs->path, override_path, sizeof(fs->path) - 1); - fs->path[sizeof(fs->path) - 1] = '\0'; + fs->path = strdup(override_path); + if (!fs->path) + return false; return true; } -static const char *fs__get_mountpoint(struct fs *fs) +static void fs__init_once(struct fs *fs) { - if (fs__env_override(fs)) - return fs->path; - - if (fs__check_mounts(fs)) - return fs->path; - - if (fs__read_mounts(fs)) - return fs->path; - - return NULL; + if (!fs__env_override(fs) && + !fs__check_mounts(fs) && + !fs__read_mounts(fs)) { + assert(!fs->path); + } else { + assert(fs->path); + } } -static const char *fs__mountpoint(int idx) +static const char *fs__mountpoint(const struct fs *fs) { - struct fs *fs = &fs__entries[idx]; - - if (fs->found) - return (const char *)fs->path; - - /* the mount point was already checked for the mount point - * but and did not exist, so return NULL to avoid scanning again. - * This makes the found and not found paths cost equivalent - * in case of multiple calls. - */ - if (fs->checked) - return NULL; - - return fs__get_mountpoint(fs); + return fs->path; } static const char *mount_overload(struct fs *fs) @@ -278,52 +267,36 @@ static const char *mount_overload(struct fs *fs) return getenv(upper_name) ?: *fs->mounts; } -static const char *fs__mount(int idx) +static const char *fs__mount(struct fs *fs) { - struct fs *fs = &fs__entries[idx]; const char *mountpoint; - if (fs__mountpoint(idx)) - return (const char *)fs->path; - - mountpoint = mount_overload(fs); + pthread_mutex_lock(&fs->mount_mutex); - if (mount(NULL, mountpoint, fs->name, 0, NULL) < 0) - return NULL; + /* Check if path found inside the mutex to avoid races with other callers of mount. */ + mountpoint = fs__mountpoint(fs); + if (mountpoint) + goto out; - return fs__check_mounts(fs) ? fs->path : NULL; -} + mountpoint = mount_overload(fs); -#define FS(name, idx) \ -const char *name##__mountpoint(void) \ -{ \ - return fs__mountpoint(idx); \ -} \ - \ -const char *name##__mount(void) \ -{ \ - return fs__mount(idx); \ -} \ - \ -bool name##__configured(void) \ -{ \ - return name##__mountpoint() != NULL; \ + if (mount(NULL, mountpoint, fs->name, 0, NULL) == 0 && + fs__valid_mount(mountpoint, fs->magic) == 0) { + fs->path = strdup(mountpoint); + mountpoint = fs->path; + } +out: + pthread_mutex_unlock(&fs->mount_mutex); + return mountpoint; } -FS(sysfs, FS__SYSFS); -FS(procfs, FS__PROCFS); -FS(debugfs, FS__DEBUGFS); -FS(tracefs, FS__TRACEFS); -FS(hugetlbfs, FS__HUGETLBFS); -FS(bpf_fs, FS__BPF_FS); - int filename__read_int(const char *filename, int *value) { char line[64]; int fd = open(filename, O_RDONLY), err = -1; if (fd < 0) - return -1; + return -errno; if (read(fd, line, sizeof(line)) > 0) { *value = atoi(line); @@ -341,7 +314,7 @@ static int filename__read_ull_base(const char *filename, int fd = open(filename, O_RDONLY), err = -1; if (fd < 0) - return -1; + return -errno; if (read(fd, line, sizeof(line)) > 0) { *value = strtoull(line, NULL, base); @@ -372,53 +345,24 @@ int filename__read_ull(const char *filename, unsigned long long *value) return filename__read_ull_base(filename, value, 0); } -#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ - int filename__read_str(const char *filename, char **buf, size_t *sizep) { - size_t size = 0, alloc_size = 0; - void *bf = NULL, *nbf; - int fd, n, err = 0; - char sbuf[STRERR_BUFSIZE]; + struct io io; + char bf[128]; + int err; - fd = open(filename, O_RDONLY); - if (fd < 0) + io.fd = open(filename, O_RDONLY); + if (io.fd < 0) return -errno; - - do { - if (size == alloc_size) { - alloc_size += BUFSIZ; - nbf = realloc(bf, alloc_size); - if (!nbf) { - err = -ENOMEM; - break; - } - - bf = nbf; - } - - n = read(fd, bf + size, alloc_size - size); - if (n < 0) { - if (size) { - pr_warn("read failed %d: %s\n", errno, - strerror_r(errno, sbuf, sizeof(sbuf))); - err = 0; - } else - err = -errno; - - break; - } - - size += n; - } while (n > 0); - - if (!err) { - *sizep = size; - *buf = bf; + io__init(&io, io.fd, bf, sizeof(bf)); + *buf = NULL; + err = io__getdelim(&io, buf, sizep, /*delim=*/-1); + if (err < 0) { + free(*buf); + *buf = NULL; } else - free(bf); - - close(fd); + err = 0; + close(io.fd); return err; } @@ -428,7 +372,7 @@ int filename__write_int(const char *filename, int value) char buf[64]; if (fd < 0) - return err; + return -errno; sprintf(buf, "%d", value); if (write(fd, buf, sizeof(buf)) == sizeof(buf)) @@ -503,15 +447,22 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep) int sysfs__read_bool(const char *entry, bool *value) { - char *buf; - size_t size; - int ret; + struct io io; + char bf[16]; + int ret = 0; + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); - ret = sysfs__read_str(entry, &buf, &size); - if (ret < 0) - return ret; + if (!sysfs) + return -1; + + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + io.fd = open(path, O_RDONLY); + if (io.fd < 0) + return -errno; - switch (buf[0]) { + io__init(&io, io.fd, bf, sizeof(bf)); + switch (io__get_char(&io)) { case '1': case 'y': case 'Y': @@ -525,8 +476,7 @@ int sysfs__read_bool(const char *entry, bool *value) default: ret = -1; } - - free(buf); + close(io.fd); return ret; } diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 5afb11b30fca..834fd64c7130 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -13,17 +13,12 @@ #include "tracing_path.h" -static char tracing_mnt[PATH_MAX] = "/sys/kernel/debug"; -static char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing"; -static char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events"; +static char tracing_path[PATH_MAX] = "/sys/kernel/tracing"; static void __tracing_path_set(const char *tracing, const char *mountpoint) { - snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint); snprintf(tracing_path, sizeof(tracing_path), "%s/%s", mountpoint, tracing); - snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", - mountpoint, tracing, "events"); } static const char *tracing_path_tracefs_mount(void) @@ -74,7 +69,7 @@ char *get_tracing_file(const char *name) { char *file; - if (asprintf(&file, "%s/%s", tracing_path_mount(), name) < 0) + if (asprintf(&file, "%s%s", tracing_path_mount(), name) < 0) return NULL; return file; @@ -113,6 +108,22 @@ DIR *tracing_events__opendir(void) return dir; } +int tracing_events__scandir_alphasort(struct dirent ***namelist) +{ + char *path = get_tracing_file("events"); + int ret; + + if (!path) { + *namelist = NULL; + return 0; + } + + ret = scandir(path, namelist, NULL, alphasort); + put_events_file(path); + + return ret; +} + int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name) { @@ -133,15 +144,15 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size, /* sdt markers */ if (!strncmp(filename, "sdt_", 4)) { snprintf(buf, size, - "Error:\tFile %s/%s not found.\n" + "Error:\tFile %s/events/%s not found.\n" "Hint:\tSDT event cannot be directly recorded on.\n" "\tPlease first use 'perf probe %s:%s' before recording it.\n", - tracing_events_path, filename, sys, name); + tracing_path, filename, sys, name); } else { snprintf(buf, size, - "Error:\tFile %s/%s not found.\n" + "Error:\tFile %s/events/%s not found.\n" "Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n", - tracing_events_path, filename); + tracing_path, filename); } break; } @@ -153,9 +164,9 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size, break; case EACCES: { snprintf(buf, size, - "Error:\tNo permissions to read %s/%s\n" + "Error:\tNo permissions to read %s/events/%s\n" "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - tracing_events_path, filename, tracing_path_mount()); + tracing_path, filename, tracing_path_mount()); } break; default: diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h index a19136b086dc..fc6347c11deb 100644 --- a/tools/lib/api/fs/tracing_path.h +++ b/tools/lib/api/fs/tracing_path.h @@ -6,6 +6,7 @@ #include <dirent.h> DIR *tracing_events__opendir(void); +int tracing_events__scandir_alphasort(struct dirent ***namelist); void tracing_path_set(const char *mountpoint); const char *tracing_path_mount(void); diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h index 777c20f6b604..1731996b2c32 100644 --- a/tools/lib/api/io.h +++ b/tools/lib/api/io.h @@ -7,8 +7,12 @@ #ifndef __API_IO__ #define __API_IO__ +#include <errno.h> +#include <poll.h> #include <stdlib.h> +#include <string.h> #include <unistd.h> +#include <linux/types.h> struct io { /* File descriptor being read/ */ @@ -21,6 +25,8 @@ struct io { char *end; /* Currently accessed data pointer. */ char *data; + /* Read timeout, 0 implies no timeout. */ + int timeout_ms; /* Set true on when the end of file on read error. */ bool eof; }; @@ -33,29 +39,59 @@ static inline void io__init(struct io *io, int fd, io->buf = buf; io->end = buf; io->data = buf; + io->timeout_ms = 0; io->eof = false; } -/* Reads one character from the "io" file with similar semantics to fgetc. */ -static inline int io__get_char(struct io *io) +/* Read from fd filling the buffer. Called when io->data == io->end. */ +static inline int io__fill_buffer(struct io *io) { - char *ptr = io->data; + ssize_t n; if (io->eof) return -1; - if (ptr == io->end) { - ssize_t n = read(io->fd, io->buf, io->buf_len); + if (io->timeout_ms != 0) { + struct pollfd pfds[] = { + { + .fd = io->fd, + .events = POLLIN, + }, + }; + n = poll(pfds, 1, io->timeout_ms); + if (n == 0) + errno = ETIMEDOUT; + if (n > 0 && !(pfds[0].revents & POLLIN)) { + errno = EIO; + n = -1; + } if (n <= 0) { io->eof = true; return -1; } - ptr = &io->buf[0]; - io->end = &io->buf[n]; } - io->data = ptr + 1; - return *ptr; + n = read(io->fd, io->buf, io->buf_len); + + if (n <= 0) { + io->eof = true; + return -1; + } + io->data = &io->buf[0]; + io->end = &io->buf[n]; + return 0; +} + +/* Reads one character from the "io" file with similar semantics to fgetc. */ +static inline int io__get_char(struct io *io) +{ + if (io->data == io->end) { + int ret = io__fill_buffer(io); + + if (ret) + return ret; + } + return *io->data++; } /* Read a hexadecimal value with no 0x prefix into the out argument hex. If the @@ -112,4 +148,54 @@ static inline int io__get_dec(struct io *io, __u64 *dec) } } +/* Read up to and including the first delim. */ +static inline ssize_t io__getdelim(struct io *io, char **line_out, size_t *line_len_out, int delim) +{ + char buf[128]; + int buf_pos = 0; + char *line = NULL, *temp; + size_t line_len = 0; + int ch = 0; + + /* TODO: reuse previously allocated memory. */ + free(*line_out); + while (ch != delim) { + ch = io__get_char(io); + + if (ch < 0) + break; + + if (buf_pos == sizeof(buf)) { + temp = realloc(line, line_len + sizeof(buf)); + if (!temp) + goto err_out; + line = temp; + memcpy(&line[line_len], buf, sizeof(buf)); + line_len += sizeof(buf); + buf_pos = 0; + } + buf[buf_pos++] = (char)ch; + } + temp = realloc(line, line_len + buf_pos + 1); + if (!temp) + goto err_out; + line = temp; + memcpy(&line[line_len], buf, buf_pos); + line[line_len + buf_pos] = '\0'; + line_len += buf_pos; + *line_out = line; + *line_len_out = line_len; + return line_len; +err_out: + free(line); + *line_out = NULL; + *line_len_out = 0; + return -ENOMEM; +} + +static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out) +{ + return io__getdelim(io, line_out, line_len_out, /*delim=*/'\n'); +} + #endif /* __API_IO__ */ diff --git a/tools/lib/api/io_dir.h b/tools/lib/api/io_dir.h new file mode 100644 index 000000000000..ef83e967e48c --- /dev/null +++ b/tools/lib/api/io_dir.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* + * Lightweight directory reading library. + */ +#ifndef __API_IO_DIR__ +#define __API_IO_DIR__ + +#include <dirent.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <linux/limits.h> + +#if !defined(SYS_getdents64) +#if defined(__x86_64__) || defined(__arm__) + #define SYS_getdents64 217 +#elif defined(__i386__) || defined(__s390x__) || defined(__sh__) + #define SYS_getdents64 220 +#elif defined(__alpha__) + #define SYS_getdents64 377 +#elif defined(__mips__) + #define SYS_getdents64 308 +#elif defined(__powerpc64__) || defined(__powerpc__) + #define SYS_getdents64 202 +#elif defined(__sparc64__) || defined(__sparc__) + #define SYS_getdents64 154 +#elif defined(__xtensa__) + #define SYS_getdents64 60 +#else + #define SYS_getdents64 61 +#endif +#endif /* !defined(SYS_getdents64) */ + +static inline ssize_t perf_getdents64(int fd, void *dirp, size_t count) +{ +#ifdef MEMORY_SANITIZER + memset(dirp, 0, count); +#endif + return syscall(SYS_getdents64, fd, dirp, count); +} + +struct io_dirent64 { + ino64_t d_ino; /* 64-bit inode number */ + off64_t d_off; /* 64-bit offset to next structure */ + unsigned short d_reclen; /* Size of this dirent */ + unsigned char d_type; /* File type */ + char d_name[NAME_MAX + 1]; /* Filename (null-terminated) */ +}; + +struct io_dir { + int dirfd; + ssize_t available_bytes; + struct io_dirent64 *next; + struct io_dirent64 buff[4]; +}; + +static inline void io_dir__init(struct io_dir *iod, int dirfd) +{ + iod->dirfd = dirfd; + iod->available_bytes = 0; +} + +static inline void io_dir__rewinddir(struct io_dir *iod) +{ + lseek(iod->dirfd, 0, SEEK_SET); + iod->available_bytes = 0; +} + +static inline struct io_dirent64 *io_dir__readdir(struct io_dir *iod) +{ + struct io_dirent64 *entry; + + if (iod->available_bytes <= 0) { + ssize_t rc = perf_getdents64(iod->dirfd, iod->buff, sizeof(iod->buff)); + + if (rc <= 0) + return NULL; + iod->available_bytes = rc; + iod->next = iod->buff; + } + entry = iod->next; + iod->next = (struct io_dirent64 *)((char *)entry + entry->d_reclen); + iod->available_bytes -= entry->d_reclen; + return entry; +} + +static inline bool io_dir__is_dir(const struct io_dir *iod, struct io_dirent64 *dent) +{ + if (dent->d_type == DT_UNKNOWN) { + struct stat st; + + if (fstatat(iod->dirfd, dent->d_name, &st, /*flags=*/0)) + return false; + + if (S_ISDIR(st.st_mode)) { + dent->d_type = DT_DIR; + return true; + } + } + return dent->d_type == DT_DIR; +} + +#endif /* __API_IO_DIR__ */ diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index db466ef7be9d..51255c69754d 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -5,9 +5,9 @@ */ #include <linux/bitmap.h> -int __bitmap_weight(const unsigned long *bitmap, int bits) +unsigned int __bitmap_weight(const unsigned long *bitmap, int bits) { - int k, w = 0, lim = bits/BITS_PER_LONG; + unsigned int k, w = 0, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; k++) w += hweight_long(bitmap[k]); @@ -57,7 +57,7 @@ size_t bitmap_scnprintf(unsigned long *bitmap, unsigned int nbits, return ret; } -int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, +bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits) { unsigned int k; @@ -72,31 +72,71 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, return result != 0; } -int __bitmap_equal(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int bits) +bool __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) { unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] != bitmap2[k]) - return 0; + return false; if (bits % BITS_PER_LONG) if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) - return 0; + return false; - return 1; + return true; } -int __bitmap_intersects(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int bits) +bool __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) { unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] & bitmap2[k]) - return 1; + return true; if (bits % BITS_PER_LONG) if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) - return 1; - return 0; + return true; + return false; +} + +void __bitmap_set(unsigned long *map, unsigned int start, int len) +{ + unsigned long *p = map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); + + while (len - bits_to_set >= 0) { + *p |= mask_to_set; + len -= bits_to_set; + bits_to_set = BITS_PER_LONG; + mask_to_set = ~0UL; + p++; + } + if (len) { + mask_to_set &= BITMAP_LAST_WORD_MASK(size); + *p |= mask_to_set; + } +} + +void __bitmap_clear(unsigned long *map, unsigned int start, int len) +{ + unsigned long *p = map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (len - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (len) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + *p &= ~mask_to_clear; + } } diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index 0da84cb9e66d..f02725b123b3 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -5,3 +5,4 @@ TAGS tags cscope.* /bpf_helper_defs.h +fixdep diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 94f0a146bb7b..c80204bb72a2 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,3 +1,4 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ - netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ - btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_utils.o \ + netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ + btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ + usdt.o zip.o elf.o features.o btf_iter.o btf_relocate.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index f947b61b2107..168140f8e646 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -2,7 +2,7 @@ # Most of this file is copied from tools/lib/traceevent/Makefile RM ?= rm -srctree = $(abs_srctree) +srctree := $(realpath $(srctree)) VERSION_SCRIPT := libbpf.map LIBBPF_VERSION := $(shell \ @@ -53,15 +53,9 @@ include $(srctree)/tools/scripts/Makefile.include # copy a bit from Linux kbuild -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -INCLUDES = -I$(if $(OUTPUT),$(OUTPUT),.) \ - -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi +INCLUDES = -I$(or $(OUTPUT),.) \ + -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi \ + -I$(srctree)/tools/arch/$(SRCARCH)/include export prefix libdir src obj @@ -95,12 +89,6 @@ override CFLAGS += $(CLANG_CROSS_FLAGS) # flags specific for shared library SHLIB_FLAGS := -DSHARED -fPIC -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - # Disable command line variables (CFLAGS) override from top # level Makefile (perf), otherwise build Makefile will get # the same command line setup. @@ -108,6 +96,8 @@ MAKEOVERRIDES= all: +OUTPUT ?= ./ +OUTPUT := $(abspath $(OUTPUT))/ export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include @@ -127,11 +117,11 @@ TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) @@ -141,7 +131,10 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force $(BPF_GENERATED) +$(SHARED_OBJDIR) $(STATIC_OBJDIR): + $(Q)mkdir -p $@ + +$(BPF_IN_SHARED): force $(BPF_GENERATED) | $(SHARED_OBJDIR) @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -151,9 +144,11 @@ $(BPF_IN_SHARED): force $(BPF_GENERATED) @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \ (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true + $(SILENT_MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= OUTPUT=$(SHARED_OBJDIR) $(SHARED_OBJDIR)fixdep $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force $(BPF_GENERATED) +$(BPF_IN_STATIC): force $(BPF_GENERATED) | $(STATIC_OBJDIR) + $(SILENT_MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= OUTPUT=$(STATIC_OBJDIR) $(STATIC_OBJDIR)fixdep $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h @@ -194,7 +189,7 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT) sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}'| \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ diff -u $(OUTPUT)libbpf_global_syms.tmp \ @@ -237,9 +232,9 @@ install_lib: all_cmd $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) -SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ +SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h \ bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \ - skel_internal.h libbpf_version.h + skel_internal.h libbpf_version.h usdt.bpf.h GEN_HDRS := $(BPF_GENERATED) INSTALL_PFX := $(DESTDIR)$(prefix)/include/bpf @@ -255,6 +250,7 @@ $(INSTALL_GEN_HDRS): $(INSTALL_PFX)/%.h: $(OUTPUT)%.h $(call do_install,$<,$(prefix)/include/bpf,644) install_headers: $(BPF_GENERATED) $(INSTALL_SRC_HDRS) $(INSTALL_GEN_HDRS) + $(call QUIET_INSTALL, libbpf_headers) install_pkgconfig: $(PC_FILE) $(call QUIET_INSTALL, $(PC_FILE)) \ @@ -262,7 +258,7 @@ install_pkgconfig: $(PC_FILE) install: install_lib install_pkgconfig install_headers -clean: +clean: fixdep-clean $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_GENERATED) \ $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ @@ -286,3 +282,20 @@ tags: # Delete partially updated (corrupted) files on error .DELETE_ON_ERROR: + +help: + @echo 'libbpf common targets:' + @echo ' HINT: use "V=1" to enable verbose build' + @echo ' all - build libraries and pkgconfig' + @echo ' clean - remove all generated files' + @echo ' check - check ABI and version info' + @echo '' + @echo 'libbpf install targets:' + @echo ' HINT: use "prefix"(defaults to "/usr/local") or "DESTDIR" (defaults to "/")' + @echo ' to adjust target destination, e.g. "make prefix=/usr/local install"' + @echo ' install - build and install all headers, libraries and pkgconfig' + @echo ' install_headers - install only headers to include/bpf' + @echo '' + @echo 'libbpf make targets:' + @echo ' tags - use ctags to make tag information for source code browsing' + @echo ' cscope - use cscope to make interactive source code browsing database' diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 550b4cbb6c99..b66f5fbfbbb2 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -29,6 +29,7 @@ #include <errno.h> #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/kernel.h> #include <limits.h> #include <sys/resource.h> #include "bpf.h" @@ -83,9 +84,7 @@ static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr, return ensure_good_fd(fd); } -#define PROG_LOAD_ATTEMPTS 5 - -static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) +int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) { int fd; @@ -104,25 +103,28 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") */ -int probe_memcg_account(void) +int probe_memcg_account(int token_fd) { - const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); + const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); struct bpf_insn insns[] = { BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), BPF_EXIT_INSN(), }; - size_t insn_cnt = sizeof(insns) / sizeof(insns[0]); + size_t insn_cnt = ARRAY_SIZE(insns); union bpf_attr attr; int prog_fd; /* attempt loading freplace trying to use custom BTF */ - memset(&attr, 0, prog_load_attr_sz); + memset(&attr, 0, attr_sz); attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; attr.insns = ptr_to_u64(insns); attr.insn_cnt = insn_cnt; attr.license = ptr_to_u64("GPL"); + attr.prog_token_fd = token_fd; + if (token_fd) + attr.prog_flags |= BPF_F_TOKEN_FD; - prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz); + prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz); if (prog_fd >= 0) { close(prog_fd); return 1; @@ -146,17 +148,13 @@ int bump_rlimit_memlock(void) { struct rlimit rlim; - /* this the default in libbpf 1.0, but for now user has to opt-in explicitly */ - if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK)) - return 0; - /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ - if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) + if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT)) return 0; memlock_bumped = true; - /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */ + /* zero memlock_rlim disables auto-bumping RLIMIT_MEMLOCK */ if (memlock_rlim == 0) return 0; @@ -174,7 +172,7 @@ int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, map_extra); + const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size); union bpf_attr attr; int fd; @@ -186,7 +184,7 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err(-EINVAL); attr.map_type = map_type; - if (map_name) + if (map_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.key_size = key_size; attr.value_size = value_size; @@ -196,6 +194,7 @@ int bpf_map_create(enum bpf_map_type map_type, attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0); attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0); attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0); + attr.value_type_btf_obj_fd = OPTS_GET(opts, value_type_btf_obj_fd, 0); attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0); attr.map_flags = OPTS_GET(opts, map_flags, 0); @@ -203,90 +202,14 @@ int bpf_map_create(enum bpf_map_type map_type, attr.numa_node = OPTS_GET(opts, numa_node, 0); attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); + attr.map_token_fd = OPTS_GET(opts, token_fd, 0); + attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL)); + attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0); + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) -{ - LIBBPF_OPTS(bpf_map_create_opts, p); - - p.map_flags = create_attr->map_flags; - p.numa_node = create_attr->numa_node; - p.btf_fd = create_attr->btf_fd; - p.btf_key_type_id = create_attr->btf_key_type_id; - p.btf_value_type_id = create_attr->btf_value_type_id; - p.map_ifindex = create_attr->map_ifindex; - if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS) - p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id; - else - p.inner_map_fd = create_attr->inner_map_fd; - - return bpf_map_create(create_attr->map_type, create_attr->name, - create_attr->key_size, create_attr->value_size, - create_attr->max_entries, &p); -} - -int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags, int node) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts); - - opts.map_flags = map_flags; - if (node >= 0) { - opts.numa_node = node; - opts.map_flags |= BPF_F_NUMA_NODE; - } - - return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - - return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - - return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags, int node) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts); - - opts.inner_map_fd = inner_map_fd; - opts.map_flags = map_flags; - if (node >= 0) { - opts.map_flags |= BPF_F_NUMA_NODE; - opts.numa_node = node; - } - - return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); -} - -int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, - .inner_map_fd = inner_map_fd, - .map_flags = map_flags, - ); - - return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); -} - static void * alloc_zero_tailing_info(const void *orecord, __u32 cnt, __u32 actual_rec_size, __u32 expected_rec_size) @@ -312,12 +235,12 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt, return info; } -DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0) -int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, - const char *prog_name, const char *license, - const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts) +int bpf_prog_load(enum bpf_prog_type prog_type, + const char *prog_name, const char *license, + const struct bpf_insn *insns, size_t insn_cnt, + struct bpf_prog_load_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, keyring_id); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -337,7 +260,7 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, if (attempts == 0) attempts = PROG_LOAD_ATTEMPTS; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.prog_type = prog_type; attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0); @@ -346,8 +269,9 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, attr.prog_flags = OPTS_GET(opts, prog_flags, 0); attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); attr.kern_version = OPTS_GET(opts, kern_version, 0); + attr.prog_token_fd = OPTS_GET(opts, token_fd, 0); - if (prog_name) + if (prog_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); attr.license = ptr_to_u64(license); @@ -375,10 +299,6 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, if (!!log_buf != !!log_size) return libbpf_err(-EINVAL); - if (log_level > (4 | 2 | 1)) - return libbpf_err(-EINVAL); - if (log_level && !log_buf) - return libbpf_err(-EINVAL); func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0); func_info = OPTS_GET(opts, func_info, NULL); @@ -393,6 +313,7 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0); attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); + attr.fd_array_cnt = OPTS_GET(opts, fd_array_cnt, 0); if (log_level) { attr.log_buf = ptr_to_u64(log_buf); @@ -400,7 +321,8 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, attr.log_level = log_level; } - fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + fd = sys_bpf_prog_load(&attr, attr_sz, attempts); + OPTS_SET(opts, log_true_size, attr.log_true_size); if (fd >= 0) return fd; @@ -440,7 +362,8 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, break; } - fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + fd = sys_bpf_prog_load(&attr, attr_sz, attempts); + OPTS_SET(opts, log_true_size, attr.log_true_size); if (fd >= 0) goto done; } @@ -454,7 +377,8 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, attr.log_size = log_size; attr.log_level = 1; - fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + fd = sys_bpf_prog_load(&attr, attr_sz, attempts); + OPTS_SET(opts, log_true_size, attr.log_true_size); } done: /* free() doesn't affect errno, so we don't need to restore it */ @@ -463,204 +387,139 @@ done: return libbpf_err_errno(fd); } -__attribute__((alias("bpf_load_program_xattr2"))) -int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz); - -static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz) -{ - LIBBPF_OPTS(bpf_prog_load_opts, p); - - if (!load_attr || !log_buf != !log_buf_sz) - return libbpf_err(-EINVAL); - - p.expected_attach_type = load_attr->expected_attach_type; - switch (load_attr->prog_type) { - case BPF_PROG_TYPE_STRUCT_OPS: - case BPF_PROG_TYPE_LSM: - p.attach_btf_id = load_attr->attach_btf_id; - break; - case BPF_PROG_TYPE_TRACING: - case BPF_PROG_TYPE_EXT: - p.attach_btf_id = load_attr->attach_btf_id; - p.attach_prog_fd = load_attr->attach_prog_fd; - break; - default: - p.prog_ifindex = load_attr->prog_ifindex; - p.kern_version = load_attr->kern_version; - } - p.log_level = load_attr->log_level; - p.log_buf = log_buf; - p.log_size = log_buf_sz; - p.prog_btf_fd = load_attr->prog_btf_fd; - p.func_info_rec_size = load_attr->func_info_rec_size; - p.func_info_cnt = load_attr->func_info_cnt; - p.func_info = load_attr->func_info; - p.line_info_rec_size = load_attr->line_info_rec_size; - p.line_info_cnt = load_attr->line_info_cnt; - p.line_info = load_attr->line_info; - p.prog_flags = load_attr->prog_flags; - - return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license, - load_attr->insns, load_attr->insns_cnt, &p); -} - -int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, const char *license, - __u32 kern_version, char *log_buf, - size_t log_buf_sz) -{ - struct bpf_load_program_attr load_attr; - - memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); - load_attr.prog_type = type; - load_attr.expected_attach_type = 0; - load_attr.name = NULL; - load_attr.insns = insns; - load_attr.insns_cnt = insns_cnt; - load_attr.license = license; - load_attr.kern_version = kern_version; - - return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz); -} - -int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, __u32 prog_flags, const char *license, - __u32 kern_version, char *log_buf, size_t log_buf_sz, - int log_level) -{ - union bpf_attr attr; - int fd; - - bump_rlimit_memlock(); - - memset(&attr, 0, sizeof(attr)); - attr.prog_type = type; - attr.insn_cnt = (__u32)insns_cnt; - attr.insns = ptr_to_u64(insns); - attr.license = ptr_to_u64(license); - attr.log_buf = ptr_to_u64(log_buf); - attr.log_size = log_buf_sz; - attr.log_level = log_level; - log_buf[0] = 0; - attr.kern_version = kern_version; - attr.prog_flags = prog_flags; - - fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); - return libbpf_err_errno(fd); -} - int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); attr.flags = flags; - ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_lookup_elem(int fd, const void *key, void *value) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); - ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); attr.flags = flags; - ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); - ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); attr.flags = flags; - ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_delete_elem(int fd, const void *key) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); + union bpf_attr attr; + int ret; + + memset(&attr, 0, attr_sz); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + + ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz); + return libbpf_err_errno(ret); +} + +int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags) +{ + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); + attr.flags = flags; - ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_get_next_key(int fd, const void *key, void *next_key) { + const size_t attr_sz = offsetofend(union bpf_attr, next_key); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.next_key = ptr_to_u64(next_key); - ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_freeze(int fd) { + const size_t attr_sz = offsetofend(union bpf_attr, map_fd); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; - ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz); return libbpf_err_errno(ret); } @@ -669,13 +528,14 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, __u32 *count, const struct bpf_map_batch_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, batch); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_map_batch_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.batch.map_fd = fd; attr.batch.in_batch = ptr_to_u64(in_batch); attr.batch.out_batch = ptr_to_u64(out_batch); @@ -685,7 +545,7 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0); attr.batch.flags = OPTS_GET(opts, flags, 0); - ret = sys_bpf(cmd, &attr, sizeof(attr)); + ret = sys_bpf(cmd, &attr, attr_sz); *count = attr.batch.count; return libbpf_err_errno(ret); @@ -722,28 +582,50 @@ int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *co (void *)keys, (void *)values, count, opts); } -int bpf_obj_pin(int fd, const char *pathname) +int bpf_obj_pin_opts(int fd, const char *pathname, const struct bpf_obj_pin_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, path_fd); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + if (!OPTS_VALID(opts, bpf_obj_pin_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.path_fd = OPTS_GET(opts, path_fd, 0); attr.pathname = ptr_to_u64((void *)pathname); + attr.file_flags = OPTS_GET(opts, file_flags, 0); attr.bpf_fd = fd; - ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); + ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz); return libbpf_err_errno(ret); } +int bpf_obj_pin(int fd, const char *pathname) +{ + return bpf_obj_pin_opts(fd, pathname, NULL); +} + int bpf_obj_get(const char *pathname) { + return bpf_obj_get_opts(pathname, NULL); +} + +int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, path_fd); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + if (!OPTS_VALID(opts, bpf_obj_get_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.path_fd = OPTS_GET(opts, path_fd, 0); attr.pathname = ptr_to_u64((void *)pathname); + attr.file_flags = OPTS_GET(opts, file_flags, 0); - fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_OBJ_GET, &attr, attr_sz); return libbpf_err_errno(fd); } @@ -754,64 +636,102 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, .flags = flags, ); - return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts); + return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts); } -int bpf_prog_attach_xattr(int prog_fd, int target_fd, - enum bpf_attach_type type, - const struct bpf_prog_attach_opts *opts) +int bpf_prog_attach_opts(int prog_fd, int target, enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, expected_revision); + __u32 relative_id, flags; + int ret, relative_fd; union bpf_attr attr; - int ret; if (!OPTS_VALID(opts, bpf_prog_attach_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); - attr.target_fd = target_fd; - attr.attach_bpf_fd = prog_fd; - attr.attach_type = type; - attr.attach_flags = OPTS_GET(opts, flags, 0); - attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0); + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + flags = OPTS_GET(opts, flags, 0); - ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); + /* validate we don't have unexpected combinations of non-zero fields */ + if (relative_fd && relative_id) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.target_fd = target; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + attr.replace_bpf_fd = OPTS_GET(opts, replace_fd, 0); + attr.expected_revision = OPTS_GET(opts, expected_revision, 0); + + if (relative_id) { + attr.attach_flags = flags | BPF_F_ID; + attr.relative_id = relative_id; + } else { + attr.attach_flags = flags; + attr.relative_fd = relative_fd; + } + + ret = sys_bpf(BPF_PROG_ATTACH, &attr, attr_sz); return libbpf_err_errno(ret); } -int bpf_prog_detach(int target_fd, enum bpf_attach_type type) +int bpf_prog_detach_opts(int prog_fd, int target, enum bpf_attach_type type, + const struct bpf_prog_detach_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, expected_revision); + __u32 relative_id, flags; + int ret, relative_fd; union bpf_attr attr; - int ret; - memset(&attr, 0, sizeof(attr)); - attr.target_fd = target_fd; - attr.attach_type = type; + if (!OPTS_VALID(opts, bpf_prog_detach_opts)) + return libbpf_err(-EINVAL); - ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + flags = OPTS_GET(opts, flags, 0); + + /* validate we don't have unexpected combinations of non-zero fields */ + if (relative_fd && relative_id) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.target_fd = target; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + attr.expected_revision = OPTS_GET(opts, expected_revision, 0); + + if (relative_id) { + attr.attach_flags = flags | BPF_F_ID; + attr.relative_id = relative_id; + } else { + attr.attach_flags = flags; + attr.relative_fd = relative_fd; + } + + ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz); return libbpf_err_errno(ret); } -int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) +int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { - union bpf_attr attr; - int ret; - - memset(&attr, 0, sizeof(attr)); - attr.target_fd = target_fd; - attr.attach_bpf_fd = prog_fd; - attr.attach_type = type; + return bpf_prog_detach_opts(0, target_fd, type, NULL); +} - ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); - return libbpf_err_errno(ret); +int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) +{ + return bpf_prog_detach_opts(prog_fd, target_fd, type, NULL); } int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, const struct bpf_link_create_opts *opts) { - __u32 target_btf_id, iter_info_len; + const size_t attr_sz = offsetofend(union bpf_attr, link_create); + __u32 target_btf_id, iter_info_len, relative_id; + int fd, err, relative_fd; union bpf_attr attr; - int fd; if (!OPTS_VALID(opts, bpf_link_create_opts)) return libbpf_err(-EINVAL); @@ -827,7 +747,7 @@ int bpf_link_create(int prog_fd, int target_fd, return libbpf_err(-EINVAL); } - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.link_create.prog_fd = prog_fd; attr.link_create.target_fd = target_fd; attr.link_create.attach_type = attach_type; @@ -848,147 +768,273 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, perf_event)) return libbpf_err(-EINVAL); break; + case BPF_TRACE_KPROBE_MULTI: + case BPF_TRACE_KPROBE_SESSION: + attr.link_create.kprobe_multi.flags = OPTS_GET(opts, kprobe_multi.flags, 0); + attr.link_create.kprobe_multi.cnt = OPTS_GET(opts, kprobe_multi.cnt, 0); + attr.link_create.kprobe_multi.syms = ptr_to_u64(OPTS_GET(opts, kprobe_multi.syms, 0)); + attr.link_create.kprobe_multi.addrs = ptr_to_u64(OPTS_GET(opts, kprobe_multi.addrs, 0)); + attr.link_create.kprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, kprobe_multi.cookies, 0)); + if (!OPTS_ZEROED(opts, kprobe_multi)) + return libbpf_err(-EINVAL); + break; + case BPF_TRACE_UPROBE_MULTI: + case BPF_TRACE_UPROBE_SESSION: + attr.link_create.uprobe_multi.flags = OPTS_GET(opts, uprobe_multi.flags, 0); + attr.link_create.uprobe_multi.cnt = OPTS_GET(opts, uprobe_multi.cnt, 0); + attr.link_create.uprobe_multi.path = ptr_to_u64(OPTS_GET(opts, uprobe_multi.path, 0)); + attr.link_create.uprobe_multi.offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.offsets, 0)); + attr.link_create.uprobe_multi.ref_ctr_offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.ref_ctr_offsets, 0)); + attr.link_create.uprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, uprobe_multi.cookies, 0)); + attr.link_create.uprobe_multi.pid = OPTS_GET(opts, uprobe_multi.pid, 0); + if (!OPTS_ZEROED(opts, uprobe_multi)) + return libbpf_err(-EINVAL); + break; + case BPF_TRACE_RAW_TP: + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + case BPF_LSM_MAC: + attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0); + if (!OPTS_ZEROED(opts, tracing)) + return libbpf_err(-EINVAL); + break; + case BPF_NETFILTER: + attr.link_create.netfilter.pf = OPTS_GET(opts, netfilter.pf, 0); + attr.link_create.netfilter.hooknum = OPTS_GET(opts, netfilter.hooknum, 0); + attr.link_create.netfilter.priority = OPTS_GET(opts, netfilter.priority, 0); + attr.link_create.netfilter.flags = OPTS_GET(opts, netfilter.flags, 0); + if (!OPTS_ZEROED(opts, netfilter)) + return libbpf_err(-EINVAL); + break; + case BPF_TCX_INGRESS: + case BPF_TCX_EGRESS: + relative_fd = OPTS_GET(opts, tcx.relative_fd, 0); + relative_id = OPTS_GET(opts, tcx.relative_id, 0); + if (relative_fd && relative_id) + return libbpf_err(-EINVAL); + if (relative_id) { + attr.link_create.tcx.relative_id = relative_id; + attr.link_create.flags |= BPF_F_ID; + } else { + attr.link_create.tcx.relative_fd = relative_fd; + } + attr.link_create.tcx.expected_revision = OPTS_GET(opts, tcx.expected_revision, 0); + if (!OPTS_ZEROED(opts, tcx)) + return libbpf_err(-EINVAL); + break; + case BPF_NETKIT_PRIMARY: + case BPF_NETKIT_PEER: + relative_fd = OPTS_GET(opts, netkit.relative_fd, 0); + relative_id = OPTS_GET(opts, netkit.relative_id, 0); + if (relative_fd && relative_id) + return libbpf_err(-EINVAL); + if (relative_id) { + attr.link_create.netkit.relative_id = relative_id; + attr.link_create.flags |= BPF_F_ID; + } else { + attr.link_create.netkit.relative_fd = relative_fd; + } + attr.link_create.netkit.expected_revision = OPTS_GET(opts, netkit.expected_revision, 0); + if (!OPTS_ZEROED(opts, netkit)) + return libbpf_err(-EINVAL); + break; + case BPF_CGROUP_INET_INGRESS: + case BPF_CGROUP_INET_EGRESS: + case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_INET_SOCK_RELEASE: + case BPF_CGROUP_INET4_BIND: + case BPF_CGROUP_INET6_BIND: + case BPF_CGROUP_INET4_POST_BIND: + case BPF_CGROUP_INET6_POST_BIND: + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: + case BPF_CGROUP_INET4_GETPEERNAME: + case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: + case BPF_CGROUP_INET4_GETSOCKNAME: + case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: + case BPF_CGROUP_SOCK_OPS: + case BPF_CGROUP_DEVICE: + case BPF_CGROUP_SYSCTL: + case BPF_CGROUP_GETSOCKOPT: + case BPF_CGROUP_SETSOCKOPT: + case BPF_LSM_CGROUP: + relative_fd = OPTS_GET(opts, cgroup.relative_fd, 0); + relative_id = OPTS_GET(opts, cgroup.relative_id, 0); + if (relative_fd && relative_id) + return libbpf_err(-EINVAL); + if (relative_id) { + attr.link_create.cgroup.relative_id = relative_id; + attr.link_create.flags |= BPF_F_ID; + } else { + attr.link_create.cgroup.relative_fd = relative_fd; + } + attr.link_create.cgroup.expected_revision = + OPTS_GET(opts, cgroup.expected_revision, 0); + if (!OPTS_ZEROED(opts, cgroup)) + return libbpf_err(-EINVAL); + break; default: if (!OPTS_ZEROED(opts, flags)) return libbpf_err(-EINVAL); break; } proceed: - fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); - return libbpf_err_errno(fd); + fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, attr_sz); + if (fd >= 0) + return fd; + /* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry + * and other similar programs + */ + err = -errno; + if (err != -EINVAL) + return libbpf_err(err); + + /* if user used features not supported by + * BPF_RAW_TRACEPOINT_OPEN command, then just give up immediately + */ + if (attr.link_create.target_fd || attr.link_create.target_btf_id) + return libbpf_err(err); + if (!OPTS_ZEROED(opts, sz)) + return libbpf_err(err); + + /* otherwise, for few select kinds of programs that can be + * attached using BPF_RAW_TRACEPOINT_OPEN command, try that as + * a fallback for older kernels + */ + switch (attach_type) { + case BPF_TRACE_RAW_TP: + case BPF_LSM_MAC: + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + return bpf_raw_tracepoint_open(NULL, prog_fd); + default: + return libbpf_err(err); + } } int bpf_link_detach(int link_fd) { + const size_t attr_sz = offsetofend(union bpf_attr, link_detach); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.link_detach.link_fd = link_fd; - ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_LINK_DETACH, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_link_update(int link_fd, int new_prog_fd, const struct bpf_link_update_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, link_update); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_link_update_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + if (OPTS_GET(opts, old_prog_fd, 0) && OPTS_GET(opts, old_map_fd, 0)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); attr.link_update.link_fd = link_fd; attr.link_update.new_prog_fd = new_prog_fd; attr.link_update.flags = OPTS_GET(opts, flags, 0); - attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); + if (OPTS_GET(opts, old_prog_fd, 0)) + attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); + else if (OPTS_GET(opts, old_map_fd, 0)) + attr.link_update.old_map_fd = OPTS_GET(opts, old_map_fd, 0); - ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr)); + ret = sys_bpf(BPF_LINK_UPDATE, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_iter_create(int link_fd) { + const size_t attr_sz = offsetofend(union bpf_attr, iter_create); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.iter_create.link_fd = link_fd; - fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } -int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, - __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) +int bpf_prog_query_opts(int target, enum bpf_attach_type type, + struct bpf_prog_query_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, query); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); - attr.query.target_fd = target_fd; - attr.query.attach_type = type; - attr.query.query_flags = query_flags; - attr.query.prog_cnt = *prog_cnt; - attr.query.prog_ids = ptr_to_u64(prog_ids); - - ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); - - if (attach_flags) - *attach_flags = attr.query.attach_flags; - *prog_cnt = attr.query.prog_cnt; - - return libbpf_err_errno(ret); -} - -int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, - void *data_out, __u32 *size_out, __u32 *retval, - __u32 *duration) -{ - union bpf_attr attr; - int ret; + if (!OPTS_VALID(opts, bpf_prog_query_opts)) + return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); - attr.test.prog_fd = prog_fd; - attr.test.data_in = ptr_to_u64(data); - attr.test.data_out = ptr_to_u64(data_out); - attr.test.data_size_in = size; - attr.test.repeat = repeat; + memset(&attr, 0, attr_sz); + attr.query.target_fd = target; + attr.query.attach_type = type; + attr.query.query_flags = OPTS_GET(opts, query_flags, 0); + attr.query.count = OPTS_GET(opts, count, 0); + attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL)); + attr.query.link_ids = ptr_to_u64(OPTS_GET(opts, link_ids, NULL)); + attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL)); + attr.query.link_attach_flags = ptr_to_u64(OPTS_GET(opts, link_attach_flags, NULL)); - ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_QUERY, &attr, attr_sz); - if (size_out) - *size_out = attr.test.data_size_out; - if (retval) - *retval = attr.test.retval; - if (duration) - *duration = attr.test.duration; + OPTS_SET(opts, attach_flags, attr.query.attach_flags); + OPTS_SET(opts, revision, attr.query.revision); + OPTS_SET(opts, count, attr.query.count); return libbpf_err_errno(ret); } -int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) +int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, + __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) { - union bpf_attr attr; + LIBBPF_OPTS(bpf_prog_query_opts, opts); int ret; - if (!test_attr->data_out && test_attr->data_size_out > 0) - return libbpf_err(-EINVAL); + opts.query_flags = query_flags; + opts.prog_ids = prog_ids; + opts.prog_cnt = *prog_cnt; + + ret = bpf_prog_query_opts(target_fd, type, &opts); - memset(&attr, 0, sizeof(attr)); - attr.test.prog_fd = test_attr->prog_fd; - attr.test.data_in = ptr_to_u64(test_attr->data_in); - attr.test.data_out = ptr_to_u64(test_attr->data_out); - attr.test.data_size_in = test_attr->data_size_in; - attr.test.data_size_out = test_attr->data_size_out; - attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in); - attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out); - attr.test.ctx_size_in = test_attr->ctx_size_in; - attr.test.ctx_size_out = test_attr->ctx_size_out; - attr.test.repeat = test_attr->repeat; - - ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); - - test_attr->data_size_out = attr.test.data_size_out; - test_attr->ctx_size_out = attr.test.ctx_size_out; - test_attr->retval = attr.test.retval; - test_attr->duration = attr.test.duration; + if (attach_flags) + *attach_flags = opts.attach_flags; + *prog_cnt = opts.prog_cnt; return libbpf_err_errno(ret); } int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, test); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_test_run_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.test.prog_fd = prog_fd; + attr.test.batch_size = OPTS_GET(opts, batch_size, 0); attr.test.cpu = OPTS_GET(opts, cpu, 0); attr.test.flags = OPTS_GET(opts, flags, 0); attr.test.repeat = OPTS_GET(opts, repeat, 0); @@ -1002,7 +1048,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL)); attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL)); - ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, attr_sz); OPTS_SET(opts, data_size_out, attr.test.data_size_out); OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out); @@ -1014,13 +1060,14 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int err; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.start_id = start_id; - err = sys_bpf(cmd, &attr, sizeof(attr)); + err = sys_bpf(cmd, &attr, attr_sz); if (!err) *next_id = attr.next_id; @@ -1047,88 +1094,164 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id) return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID); } -int bpf_prog_get_fd_by_id(__u32 id) +int bpf_prog_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); attr.prog_id = id; + attr.open_flags = OPTS_GET(opts, open_flags, 0); - fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } -int bpf_map_get_fd_by_id(__u32 id) +int bpf_prog_get_fd_by_id(__u32 id) +{ + return bpf_prog_get_fd_by_id_opts(id, NULL); +} + +int bpf_map_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); attr.map_id = id; + attr.open_flags = OPTS_GET(opts, open_flags, 0); - fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } -int bpf_btf_get_fd_by_id(__u32 id) +int bpf_map_get_fd_by_id(__u32 id) { + return bpf_map_get_fd_by_id_opts(id, NULL); +} + +int bpf_btf_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, fd_by_id_token_fd); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); attr.btf_id = id; + attr.open_flags = OPTS_GET(opts, open_flags, 0); + attr.fd_by_id_token_fd = OPTS_GET(opts, token_fd, 0); - fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } -int bpf_link_get_fd_by_id(__u32 id) +int bpf_btf_get_fd_by_id(__u32 id) +{ + return bpf_btf_get_fd_by_id_opts(id, NULL); +} + +int bpf_link_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); attr.link_id = id; + attr.open_flags = OPTS_GET(opts, open_flags, 0); - fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } +int bpf_link_get_fd_by_id(__u32 id) +{ + return bpf_link_get_fd_by_id_opts(id, NULL); +} + int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len) { + const size_t attr_sz = offsetofend(union bpf_attr, info); union bpf_attr attr; int err; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.info.bpf_fd = bpf_fd; attr.info.info_len = *info_len; attr.info.info = ptr_to_u64(info); - err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); - + err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz); if (!err) *info_len = attr.info.info_len; - return libbpf_err_errno(err); } -int bpf_raw_tracepoint_open(const char *name, int prog_fd) +int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, __u32 *info_len) +{ + return bpf_obj_get_info_by_fd(prog_fd, info, info_len); +} + +int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len) +{ + return bpf_obj_get_info_by_fd(map_fd, info, info_len); +} + +int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u32 *info_len) { + return bpf_obj_get_info_by_fd(btf_fd, info, info_len); +} + +int bpf_link_get_info_by_fd(int link_fd, struct bpf_link_info *info, __u32 *info_len) +{ + return bpf_obj_get_info_by_fd(link_fd, info, info_len); +} + +int bpf_raw_tracepoint_open_opts(int prog_fd, struct bpf_raw_tp_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); - attr.raw_tracepoint.name = ptr_to_u64(name); + if (!OPTS_VALID(opts, bpf_raw_tp_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); attr.raw_tracepoint.prog_fd = prog_fd; + attr.raw_tracepoint.name = ptr_to_u64(OPTS_GET(opts, tp_name, NULL)); + attr.raw_tracepoint.cookie = OPTS_GET(opts, cookie, 0); - fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz); return libbpf_err_errno(fd); } -int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts) +int bpf_raw_tracepoint_open(const char *name, int prog_fd) +{ + LIBBPF_OPTS(bpf_raw_tp_opts, opts, .tp_name = name); + + return bpf_raw_tracepoint_open_opts(prog_fd, &opts); +} + +int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level); + const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd); union bpf_attr attr; char *log_buf; size_t log_size; @@ -1153,6 +1276,10 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_loa attr.btf = ptr_to_u64(btf_data); attr.btf_size = btf_size; + + attr.btf_flags = OPTS_GET(opts, btf_flags, 0); + attr.btf_token_fd = OPTS_GET(opts, token_fd, 0); + /* log_level == 0 and log_buf != NULL means "try loading without * log_buf, but retry with log_buf and log_level=1 on error", which is * consistent across low-level and high-level BTF and program loading @@ -1171,27 +1298,8 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_loa attr.btf_log_level = 1; fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); } - return libbpf_err_errno(fd); -} - -int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log) -{ - LIBBPF_OPTS(bpf_btf_load_opts, opts); - int fd; - -retry: - if (do_log && log_buf && log_buf_size) { - opts.log_buf = log_buf; - opts.log_size = log_buf_size; - opts.log_level = 1; - } - - fd = bpf_btf_load(btf, btf_size, &opts); - if (fd < 0 && !do_log && log_buf && log_buf_size) { - do_log = true; - goto retry; - } + OPTS_SET(opts, log_true_size, attr.btf_log_true_size); return libbpf_err_errno(fd); } @@ -1199,16 +1307,18 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr) { - union bpf_attr attr = {}; + const size_t attr_sz = offsetofend(union bpf_attr, task_fd_query); + union bpf_attr attr; int err; + memset(&attr, 0, attr_sz); attr.task_fd_query.pid = pid; attr.task_fd_query.fd = fd; attr.task_fd_query.flags = flags; attr.task_fd_query.buf = ptr_to_u64(buf); attr.task_fd_query.buf_len = *buf_len; - err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr)); + err = sys_bpf(BPF_TASK_FD_QUERY, &attr, attr_sz); *buf_len = attr.task_fd_query.buf_len; *prog_id = attr.task_fd_query.prog_id; @@ -1221,30 +1331,69 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, int bpf_enable_stats(enum bpf_stats_type type) { + const size_t attr_sz = offsetofend(union bpf_attr, enable_stats); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.enable_stats.type = type; - fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_prog_bind_map(int prog_fd, int map_fd, const struct bpf_prog_bind_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, prog_bind_map); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_prog_bind_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.prog_bind_map.prog_fd = prog_fd; attr.prog_bind_map.map_fd = map_fd; attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0); - ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz); return libbpf_err_errno(ret); } + +int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, token_create); + union bpf_attr attr; + int fd; + + if (!OPTS_VALID(opts, bpf_token_create_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.token_create.bpffs_fd = bpffs_fd; + attr.token_create.flags = OPTS_GET(opts, flags, 0); + + fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz); + return libbpf_err_errno(fd); +} + +int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len, + struct bpf_prog_stream_read_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_stream_read); + union bpf_attr attr; + int err; + + if (!OPTS_VALID(opts, bpf_prog_stream_read_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.prog_stream_read.stream_buf = ptr_to_u64(buf); + attr.prog_stream_read.stream_buf_len = buf_len; + attr.prog_stream_read.stream_id = stream_id; + attr.prog_stream_read.prog_fd = prog_fd; + + err = sys_bpf(BPF_PROG_STREAM_READ_BY_FD, &attr, attr_sz); + return libbpf_err_errno(err); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 14e0d97ad2cf..e983a3e40d61 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* - * common eBPF ELF operations. + * Common BPF ELF operations. * * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> @@ -35,7 +35,7 @@ extern "C" { #endif -int libbpf_set_memlock_rlim(size_t memlock_bytes); +LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes); struct bpf_map_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -51,8 +51,15 @@ struct bpf_map_create_opts { __u32 numa_node; __u32 map_ifindex; + __s32 value_type_btf_obj_fd; + + __u32 token_fd; + + const void *excl_prog_hash; + __u32 excl_prog_hash_size; + size_t :0; }; -#define bpf_map_create_opts__last_field map_ifindex +#define bpf_map_create_opts__last_field excl_prog_hash_size LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, @@ -61,48 +68,6 @@ LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts); -struct bpf_create_map_attr { - const char *name; - enum bpf_map_type map_type; - __u32 map_flags; - __u32 key_size; - __u32 value_size; - __u32 max_entries; - __u32 numa_node; - __u32 btf_fd; - __u32 btf_key_type_id; - __u32 btf_value_type_id; - __u32 map_ifindex; - union { - __u32 inner_map_fd; - __u32 btf_vmlinux_value_type_id; - }; -}; - -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, - int max_entries, __u32 map_flags, int node); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, - int max_entries, __u32 map_flags); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, - const char *name, int key_size, - int inner_map_fd, int max_entries, - __u32 map_flags, int node); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, - const char *name, int key_size, - int inner_map_fd, int max_entries, - __u32 map_flags); - struct bpf_prog_load_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -138,61 +103,24 @@ struct bpf_prog_load_opts { __u32 log_level; __u32 log_size; char *log_buf; + /* output: actual total log contents size (including terminating zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + * If kernel doesn't support this feature, log_size is left unchanged. + */ + __u32 log_true_size; + __u32 token_fd; + + /* if set, provides the length of fd_array */ + __u32 fd_array_cnt; + size_t :0; }; -#define bpf_prog_load_opts__last_field log_buf +#define bpf_prog_load_opts__last_field fd_array_cnt LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts); -/* this "specialization" should go away in libbpf 1.0 */ -LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, - const char *prog_name, const char *license, - const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts); - -/* This is an elaborate way to not conflict with deprecated bpf_prog_load() - * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone. - * With this approach, if someone is calling bpf_prog_load() with - * 4 arguments, they will use the deprecated API, which keeps backwards - * compatibility (both source code and binary). If bpf_prog_load() is called - * with 6 arguments, though, it gets redirected to __bpf_prog_load. - * So looking forward to libbpf 1.0 when this hack will be gone and - * __bpf_prog_load() will be called just bpf_prog_load(). - */ -#ifndef bpf_prog_load -#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__) -#define ___bpf_prog_load4(file, type, pobj, prog_fd) \ - bpf_prog_load_deprecated(file, type, pobj, prog_fd) -#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \ - bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts) -#endif /* bpf_prog_load */ - -struct bpf_load_program_attr { - enum bpf_prog_type prog_type; - enum bpf_attach_type expected_attach_type; - const char *name; - const struct bpf_insn *insns; - size_t insns_cnt; - const char *license; - union { - __u32 kern_version; - __u32 attach_prog_fd; - }; - union { - __u32 prog_ifindex; - __u32 attach_btf_id; - }; - __u32 prog_btf_fd; - __u32 func_info_rec_size; - const void *func_info; - __u32 func_info_cnt; - __u32 line_info_rec_size; - const void *line_info; - __u32 line_info_cnt; - __u32 log_level; - __u32 prog_flags; -}; + struct bpf_prog_load_opts *opts); /* Flags to direct loading requirements */ #define MAPS_RELAX_COMPAT 0x01 @@ -200,22 +128,6 @@ struct bpf_load_program_attr { /* Recommended log buffer size */ #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") -LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") -LIBBPF_API int bpf_load_program(enum bpf_prog_type type, - const struct bpf_insn *insns, size_t insns_cnt, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") -LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, - const struct bpf_insn *insns, - size_t insns_cnt, __u32 prog_flags, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz, - int log_level); - struct bpf_btf_load_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -223,15 +135,21 @@ struct bpf_btf_load_opts { char *log_buf; __u32 log_level; __u32 log_size; + /* output: actual total log contents size (including terminating zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + * If kernel doesn't support this feature, log_size is left unchanged. + */ + __u32 log_true_size; + + __u32 btf_flags; + __u32 token_fd; + size_t :0; }; -#define bpf_btf_load_opts__last_field log_size +#define bpf_btf_load_opts__last_field token_fd LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, - const struct bpf_btf_load_opts *opts); - -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead") -LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, - __u32 log_buf_size, bool do_log); + struct bpf_btf_load_opts *opts); LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); @@ -244,6 +162,7 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); +LIBBPF_API int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); LIBBPF_API int bpf_map_freeze(int fd); @@ -277,10 +196,14 @@ LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys, /** * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements. * - * The parameter *in_batch* is the address of the first element in the batch to read. - * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent - * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate - * that the batched lookup starts from the beginning of the map. + * The parameter *in_batch* is the address of the first element in the batch to + * read. *out_batch* is an output parameter that should be passed as *in_batch* + * to subsequent calls to **bpf_map_lookup_batch()**. NULL can be passed for + * *in_batch* to indicate that the batched lookup starts from the beginning of + * the map. Both *in_batch* and *out_batch* must point to memory large enough to + * hold a single key, except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH, + * LRU_HASH, LRU_PERCPU_HASH}**, for which the memory size must be at + * least 4 bytes wide regardless of key size. * * The *keys* and *values* are output parameters which must point to memory large enough to * hold *count* items based on the key and value size of the map *map_fd*. The *keys* @@ -313,7 +236,10 @@ LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, * * @param fd BPF map file descriptor * @param in_batch address of the first element in batch to read, can pass NULL to - * get address of the first element in *out_batch* + * get address of the first element in *out_batch*. If not NULL, must be large + * enough to hold a key. For **BPF_MAP_TYPE_{HASH, PERCPU_HASH, LRU_HASH, + * LRU_PERCPU_HASH}**, the memory size must be at least 4 bytes wide regardless + * of key size. * @param out_batch output parameter that should be passed to next call as *in_batch* * @param keys pointer to an array of *count* keys * @param values pointer to an array large enough for *count* values @@ -379,25 +305,96 @@ LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values __u32 *count, const struct bpf_map_batch_opts *opts); +struct bpf_obj_pin_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + __u32 file_flags; + int path_fd; + + size_t :0; +}; +#define bpf_obj_pin_opts__last_field path_fd + LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); -LIBBPF_API int bpf_obj_get(const char *pathname); +LIBBPF_API int bpf_obj_pin_opts(int fd, const char *pathname, + const struct bpf_obj_pin_opts *opts); -struct bpf_prog_attach_opts { +struct bpf_obj_get_opts { size_t sz; /* size of this struct for forward/backward compatibility */ - unsigned int flags; - int replace_prog_fd; + + __u32 file_flags; + int path_fd; + + size_t :0; }; -#define bpf_prog_attach_opts__last_field replace_prog_fd +#define bpf_obj_get_opts__last_field path_fd + +LIBBPF_API int bpf_obj_get(const char *pathname); +LIBBPF_API int bpf_obj_get_opts(const char *pathname, + const struct bpf_obj_get_opts *opts); LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); -LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd, - enum bpf_attach_type type, - const struct bpf_prog_attach_opts *opts); LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); +struct bpf_prog_attach_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 flags; + union { + int replace_prog_fd; + int replace_fd; + }; + int relative_fd; + __u32 relative_id; + __u64 expected_revision; + size_t :0; +}; +#define bpf_prog_attach_opts__last_field expected_revision + +struct bpf_prog_detach_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 flags; + int relative_fd; + __u32 relative_id; + __u64 expected_revision; + size_t :0; +}; +#define bpf_prog_detach_opts__last_field expected_revision + +/** + * @brief **bpf_prog_attach_opts()** attaches the BPF program corresponding to + * *prog_fd* to a *target* which can represent a file descriptor or netdevice + * ifindex. + * + * @param prog_fd BPF program file descriptor + * @param target attach location file descriptor or ifindex + * @param type attach type for the BPF program + * @param opts options for configuring the attachment + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int target, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts); + +/** + * @brief **bpf_prog_detach_opts()** detaches the BPF program corresponding to + * *prog_fd* from a *target* which can represent a file descriptor or netdevice + * ifindex. + * + * @param prog_fd BPF program file descriptor + * @param target detach location file descriptor or ifindex + * @param type detach type for the BPF program + * @param opts options for configuring the detachment + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_prog_detach_opts(int prog_fd, int target, + enum bpf_attach_type type, + const struct bpf_prog_detach_opts *opts); + union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */ struct bpf_link_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -409,10 +406,50 @@ struct bpf_link_create_opts { struct { __u64 bpf_cookie; } perf_event; + struct { + __u32 flags; + __u32 cnt; + const char **syms; + const unsigned long *addrs; + const __u64 *cookies; + } kprobe_multi; + struct { + __u32 flags; + __u32 cnt; + const char *path; + const unsigned long *offsets; + const unsigned long *ref_ctr_offsets; + const __u64 *cookies; + __u32 pid; + } uprobe_multi; + struct { + __u64 cookie; + } tracing; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; + struct { + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + } tcx; + struct { + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + } netkit; + struct { + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + } cgroup; }; size_t :0; }; -#define bpf_link_create_opts__last_field perf_event +#define bpf_link_create_opts__last_field uprobe_multi.pid LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, @@ -424,8 +461,9 @@ struct bpf_link_update_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 flags; /* extra flags */ __u32 old_prog_fd; /* expected old program FD */ + __u32 old_map_fd; /* expected old map FD */ }; -#define bpf_link_update_opts__last_field old_prog_fd +#define bpf_link_update_opts__last_field old_map_fd LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd, const struct bpf_link_update_opts *opts); @@ -449,34 +487,171 @@ struct bpf_prog_test_run_attr { * out: length of cxt_out */ }; -LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); - -/* - * bpf_prog_test_run does not check that data_out is large enough. Consider - * using bpf_prog_test_run_xattr instead. - */ -LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, - __u32 size, void *data_out, __u32 *size_out, - __u32 *retval, __u32 *duration); LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id); + +struct bpf_get_fd_by_id_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 open_flags; /* permissions requested for the operation on fd */ + __u32 token_fd; + size_t :0; +}; +#define bpf_get_fd_by_id_opts__last_field token_fd + LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_prog_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts); LIBBPF_API int bpf_map_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_map_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts); LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_btf_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts); LIBBPF_API int bpf_link_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_link_get_fd_by_id_opts(__u32 id, + const struct bpf_get_fd_by_id_opts *opts); LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len); + +/** + * @brief **bpf_prog_get_info_by_fd()** obtains information about the BPF + * program corresponding to *prog_fd*. + * + * Populates up to *info_len* bytes of *info* and updates *info_len* with the + * actual number of bytes written to *info*. Note that *info* should be + * zero-initialized or initialized as expected by the requested *info* + * type. Failing to (zero-)initialize *info* under certain circumstances can + * result in this helper returning an error. + * + * @param prog_fd BPF program file descriptor + * @param info pointer to **struct bpf_prog_info** that will be populated with + * BPF program information + * @param info_len pointer to the size of *info*; on success updated with the + * number of bytes written to *info* + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, __u32 *info_len); + +/** + * @brief **bpf_map_get_info_by_fd()** obtains information about the BPF + * map corresponding to *map_fd*. + * + * Populates up to *info_len* bytes of *info* and updates *info_len* with the + * actual number of bytes written to *info*. Note that *info* should be + * zero-initialized or initialized as expected by the requested *info* + * type. Failing to (zero-)initialize *info* under certain circumstances can + * result in this helper returning an error. + * + * @param map_fd BPF map file descriptor + * @param info pointer to **struct bpf_map_info** that will be populated with + * BPF map information + * @param info_len pointer to the size of *info*; on success updated with the + * number of bytes written to *info* + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len); + +/** + * @brief **bpf_btf_get_info_by_fd()** obtains information about the + * BTF object corresponding to *btf_fd*. + * + * Populates up to *info_len* bytes of *info* and updates *info_len* with the + * actual number of bytes written to *info*. Note that *info* should be + * zero-initialized or initialized as expected by the requested *info* + * type. Failing to (zero-)initialize *info* under certain circumstances can + * result in this helper returning an error. + * + * @param btf_fd BTF object file descriptor + * @param info pointer to **struct bpf_btf_info** that will be populated with + * BTF object information + * @param info_len pointer to the size of *info*; on success updated with the + * number of bytes written to *info* + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u32 *info_len); + +/** + * @brief **bpf_btf_get_info_by_fd()** obtains information about the BPF + * link corresponding to *link_fd*. + * + * Populates up to *info_len* bytes of *info* and updates *info_len* with the + * actual number of bytes written to *info*. Note that *info* should be + * zero-initialized or initialized as expected by the requested *info* + * type. Failing to (zero-)initialize *info* under certain circumstances can + * result in this helper returning an error. + * + * @param link_fd BPF link file descriptor + * @param info pointer to **struct bpf_link_info** that will be populated with + * BPF link information + * @param info_len pointer to the size of *info*; on success updated with the + * number of bytes written to *info* + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_link_get_info_by_fd(int link_fd, struct bpf_link_info *info, __u32 *info_len); + +struct bpf_prog_query_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 query_flags; + __u32 attach_flags; /* output argument */ + __u32 *prog_ids; + union { + /* input+output argument */ + __u32 prog_cnt; + __u32 count; + }; + __u32 *prog_attach_flags; + __u32 *link_ids; + __u32 *link_attach_flags; + __u64 revision; + size_t :0; +}; +#define bpf_prog_query_opts__last_field revision + +/** + * @brief **bpf_prog_query_opts()** queries the BPF programs and BPF links + * which are attached to *target* which can represent a file descriptor or + * netdevice ifindex. + * + * @param target query location file descriptor or ifindex + * @param type attach type for the BPF program + * @param opts options for configuring the query + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_prog_query_opts(int target, enum bpf_attach_type type, + struct bpf_prog_query_opts *opts); LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); + +struct bpf_raw_tp_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + const char *tp_name; + __u64 cookie; + size_t :0; +}; +#define bpf_raw_tp_opts__last_field cookie + +LIBBPF_API int bpf_raw_tracepoint_open_opts(int prog_fd, struct bpf_raw_tp_opts *opts); LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); +#ifdef __cplusplus +/* forward-declaring enums in C++ isn't compatible with pure C enums, so + * instead define bpf_enable_stats() as accepting int as an input + */ +LIBBPF_API int bpf_enable_stats(int type); +#else enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */ LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type); +#endif struct bpf_prog_bind_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -506,12 +681,58 @@ struct bpf_test_run_opts { __u32 duration; /* out: average per repetition in ns */ __u32 flags; __u32 cpu; + __u32 batch_size; }; -#define bpf_test_run_opts__last_field cpu +#define bpf_test_run_opts__last_field batch_size LIBBPF_API int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts); +struct bpf_token_create_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 flags; + size_t :0; +}; +#define bpf_token_create_opts__last_field flags + +/** + * @brief **bpf_token_create()** creates a new instance of BPF token derived + * from specified BPF FS mount point. + * + * BPF token created with this API can be passed to bpf() syscall for + * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc. + * + * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token + * instance. + * @param opts optional BPF token creation options, can be NULL + * + * @return BPF token FD > 0, on success; negative error code, otherwise (errno + * is also set to the error code) + */ +LIBBPF_API int bpf_token_create(int bpffs_fd, + struct bpf_token_create_opts *opts); + +struct bpf_prog_stream_read_opts { + size_t sz; + size_t :0; +}; +#define bpf_prog_stream_read_opts__last_field sz +/** + * @brief **bpf_prog_stream_read** reads data from the BPF stream of a given BPF + * program. + * + * @param prog_fd FD for the BPF program whose BPF stream is to be read. + * @param stream_id ID of the BPF stream to be read. + * @param buf Buffer to read data into from the BPF stream. + * @param buf_len Maximum number of bytes to read from the BPF stream. + * @param opts optional options, can be NULL + * + * @return The number of bytes read, on success; negative error code, otherwise + * (errno is also set to the error code) + */ +LIBBPF_API int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len, + struct bpf_prog_stream_read_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index e4aa9996a550..b997c68bd945 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -2,6 +2,8 @@ #ifndef __BPF_CORE_READ_H__ #define __BPF_CORE_READ_H__ +#include "bpf_helpers.h" + /* * enum bpf_field_info_kind is passed as a second argument into * __builtin_preserve_field_info() built-in to get a specific aspect of @@ -29,6 +31,7 @@ enum bpf_type_id_kind { enum bpf_type_info_kind { BPF_TYPE_EXISTS = 0, /* type existence in target kernel */ BPF_TYPE_SIZE = 1, /* type size in target kernel */ + BPF_TYPE_MATCHES = 2, /* type match in target kernel */ }; /* second argument to __builtin_preserve_enum_value() built-in */ @@ -43,7 +46,7 @@ enum bpf_enum_value_kind { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ bpf_probe_read_kernel( \ - (void *)dst, \ + (void *)dst, \ __CORE_RELO(src, fld, BYTE_SIZE), \ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) #else @@ -101,6 +104,7 @@ enum bpf_enum_value_kind { case 2: val = *(const unsigned short *)p; break; \ case 4: val = *(const unsigned int *)p; break; \ case 8: val = *(const unsigned long long *)p; break; \ + default: val = 0; break; \ } \ val <<= __CORE_RELO(s, field, LSHIFT_U64); \ if (__CORE_RELO(s, field, SIGNED)) \ @@ -111,20 +115,102 @@ enum bpf_enum_value_kind { }) /* + * Write to a bitfield, identified by s->field. + * This is the inverse of BPF_CORE_WRITE_BITFIELD(). + */ +#define BPF_CORE_WRITE_BITFIELD(s, field, new_val) ({ \ + void *p = (void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ + unsigned int byte_size = __CORE_RELO(s, field, BYTE_SIZE); \ + unsigned int lshift = __CORE_RELO(s, field, LSHIFT_U64); \ + unsigned int rshift = __CORE_RELO(s, field, RSHIFT_U64); \ + unsigned long long mask, val, nval = new_val; \ + unsigned int rpad = rshift - lshift; \ + \ + asm volatile("" : "+r"(p)); \ + \ + switch (byte_size) { \ + case 1: val = *(unsigned char *)p; break; \ + case 2: val = *(unsigned short *)p; break; \ + case 4: val = *(unsigned int *)p; break; \ + case 8: val = *(unsigned long long *)p; break; \ + } \ + \ + mask = (~0ULL << rshift) >> lshift; \ + val = (val & ~mask) | ((nval << rpad) & mask); \ + \ + switch (byte_size) { \ + case 1: *(unsigned char *)p = val; break; \ + case 2: *(unsigned short *)p = val; break; \ + case 4: *(unsigned int *)p = val; break; \ + case 8: *(unsigned long long *)p = val; break; \ + } \ +}) + +/* Differentiator between compilers builtin implementations. This is a + * requirement due to the compiler parsing differences where GCC optimizes + * early in parsing those constructs of type pointers to the builtin specific + * type, resulting in not being possible to collect the required type + * information in the builtin expansion. + */ +#ifdef __clang__ +#define ___bpf_typeof(type) ((typeof(type) *) 0) +#else +#define ___bpf_typeof1(type, NR) ({ \ + extern typeof(type) *___concat(bpf_type_tmp_, NR); \ + ___concat(bpf_type_tmp_, NR); \ +}) +#define ___bpf_typeof(type) ___bpf_typeof1(type, __COUNTER__) +#endif + +#ifdef __clang__ +#define ___bpf_field_ref1(field) (field) +#define ___bpf_field_ref2(type, field) (___bpf_typeof(type)->field) +#else +#define ___bpf_field_ref1(field) (&(field)) +#define ___bpf_field_ref2(type, field) (&(___bpf_typeof(type)->field)) +#endif +#define ___bpf_field_ref(args...) \ + ___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args) + +/* * Convenience macro to check that field actually exists in target kernel's. * Returns: * 1, if matching field is present in target kernel; * 0, if no matching field found. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_exists(p->my_field); + * - field reference through type and field names: + * bpf_core_field_exists(struct my_type, my_field). */ -#define bpf_core_field_exists(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) +#define bpf_core_field_exists(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_EXISTS) /* * Convenience macro to get the byte size of a field. Works for integers, * struct/unions, pointers, arrays, and enums. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_size(p->my_field); + * - field reference through type and field names: + * bpf_core_field_size(struct my_type, my_field). + */ +#define bpf_core_field_size(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_SIZE) + +/* + * Convenience macro to get field's byte offset. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_offset(p->my_field); + * - field reference through type and field names: + * bpf_core_field_offset(struct my_type, my_field). */ -#define bpf_core_field_size(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) +#define bpf_core_field_offset(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_OFFSET) /* * Convenience macro to get BTF type ID of a specified type, using a local BTF @@ -132,7 +218,7 @@ enum bpf_enum_value_kind { * BTF. Always succeeds. */ #define bpf_core_type_id_local(type) \ - __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL) + __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_LOCAL) /* * Convenience macro to get BTF type ID of a target kernel's type that matches @@ -142,7 +228,7 @@ enum bpf_enum_value_kind { * - 0, if no matching type was found in a target kernel BTF. */ #define bpf_core_type_id_kernel(type) \ - __builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET) + __builtin_btf_type_id(*___bpf_typeof(type), BPF_TYPE_ID_TARGET) /* * Convenience macro to check that provided named type @@ -152,7 +238,17 @@ enum bpf_enum_value_kind { * 0, if no matching type is found. */ #define bpf_core_type_exists(type) \ - __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS) + __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_EXISTS) + +/* + * Convenience macro to check that provided named type + * (struct/union/enum/typedef) "matches" that in a target kernel. + * Returns: + * 1, if the type matches in the target kernel's BTF; + * 0, if the type does not match any in the target kernel + */ +#define bpf_core_type_matches(type) \ + __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_MATCHES) /* * Convenience macro to get the byte size of a provided named type @@ -162,7 +258,7 @@ enum bpf_enum_value_kind { * 0, if no matching type is found. */ #define bpf_core_type_size(type) \ - __builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE) + __builtin_preserve_type_info(*___bpf_typeof(type), BPF_TYPE_SIZE) /* * Convenience macro to check that provided enumerator value is defined in @@ -172,8 +268,13 @@ enum bpf_enum_value_kind { * kernel's BTF; * 0, if no matching enum and/or enum value within that enum is found. */ +#ifdef __clang__ #define bpf_core_enum_value_exists(enum_type, enum_value) \ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS) +#else +#define bpf_core_enum_value_exists(enum_type, enum_value) \ + __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_EXISTS) +#endif /* * Convenience macro to get the integer value of an enumerator value in @@ -183,8 +284,13 @@ enum bpf_enum_value_kind { * present in target kernel's BTF; * 0, if no matching enum and/or enum value within that enum is found. */ +#ifdef __clang__ #define bpf_core_enum_value(enum_type, enum_value) \ __builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE) +#else +#define bpf_core_enum_value(enum_type, enum_value) \ + __builtin_preserve_enum_value(___bpf_typeof(enum_type), enum_value, BPF_ENUMVAL_VALUE) +#endif /* * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures @@ -196,7 +302,7 @@ enum bpf_enum_value_kind { * a relocation, which records BTF type ID describing root struct/union and an * accessor string which describes exact embedded field that was used to take * an address. See detailed description of this relocation format and - * semantics in comments to struct bpf_field_reloc in libbpf_internal.h. + * semantics in comments to struct bpf_core_relo in include/uapi/linux/bpf.h. * * This relocation allows libbpf to adjust BPF instruction to use correct * actual field offset, based on target kernel BTF type that matches original @@ -220,6 +326,17 @@ enum bpf_enum_value_kind { #define bpf_core_read_user_str(dst, sz, src) \ bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) +extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak; + +/* + * Cast provided pointer *ptr* into a pointer to a specified *type* in such + * a way that BPF verifier will become aware of associated kernel-side BTF + * type. This allows to access members of kernel types directly without the + * need to use BPF_CORE_READ() macros. + */ +#define bpf_core_cast(ptr, type) \ + ((typeof(type) *)bpf_rdonly_cast((ptr), bpf_core_type_id_kernel(type))) + #define ___concat(a, b) a ## b #define ___apply(fn, n) ___concat(fn, n) #define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N @@ -271,7 +388,13 @@ enum bpf_enum_value_kind { #define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j #define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) +#if defined(__clang__) && (__clang_major__ >= 19) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#elif defined(__GNUC__) && (__GNUC__ >= 14) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#else #define ___type(...) typeof(___arrow(__VA_ARGS__)) +#endif #define ___read(read_fn, dst, src_type, src, accessor) \ read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) @@ -324,7 +447,7 @@ enum bpf_enum_value_kind { /* Non-CO-RE variant of BPF_CORE_READ_INTO() */ #define BPF_PROBE_READ_INTO(dst, src, a, ...) ({ \ - ___core_read(bpf_probe_read, bpf_probe_read, \ + ___core_read(bpf_probe_read_kernel, bpf_probe_read_kernel, \ dst, (src), a, ##__VA_ARGS__) \ }) @@ -360,7 +483,7 @@ enum bpf_enum_value_kind { /* Non-CO-RE variant of BPF_CORE_READ_STR_INTO() */ #define BPF_PROBE_READ_STR_INTO(dst, src, a, ...) ({ \ - ___core_read(bpf_probe_read_str, bpf_probe_read, \ + ___core_read(bpf_probe_read_kernel_str, bpf_probe_read_kernel, \ dst, (src), a, ##__VA_ARGS__) \ }) diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index 223308931d55..49af4260b8e6 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -4,6 +4,7 @@ #define __BPF_GEN_INTERNAL_H #include "bpf.h" +#include "libbpf_internal.h" struct ksym_relo_desc { const char *name; @@ -11,6 +12,7 @@ struct ksym_relo_desc { int insn_idx; bool is_weak; bool is_typeless; + bool is_ld64; }; struct ksym_desc { @@ -24,6 +26,7 @@ struct ksym_desc { bool typeless; }; int insn; + bool is_ld64; }; struct bpf_gen { @@ -32,6 +35,7 @@ struct bpf_gen { void *data_cur; void *insn_start; void *insn_cur; + bool swapped_endian; ssize_t cleanup_label; __u32 nr_progs; __u32 nr_maps; @@ -47,6 +51,7 @@ struct bpf_gen { __u32 nr_ksyms; int fd_array; int nr_fd_array; + int hash_insn_offset[SHA256_DWORD_SIZE]; }; void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps); @@ -65,7 +70,7 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, - bool is_typeless, int kind, int insn_idx); + bool is_typeless, bool is_ld64, int kind, int insn_idx); void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo); void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx); diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 963b1060d944..d4e4e388e625 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -13,6 +13,15 @@ #define __uint(name, val) int (*name)[val] #define __type(name, val) typeof(val) *name #define __array(name, val) typeof(val) *name[] +#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name + +#ifndef likely +#define likely(x) (__builtin_expect(!!(x), 1)) +#endif + +#ifndef unlikely +#define unlikely(x) (__builtin_expect(!!(x), 0)) +#endif /* * Helper macro to place programs, maps, license in @@ -22,12 +31,25 @@ * To allow use of SEC() with externs (e.g., for extern .maps declarations), * make sure __attribute__((unused)) doesn't trigger compilation warning. */ +#if __GNUC__ && !__clang__ + +/* + * Pragma macros are broken on GCC + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55578 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90400 + */ +#define SEC(name) __attribute__((section(name), used)) + +#else + #define SEC(name) \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \ __attribute__((section(name), used)) \ _Pragma("GCC diagnostic pop") \ +#endif + /* Avoid 'linux/stddef.h' definition of '__always_inline'. */ #undef __always_inline #define __always_inline inline __attribute__((always_inline)) @@ -64,15 +86,44 @@ /* * Helper macros to manipulate data structures */ -#ifndef offsetof -#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER) -#endif -#ifndef container_of + +/* offsetof() definition that uses __builtin_offset() might not preserve field + * offset CO-RE relocation properly, so force-redefine offsetof() using + * old-school approach which works with CO-RE correctly + */ +#undef offsetof +#define offsetof(type, member) ((unsigned long)&((type *)0)->member) + +/* redefined container_of() to ensure we use the above offsetof() macro */ +#undef container_of #define container_of(ptr, type, member) \ ({ \ void *__mptr = (void *)(ptr); \ ((type *)(__mptr - offsetof(type, member))); \ }) + +/* + * Compiler (optimization) barrier. + */ +#ifndef barrier +#define barrier() asm volatile("" ::: "memory") +#endif + +/* Variable-specific compiler (optimization) barrier. It's a no-op which makes + * compiler believe that there is some black box modification of a given + * variable and thus prevents compiler from making extra assumption about its + * value and potential simplifications and optimizations on this variable. + * + * E.g., compiler might often delay or even omit 32-bit to 64-bit casting of + * a variable, making some code patterns unverifiable. Putting barrier_var() + * in place will ensure that cast is performed before the barrier_var() + * invocation, because compiler has to pessimistically assume that embedded + * asm section might perform some extra operations on that variable. + * + * This is a variable-specific variant of more global barrier(). + */ +#ifndef barrier_var +#define barrier_var(var) asm volatile("" : "+r"(var)) #endif /* @@ -94,7 +145,8 @@ /* * Helper function to perform a tail call with a constant/immediate map slot. */ -#if __clang_major__ >= 8 && defined(__bpf__) +#if (defined(__clang__) && __clang_major__ >= 8) || (!defined(__clang__) && __GNUC__ > 12) +#if defined(__bpf__) static __always_inline void bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) { @@ -122,18 +174,7 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) : "r0", "r1", "r2", "r3", "r4", "r5"); } #endif - -/* - * Helper structure used by eBPF C program - * to describe BPF map attributes to libbpf loader - */ -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; -}; +#endif enum libbpf_pin_type { LIBBPF_PIN_NONE, @@ -149,6 +190,33 @@ enum libbpf_tristate { #define __kconfig __attribute__((section(".kconfig"))) #define __ksym __attribute__((section(".ksyms"))) +#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted"))) +#define __kptr __attribute__((btf_type_tag("kptr"))) +#define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr"))) +#define __uptr __attribute__((btf_type_tag("uptr"))) + +#if defined (__clang__) +#define bpf_ksym_exists(sym) ({ \ + _Static_assert(!__builtin_constant_p(!!sym), \ + #sym " should be marked as __weak"); \ + !!sym; \ +}) +#elif __GNUC__ > 8 +#define bpf_ksym_exists(sym) ({ \ + _Static_assert(__builtin_has_attribute (*sym, __weak__), \ + #sym " should be marked as __weak"); \ + !!sym; \ +}) +#else +#define bpf_ksym_exists(sym) !!sym +#endif + +#define __arg_ctx __attribute__((btf_decl_tag("arg:ctx"))) +#define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull"))) +#define __arg_nullable __attribute((btf_decl_tag("arg:nullable"))) +#define __arg_trusted __attribute((btf_decl_tag("arg:trusted"))) +#define __arg_untrusted __attribute((btf_decl_tag("arg:untrusted"))) +#define __arg_arena __attribute((btf_decl_tag("arg:arena"))) #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b @@ -247,6 +315,22 @@ enum libbpf_tristate { ___param, sizeof(___param)); \ }) +extern int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args, + __u32 len__sz, void *aux__prog) __weak __ksym; + +#define bpf_stream_printk(stream_id, fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_stream_vprintk_impl(stream_id, ___fmt, ___param, sizeof(___param), NULL); \ +}) + /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args * Otherwise use __bpf_vprintk */ @@ -259,4 +343,107 @@ enum libbpf_tristate { /* Helper macro to print out debug messages */ #define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args) +struct bpf_iter_num; + +extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __weak __ksym; +extern int *bpf_iter_num_next(struct bpf_iter_num *it) __weak __ksym; +extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __weak __ksym; + +#ifndef bpf_for_each +/* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for + * using BPF open-coded iterators without having to write mundane explicit + * low-level loop logic. Instead, it provides for()-like generic construct + * that can be used pretty naturally. E.g., for some hypothetical cgroup + * iterator, you'd write: + * + * struct cgroup *cg, *parent_cg = <...>; + * + * bpf_for_each(cgroup, cg, parent_cg, CG_ITER_CHILDREN) { + * bpf_printk("Child cgroup id = %d", cg->cgroup_id); + * if (cg->cgroup_id == 123) + * break; + * } + * + * I.e., it looks almost like high-level for each loop in other languages, + * supports continue/break, and is verifiable by BPF verifier. + * + * For iterating integers, the difference between bpf_for_each(num, i, N, M) + * and bpf_for(i, N, M) is in that bpf_for() provides additional proof to + * verifier that i is in [N, M) range, and in bpf_for_each() case i is `int + * *`, not just `int`. So for integers bpf_for() is more convenient. + * + * Note: this macro relies on C99 feature of allowing to declare variables + * inside for() loop, bound to for() loop lifetime. It also utilizes GCC + * extension: __attribute__((cleanup(<func>))), supported by both GCC and + * Clang. + */ +#define bpf_for_each(type, cur, args...) for ( \ + /* initialize and define destructor */ \ + struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */, \ + cleanup(bpf_iter_##type##_destroy))), \ + /* ___p pointer is just to call bpf_iter_##type##_new() *once* to init ___it */ \ + *___p __attribute__((unused)) = ( \ + bpf_iter_##type##_new(&___it, ##args), \ + /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ + /* for bpf_iter_##type##_destroy() when used from cleanup() attribute */ \ + (void)bpf_iter_##type##_destroy, (void *)0); \ + /* iteration and termination check */ \ + (((cur) = bpf_iter_##type##_next(&___it))); \ +) +#endif /* bpf_for_each */ + +#ifndef bpf_for +/* bpf_for(i, start, end) implements a for()-like looping construct that sets + * provided integer variable *i* to values starting from *start* through, + * but not including, *end*. It also proves to BPF verifier that *i* belongs + * to range [start, end), so this can be used for accessing arrays without + * extra checks. + * + * Note: *start* and *end* are assumed to be expressions with no side effects + * and whose values do not change throughout bpf_for() loop execution. They do + * not have to be statically known or constant, though. + * + * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for() + * loop bound variables and cleanup attribute, supported by GCC and Clang. + */ +#define bpf_for(i, start, end) for ( \ + /* initialize and define destructor */ \ + struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ + cleanup(bpf_iter_num_destroy))), \ + /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ + *___p __attribute__((unused)) = ( \ + bpf_iter_num_new(&___it, (start), (end)), \ + /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ + /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ + (void)bpf_iter_num_destroy, (void *)0); \ + ({ \ + /* iteration step */ \ + int *___t = bpf_iter_num_next(&___it); \ + /* termination and bounds check */ \ + (___t && ((i) = *___t, (i) >= (start) && (i) < (end))); \ + }); \ +) +#endif /* bpf_for */ + +#ifndef bpf_repeat +/* bpf_repeat(N) performs N iterations without exposing iteration number + * + * Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for() + * loop bound variables and cleanup attribute, supported by GCC and Clang. + */ +#define bpf_repeat(N) for ( \ + /* initialize and define destructor */ \ + struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \ + cleanup(bpf_iter_num_destroy))), \ + /* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \ + *___p __attribute__((unused)) = ( \ + bpf_iter_num_new(&___it, 0, (N)), \ + /* this is a workaround for Clang bug: it currently doesn't emit BTF */ \ + /* for bpf_iter_num_destroy() when used from cleanup() attribute */ \ + (void)bpf_iter_num_destroy, (void *)0); \ + bpf_iter_num_next(&___it); \ + /* nothing here */ \ +) +#endif /* bpf_repeat */ + #endif diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 90f56b0f585f..dbe32a5d02cd 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -2,6 +2,8 @@ #ifndef __BPF_TRACING_H__ #define __BPF_TRACING_H__ +#include "bpf_helpers.h" + /* Scan the ARCH passed in from ARCH env variable (see Makefile) */ #if defined(__TARGET_ARCH_x86) #define bpf_target_x86 @@ -27,6 +29,12 @@ #elif defined(__TARGET_ARCH_riscv) #define bpf_target_riscv #define bpf_target_defined +#elif defined(__TARGET_ARCH_arc) + #define bpf_target_arc + #define bpf_target_defined +#elif defined(__TARGET_ARCH_loongarch) + #define bpf_target_loongarch + #define bpf_target_defined #else /* Fall back to what the compiler says */ @@ -54,6 +62,12 @@ #elif defined(__riscv) && __riscv_xlen == 64 #define bpf_target_riscv #define bpf_target_defined +#elif defined(__arc__) + #define bpf_target_arc + #define bpf_target_defined +#elif defined(__loongarch__) + #define bpf_target_loongarch + #define bpf_target_defined #endif /* no compiler target */ #endif @@ -64,6 +78,10 @@ #if defined(bpf_target_x86) +/* + * https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI + */ + #if defined(__KERNEL__) || defined(__VMLINUX_H__) #define __PT_PARM1_REG di @@ -71,6 +89,18 @@ #define __PT_PARM3_REG dx #define __PT_PARM4_REG cx #define __PT_PARM5_REG r8 +#define __PT_PARM6_REG r9 +/* + * Syscall uses r10 for PARM4. See arch/x86/entry/entry_64.S:entry_SYSCALL_64 + * comments in Linux sources. And refer to syscall(2) manpage. + */ +#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG +#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG +#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG +#define __PT_PARM4_SYSCALL_REG r10 +#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG +#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG + #define __PT_RET_REG sp #define __PT_FP_REG bp #define __PT_RC_REG ax @@ -81,12 +111,18 @@ #ifdef __i386__ +/* i386 kernel is built with -mregparm=3 */ #define __PT_PARM1_REG eax #define __PT_PARM2_REG edx #define __PT_PARM3_REG ecx -/* i386 kernel is built with -mregparm=3 */ -#define __PT_PARM4_REG __unsupported__ -#define __PT_PARM5_REG __unsupported__ +/* i386 syscall ABI is very different, refer to syscall(2) manpage */ +#define __PT_PARM1_SYSCALL_REG ebx +#define __PT_PARM2_SYSCALL_REG ecx +#define __PT_PARM3_SYSCALL_REG edx +#define __PT_PARM4_SYSCALL_REG esi +#define __PT_PARM5_SYSCALL_REG edi +#define __PT_PARM6_SYSCALL_REG ebp + #define __PT_RET_REG esp #define __PT_FP_REG ebp #define __PT_RC_REG eax @@ -100,6 +136,15 @@ #define __PT_PARM3_REG rdx #define __PT_PARM4_REG rcx #define __PT_PARM5_REG r8 +#define __PT_PARM6_REG r9 + +#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG +#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG +#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG +#define __PT_PARM4_SYSCALL_REG r10 +#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG +#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG + #define __PT_RET_REG rsp #define __PT_FP_REG rbp #define __PT_RC_REG rax @@ -112,6 +157,14 @@ #elif defined(bpf_target_s390) +/* + * https://github.com/IBM/s390x-abi/releases/download/v1.6/lzsabi_s390x.pdf + */ + +struct pt_regs___s390 { + unsigned long orig_gpr2; +} __attribute__((preserve_access_index)); + /* s390 provides user_pt_regs instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const user_pt_regs *)(x)) #define __PT_PARM1_REG gprs[2] @@ -119,7 +172,18 @@ #define __PT_PARM3_REG gprs[4] #define __PT_PARM4_REG gprs[5] #define __PT_PARM5_REG gprs[6] -#define __PT_RET_REG grps[14] + +#define __PT_PARM1_SYSCALL_REG orig_gpr2 +#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG +#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG +#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG +#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG +#define __PT_PARM6_SYSCALL_REG gprs[7] +#define PT_REGS_PARM1_SYSCALL(x) (((const struct pt_regs___s390 *)(x))->__PT_PARM1_SYSCALL_REG) +#define PT_REGS_PARM1_CORE_SYSCALL(x) \ + BPF_CORE_READ((const struct pt_regs___s390 *)(x), __PT_PARM1_SYSCALL_REG) + +#define __PT_RET_REG gprs[14] #define __PT_FP_REG gprs[11] /* Works only with CONFIG_FRAME_POINTER */ #define __PT_RC_REG gprs[2] #define __PT_SP_REG gprs[15] @@ -127,11 +191,23 @@ #elif defined(bpf_target_arm) +/* + * https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst#machine-registers + */ + #define __PT_PARM1_REG uregs[0] #define __PT_PARM2_REG uregs[1] #define __PT_PARM3_REG uregs[2] #define __PT_PARM4_REG uregs[3] -#define __PT_PARM5_REG uregs[4] + +#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG +#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG +#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG +#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG +#define __PT_PARM5_SYSCALL_REG uregs[4] +#define __PT_PARM6_SYSCALL_REG uregs[5] +#define __PT_PARM7_SYSCALL_REG uregs[6] + #define __PT_RET_REG uregs[14] #define __PT_FP_REG uregs[11] /* Works only with CONFIG_FRAME_POINTER */ #define __PT_RC_REG uregs[0] @@ -140,6 +216,14 @@ #elif defined(bpf_target_arm64) +/* + * https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#machine-registers + */ + +struct pt_regs___arm64 { + unsigned long orig_x0; +} __attribute__((preserve_access_index)); + /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) #define __PT_PARM1_REG regs[0] @@ -147,6 +231,20 @@ #define __PT_PARM3_REG regs[2] #define __PT_PARM4_REG regs[3] #define __PT_PARM5_REG regs[4] +#define __PT_PARM6_REG regs[5] +#define __PT_PARM7_REG regs[6] +#define __PT_PARM8_REG regs[7] + +#define __PT_PARM1_SYSCALL_REG orig_x0 +#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG +#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG +#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG +#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG +#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG +#define PT_REGS_PARM1_SYSCALL(x) (((const struct pt_regs___arm64 *)(x))->__PT_PARM1_SYSCALL_REG) +#define PT_REGS_PARM1_CORE_SYSCALL(x) \ + BPF_CORE_READ((const struct pt_regs___arm64 *)(x), __PT_PARM1_SYSCALL_REG) + #define __PT_RET_REG regs[30] #define __PT_FP_REG regs[29] /* Works only with CONFIG_FRAME_POINTER */ #define __PT_RC_REG regs[0] @@ -155,11 +253,27 @@ #elif defined(bpf_target_mips) +/* + * N64 ABI is assumed right now. + * https://en.wikipedia.org/wiki/MIPS_architecture#Calling_conventions + */ + #define __PT_PARM1_REG regs[4] #define __PT_PARM2_REG regs[5] #define __PT_PARM3_REG regs[6] #define __PT_PARM4_REG regs[7] #define __PT_PARM5_REG regs[8] +#define __PT_PARM6_REG regs[9] +#define __PT_PARM7_REG regs[10] +#define __PT_PARM8_REG regs[11] + +#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG +#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG +#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG +#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG +#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG /* only N32/N64 */ +#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG /* only N32/N64 */ + #define __PT_RET_REG regs[31] #define __PT_FP_REG regs[30] /* Works only with CONFIG_FRAME_POINTER */ #define __PT_RC_REG regs[2] @@ -168,24 +282,58 @@ #elif defined(bpf_target_powerpc) +/* + * http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf (page 3-14, + * section "Function Calling Sequence") + */ + #define __PT_PARM1_REG gpr[3] #define __PT_PARM2_REG gpr[4] #define __PT_PARM3_REG gpr[5] #define __PT_PARM4_REG gpr[6] #define __PT_PARM5_REG gpr[7] +#define __PT_PARM6_REG gpr[8] +#define __PT_PARM7_REG gpr[9] +#define __PT_PARM8_REG gpr[10] + +/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx +#define __PT_PARM1_SYSCALL_REG orig_gpr3 +#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG +#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG +#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG +#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG +#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG +#if !defined(__arch64__) +#define __PT_PARM7_SYSCALL_REG __PT_PARM7_REG /* only powerpc (not powerpc64) */ +#endif + #define __PT_RET_REG regs[31] #define __PT_FP_REG __unsupported__ #define __PT_RC_REG gpr[3] -#define __PT_SP_REG sp +#define __PT_SP_REG gpr[1] #define __PT_IP_REG nip #elif defined(bpf_target_sparc) +/* + * https://en.wikipedia.org/wiki/Calling_convention#SPARC + */ + #define __PT_PARM1_REG u_regs[UREG_I0] #define __PT_PARM2_REG u_regs[UREG_I1] #define __PT_PARM3_REG u_regs[UREG_I2] #define __PT_PARM4_REG u_regs[UREG_I3] #define __PT_PARM5_REG u_regs[UREG_I4] +#define __PT_PARM6_REG u_regs[UREG_I5] + +#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG +#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG +#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG +#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG +#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG +#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG + #define __PT_RET_REG u_regs[UREG_I7] #define __PT_FP_REG __unsupported__ #define __PT_RC_REG u_regs[UREG_I0] @@ -199,17 +347,106 @@ #elif defined(bpf_target_riscv) +/* + * https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#risc-v-calling-conventions + */ + +struct pt_regs___riscv { + unsigned long orig_a0; +} __attribute__((preserve_access_index)); + +/* riscv provides struct user_regs_struct instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) #define __PT_PARM1_REG a0 #define __PT_PARM2_REG a1 #define __PT_PARM3_REG a2 #define __PT_PARM4_REG a3 #define __PT_PARM5_REG a4 +#define __PT_PARM6_REG a5 +#define __PT_PARM7_REG a6 +#define __PT_PARM8_REG a7 + +#define __PT_PARM1_SYSCALL_REG orig_a0 +#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG +#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG +#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG +#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG +#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG +#define PT_REGS_PARM1_SYSCALL(x) (((const struct pt_regs___riscv *)(x))->__PT_PARM1_SYSCALL_REG) +#define PT_REGS_PARM1_CORE_SYSCALL(x) \ + BPF_CORE_READ((const struct pt_regs___riscv *)(x), __PT_PARM1_SYSCALL_REG) + #define __PT_RET_REG ra -#define __PT_FP_REG fp -#define __PT_RC_REG a5 +#define __PT_FP_REG s0 +#define __PT_RC_REG a0 #define __PT_SP_REG sp -#define __PT_IP_REG epc +#define __PT_IP_REG pc + +#elif defined(bpf_target_arc) + +/* + * Section "Function Calling Sequence" (page 24): + * https://raw.githubusercontent.com/wiki/foss-for-synopsys-dwc-arc-processors/toolchain/files/ARCv2_ABI.pdf + */ + +/* arc provides struct user_regs_struct instead of struct pt_regs to userspace */ +#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) +#define __PT_PARM1_REG scratch.r0 +#define __PT_PARM2_REG scratch.r1 +#define __PT_PARM3_REG scratch.r2 +#define __PT_PARM4_REG scratch.r3 +#define __PT_PARM5_REG scratch.r4 +#define __PT_PARM6_REG scratch.r5 +#define __PT_PARM7_REG scratch.r6 +#define __PT_PARM8_REG scratch.r7 + +/* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx +#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG +#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG +#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG +#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG +#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG +#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG + +#define __PT_RET_REG scratch.blink +#define __PT_FP_REG scratch.fp +#define __PT_RC_REG scratch.r0 +#define __PT_SP_REG scratch.sp +#define __PT_IP_REG scratch.ret + +#elif defined(bpf_target_loongarch) + +/* + * https://docs.kernel.org/loongarch/introduction.html + * https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html + */ + +/* loongarch provides struct user_pt_regs instead of struct pt_regs to userspace */ +#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) +#define __PT_PARM1_REG regs[4] +#define __PT_PARM2_REG regs[5] +#define __PT_PARM3_REG regs[6] +#define __PT_PARM4_REG regs[7] +#define __PT_PARM5_REG regs[8] +#define __PT_PARM6_REG regs[9] +#define __PT_PARM7_REG regs[10] +#define __PT_PARM8_REG regs[11] + +/* loongarch does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx +#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG +#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG +#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG +#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG +#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG +#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG + +#define __PT_RET_REG regs[1] +#define __PT_FP_REG regs[22] +#define __PT_RC_REG regs[4] +#define __PT_SP_REG regs[3] +#define __PT_IP_REG csr_era #endif @@ -217,16 +454,49 @@ struct pt_regs; -/* allow some architecutres to override `struct pt_regs` */ +/* allow some architectures to override `struct pt_regs` */ #ifndef __PT_REGS_CAST #define __PT_REGS_CAST(x) (x) #endif +/* + * Different architectures support different number of arguments passed + * through registers. i386 supports just 3, some arches support up to 8. + */ +#ifndef __PT_PARM4_REG +#define __PT_PARM4_REG __unsupported__ +#endif +#ifndef __PT_PARM5_REG +#define __PT_PARM5_REG __unsupported__ +#endif +#ifndef __PT_PARM6_REG +#define __PT_PARM6_REG __unsupported__ +#endif +#ifndef __PT_PARM7_REG +#define __PT_PARM7_REG __unsupported__ +#endif +#ifndef __PT_PARM8_REG +#define __PT_PARM8_REG __unsupported__ +#endif +/* + * Similarly, syscall-specific conventions might differ between function call + * conventions within each architecture. All supported architectures pass + * either 6 or 7 syscall arguments in registers. + * + * See syscall(2) manpage for succinct table with information on each arch. + */ +#ifndef __PT_PARM7_SYSCALL_REG +#define __PT_PARM7_SYSCALL_REG __unsupported__ +#endif + #define PT_REGS_PARM1(x) (__PT_REGS_CAST(x)->__PT_PARM1_REG) #define PT_REGS_PARM2(x) (__PT_REGS_CAST(x)->__PT_PARM2_REG) #define PT_REGS_PARM3(x) (__PT_REGS_CAST(x)->__PT_PARM3_REG) #define PT_REGS_PARM4(x) (__PT_REGS_CAST(x)->__PT_PARM4_REG) #define PT_REGS_PARM5(x) (__PT_REGS_CAST(x)->__PT_PARM5_REG) +#define PT_REGS_PARM6(x) (__PT_REGS_CAST(x)->__PT_PARM6_REG) +#define PT_REGS_PARM7(x) (__PT_REGS_CAST(x)->__PT_PARM7_REG) +#define PT_REGS_PARM8(x) (__PT_REGS_CAST(x)->__PT_PARM8_REG) #define PT_REGS_RET(x) (__PT_REGS_CAST(x)->__PT_RET_REG) #define PT_REGS_FP(x) (__PT_REGS_CAST(x)->__PT_FP_REG) #define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG) @@ -238,6 +508,9 @@ struct pt_regs; #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_REG) #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_REG) #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_REG) +#define PT_REGS_PARM6_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM6_REG) +#define PT_REGS_PARM7_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM7_REG) +#define PT_REGS_PARM8_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM8_REG) #define PT_REGS_RET_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RET_REG) #define PT_REGS_FP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_FP_REG) #define PT_REGS_RC_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RC_REG) @@ -249,7 +522,7 @@ struct pt_regs; #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif defined(bpf_target_sparc) +#elif defined(bpf_target_sparc) || defined(bpf_target_arm64) #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP @@ -263,6 +536,35 @@ struct pt_regs; #endif +#ifndef PT_REGS_PARM1_SYSCALL +#define PT_REGS_PARM1_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM1_SYSCALL_REG) +#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_SYSCALL_REG) +#endif +#ifndef PT_REGS_PARM2_SYSCALL +#define PT_REGS_PARM2_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM2_SYSCALL_REG) +#define PT_REGS_PARM2_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_SYSCALL_REG) +#endif +#ifndef PT_REGS_PARM3_SYSCALL +#define PT_REGS_PARM3_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM3_SYSCALL_REG) +#define PT_REGS_PARM3_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_SYSCALL_REG) +#endif +#ifndef PT_REGS_PARM4_SYSCALL +#define PT_REGS_PARM4_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM4_SYSCALL_REG) +#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_SYSCALL_REG) +#endif +#ifndef PT_REGS_PARM5_SYSCALL +#define PT_REGS_PARM5_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM5_SYSCALL_REG) +#define PT_REGS_PARM5_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_SYSCALL_REG) +#endif +#ifndef PT_REGS_PARM6_SYSCALL +#define PT_REGS_PARM6_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM6_SYSCALL_REG) +#define PT_REGS_PARM6_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM6_SYSCALL_REG) +#endif +#ifndef PT_REGS_PARM7_SYSCALL +#define PT_REGS_PARM7_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM7_SYSCALL_REG) +#define PT_REGS_PARM7_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM7_SYSCALL_REG) +#endif + #else /* defined(bpf_target_defined) */ #define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) @@ -270,6 +572,9 @@ struct pt_regs; #define PT_REGS_PARM3(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM4(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM5(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM6(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM7(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM8(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_RET(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_FP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_RC(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) @@ -281,6 +586,9 @@ struct pt_regs; #define PT_REGS_PARM3_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM4_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM5_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM6_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM7_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM8_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_RET_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_FP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_RC_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) @@ -290,8 +598,34 @@ struct pt_regs; #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM2_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM3_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM4_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM5_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM6_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM7_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) + +#define PT_REGS_PARM1_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM2_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM3_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM4_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM5_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM6_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM7_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) + #endif /* defined(bpf_target_defined) */ +/* + * When invoked from a syscall handler kprobe, returns a pointer to a + * struct pt_regs containing syscall arguments and suitable for passing to + * PT_REGS_PARMn_SYSCALL() and PT_REGS_PARMn_CORE_SYSCALL(). + */ +#ifndef PT_REGS_SYSCALL_REGS +/* By default, assume that the arch selects ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ((struct pt_regs *)PT_REGS_PARM1(ctx)) +#endif + #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b #endif @@ -306,25 +640,25 @@ struct pt_regs; #endif #define ___bpf_ctx_cast0() ctx -#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] -#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] -#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] -#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] -#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] -#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] -#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] -#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] -#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] -#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] -#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] -#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] +#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), ctx[0] +#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), ctx[1] +#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), ctx[2] +#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), ctx[3] +#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), ctx[4] +#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), ctx[5] +#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), ctx[6] +#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), ctx[7] +#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), ctx[8] +#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), ctx[9] +#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), ctx[10] +#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), ctx[11] #define ___bpf_ctx_cast(args...) ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) /* * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and * similar kinds of BPF programs, that accept input arguments as a single * pointer to untyped u64 array, where each u64 can actually be a typed - * pointer or integer of different size. Instead of requring user to write + * pointer or integer of different size. Instead of requiring user to write * manual casts and work with array elements by index, BPF_PROG macro * allows user to declare a list of named and typed input arguments in the * same syntax as for normal C function. All the casting is hidden and @@ -337,7 +671,7 @@ struct pt_regs; */ #define BPF_PROG(name, args...) \ name(unsigned long long *ctx); \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(unsigned long long *ctx, ##args); \ typeof(name(0)) name(unsigned long long *ctx) \ { \ @@ -346,17 +680,127 @@ typeof(name(0)) name(unsigned long long *ctx) \ return ____##name(___bpf_ctx_cast(args)); \ _Pragma("GCC diagnostic pop") \ } \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(unsigned long long *ctx, ##args) +#ifndef ___bpf_nth2 +#define ___bpf_nth2(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \ + _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, N, ...) N +#endif +#ifndef ___bpf_narg2 +#define ___bpf_narg2(...) \ + ___bpf_nth2(_, ##__VA_ARGS__, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, \ + 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0) +#endif + +#define ___bpf_treg_cnt(t) \ + __builtin_choose_expr(sizeof(t) == 1, 1, \ + __builtin_choose_expr(sizeof(t) == 2, 1, \ + __builtin_choose_expr(sizeof(t) == 4, 1, \ + __builtin_choose_expr(sizeof(t) == 8, 1, \ + __builtin_choose_expr(sizeof(t) == 16, 2, \ + (void)0))))) + +#define ___bpf_reg_cnt0() (0) +#define ___bpf_reg_cnt1(t, x) (___bpf_reg_cnt0() + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt2(t, x, args...) (___bpf_reg_cnt1(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt3(t, x, args...) (___bpf_reg_cnt2(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt4(t, x, args...) (___bpf_reg_cnt3(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt5(t, x, args...) (___bpf_reg_cnt4(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt6(t, x, args...) (___bpf_reg_cnt5(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt7(t, x, args...) (___bpf_reg_cnt6(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt8(t, x, args...) (___bpf_reg_cnt7(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt9(t, x, args...) (___bpf_reg_cnt8(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt10(t, x, args...) (___bpf_reg_cnt9(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt11(t, x, args...) (___bpf_reg_cnt10(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt12(t, x, args...) (___bpf_reg_cnt11(args) + ___bpf_treg_cnt(t)) +#define ___bpf_reg_cnt(args...) ___bpf_apply(___bpf_reg_cnt, ___bpf_narg2(args))(args) + +#define ___bpf_union_arg(t, x, n) \ + __builtin_choose_expr(sizeof(t) == 1, ({ union { __u8 z[1]; t x; } ___t = { .z = {ctx[n]}}; ___t.x; }), \ + __builtin_choose_expr(sizeof(t) == 2, ({ union { __u16 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \ + __builtin_choose_expr(sizeof(t) == 4, ({ union { __u32 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \ + __builtin_choose_expr(sizeof(t) == 8, ({ union { __u64 z[1]; t x; } ___t = {.z = {ctx[n]} }; ___t.x; }), \ + __builtin_choose_expr(sizeof(t) == 16, ({ union { __u64 z[2]; t x; } ___t = {.z = {ctx[n], ctx[n + 1]} }; ___t.x; }), \ + (void)0))))) + +#define ___bpf_ctx_arg0(n, args...) +#define ___bpf_ctx_arg1(n, t, x) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt1(t, x)) +#define ___bpf_ctx_arg2(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt2(t, x, args)) ___bpf_ctx_arg1(n, args) +#define ___bpf_ctx_arg3(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt3(t, x, args)) ___bpf_ctx_arg2(n, args) +#define ___bpf_ctx_arg4(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt4(t, x, args)) ___bpf_ctx_arg3(n, args) +#define ___bpf_ctx_arg5(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt5(t, x, args)) ___bpf_ctx_arg4(n, args) +#define ___bpf_ctx_arg6(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt6(t, x, args)) ___bpf_ctx_arg5(n, args) +#define ___bpf_ctx_arg7(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt7(t, x, args)) ___bpf_ctx_arg6(n, args) +#define ___bpf_ctx_arg8(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt8(t, x, args)) ___bpf_ctx_arg7(n, args) +#define ___bpf_ctx_arg9(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt9(t, x, args)) ___bpf_ctx_arg8(n, args) +#define ___bpf_ctx_arg10(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt10(t, x, args)) ___bpf_ctx_arg9(n, args) +#define ___bpf_ctx_arg11(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt11(t, x, args)) ___bpf_ctx_arg10(n, args) +#define ___bpf_ctx_arg12(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt12(t, x, args)) ___bpf_ctx_arg11(n, args) +#define ___bpf_ctx_arg(args...) ___bpf_apply(___bpf_ctx_arg, ___bpf_narg2(args))(___bpf_reg_cnt(args), args) + +#define ___bpf_ctx_decl0() +#define ___bpf_ctx_decl1(t, x) , t x +#define ___bpf_ctx_decl2(t, x, args...) , t x ___bpf_ctx_decl1(args) +#define ___bpf_ctx_decl3(t, x, args...) , t x ___bpf_ctx_decl2(args) +#define ___bpf_ctx_decl4(t, x, args...) , t x ___bpf_ctx_decl3(args) +#define ___bpf_ctx_decl5(t, x, args...) , t x ___bpf_ctx_decl4(args) +#define ___bpf_ctx_decl6(t, x, args...) , t x ___bpf_ctx_decl5(args) +#define ___bpf_ctx_decl7(t, x, args...) , t x ___bpf_ctx_decl6(args) +#define ___bpf_ctx_decl8(t, x, args...) , t x ___bpf_ctx_decl7(args) +#define ___bpf_ctx_decl9(t, x, args...) , t x ___bpf_ctx_decl8(args) +#define ___bpf_ctx_decl10(t, x, args...) , t x ___bpf_ctx_decl9(args) +#define ___bpf_ctx_decl11(t, x, args...) , t x ___bpf_ctx_decl10(args) +#define ___bpf_ctx_decl12(t, x, args...) , t x ___bpf_ctx_decl11(args) +#define ___bpf_ctx_decl(args...) ___bpf_apply(___bpf_ctx_decl, ___bpf_narg2(args))(args) + +/* + * BPF_PROG2 is an enhanced version of BPF_PROG in order to handle struct + * arguments. Since each struct argument might take one or two u64 values + * in the trampoline stack, argument type size is needed to place proper number + * of u64 values for each argument. Therefore, BPF_PROG2 has different + * syntax from BPF_PROG. For example, for the following BPF_PROG syntax: + * + * int BPF_PROG(test2, int a, int b) { ... } + * + * the corresponding BPF_PROG2 syntax is: + * + * int BPF_PROG2(test2, int, a, int, b) { ... } + * + * where type and the corresponding argument name are separated by comma. + * + * Use BPF_PROG2 macro if one of the arguments might be a struct/union larger + * than 8 bytes: + * + * int BPF_PROG2(test_struct_arg, struct bpf_testmod_struct_arg_1, a, int, b, + * int, c, int, d, struct bpf_testmod_struct_arg_2, e, int, ret) + * { + * // access a, b, c, d, e, and ret directly + * ... + * } + */ +#define BPF_PROG2(name, args...) \ +name(unsigned long long *ctx); \ +static __always_inline typeof(name(0)) \ +____##name(unsigned long long *ctx ___bpf_ctx_decl(args)); \ +typeof(name(0)) name(unsigned long long *ctx) \ +{ \ + return ____##name(ctx ___bpf_ctx_arg(args)); \ +} \ +static __always_inline typeof(name(0)) \ +____##name(unsigned long long *ctx ___bpf_ctx_decl(args)) + struct pt_regs; #define ___bpf_kprobe_args0() ctx -#define ___bpf_kprobe_args1(x) ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) -#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) -#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) -#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) -#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) +#define ___bpf_kprobe_args1(x) ___bpf_kprobe_args0(), (unsigned long long)PT_REGS_PARM1(ctx) +#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (unsigned long long)PT_REGS_PARM2(ctx) +#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (unsigned long long)PT_REGS_PARM3(ctx) +#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (unsigned long long)PT_REGS_PARM4(ctx) +#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (unsigned long long)PT_REGS_PARM5(ctx) +#define ___bpf_kprobe_args6(x, args...) ___bpf_kprobe_args5(args), (unsigned long long)PT_REGS_PARM6(ctx) +#define ___bpf_kprobe_args7(x, args...) ___bpf_kprobe_args6(args), (unsigned long long)PT_REGS_PARM7(ctx) +#define ___bpf_kprobe_args8(x, args...) ___bpf_kprobe_args7(args), (unsigned long long)PT_REGS_PARM8(ctx) #define ___bpf_kprobe_args(args...) ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) /* @@ -364,14 +808,14 @@ struct pt_regs; * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific * low-level way of getting kprobe input arguments from struct pt_regs, and * provides a familiar typed and named function arguments syntax and - * semantics of accessing kprobe input paremeters. + * semantics of accessing kprobe input parameters. * * Original struct pt_regs* context is preserved as 'ctx' argument. This might * be necessary when using BPF helpers like bpf_perf_event_output(). */ #define BPF_KPROBE(name, args...) \ name(struct pt_regs *ctx); \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ @@ -380,11 +824,11 @@ typeof(name(0)) name(struct pt_regs *ctx) \ return ____##name(___bpf_kprobe_args(args)); \ _Pragma("GCC diagnostic pop") \ } \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) #define ___bpf_kretprobe_args0() ctx -#define ___bpf_kretprobe_args1(x) ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) +#define ___bpf_kretprobe_args1(x) ___bpf_kretprobe_args0(), (unsigned long long)PT_REGS_RC(ctx) #define ___bpf_kretprobe_args(args...) ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) /* @@ -395,7 +839,7 @@ ____##name(struct pt_regs *ctx, ##args) */ #define BPF_KRETPROBE(name, args...) \ name(struct pt_regs *ctx); \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ @@ -406,4 +850,80 @@ typeof(name(0)) name(struct pt_regs *ctx) \ } \ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) +/* If kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER, read pt_regs directly */ +#define ___bpf_syscall_args0() ctx +#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (unsigned long long)PT_REGS_PARM1_SYSCALL(regs) +#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (unsigned long long)PT_REGS_PARM2_SYSCALL(regs) +#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (unsigned long long)PT_REGS_PARM3_SYSCALL(regs) +#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (unsigned long long)PT_REGS_PARM4_SYSCALL(regs) +#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (unsigned long long)PT_REGS_PARM5_SYSCALL(regs) +#define ___bpf_syscall_args6(x, args...) ___bpf_syscall_args5(args), (unsigned long long)PT_REGS_PARM6_SYSCALL(regs) +#define ___bpf_syscall_args7(x, args...) ___bpf_syscall_args6(args), (unsigned long long)PT_REGS_PARM7_SYSCALL(regs) +#define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args) + +/* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */ +#define ___bpf_syswrap_args0() ctx +#define ___bpf_syswrap_args1(x) ___bpf_syswrap_args0(), (unsigned long long)PT_REGS_PARM1_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (unsigned long long)PT_REGS_PARM2_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (unsigned long long)PT_REGS_PARM3_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (unsigned long long)PT_REGS_PARM4_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (unsigned long long)PT_REGS_PARM5_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args6(x, args...) ___bpf_syswrap_args5(args), (unsigned long long)PT_REGS_PARM6_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args7(x, args...) ___bpf_syswrap_args6(args), (unsigned long long)PT_REGS_PARM7_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args(args...) ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args) + +/* + * BPF_KSYSCALL is a variant of BPF_KPROBE, which is intended for + * tracing syscall functions, like __x64_sys_close. It hides the underlying + * platform-specific low-level way of getting syscall input arguments from + * struct pt_regs, and provides a familiar typed and named function arguments + * syntax and semantics of accessing syscall input parameters. + * + * Original struct pt_regs * context is preserved as 'ctx' argument. This might + * be necessary when using BPF helpers like bpf_perf_event_output(). + * + * At the moment BPF_KSYSCALL does not transparently handle all the calling + * convention quirks for the following syscalls: + * + * - mmap(): __ARCH_WANT_SYS_OLD_MMAP. + * - clone(): CONFIG_CLONE_BACKWARDS, CONFIG_CLONE_BACKWARDS2 and + * CONFIG_CLONE_BACKWARDS3. + * - socket-related syscalls: __ARCH_WANT_SYS_SOCKETCALL. + * - compat syscalls. + * + * This may or may not change in the future. User needs to take extra measures + * to handle such quirks explicitly, if necessary. + * + * This macro relies on BPF CO-RE support and virtual __kconfig externs. + */ +#define BPF_KSYSCALL(name, args...) \ +name(struct pt_regs *ctx); \ +extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; \ +static __always_inline typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER \ + ? (struct pt_regs *)PT_REGS_PARM1(ctx) \ + : ctx; \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + if (LINUX_HAS_SYSCALL_WRAPPER) \ + return ____##name(___bpf_syswrap_args(args)); \ + else \ + return ____##name(___bpf_syscall_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __always_inline typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args) + +#define BPF_KPROBE_SYSCALL BPF_KSYSCALL + +/* BPF_UPROBE and BPF_URETPROBE are identical to BPF_KPROBE and BPF_KRETPROBE, + * but are named way less confusingly for SEC("uprobe") and SEC("uretprobe") + * use cases. + */ +#define BPF_UPROBE(name, args...) BPF_KPROBE(name, ##args) +#define BPF_URETPROBE(name, args...) BPF_KRETPROBE(name, ##args) + #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 9aa19c89f758..84a4b0abc8be 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -12,6 +12,7 @@ #include <sys/utsname.h> #include <sys/param.h> #include <sys/stat.h> +#include <sys/mman.h> #include <linux/kernel.h> #include <linux/err.h> #include <linux/btf.h> @@ -116,6 +117,12 @@ struct btf { /* whether strings are already deduplicated */ bool strs_deduped; + /* whether base_btf should be freed in btf_free for this instance */ + bool owns_base; + + /* whether raw_data is a (read-only) mmap */ + bool raw_data_is_mmap; + /* BTF object FD, if loaded into kernel */ int fd; @@ -130,7 +137,7 @@ static inline __u64 ptr_to_u64(const void *ptr) /* Ensure given dynamically allocated memory region pointed to by *data* with * capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough - * memory to accomodate *add_cnt* new elements, assuming *cur_cnt* elements + * memory to accommodate *add_cnt* new elements, assuming *cur_cnt* elements * are already used. At most *max_cnt* elements can be ever allocated. * If necessary, memory is reallocated and all existing data is copied over, * new pointer to the memory region is stored at *data, new memory region @@ -279,7 +286,7 @@ static int btf_parse_str_sec(struct btf *btf) return -EINVAL; } if (!btf->base_btf && start[0]) { - pr_debug("Invalid BTF string section\n"); + pr_debug("Malformed BTF string section, did you forget to provide base BTF?\n"); return -EINVAL; } return 0; @@ -305,6 +312,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(__u32); case BTF_KIND_ENUM: return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ENUM64: + return base_size + vlen * sizeof(struct btf_enum64); case BTF_KIND_ARRAY: return base_size + sizeof(struct btf_array); case BTF_KIND_STRUCT: @@ -334,6 +343,7 @@ static void btf_bswap_type_base(struct btf_type *t) static int btf_bswap_type_rest(struct btf_type *t) { struct btf_var_secinfo *v; + struct btf_enum64 *e64; struct btf_member *m; struct btf_array *a; struct btf_param *p; @@ -361,6 +371,13 @@ static int btf_bswap_type_rest(struct btf_type *t) e->val = bswap_32(e->val); } return 0; + case BTF_KIND_ENUM64: + for (i = 0, e64 = btf_enum64(t); i < vlen; i++, e64++) { + e64->name_off = bswap_32(e64->name_off); + e64->val_lo32 = bswap_32(e64->val_lo32); + e64->val_hi32 = bswap_32(e64->val_hi32); + } + return 0; case BTF_KIND_ARRAY: a = btf_array(t); a->type = bswap_32(a->type); @@ -438,9 +455,163 @@ static int btf_parse_type_sec(struct btf *btf) return 0; } -__u32 btf__get_nr_types(const struct btf *btf) +static int btf_validate_str(const struct btf *btf, __u32 str_off, const char *what, __u32 type_id) { - return btf->start_id + btf->nr_types - 1; + const char *s; + + s = btf__str_by_offset(btf, str_off); + if (!s) { + pr_warn("btf: type [%u]: invalid %s (string offset %u)\n", type_id, what, str_off); + return -EINVAL; + } + + return 0; +} + +static int btf_validate_id(const struct btf *btf, __u32 id, __u32 ctx_id) +{ + const struct btf_type *t; + + t = btf__type_by_id(btf, id); + if (!t) { + pr_warn("btf: type [%u]: invalid referenced type ID %u\n", ctx_id, id); + return -EINVAL; + } + + return 0; +} + +static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __u32 id) +{ + __u32 kind = btf_kind(t); + int err, i, n; + + err = btf_validate_str(btf, t->name_off, "type name", id); + if (err) + return err; + + switch (kind) { + case BTF_KIND_UNKN: + case BTF_KIND_INT: + case BTF_KIND_FWD: + case BTF_KIND_FLOAT: + break; + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + err = btf_validate_id(btf, t->type, id); + if (err) + return err; + break; + case BTF_KIND_ARRAY: { + const struct btf_array *a = btf_array(t); + + err = btf_validate_id(btf, a->type, id); + err = err ?: btf_validate_id(btf, a->index_type, id); + if (err) + return err; + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "field name", id); + err = err ?: btf_validate_id(btf, m->type, id); + if (err) + return err; + } + break; + } + case BTF_KIND_ENUM: { + const struct btf_enum *m = btf_enum(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "enum name", id); + if (err) + return err; + } + break; + } + case BTF_KIND_ENUM64: { + const struct btf_enum64 *m = btf_enum64(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "enum name", id); + if (err) + return err; + } + break; + } + case BTF_KIND_FUNC: { + const struct btf_type *ft; + + err = btf_validate_id(btf, t->type, id); + if (err) + return err; + ft = btf__type_by_id(btf, t->type); + if (btf_kind(ft) != BTF_KIND_FUNC_PROTO) { + pr_warn("btf: type [%u]: referenced type [%u] is not FUNC_PROTO\n", id, t->type); + return -EINVAL; + } + break; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *m = btf_params(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "param name", id); + err = err ?: btf_validate_id(btf, m->type, id); + if (err) + return err; + } + break; + } + case BTF_KIND_DATASEC: { + const struct btf_var_secinfo *m = btf_var_secinfos(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_id(btf, m->type, id); + if (err) + return err; + } + break; + } + default: + pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind); + return -EINVAL; + } + return 0; +} + +/* Validate basic sanity of BTF. It's intentionally less thorough than + * kernel's validation and validates only properties of BTF that libbpf relies + * on to be correct (e.g., valid type IDs, valid string offsets, etc) + */ +static int btf_sanity_check(const struct btf *btf) +{ + const struct btf_type *t; + __u32 i, n = btf__type_cnt(btf); + int err; + + for (i = btf->start_id; i < n; i++) { + t = btf_type_by_id(btf, i); + err = btf_validate_type(btf, t, i); + if (err) + return err; + } + return 0; } __u32 btf__type_cnt(const struct btf *btf) @@ -472,9 +643,22 @@ const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) static int determine_ptr_size(const struct btf *btf) { + static const char * const long_aliases[] = { + "long", + "long int", + "int long", + "unsigned long", + "long unsigned", + "unsigned long int", + "unsigned int long", + "long unsigned int", + "long int unsigned", + "int unsigned long", + "int long unsigned", + }; const struct btf_type *t; const char *name; - int i, n; + int i, j, n; if (btf->base_btf && btf->base_btf->ptr_sz > 0) return btf->base_btf->ptr_sz; @@ -485,15 +669,16 @@ static int determine_ptr_size(const struct btf *btf) if (!btf_is_int(t)) continue; + if (t->size != 4 && t->size != 8) + continue; + name = btf__name_by_offset(btf, t->name_off); if (!name) continue; - if (strcmp(name, "long int") == 0 || - strcmp(name, "long unsigned int") == 0) { - if (t->size != 4 && t->size != 8) - continue; - return t->size; + for (j = 0; j < ARRAY_SIZE(long_aliases); j++) { + if (strcmp(name, long_aliases[j]) == 0) + return t->size; } } @@ -597,6 +782,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_DATASEC: case BTF_KIND_FLOAT: size = t->size; @@ -644,6 +830,7 @@ int btf__align_of(const struct btf *btf, __u32 id) switch (kind) { case BTF_KIND_INT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FLOAT: return min(btf_ptr_sz(btf), (size_t)t->size); case BTF_KIND_PTR: @@ -667,8 +854,21 @@ int btf__align_of(const struct btf *btf, __u32 id) if (align <= 0) return libbpf_err(align); max_align = max(max_align, align); + + /* if field offset isn't aligned according to field + * type's alignment, then struct must be packed + */ + if (btf_member_bitfield_size(t, i) == 0 && + (m->offset % (8 * align)) != 0) + return 1; } + /* if struct/union size isn't a multiple of its alignment, + * then struct must be packed + */ + if ((t->size % max_align) != 0) + return 1; + return max_align; } default: @@ -754,6 +954,17 @@ static bool btf_is_modifiable(const struct btf *btf) return (void *)btf->hdr != btf->raw_data; } +static void btf_free_raw_data(struct btf *btf) +{ + if (btf->raw_data_is_mmap) { + munmap(btf->raw_data, btf->raw_size); + btf->raw_data_is_mmap = false; + } else { + free(btf->raw_data); + } + btf->raw_data = NULL; +} + void btf__free(struct btf *btf) { if (IS_ERR_OR_NULL(btf)) @@ -773,9 +984,11 @@ void btf__free(struct btf *btf) free(btf->types_data); strset__free(btf->strs_set); } - free(btf->raw_data); + btf_free_raw_data(btf); free(btf->raw_data_swapped); free(btf->type_offs); + if (btf->owns_base) + btf__free(btf->base_btf); free(btf); } @@ -797,7 +1010,8 @@ static struct btf *btf_new_empty(struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len; + btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off; + btf->swapped_endian = base_btf->swapped_endian; } /* +1 for empty string at offset 0 */ @@ -830,7 +1044,7 @@ struct btf *btf__new_empty_split(struct btf *base_btf) return libbpf_ptr(btf_new_empty(base_btf)); } -static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) +static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap) { struct btf *btf; int err; @@ -847,15 +1061,21 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len; + btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off; } - btf->raw_data = malloc(size); - if (!btf->raw_data) { - err = -ENOMEM; - goto done; + if (is_mmap) { + btf->raw_data = (void *)data; + btf->raw_data_is_mmap = true; + } else { + btf->raw_data = malloc(size); + if (!btf->raw_data) { + err = -ENOMEM; + goto done; + } + memcpy(btf->raw_data, data, size); } - memcpy(btf->raw_data, data, size); + btf->raw_size = size; btf->hdr = btf->raw_data; @@ -868,6 +1088,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) err = btf_parse_str_sec(btf); err = err ?: btf_parse_type_sec(btf); + err = err ?: btf_sanity_check(btf); if (err) goto done; @@ -882,56 +1103,46 @@ done: struct btf *btf__new(const void *data, __u32 size) { - return libbpf_ptr(btf_new(data, size, NULL)); + return libbpf_ptr(btf_new(data, size, NULL, false)); } -static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, - struct btf_ext **btf_ext) +struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf) +{ + return libbpf_ptr(btf_new(data, size, base_btf, false)); +} + +struct btf_elf_secs { + Elf_Data *btf_data; + Elf_Data *btf_ext_data; + Elf_Data *btf_base_data; +}; + +static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs *secs) { - Elf_Data *btf_data = NULL, *btf_ext_data = NULL; - int err = 0, fd = -1, idx = 0; - struct btf *btf = NULL; Elf_Scn *scn = NULL; - Elf *elf = NULL; + Elf_Data *data; GElf_Ehdr ehdr; size_t shstrndx; + int idx = 0; - if (elf_version(EV_CURRENT) == EV_NONE) { - pr_warn("failed to init libelf for %s\n", path); - return ERR_PTR(-LIBBPF_ERRNO__LIBELF); - } - - fd = open(path, O_RDONLY | O_CLOEXEC); - if (fd < 0) { - err = -errno; - pr_warn("failed to open %s: %s\n", path, strerror(errno)); - return ERR_PTR(err); - } - - err = -LIBBPF_ERRNO__FORMAT; - - elf = elf_begin(fd, ELF_C_READ, NULL); - if (!elf) { - pr_warn("failed to open %s as ELF file\n", path); - goto done; - } if (!gelf_getehdr(elf, &ehdr)) { pr_warn("failed to get EHDR from %s\n", path); - goto done; + goto err; } if (elf_getshdrstrndx(elf, &shstrndx)) { pr_warn("failed to get section names section index for %s\n", path); - goto done; + goto err; } if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) { pr_warn("failed to get e_shstrndx from %s\n", path); - goto done; + goto err; } while ((scn = elf_nextscn(elf, scn)) != NULL) { + Elf_Data **field; GElf_Shdr sh; char *name; @@ -939,44 +1150,110 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, if (gelf_getshdr(scn, &sh) != &sh) { pr_warn("failed to get section(%d) header from %s\n", idx, path); - goto done; + goto err; } name = elf_strptr(elf, shstrndx, sh.sh_name); if (!name) { pr_warn("failed to get section(%d) name from %s\n", idx, path); - goto done; + goto err; } - if (strcmp(name, BTF_ELF_SEC) == 0) { - btf_data = elf_getdata(scn, 0); - if (!btf_data) { - pr_warn("failed to get section(%d, %s) data from %s\n", - idx, name, path); - goto done; - } - continue; - } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) { - btf_ext_data = elf_getdata(scn, 0); - if (!btf_ext_data) { - pr_warn("failed to get section(%d, %s) data from %s\n", - idx, name, path); - goto done; - } + + if (strcmp(name, BTF_ELF_SEC) == 0) + field = &secs->btf_data; + else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) + field = &secs->btf_ext_data; + else if (strcmp(name, BTF_BASE_ELF_SEC) == 0) + field = &secs->btf_base_data; + else continue; + + if (sh.sh_type != SHT_PROGBITS) { + pr_warn("unexpected section type (%d) of section(%d, %s) from %s\n", + sh.sh_type, idx, name, path); + goto err; } + + data = elf_getdata(scn, 0); + if (!data) { + pr_warn("failed to get section(%d, %s) data from %s\n", + idx, name, path); + goto err; + } + *field = data; } - err = 0; + return 0; + +err: + return -LIBBPF_ERRNO__FORMAT; +} - if (!btf_data) { - err = -ENOENT; +static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, + struct btf_ext **btf_ext) +{ + struct btf_elf_secs secs = {}; + struct btf *dist_base_btf = NULL; + struct btf *btf = NULL; + int err = 0, fd = -1; + Elf *elf = NULL; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn("failed to init libelf for %s\n", path); + return ERR_PTR(-LIBBPF_ERRNO__LIBELF); + } + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = -errno; + pr_warn("failed to open %s: %s\n", path, errstr(err)); + return ERR_PTR(err); + } + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (!elf) { + err = -LIBBPF_ERRNO__FORMAT; + pr_warn("failed to open %s as ELF file\n", path); goto done; } - btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf); - err = libbpf_get_error(btf); + + err = btf_find_elf_sections(elf, path, &secs); if (err) goto done; + if (!secs.btf_data) { + pr_warn("failed to find '%s' ELF section in %s\n", BTF_ELF_SEC, path); + err = -ENODATA; + goto done; + } + + if (secs.btf_base_data) { + dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size, + NULL, false); + if (IS_ERR(dist_base_btf)) { + err = PTR_ERR(dist_base_btf); + dist_base_btf = NULL; + goto done; + } + } + + btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size, + dist_base_btf ?: base_btf, false); + if (IS_ERR(btf)) { + err = PTR_ERR(btf); + goto done; + } + if (dist_base_btf && base_btf) { + err = btf__relocate(btf, base_btf); + if (err) + goto done; + btf__free(dist_base_btf); + dist_base_btf = NULL; + } + + if (dist_base_btf) + btf->owns_base = true; + switch (gelf_getclass(elf)) { case ELFCLASS32: btf__set_pointer_size(btf, 4); @@ -989,11 +1266,12 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, break; } - if (btf_ext && btf_ext_data) { - *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); - err = libbpf_get_error(*btf_ext); - if (err) + if (btf_ext && secs.btf_ext_data) { + *btf_ext = btf_ext__new(secs.btf_ext_data->d_buf, secs.btf_ext_data->d_size); + if (IS_ERR(*btf_ext)) { + err = PTR_ERR(*btf_ext); goto done; + } } else if (btf_ext) { *btf_ext = NULL; } @@ -1007,6 +1285,7 @@ done: if (btf_ext) btf_ext__free(*btf_ext); + btf__free(dist_base_btf); btf__free(btf); return ERR_PTR(err); @@ -1031,7 +1310,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf) int err = 0; long sz; - f = fopen(path, "rb"); + f = fopen(path, "rbe"); if (!f) { err = -errno; goto err_out; @@ -1076,7 +1355,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf) } /* finally parse BTF data */ - btf = btf_new(data, sz, base_btf); + btf = btf_new(data, sz, base_btf, false); err_out: free(data); @@ -1095,6 +1374,37 @@ struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf) return libbpf_ptr(btf_parse_raw(path, base_btf)); } +static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) +{ + struct stat st; + void *data; + struct btf *btf; + int fd, err; + + fd = open(path, O_RDONLY); + if (fd < 0) + return ERR_PTR(-errno); + + if (fstat(fd, &st) < 0) { + err = -errno; + close(fd); + return ERR_PTR(err); + } + + data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + err = -errno; + close(fd); + + if (data == MAP_FAILED) + return ERR_PTR(err); + + btf = btf_new(data, st.st_size, base_btf, true); + if (IS_ERR(btf)) + munmap(data, st.st_size); + + return btf; +} + static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext) { struct btf *btf; @@ -1124,7 +1434,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd) { LIBBPF_OPTS(bpf_btf_load_opts, opts); __u32 buf_sz = 0, raw_size; @@ -1174,6 +1486,10 @@ retry_load: opts.log_level = log_level; } + opts.token_fd = token_fd; + if (token_fd) + opts.btf_flags |= BPF_F_TOKEN_FD; + btf->fd = bpf_btf_load(raw_data, raw_size, &opts); if (btf->fd < 0) { /* time to turn on verbose mode and try again */ @@ -1188,7 +1504,7 @@ retry_load: goto retry_load; err = -errno; - pr_warn("BTF loading error: %d\n", err); + pr_warn("BTF loading error: %s\n", errstr(err)); /* don't print out contents of custom log_buf */ if (!log_buf && buf[0]) pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf); @@ -1201,11 +1517,9 @@ done: int btf__load_into_kernel(struct btf *btf) { - return btf_load_into_kernel(btf, NULL, 0, 0); + return btf_load_into_kernel(btf, NULL, 0, 0, 0); } -int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel"))); - int btf__fd(const struct btf *btf) { return btf->fd; @@ -1317,9 +1631,9 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf) void *ptr; int err; - /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so + /* we won't know btf_size until we call bpf_btf_get_info_by_fd(). so * let's start with a sane default - 4KiB here - and resize it only if - * bpf_obj_get_info_by_fd() needs a bigger buffer. + * bpf_btf_get_info_by_fd() needs a bigger buffer. */ last_size = 4096; ptr = malloc(last_size); @@ -1329,7 +1643,7 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf) memset(&btf_info, 0, sizeof(btf_info)); btf_info.btf = ptr_to_u64(ptr); btf_info.btf_size = last_size; - err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + err = bpf_btf_get_info_by_fd(btf_fd, &btf_info, &len); if (!err && btf_info.btf_size > last_size) { void *temp_ptr; @@ -1347,7 +1661,7 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf) btf_info.btf = ptr_to_u64(ptr); btf_info.btf_size = last_size; - err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + err = bpf_btf_get_info_by_fd(btf_fd, &btf_info, &len); } if (err || btf_info.btf_size > last_size) { @@ -1355,19 +1669,25 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf) goto exit_free; } - btf = btf_new(ptr, btf_info.btf_size, base_btf); + btf = btf_new(ptr, btf_info.btf_size, base_btf, false); exit_free: free(ptr); return btf; } -struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) +struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd) { struct btf *btf; int btf_fd; + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts); - btf_fd = bpf_btf_get_fd_by_id(id); + if (token_fd) { + opts.open_flags |= BPF_F_TOKEN_FD; + opts.token_fd = token_fd; + } + + btf_fd = bpf_btf_get_fd_by_id_opts(id, &opts); if (btf_fd < 0) return libbpf_err_ptr(-errno); @@ -1377,103 +1697,20 @@ struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) return libbpf_ptr(btf); } -struct btf *btf__load_from_kernel_by_id(__u32 id) +struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) { - return btf__load_from_kernel_by_id_split(id, NULL); + return btf_load_from_kernel(id, base_btf, 0); } -int btf__get_from_id(__u32 id, struct btf **btf) -{ - struct btf *res; - int err; - - *btf = NULL; - res = btf__load_from_kernel_by_id(id); - err = libbpf_get_error(res); - - if (err) - return libbpf_err(err); - - *btf = res; - return 0; -} - -int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, - __u32 expected_key_size, __u32 expected_value_size, - __u32 *key_type_id, __u32 *value_type_id) +struct btf *btf__load_from_kernel_by_id(__u32 id) { - const struct btf_type *container_type; - const struct btf_member *key, *value; - const size_t max_name = 256; - char container_name[max_name]; - __s64 key_size, value_size; - __s32 container_id; - - if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) { - pr_warn("map:%s length of '____btf_map_%s' is too long\n", - map_name, map_name); - return libbpf_err(-EINVAL); - } - - container_id = btf__find_by_name(btf, container_name); - if (container_id < 0) { - pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", - map_name, container_name); - return libbpf_err(container_id); - } - - container_type = btf__type_by_id(btf, container_id); - if (!container_type) { - pr_warn("map:%s cannot find BTF type for container_id:%u\n", - map_name, container_id); - return libbpf_err(-EINVAL); - } - - if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) { - pr_warn("map:%s container_name:%s is an invalid container struct\n", - map_name, container_name); - return libbpf_err(-EINVAL); - } - - key = btf_members(container_type); - value = key + 1; - - key_size = btf__resolve_size(btf, key->type); - if (key_size < 0) { - pr_warn("map:%s invalid BTF key_type_size\n", map_name); - return libbpf_err(key_size); - } - - if (expected_key_size != key_size) { - pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", - map_name, (__u32)key_size, expected_key_size); - return libbpf_err(-EINVAL); - } - - value_size = btf__resolve_size(btf, value->type); - if (value_size < 0) { - pr_warn("map:%s invalid BTF value_type_size\n", map_name); - return libbpf_err(value_size); - } - - if (expected_value_size != value_size) { - pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", - map_name, (__u32)value_size, expected_value_size); - return libbpf_err(-EINVAL); - } - - *key_type_id = key->type; - *value_type_id = value->type; - - return 0; + return btf__load_from_kernel_by_id_split(id, NULL); } static void btf_invalidate_raw_data(struct btf *btf) { - if (btf->raw_data) { - free(btf->raw_data); - btf->raw_data = NULL; - } + if (btf->raw_data) + btf_free_raw_data(btf); if (btf->raw_data_swapped) { free(btf->raw_data_swapped); btf->raw_data_swapped = NULL; @@ -1620,28 +1857,45 @@ static int btf_commit_type(struct btf *btf, int data_sz) struct btf_pipe { const struct btf *src; struct btf *dst; + struct hashmap *str_off_map; /* map string offsets from src to dst */ }; -static int btf_rewrite_str(__u32 *str_off, void *ctx) +static int btf_rewrite_str(struct btf_pipe *p, __u32 *str_off) { - struct btf_pipe *p = ctx; - int off; + long mapped_off; + int off, err; if (!*str_off) /* nothing to do for empty strings */ return 0; + if (p->str_off_map && + hashmap__find(p->str_off_map, *str_off, &mapped_off)) { + *str_off = mapped_off; + return 0; + } + off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off)); if (off < 0) return off; + /* Remember string mapping from src to dst. It avoids + * performing expensive string comparisons. + */ + if (p->str_off_map) { + err = hashmap__append(p->str_off_map, *str_off, off); + if (err) + return err; + } + *str_off = off; return 0; } -int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type) +static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type) { - struct btf_pipe p = { .src = src_btf, .dst = btf }; + struct btf_field_iter it; struct btf_type *t; + __u32 *str_off; int sz, err; sz = btf_type_size(src_type); @@ -1649,37 +1903,38 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t return libbpf_err(sz); /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(btf)) + if (btf_ensure_modifiable(p->dst)) return libbpf_err(-ENOMEM); - t = btf_add_type_mem(btf, sz); + t = btf_add_type_mem(p->dst, sz); if (!t) return libbpf_err(-ENOMEM); memcpy(t, src_type, sz); - err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (err) return libbpf_err(err); - return btf_commit_type(btf, sz); + while ((str_off = btf_field_iter_next(&it))) { + err = btf_rewrite_str(p, str_off); + if (err) + return libbpf_err(err); + } + + return btf_commit_type(p->dst, sz); } -static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) +int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type) { - struct btf *btf = ctx; - - if (!*type_id) /* nothing to do for VOID references */ - return 0; + struct btf_pipe p = { .src = src_btf, .dst = btf }; - /* we haven't updated btf's type count yet, so - * btf->start_id + btf->nr_types - 1 is the type ID offset we should - * add to all newly added BTF types - */ - *type_id += btf->start_id + btf->nr_types - 1; - return 0; + return btf_add_type(&p, src_type); } +static size_t btf_dedup_identity_hash_fn(long key, void *ctx); +static bool btf_dedup_equal_fn(long k1, long k2, void *ctx); + int btf__add_btf(struct btf *btf, const struct btf *src_btf) { struct btf_pipe p = { .src = src_btf, .dst = btf }; @@ -1713,10 +1968,18 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) if (!off) return libbpf_err(-ENOMEM); + /* Map the string offsets from src_btf to the offsets from btf to improve performance */ + p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(p.str_off_map)) + return libbpf_err(-ENOMEM); + /* bulk copy types data for all types from src_btf */ memcpy(t, src_btf->types_data, data_sz); for (i = 0; i < cnt; i++) { + struct btf_field_iter it; + __u32 *type_id, *str_off; + sz = btf_type_size(t); if (sz < 0) { /* unlikely, has to be corrupted src_btf */ @@ -1728,15 +1991,31 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) *off = t - btf->types_data; /* add, dedup, and remap strings referenced by this BTF type */ - err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (err) goto err_out; + while ((str_off = btf_field_iter_next(&it))) { + err = btf_rewrite_str(&p, str_off); + if (err) + goto err_out; + } /* remap all type IDs referenced from this BTF type */ - err = btf_type_visit_type_ids(t, btf_rewrite_type_ids, btf); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (err) goto err_out; + while ((type_id = btf_field_iter_next(&it))) { + if (!*type_id) /* nothing to do for VOID references */ + continue; + + /* we haven't updated btf's type count yet, so + * btf->start_id + btf->nr_types - 1 is the type ID offset we should + * add to all newly added BTF types + */ + *type_id += btf->start_id + btf->nr_types - 1; + } + /* go to next type data and type offset index entry */ t += sz; off++; @@ -1754,6 +2033,8 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) btf->hdr->str_off += data_sz; btf->nr_types += cnt; + hashmap__free(p.str_off_map); + /* return type ID of the first added BTF type */ return btf->start_id + btf->nr_types - cnt; err_out: @@ -1764,9 +2045,12 @@ err_out: memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len); /* and now restore original strings section size; types data size - * wasn't modified, so doesn't need restoring, see big comment above */ + * wasn't modified, so doesn't need restoring, see big comment above + */ btf->hdr->str_len = old_strs_len; + hashmap__free(p.str_off_map); + return libbpf_err(err); } @@ -1872,7 +2156,7 @@ static int validate_type_id(int id) } /* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */ -static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id) +static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id, int kflag) { struct btf_type *t; int sz, name_off = 0; @@ -1895,7 +2179,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref } t->name_off = name_off; - t->info = btf_type_info(kind, 0, 0); + t->info = btf_type_info(kind, 0, kflag); t->type = ref_type_id; return btf_commit_type(btf, sz); @@ -1910,7 +2194,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref */ int btf__add_ptr(struct btf *btf, int ref_type_id) { - return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id, 0); } /* @@ -2086,20 +2370,8 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, return 0; } -/* - * Append new BTF_KIND_ENUM type with: - * - *name* - name of the enum, can be NULL or empty for anonymous enums; - * - *byte_sz* - size of the enum, in bytes. - * - * Enum initially has no enum values in it (and corresponds to enum forward - * declaration). Enumerator values can be added by btf__add_enum_value() - * immediately after btf__add_enum() succeeds. - * - * Returns: - * - >0, type ID of newly added BTF type; - * - <0, on error. - */ -int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) +static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz, + bool is_signed, __u8 kind) { struct btf_type *t; int sz, name_off = 0; @@ -2124,13 +2396,35 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) /* start out with vlen=0; it will be adjusted when adding enum values */ t->name_off = name_off; - t->info = btf_type_info(BTF_KIND_ENUM, 0, 0); + t->info = btf_type_info(kind, 0, is_signed); t->size = byte_sz; return btf_commit_type(btf, sz); } /* + * Append new BTF_KIND_ENUM type with: + * - *name* - name of the enum, can be NULL or empty for anonymous enums; + * - *byte_sz* - size of the enum, in bytes. + * + * Enum initially has no enum values in it (and corresponds to enum forward + * declaration). Enumerator values can be added by btf__add_enum_value() + * immediately after btf__add_enum() succeeds. + * + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) +{ + /* + * set the signedness to be unsigned, it will change to signed + * if any later enumerator is negative. + */ + return btf_add_enum_common(btf, name, byte_sz, false, BTF_KIND_ENUM); +} + +/* * Append new enum value for the current ENUM type with: * - *name* - name of the enumerator value, can't be NULL or empty; * - *value* - integer value corresponding to enum value *name*; @@ -2177,6 +2471,82 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) t = btf_last_type(btf); btf_type_inc_vlen(t); + /* if negative value, set signedness to signed */ + if (value < 0) + t->info = btf_type_info(btf_kind(t), btf_vlen(t), true); + + btf->hdr->type_len += sz; + btf->hdr->str_off += sz; + return 0; +} + +/* + * Append new BTF_KIND_ENUM64 type with: + * - *name* - name of the enum, can be NULL or empty for anonymous enums; + * - *byte_sz* - size of the enum, in bytes. + * - *is_signed* - whether the enum values are signed or not; + * + * Enum initially has no enum values in it (and corresponds to enum forward + * declaration). Enumerator values can be added by btf__add_enum64_value() + * immediately after btf__add_enum64() succeeds. + * + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_enum64(struct btf *btf, const char *name, __u32 byte_sz, + bool is_signed) +{ + return btf_add_enum_common(btf, name, byte_sz, is_signed, + BTF_KIND_ENUM64); +} + +/* + * Append new enum value for the current ENUM64 type with: + * - *name* - name of the enumerator value, can't be NULL or empty; + * - *value* - integer value corresponding to enum value *name*; + * Returns: + * - 0, on success; + * - <0, on error. + */ +int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) +{ + struct btf_enum64 *v; + struct btf_type *t; + int sz, name_off; + + /* last type should be BTF_KIND_ENUM64 */ + if (btf->nr_types == 0) + return libbpf_err(-EINVAL); + t = btf_last_type(btf); + if (!btf_is_enum64(t)) + return libbpf_err(-EINVAL); + + /* non-empty name */ + if (!name || !name[0]) + return libbpf_err(-EINVAL); + + /* decompose and invalidate raw data */ + if (btf_ensure_modifiable(btf)) + return libbpf_err(-ENOMEM); + + sz = sizeof(struct btf_enum64); + v = btf_add_type_mem(btf, sz); + if (!v) + return libbpf_err(-ENOMEM); + + name_off = btf__add_str(btf, name); + if (name_off < 0) + return name_off; + + v->name_off = name_off; + v->val_lo32 = (__u32)value; + v->val_hi32 = value >> 32; + + /* update parent type's vlen */ + t = btf_last_type(btf); + btf_type_inc_vlen(t); + btf->hdr->type_len += sz; btf->hdr->str_off += sz; return 0; @@ -2202,7 +2572,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) struct btf_type *t; int id; - id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0); + id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0, 0); if (id <= 0) return id; t = btf_type_by_id(btf, id); @@ -2232,7 +2602,7 @@ int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id) if (!name || !name[0]) return libbpf_err(-EINVAL); - return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id, 0); } /* @@ -2244,7 +2614,7 @@ int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id) */ int btf__add_volatile(struct btf *btf, int ref_type_id) { - return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id, 0); } /* @@ -2256,7 +2626,7 @@ int btf__add_volatile(struct btf *btf, int ref_type_id) */ int btf__add_const(struct btf *btf, int ref_type_id) { - return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id, 0); } /* @@ -2268,7 +2638,7 @@ int btf__add_const(struct btf *btf, int ref_type_id) */ int btf__add_restrict(struct btf *btf, int ref_type_id) { - return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id, 0); } /* @@ -2281,10 +2651,27 @@ int btf__add_restrict(struct btf *btf, int ref_type_id) */ int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) { - if (!value|| !value[0]) + if (!value || !value[0]) return libbpf_err(-EINVAL); - return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 0); +} + +/* + * Append new BTF_KIND_TYPE_TAG type with: + * - *value*, non-empty/non-NULL tag value; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * Set info->kflag to 1, indicating this tag is an __attribute__ + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id) +{ + if (!value || !value[0]) + return libbpf_err(-EINVAL); + + return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 1); } /* @@ -2306,7 +2693,7 @@ int btf__add_func(struct btf *btf, const char *name, linkage != BTF_FUNC_EXTERN) return libbpf_err(-EINVAL); - id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id); + id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id, 0); if (id > 0) { struct btf_type *t = btf_type_by_id(btf, id); @@ -2541,18 +2928,8 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ return 0; } -/* - * Append new BTF_KIND_DECL_TAG type with: - * - *value* - non-empty/non-NULL string; - * - *ref_type_id* - referenced type ID, it might not exist yet; - * - *component_idx* - -1 for tagging reference type, otherwise struct/union - * member or function argument index; - * Returns: - * - >0, type ID of newly added BTF type; - * - <0, on error. - */ -int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, - int component_idx) +static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id, + int component_idx, int kflag) { struct btf_type *t; int sz, value_off; @@ -2576,14 +2953,47 @@ int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, return value_off; t->name_off = value_off; - t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false); + t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, kflag); t->type = ref_type_id; btf_decl_tag(t)->component_idx = component_idx; return btf_commit_type(btf, sz); } -struct btf_ext_sec_setup_param { +/* + * Append new BTF_KIND_DECL_TAG type with: + * - *value* - non-empty/non-NULL string; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * - *component_idx* - -1 for tagging reference type, otherwise struct/union + * member or function argument index; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, + int component_idx) +{ + return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 0); +} + +/* + * Append new BTF_KIND_DECL_TAG type with: + * - *value* - non-empty/non-NULL string; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * - *component_idx* - -1 for tagging reference type, otherwise struct/union + * member or function argument index; + * Set info->kflag to 1, indicating this tag is an __attribute__ + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id, + int component_idx) +{ + return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 1); +} + +struct btf_ext_sec_info_param { __u32 off; __u32 len; __u32 min_rec_size; @@ -2591,13 +3001,20 @@ struct btf_ext_sec_setup_param { const char *desc; }; -static int btf_ext_setup_info(struct btf_ext *btf_ext, - struct btf_ext_sec_setup_param *ext_sec) +/* + * Parse a single info subsection of the BTF.ext info data: + * - validate subsection structure and elements + * - save info subsection start and sizing details in struct btf_ext + * - endian-independent operation, for calling before byte-swapping + */ +static int btf_ext_parse_sec_info(struct btf_ext *btf_ext, + struct btf_ext_sec_info_param *ext_sec, + bool is_native) { const struct btf_ext_info_sec *sinfo; struct btf_ext_info *ext_info; __u32 info_left, record_size; - /* The start of the info sec (including the __u32 record_size). */ + size_t sec_cnt = 0; void *info; if (ext_sec->len == 0) @@ -2609,6 +3026,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } + /* The start of the info sec (including the __u32 record_size). */ info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off; info_left = ext_sec->len; @@ -2624,9 +3042,13 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - /* The record size needs to meet the minimum standard */ - record_size = *(__u32 *)info; + /* The record size needs to meet either the minimum standard or, when + * handling non-native endianness data, the exact standard so as + * to allow safe byte-swapping. + */ + record_size = is_native ? *(__u32 *)info : bswap_32(*(__u32 *)info); if (record_size < ext_sec->min_rec_size || + (!is_native && record_size != ext_sec->min_rec_size) || record_size & 0x03) { pr_debug("%s section in .BTF.ext has invalid record size %u\n", ext_sec->desc, record_size); @@ -2638,7 +3060,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, /* If no records, return failure now so .BTF.ext won't be used. */ if (!info_left) { - pr_debug("%s section in .BTF.ext has no records", ext_sec->desc); + pr_debug("%s section in .BTF.ext has no records\n", ext_sec->desc); return -EINVAL; } @@ -2653,15 +3075,14 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - num_records = sinfo->num_info; + num_records = is_native ? sinfo->num_info : bswap_32(sinfo->num_info); if (num_records == 0) { pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); return -EINVAL; } - total_record_size = sec_hdrlen + - (__u64)num_records * record_size; + total_record_size = sec_hdrlen + (__u64)num_records * record_size; if (info_left < total_record_size) { pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); @@ -2670,74 +3091,169 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, info_left -= total_record_size; sinfo = (void *)sinfo + total_record_size; + sec_cnt++; } ext_info = ext_sec->ext_info; ext_info->len = ext_sec->len - sizeof(__u32); ext_info->rec_size = record_size; ext_info->info = info + sizeof(__u32); + ext_info->sec_cnt = sec_cnt; return 0; } -static int btf_ext_setup_func_info(struct btf_ext *btf_ext) +/* Parse all info secs in the BTF.ext info data */ +static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native) { - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param func_info = { .off = btf_ext->hdr->func_info_off, .len = btf_ext->hdr->func_info_len, .min_rec_size = sizeof(struct bpf_func_info_min), .ext_info = &btf_ext->func_info, .desc = "func_info" }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_setup_line_info(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param line_info = { .off = btf_ext->hdr->line_info_off, .len = btf_ext->hdr->line_info_len, .min_rec_size = sizeof(struct bpf_line_info_min), .ext_info = &btf_ext->line_info, .desc = "line_info", }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_setup_core_relos(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { - .off = btf_ext->hdr->core_relo_off, - .len = btf_ext->hdr->core_relo_len, + struct btf_ext_sec_info_param core_relo = { .min_rec_size = sizeof(struct bpf_core_relo), .ext_info = &btf_ext->core_relo_info, .desc = "core_relo", }; + int err; + + err = btf_ext_parse_sec_info(btf_ext, &func_info, is_native); + if (err) + return err; + + err = btf_ext_parse_sec_info(btf_ext, &line_info, is_native); + if (err) + return err; + + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return 0; /* skip core relos parsing */ + + core_relo.off = btf_ext->hdr->core_relo_off; + core_relo.len = btf_ext->hdr->core_relo_len; + err = btf_ext_parse_sec_info(btf_ext, &core_relo, is_native); + if (err) + return err; + + return 0; +} + +/* Swap byte-order of BTF.ext header with any endianness */ +static void btf_ext_bswap_hdr(struct btf_ext_header *h) +{ + bool is_native = h->magic == BTF_MAGIC; + __u32 hdr_len; + + hdr_len = is_native ? h->hdr_len : bswap_32(h->hdr_len); + + h->magic = bswap_16(h->magic); + h->hdr_len = bswap_32(h->hdr_len); + h->func_info_off = bswap_32(h->func_info_off); + h->func_info_len = bswap_32(h->func_info_len); + h->line_info_off = bswap_32(h->line_info_off); + h->line_info_len = bswap_32(h->line_info_len); + + if (hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return; + + h->core_relo_off = bswap_32(h->core_relo_off); + h->core_relo_len = bswap_32(h->core_relo_len); +} + +/* Swap byte-order of generic info subsection */ +static void btf_ext_bswap_info_sec(void *info, __u32 len, bool is_native, + info_rec_bswap_fn bswap_fn) +{ + struct btf_ext_info_sec *sec; + __u32 info_left, rec_size, *rs; + + if (len == 0) + return; + + rs = info; /* info record size */ + rec_size = is_native ? *rs : bswap_32(*rs); + *rs = bswap_32(*rs); + + sec = info + sizeof(__u32); /* info sec #1 */ + info_left = len - sizeof(__u32); + while (info_left) { + unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u32 i, num_recs; + void *p; + + num_recs = is_native ? sec->num_info : bswap_32(sec->num_info); + sec->sec_name_off = bswap_32(sec->sec_name_off); + sec->num_info = bswap_32(sec->num_info); + p = sec->data; /* info rec #1 */ + for (i = 0; i < num_recs; i++, p += rec_size) + bswap_fn(p); + sec = p; + info_left -= sec_hdrlen + (__u64)rec_size * num_recs; + } +} + +/* + * Swap byte-order of all info data in a BTF.ext section + * - requires BTF.ext hdr in native endianness + */ +static void btf_ext_bswap_info(struct btf_ext *btf_ext, void *data) +{ + const bool is_native = btf_ext->swapped_endian; + const struct btf_ext_header *h = data; + void *info; + + /* Swap func_info subsection byte-order */ + info = data + h->hdr_len + h->func_info_off; + btf_ext_bswap_info_sec(info, h->func_info_len, is_native, + (info_rec_bswap_fn)bpf_func_info_bswap); - return btf_ext_setup_info(btf_ext, ¶m); + /* Swap line_info subsection byte-order */ + info = data + h->hdr_len + h->line_info_off; + btf_ext_bswap_info_sec(info, h->line_info_len, is_native, + (info_rec_bswap_fn)bpf_line_info_bswap); + + /* Swap core_relo subsection byte-order (if present) */ + if (h->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return; + + info = data + h->hdr_len + h->core_relo_off; + btf_ext_bswap_info_sec(info, h->core_relo_len, is_native, + (info_rec_bswap_fn)bpf_core_relo_bswap); } -static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) +/* Parse hdr data and info sections: check and convert to native endianness */ +static int btf_ext_parse(struct btf_ext *btf_ext) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + __u32 hdr_len, data_size = btf_ext->data_size; + struct btf_ext_header *hdr = btf_ext->hdr; + bool swapped_endian = false; + int err; - if (data_size < offsetofend(struct btf_ext_header, hdr_len) || - data_size < hdr->hdr_len) { - pr_debug("BTF.ext header not found"); + if (data_size < offsetofend(struct btf_ext_header, hdr_len)) { + pr_debug("BTF.ext header too short\n"); return -EINVAL; } + hdr_len = hdr->hdr_len; if (hdr->magic == bswap_16(BTF_MAGIC)) { - pr_warn("BTF.ext in non-native endianness is not supported\n"); - return -ENOTSUP; + swapped_endian = true; + hdr_len = bswap_32(hdr_len); } else if (hdr->magic != BTF_MAGIC) { pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); return -EINVAL; } - if (hdr->version != BTF_VERSION) { + /* Ensure known version of structs, current BTF_VERSION == 1 */ + if (hdr->version != 1) { pr_debug("Unsupported BTF.ext version:%u\n", hdr->version); return -ENOTSUP; } @@ -2747,11 +3263,39 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) return -ENOTSUP; } - if (data_size == hdr->hdr_len) { + if (data_size < hdr_len) { + pr_debug("BTF.ext header not found\n"); + return -EINVAL; + } else if (data_size == hdr_len) { pr_debug("BTF.ext has no data\n"); return -EINVAL; } + /* Verify mandatory hdr info details present */ + if (hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { + pr_warn("BTF.ext header missing func_info, line_info\n"); + return -EINVAL; + } + + /* Keep hdr native byte-order in memory for introspection */ + if (swapped_endian) + btf_ext_bswap_hdr(btf_ext->hdr); + + /* Validate info subsections and cache key metadata */ + err = btf_ext_parse_info(btf_ext, !swapped_endian); + if (err) + return err; + + /* Keep infos native byte-order in memory for introspection */ + if (swapped_endian) + btf_ext_bswap_info(btf_ext, btf_ext->data); + + /* + * Set btf_ext->swapped_endian only after all header and info data has + * been swapped, helping bswap functions determine if their data are + * in native byte-order when called. + */ + btf_ext->swapped_endian = swapped_endian; return 0; } @@ -2759,7 +3303,11 @@ void btf_ext__free(struct btf_ext *btf_ext) { if (IS_ERR_OR_NULL(btf_ext)) return; + free(btf_ext->func_info.sec_idxs); + free(btf_ext->line_info.sec_idxs); + free(btf_ext->core_relo_info.sec_idxs); free(btf_ext->data); + free(btf_ext->data_swapped); free(btf_ext); } @@ -2780,31 +3328,7 @@ struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) } memcpy(btf_ext->data, data, size); - err = btf_ext_parse_hdr(btf_ext->data, size); - if (err) - goto done; - - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { - err = -EINVAL; - goto done; - } - - err = btf_ext_setup_func_info(btf_ext); - if (err) - goto done; - - err = btf_ext_setup_line_info(btf_ext); - if (err) - goto done; - - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) { - err = -EINVAL; - goto done; - } - - err = btf_ext_setup_core_relos(btf_ext); - if (err) - goto done; + err = btf_ext_parse(btf_ext); done: if (err) { @@ -2815,85 +3339,65 @@ done: return btf_ext; } -const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size) -{ - *size = btf_ext->data_size; - return btf_ext->data; -} - -static int btf_ext_reloc_info(const struct btf *btf, - const struct btf_ext_info *ext_info, - const char *sec_name, __u32 insns_cnt, - void **info, __u32 *cnt) +static void *btf_ext_raw_data(const struct btf_ext *btf_ext_ro, bool swap_endian) { - __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec); - __u32 i, record_size, existing_len, records_len; - struct btf_ext_info_sec *sinfo; - const char *info_sec_name; - __u64 remain_len; + struct btf_ext *btf_ext = (struct btf_ext *)btf_ext_ro; + const __u32 data_sz = btf_ext->data_size; void *data; - record_size = ext_info->rec_size; - sinfo = ext_info->info; - remain_len = ext_info->len; - while (remain_len > 0) { - records_len = sinfo->num_info * record_size; - info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); - if (strcmp(info_sec_name, sec_name)) { - remain_len -= sec_hdrlen + records_len; - sinfo = (void *)sinfo + sec_hdrlen + records_len; - continue; - } - - existing_len = (*cnt) * record_size; - data = realloc(*info, existing_len + records_len); - if (!data) - return libbpf_err(-ENOMEM); + /* Return native data (always present) or swapped data if present */ + if (!swap_endian) + return btf_ext->data; + else if (btf_ext->data_swapped) + return btf_ext->data_swapped; - memcpy(data + existing_len, sinfo->data, records_len); - /* adjust insn_off only, the rest data will be passed - * to the kernel. - */ - for (i = 0; i < sinfo->num_info; i++) { - __u32 *insn_off; - - insn_off = data + existing_len + (i * record_size); - *insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt; - } - *info = data; - *cnt += sinfo->num_info; - return 0; - } + /* Recreate missing swapped data, then cache and return */ + data = calloc(1, data_sz); + if (!data) + return NULL; + memcpy(data, btf_ext->data, data_sz); - return libbpf_err(-ENOENT); + btf_ext_bswap_info(btf_ext, data); + btf_ext_bswap_hdr(data); + btf_ext->data_swapped = data; + return data; } -int btf_ext__reloc_func_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *cnt) +const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size) { - return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name, - insns_cnt, func_info, cnt); -} + void *data; -int btf_ext__reloc_line_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **line_info, __u32 *cnt) -{ - return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name, - insns_cnt, line_info, cnt); + data = btf_ext_raw_data(btf_ext, btf_ext->swapped_endian); + if (!data) + return errno = ENOMEM, NULL; + + *size = btf_ext->data_size; + return data; } -__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext) +__attribute__((alias("btf_ext__raw_data"))) +const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); + +enum btf_endianness btf_ext__endianness(const struct btf_ext *btf_ext) { - return btf_ext->func_info.rec_size; + if (is_host_big_endian()) + return btf_ext->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN; + else + return btf_ext->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; } -__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) +int btf_ext__set_endianness(struct btf_ext *btf_ext, enum btf_endianness endian) { - return btf_ext->line_info.rec_size; + if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN) + return libbpf_err(-EINVAL); + + btf_ext->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN); + + if (!btf_ext->swapped_endian) { + free(btf_ext->data_swapped); + btf_ext->data_swapped = NULL; + } + return 0; } struct btf_dedup; @@ -2905,6 +3409,7 @@ static int btf_dedup_strings(struct btf_dedup *d); static int btf_dedup_prim_types(struct btf_dedup *d); static int btf_dedup_struct_types(struct btf_dedup *d); static int btf_dedup_ref_types(struct btf_dedup *d); +static int btf_dedup_resolve_fwds(struct btf_dedup *d); static int btf_dedup_compact_types(struct btf_dedup *d); static int btf_dedup_remap_types(struct btf_dedup *d); @@ -3012,15 +3517,16 @@ static int btf_dedup_remap_types(struct btf_dedup *d); * Algorithm summary * ================= * - * Algorithm completes its work in 6 separate passes: + * Algorithm completes its work in 7 separate passes: * * 1. Strings deduplication. * 2. Primitive types deduplication (int, enum, fwd). * 3. Struct/union types deduplication. - * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func + * 4. Resolve unambiguous forward declarations. + * 5. Reference types deduplication (pointers, typedefs, arrays, funcs, func * protos, and const/volatile/restrict modifiers). - * 5. Types compaction. - * 6. Types remapping. + * 6. Types compaction. + * 7. Types remapping. * * Algorithm determines canonical type descriptor, which is a single * representative type for each truly unique type. This canonical type is the @@ -3045,9 +3551,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d); * deduplicating structs/unions is described in greater details in comments for * `btf_dedup_is_equiv` function. */ - -DEFAULT_VERSION(btf__dedup_v0_6_0, btf__dedup, LIBBPF_0.6.0) -int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts) +int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) { struct btf_dedup *d; int err; @@ -3057,7 +3561,7 @@ int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts) d = btf_dedup_new(btf, opts); if (IS_ERR(d)) { - pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); + pr_debug("btf_dedup_new failed: %ld\n", PTR_ERR(d)); return libbpf_err(-EINVAL); } @@ -3068,37 +3572,42 @@ int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts) err = btf_dedup_prep(d); if (err) { - pr_debug("btf_dedup_prep failed:%d\n", err); + pr_debug("btf_dedup_prep failed: %s\n", errstr(err)); goto done; } err = btf_dedup_strings(d); if (err < 0) { - pr_debug("btf_dedup_strings failed:%d\n", err); + pr_debug("btf_dedup_strings failed: %s\n", errstr(err)); goto done; } err = btf_dedup_prim_types(d); if (err < 0) { - pr_debug("btf_dedup_prim_types failed:%d\n", err); + pr_debug("btf_dedup_prim_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_struct_types(d); if (err < 0) { - pr_debug("btf_dedup_struct_types failed:%d\n", err); + pr_debug("btf_dedup_struct_types failed: %s\n", errstr(err)); + goto done; + } + err = btf_dedup_resolve_fwds(d); + if (err < 0) { + pr_debug("btf_dedup_resolve_fwds failed: %s\n", errstr(err)); goto done; } err = btf_dedup_ref_types(d); if (err < 0) { - pr_debug("btf_dedup_ref_types failed:%d\n", err); + pr_debug("btf_dedup_ref_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_compact_types(d); if (err < 0) { - pr_debug("btf_dedup_compact_types failed:%d\n", err); + pr_debug("btf_dedup_compact_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_remap_types(d); if (err < 0) { - pr_debug("btf_dedup_remap_types failed:%d\n", err); + pr_debug("btf_dedup_remap_types failed: %s\n", errstr(err)); goto done; } @@ -3107,19 +3616,6 @@ done: return libbpf_err(err); } -COMPAT_VERSION(btf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2) -int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts) -{ - LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext); - - if (unused_opts) { - pr_warn("please use new version of btf__dedup() that supports options\n"); - return libbpf_err(-ENOTSUP); - } - - return btf__dedup(btf, &opts); -} - #define BTF_UNPROCESSED_ID ((__u32)-1) #define BTF_IN_PROGRESS_ID ((__u32)-2) @@ -3159,18 +3655,17 @@ struct btf_dedup { struct strset *strs_set; }; -static long hash_combine(long h, long value) +static unsigned long hash_combine(unsigned long h, unsigned long value) { return h * 31 + value; } #define for_each_dedup_cand(d, node, hash) \ - hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash) + hashmap__for_each_key_entry(d->dedup_table, node, hash) static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id) { - return hashmap__append(d->dedup_table, - (void *)hash, (void *)(long)type_id); + return hashmap__append(d->dedup_table, hash, type_id); } static int btf_dedup_hypot_map_add(struct btf_dedup *d, @@ -3217,17 +3712,17 @@ static void btf_dedup_free(struct btf_dedup *d) free(d); } -static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx) +static size_t btf_dedup_identity_hash_fn(long key, void *ctx) { - return (size_t)key; + return key; } -static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx) +static size_t btf_dedup_collision_hash_fn(long key, void *ctx) { return 0; } -static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx) +static bool btf_dedup_equal_fn(long k1, long k2, void *ctx) { return k1 == k2; } @@ -3298,11 +3793,19 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_visit_fn fn, void * int i, r; for (i = 0; i < d->btf->nr_types; i++) { + struct btf_field_iter it; struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i); + __u32 *str_off; - r = btf_type_visit_str_offs(t, fn, ctx); + r = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (r) return r; + + while ((str_off = btf_field_iter_next(&it))) { + r = fn(str_off, ctx); + if (r) + return r; + } } if (!d->btf_ext) @@ -3398,6 +3901,20 @@ err_out: return err; } +/* + * Calculate type signature hash of TYPEDEF, ignoring referenced type IDs, + * as referenced type IDs equivalence is established separately during type + * graph equivalence check algorithm. + */ +static long btf_hash_typedef(struct btf_type *t) +{ + long h; + + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info); + return h; +} + static long btf_hash_common(struct btf_type *t) { long h; @@ -3415,6 +3932,13 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) t1->size == t2->size; } +/* Check structural compatibility of two TYPEDEF. */ +static bool btf_equal_typedef(struct btf_type *t1, struct btf_type *t2) +{ + return t1->name_off == t2->name_off && + t1->info == t2->info; +} + /* Calculate type signature hash of INT or TAG. */ static long btf_hash_int_decl_tag(struct btf_type *t) { @@ -3438,28 +3962,22 @@ static bool btf_equal_int_tag(struct btf_type *t1, struct btf_type *t2) return info1 == info2; } -/* Calculate type signature hash of ENUM. */ +/* Calculate type signature hash of ENUM/ENUM64. */ static long btf_hash_enum(struct btf_type *t) { long h; - /* don't hash vlen and enum members to support enum fwd resolving */ + /* don't hash vlen, enum members and size to support enum fwd resolving */ h = hash_combine(0, t->name_off); - h = hash_combine(h, t->info & ~0xffff); - h = hash_combine(h, t->size); return h; } -/* Check structural equality of two ENUMs. */ -static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) +static bool btf_equal_enum_members(struct btf_type *t1, struct btf_type *t2) { const struct btf_enum *m1, *m2; __u16 vlen; int i; - if (!btf_equal_common(t1, t2)) - return false; - vlen = btf_vlen(t1); m1 = btf_enum(t1); m2 = btf_enum(t2); @@ -3472,19 +3990,55 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) return true; } +static bool btf_equal_enum64_members(struct btf_type *t1, struct btf_type *t2) +{ + const struct btf_enum64 *m1, *m2; + __u16 vlen; + int i; + + vlen = btf_vlen(t1); + m1 = btf_enum64(t1); + m2 = btf_enum64(t2); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->val_lo32 != m2->val_lo32 || + m1->val_hi32 != m2->val_hi32) + return false; + m1++; + m2++; + } + return true; +} + +/* Check structural equality of two ENUMs or ENUM64s. */ +static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) +{ + if (!btf_equal_common(t1, t2)) + return false; + + /* t1 & t2 kinds are identical because of btf_equal_common */ + if (btf_kind(t1) == BTF_KIND_ENUM) + return btf_equal_enum_members(t1, t2); + else + return btf_equal_enum64_members(t1, t2); +} + static inline bool btf_is_enum_fwd(struct btf_type *t) { - return btf_is_enum(t) && btf_vlen(t) == 0; + return btf_is_any_enum(t) && btf_vlen(t) == 0; } static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) { if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) return btf_equal_enum(t1, t2); - /* ignore vlen when comparing */ + /* At this point either t1 or t2 or both are forward declarations, thus: + * - skip comparing vlen because it is zero for forward declarations; + * - skip comparing size to allow enum forward declarations + * to be compatible with enum64 full declarations; + * - skip comparing kind for the same reason. + */ return t1->name_off == t2->name_off && - (t1->info & ~0xffff) == (t2->info & ~0xffff) && - t1->size == t2->size; + btf_is_any_enum(t1) && btf_is_any_enum(t2); } /* @@ -3699,6 +4253,7 @@ static int btf_dedup_prep(struct btf_dedup *d) h = btf_hash_int_decl_tag(t); break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: h = btf_hash_enum(t); break; case BTF_KIND_STRUCT: @@ -3758,7 +4313,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_INT: h = btf_hash_int_decl_tag(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_int_tag(t, cand)) { new_id = cand_id; @@ -3768,9 +4323,10 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: h = btf_hash_enum(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_enum(t, cand)) { new_id = cand_id; @@ -3792,7 +4348,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_FLOAT: h = btf_hash_common(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_common(t, cand)) { new_id = cand_id; @@ -3870,44 +4426,109 @@ static inline __u16 btf_fwd_kind(struct btf_type *t) return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT; } -/* Check if given two types are identical ARRAY definitions */ -static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) +static bool btf_dedup_identical_types(struct btf_dedup *d, __u32 id1, __u32 id2, int depth) { struct btf_type *t1, *t2; + int k1, k2; +recur: + if (depth <= 0) + return false; t1 = btf_type_by_id(d->btf, id1); t2 = btf_type_by_id(d->btf, id2); - if (!btf_is_array(t1) || !btf_is_array(t2)) - return 0; - return btf_equal_array(t1, t2); -} + k1 = btf_kind(t1); + k2 = btf_kind(t2); + if (k1 != k2) + return false; -/* Check if given two types are identical STRUCT/UNION definitions */ -static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2) -{ - const struct btf_member *m1, *m2; - struct btf_type *t1, *t2; - int n, i; + switch (k1) { + case BTF_KIND_UNKN: /* VOID */ + return true; + case BTF_KIND_INT: + return btf_equal_int_tag(t1, t2); + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + return btf_compat_enum(t1, t2); + case BTF_KIND_FWD: + case BTF_KIND_FLOAT: + return btf_equal_common(t1, t2); + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_TYPE_TAG: + if (t1->info != t2->info || t1->name_off != t2->name_off) + return false; + id1 = t1->type; + id2 = t2->type; + goto recur; + case BTF_KIND_ARRAY: { + struct btf_array *a1, *a2; - t1 = btf_type_by_id(d->btf, id1); - t2 = btf_type_by_id(d->btf, id2); + if (!btf_compat_array(t1, t2)) + return false; - if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2)) - return false; + a1 = btf_array(t1); + a2 = btf_array(t1); - if (!btf_shallow_equal_struct(t1, t2)) - return false; + if (a1->index_type != a2->index_type && + !btf_dedup_identical_types(d, a1->index_type, a2->index_type, depth - 1)) + return false; - m1 = btf_members(t1); - m2 = btf_members(t2); - for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { - if (m1->type != m2->type) + if (a1->type != a2->type && + !btf_dedup_identical_types(d, a1->type, a2->type, depth - 1)) return false; + + return true; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m1, *m2; + int i, n; + + if (!btf_shallow_equal_struct(t1, t2)) + return false; + + m1 = btf_members(t1); + m2 = btf_members(t2); + for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { + if (m1->type == m2->type) + continue; + if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1)) + return false; + } + return true; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p1, *p2; + int i, n; + + if (!btf_compat_fnproto(t1, t2)) + return false; + + if (t1->type != t2->type && + !btf_dedup_identical_types(d, t1->type, t2->type, depth - 1)) + return false; + + p1 = btf_params(t1); + p2 = btf_params(t2); + for (i = 0, n = btf_vlen(t1); i < n; i++, p1++, p2++) { + if (p1->type == p2->type) + continue; + if (!btf_dedup_identical_types(d, p1->type, p2->type, depth - 1)) + return false; + } + return true; + } + default: + return false; } - return true; } + /* * Check equivalence of BTF type graph formed by candidate struct/union (we'll * call it "candidate graph" in this description for brevity) to a type graph @@ -3925,7 +4546,7 @@ static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id * and canonical graphs are not compatible structurally, whole graphs are * incompatible. If types are structurally equivalent (i.e., all information * except referenced type IDs is exactly the same), a mapping from `canon_id` to - * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`). + * a `cand_id` is recoded in hypothetical mapping (`btf_dedup->hypot_map`). * If a type references other types, then those referenced types are checked * for equivalence recursively. * @@ -3963,7 +4584,7 @@ static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id * consists of portions of the graph that come from multiple compilation units. * This is due to the fact that types within single compilation unit are always * deduplicated and FWDs are already resolved, if referenced struct/union - * definiton is available. So, if we had unresolved FWD and found corresponding + * definition is available. So, if we had unresolved FWD and found corresponding * STRUCT/UNION, they will be from different compilation units. This * consequently means that when we "link" FWD to corresponding STRUCT/UNION, * type graph will likely have at least two different BTF types that describe @@ -4026,19 +4647,13 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, * different fields within the *same* struct. This breaks type * equivalence check, which makes an assumption that candidate * types sub-graph has a consistent and deduped-by-compiler - * types within a single CU. So work around that by explicitly - * allowing identical array types here. + * types within a single CU. And similar situation can happen + * with struct/union sometimes, and event with pointers. + * So accommodate cases like this doing a structural + * comparison recursively, but avoiding being stuck in endless + * loops by limiting the depth up to which we check. */ - if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id)) - return 1; - /* It turns out that similar situation can happen with - * struct/union sometimes, sigh... Handle the case where - * structs/unions are exactly the same, down to the referenced - * type IDs. Anything more complicated (e.g., if referenced - * types are different, but equivalent) is *way more* - * complicated and requires a many-to-many equivalence mapping. - */ - if (btf_dedup_identical_structs(d, hypot_type_id, cand_id)) + if (btf_dedup_identical_types(d, hypot_type_id, cand_id, 16)) return 1; return 0; } @@ -4081,6 +4696,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return btf_equal_int_tag(cand_type, canon_type); case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: return btf_compat_enum(cand_type, canon_type); case BTF_KIND_FWD: @@ -4249,13 +4865,30 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d) } } +static inline long btf_hash_by_kind(struct btf_type *t, __u16 kind) +{ + if (kind == BTF_KIND_TYPEDEF) + return btf_hash_typedef(t); + else + return btf_hash_struct(t); +} + +static inline bool btf_equal_by_kind(struct btf_type *t1, struct btf_type *t2, __u16 kind) +{ + if (kind == BTF_KIND_TYPEDEF) + return btf_equal_typedef(t1, t2); + else + return btf_shallow_equal_struct(t1, t2); +} + /* - * Deduplicate struct/union types. + * Deduplicate struct/union and typedef types. * * For each struct/union type its type signature hash is calculated, taking * into account type's name, size, number, order and names of fields, but * ignoring type ID's referenced from fields, because they might not be deduped - * completely until after reference types deduplication phase. This type hash + * completely until after reference types deduplication phase. For each typedef + * type, the hash is computed based on the type’s name and size. This type hash * is used to iterate over all potential canonical types, sharing same hash. * For each canonical candidate we check whether type graphs that they form * (through referenced types in fields and so on) are equivalent using algorithm @@ -4287,18 +4920,20 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) t = btf_type_by_id(d->btf, type_id); kind = btf_kind(t); - if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) + if (kind != BTF_KIND_STRUCT && + kind != BTF_KIND_UNION && + kind != BTF_KIND_TYPEDEF) return 0; - h = btf_hash_struct(t); + h = btf_hash_by_kind(t, kind); for_each_dedup_cand(d, hash_entry, h) { - __u32 cand_id = (__u32)(long)hash_entry->value; + __u32 cand_id = hash_entry->value; int eq; /* * Even though btf_dedup_is_equiv() checks for - * btf_shallow_equal_struct() internally when checking two - * structs (unions) for equivalence, we need to guard here + * btf_equal_by_kind() internally when checking two + * structs (unions) or typedefs for equivalence, we need to guard here * from picking matching FWD type as a dedup candidate. * This can happen due to hash collision. In such case just * relying on btf_dedup_is_equiv() would lead to potentially @@ -4306,7 +4941,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) * FWD and compatible STRUCT/UNION are considered equivalent. */ cand_type = btf_type_by_id(d->btf, cand_id); - if (!btf_shallow_equal_struct(t, cand_type)) + if (!btf_equal_by_kind(t, cand_type, kind)) continue; btf_dedup_clear_hypot_map(d); @@ -4344,18 +4979,18 @@ static int btf_dedup_struct_types(struct btf_dedup *d) /* * Deduplicate reference type. * - * Once all primitive and struct/union types got deduplicated, we can easily + * Once all primitive, struct/union and typedef types got deduplicated, we can easily * deduplicate all other (reference) BTF types. This is done in two steps: * * 1. Resolve all referenced type IDs into their canonical type IDs. This - * resolution can be done either immediately for primitive or struct/union types - * (because they were deduped in previous two phases) or recursively for + * resolution can be done either immediately for primitive, struct/union, and typedef + * types (because they were deduped in previous two phases) or recursively for * reference types. Recursion will always terminate at either primitive or - * struct/union type, at which point we can "unwind" chain of reference types - * one by one. There is no danger of encountering cycles because in C type - * system the only way to form type cycle is through struct/union, so any chain - * of reference types, even those taking part in a type cycle, will inevitably - * reach struct/union at some point. + * struct/union and typedef types, at which point we can "unwind" chain of reference + * types one by one. There is no danger of encountering cycles in C, as the only way to + * form a type cycle is through struct or union types. Go can form such cycles through + * typedef. Thus, any chain of reference types, even those taking part in a type cycle, + * will inevitably reach a struct/union or typedef type at some point. * * 2. Once all referenced type IDs are resolved into canonical ones, BTF type * becomes "stable", in the sense that no further deduplication will cause @@ -4387,7 +5022,6 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_VOLATILE: case BTF_KIND_RESTRICT: case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_TYPE_TAG: ref_type_id = btf_dedup_ref_type(d, t->type); @@ -4397,7 +5031,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_common(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_common(t, cand)) { new_id = cand_id; @@ -4414,7 +5048,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_int_decl_tag(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_int_tag(t, cand)) { new_id = cand_id; @@ -4438,7 +5072,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_array(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_array(t, cand)) { new_id = cand_id; @@ -4470,7 +5104,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) h = btf_hash_fnproto(t); for_each_dedup_cand(d, hash_entry, h) { - cand_id = (__u32)(long)hash_entry->value; + cand_id = hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); if (btf_equal_fnproto(t, cand)) { new_id = cand_id; @@ -4507,6 +5141,134 @@ static int btf_dedup_ref_types(struct btf_dedup *d) } /* + * Collect a map from type names to type ids for all canonical structs + * and unions. If the same name is shared by several canonical types + * use a special value 0 to indicate this fact. + */ +static int btf_dedup_fill_unique_names_map(struct btf_dedup *d, struct hashmap *names_map) +{ + __u32 nr_types = btf__type_cnt(d->btf); + struct btf_type *t; + __u32 type_id; + __u16 kind; + int err; + + /* + * Iterate over base and split module ids in order to get all + * available structs in the map. + */ + for (type_id = 1; type_id < nr_types; ++type_id) { + t = btf_type_by_id(d->btf, type_id); + kind = btf_kind(t); + + if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) + continue; + + /* Skip non-canonical types */ + if (type_id != d->map[type_id]) + continue; + + err = hashmap__add(names_map, t->name_off, type_id); + if (err == -EEXIST) + err = hashmap__set(names_map, t->name_off, 0, NULL, NULL); + + if (err) + return err; + } + + return 0; +} + +static int btf_dedup_resolve_fwd(struct btf_dedup *d, struct hashmap *names_map, __u32 type_id) +{ + struct btf_type *t = btf_type_by_id(d->btf, type_id); + enum btf_fwd_kind fwd_kind = btf_kflag(t); + __u16 cand_kind, kind = btf_kind(t); + struct btf_type *cand_t; + uintptr_t cand_id; + + if (kind != BTF_KIND_FWD) + return 0; + + /* Skip if this FWD already has a mapping */ + if (type_id != d->map[type_id]) + return 0; + + if (!hashmap__find(names_map, t->name_off, &cand_id)) + return 0; + + /* Zero is a special value indicating that name is not unique */ + if (!cand_id) + return 0; + + cand_t = btf_type_by_id(d->btf, cand_id); + cand_kind = btf_kind(cand_t); + if ((cand_kind == BTF_KIND_STRUCT && fwd_kind != BTF_FWD_STRUCT) || + (cand_kind == BTF_KIND_UNION && fwd_kind != BTF_FWD_UNION)) + return 0; + + d->map[type_id] = cand_id; + + return 0; +} + +/* + * Resolve unambiguous forward declarations. + * + * The lion's share of all FWD declarations is resolved during + * `btf_dedup_struct_types` phase when different type graphs are + * compared against each other. However, if in some compilation unit a + * FWD declaration is not a part of a type graph compared against + * another type graph that declaration's canonical type would not be + * changed. Example: + * + * CU #1: + * + * struct foo; + * struct foo *some_global; + * + * CU #2: + * + * struct foo { int u; }; + * struct foo *another_global; + * + * After `btf_dedup_struct_types` the BTF looks as follows: + * + * [1] STRUCT 'foo' size=4 vlen=1 ... + * [2] INT 'int' size=4 ... + * [3] PTR '(anon)' type_id=1 + * [4] FWD 'foo' fwd_kind=struct + * [5] PTR '(anon)' type_id=4 + * + * This pass assumes that such FWD declarations should be mapped to + * structs or unions with identical name in case if the name is not + * ambiguous. + */ +static int btf_dedup_resolve_fwds(struct btf_dedup *d) +{ + int i, err; + struct hashmap *names_map; + + names_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(names_map)) + return PTR_ERR(names_map); + + err = btf_dedup_fill_unique_names_map(d, names_map); + if (err < 0) + goto exit; + + for (i = 0; i < d->btf->nr_types; i++) { + err = btf_dedup_resolve_fwd(d, names_map, d->btf->start_id + i); + if (err < 0) + break; + } + +exit: + hashmap__free(names_map); + return err; +} + +/* * Compact types. * * After we established for each type its corresponding canonical representative @@ -4601,10 +5363,23 @@ static int btf_dedup_remap_types(struct btf_dedup *d) for (i = 0; i < d->btf->nr_types; i++) { struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i); + struct btf_field_iter it; + __u32 *type_id; - r = btf_type_visit_type_ids(t, btf_dedup_remap_type_id, d); + r = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (r) return r; + + while ((type_id = btf_field_iter_next(&it))) { + __u32 resolved_id, new_id; + + resolved_id = resolve_type_id(d, *type_id); + new_id = d->hypot_map[resolved_id]; + if (new_id > BTF_MAX_NR_TYPES) + return -EINVAL; + + *type_id = new_id; + } } if (!d->btf_ext) @@ -4623,40 +5398,52 @@ static int btf_dedup_remap_types(struct btf_dedup *d) */ struct btf *btf__load_vmlinux_btf(void) { - struct { - const char *path_fmt; - bool raw_btf; - } locations[] = { - /* try canonical vmlinux BTF through sysfs first */ - { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, - /* fall back to trying to find vmlinux ELF on disk otherwise */ - { "/boot/vmlinux-%1$s" }, - { "/lib/modules/%1$s/vmlinux-%1$s" }, - { "/lib/modules/%1$s/build/vmlinux" }, - { "/usr/lib/modules/%1$s/kernel/vmlinux" }, - { "/usr/lib/debug/boot/vmlinux-%1$s" }, - { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, - { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, + const char *sysfs_btf_path = "/sys/kernel/btf/vmlinux"; + /* fall back locations, trying to find vmlinux on disk */ + const char *locations[] = { + "/boot/vmlinux-%1$s", + "/lib/modules/%1$s/vmlinux-%1$s", + "/lib/modules/%1$s/build/vmlinux", + "/usr/lib/modules/%1$s/kernel/vmlinux", + "/usr/lib/debug/boot/vmlinux-%1$s", + "/usr/lib/debug/boot/vmlinux-%1$s.debug", + "/usr/lib/debug/lib/modules/%1$s/vmlinux", }; char path[PATH_MAX + 1]; struct utsname buf; struct btf *btf; int i, err; - uname(&buf); + /* is canonical sysfs location accessible? */ + if (faccessat(AT_FDCWD, sysfs_btf_path, F_OK, AT_EACCESS) < 0) { + pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n", + sysfs_btf_path); + } else { + btf = btf_parse_raw_mmap(sysfs_btf_path, NULL); + if (IS_ERR(btf)) + btf = btf__parse(sysfs_btf_path, NULL); + + if (!btf) { + err = -errno; + pr_warn("failed to read kernel BTF from '%s': %s\n", + sysfs_btf_path, errstr(err)); + return libbpf_err_ptr(err); + } + pr_debug("loaded kernel BTF from '%s'\n", sysfs_btf_path); + return btf; + } + /* try fallback locations */ + uname(&buf); for (i = 0; i < ARRAY_SIZE(locations); i++) { - snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); + snprintf(path, PATH_MAX, locations[i], buf.release); - if (access(path, R_OK)) + if (faccessat(AT_FDCWD, path, R_OK, AT_EACCESS)) continue; - if (locations[i].raw_btf) - btf = btf__parse_raw(path); - else - btf = btf__parse_elf(path, NULL); + btf = btf__parse(path, NULL); err = libbpf_get_error(btf); - pr_debug("loading kernel BTF '%s': %d\n", path, err); + pr_debug("loading kernel BTF '%s': %s\n", path, errstr(err)); if (err) continue; @@ -4677,125 +5464,6 @@ struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_bt return btf__parse_split(path, vmlinux_btf); } -int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx) -{ - int i, n, err; - - switch (btf_kind(t)) { - case BTF_KIND_INT: - case BTF_KIND_FLOAT: - case BTF_KIND_ENUM: - return 0; - - case BTF_KIND_FWD: - case BTF_KIND_CONST: - case BTF_KIND_VOLATILE: - case BTF_KIND_RESTRICT: - case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: - case BTF_KIND_FUNC: - case BTF_KIND_VAR: - case BTF_KIND_DECL_TAG: - case BTF_KIND_TYPE_TAG: - return visit(&t->type, ctx); - - case BTF_KIND_ARRAY: { - struct btf_array *a = btf_array(t); - - err = visit(&a->type, ctx); - err = err ?: visit(&a->index_type, ctx); - return err; - } - - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - struct btf_member *m = btf_members(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - case BTF_KIND_FUNC_PROTO: { - struct btf_param *m = btf_params(t); - - err = visit(&t->type, ctx); - if (err) - return err; - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - case BTF_KIND_DATASEC: { - struct btf_var_secinfo *m = btf_var_secinfos(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - default: - return -EINVAL; - } -} - -int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx) -{ - int i, n, err; - - err = visit(&t->name_off, ctx); - if (err) - return err; - - switch (btf_kind(t)) { - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - struct btf_member *m = btf_members(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_ENUM: { - struct btf_enum *m = btf_enum(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_FUNC_PROTO: { - struct btf_param *m = btf_params(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - default: - break; - } - - return 0; -} - int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx) { const struct btf_ext_info *seg; @@ -4875,3 +5543,328 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void return 0; } + +struct btf_distill { + struct btf_pipe pipe; + int *id_map; + unsigned int split_start_id; + unsigned int split_start_str; + int diff_id; +}; + +static int btf_add_distilled_type_ids(struct btf_distill *dist, __u32 i) +{ + struct btf_type *split_t = btf_type_by_id(dist->pipe.src, i); + struct btf_field_iter it; + __u32 *id; + int err; + + err = btf_field_iter_init(&it, split_t, BTF_FIELD_ITER_IDS); + if (err) + return err; + while ((id = btf_field_iter_next(&it))) { + struct btf_type *base_t; + + if (!*id) + continue; + /* split BTF id, not needed */ + if (*id >= dist->split_start_id) + continue; + /* already added ? */ + if (dist->id_map[*id] > 0) + continue; + + /* only a subset of base BTF types should be referenced from + * split BTF; ensure nothing unexpected is referenced. + */ + base_t = btf_type_by_id(dist->pipe.src, *id); + switch (btf_kind(base_t)) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + case BTF_KIND_ARRAY: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_TYPEDEF: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + case BTF_KIND_PTR: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VOLATILE: + case BTF_KIND_FUNC_PROTO: + case BTF_KIND_TYPE_TAG: + dist->id_map[*id] = *id; + break; + default: + pr_warn("unexpected reference to base type[%u] of kind [%u] when creating distilled base BTF.\n", + *id, btf_kind(base_t)); + return -EINVAL; + } + /* If a base type is used, ensure types it refers to are + * marked as used also; so for example if we find a PTR to INT + * we need both the PTR and INT. + * + * The only exception is named struct/unions, since distilled + * base BTF composite types have no members. + */ + if (btf_is_composite(base_t) && base_t->name_off) + continue; + err = btf_add_distilled_type_ids(dist, *id); + if (err) + return err; + } + return 0; +} + +static int btf_add_distilled_types(struct btf_distill *dist) +{ + bool adding_to_base = dist->pipe.dst->start_id == 1; + int id = btf__type_cnt(dist->pipe.dst); + struct btf_type *t; + int i, err = 0; + + + /* Add types for each of the required references to either distilled + * base or split BTF, depending on type characteristics. + */ + for (i = 1; i < dist->split_start_id; i++) { + const char *name; + int kind; + + if (!dist->id_map[i]) + continue; + t = btf_type_by_id(dist->pipe.src, i); + kind = btf_kind(t); + name = btf__name_by_offset(dist->pipe.src, t->name_off); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + /* Named int, float, fwd are added to base. */ + if (!adding_to_base) + continue; + err = btf_add_type(&dist->pipe, t); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* Named struct/union are added to base as 0-vlen + * struct/union of same size. Anonymous struct/unions + * are added to split BTF as-is. + */ + if (adding_to_base) { + if (!t->name_off) + continue; + err = btf_add_composite(dist->pipe.dst, kind, name, t->size); + } else { + if (t->name_off) + continue; + err = btf_add_type(&dist->pipe, t); + } + break; + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + /* Named enum[64]s are added to base as a sized + * enum; relocation will match with appropriately-named + * and sized enum or enum64. + * + * Anonymous enums are added to split BTF as-is. + */ + if (adding_to_base) { + if (!t->name_off) + continue; + err = btf__add_enum(dist->pipe.dst, name, t->size); + } else { + if (t->name_off) + continue; + err = btf_add_type(&dist->pipe, t); + } + break; + case BTF_KIND_ARRAY: + case BTF_KIND_TYPEDEF: + case BTF_KIND_PTR: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VOLATILE: + case BTF_KIND_FUNC_PROTO: + case BTF_KIND_TYPE_TAG: + /* All other types are added to split BTF. */ + if (adding_to_base) + continue; + err = btf_add_type(&dist->pipe, t); + break; + default: + pr_warn("unexpected kind when adding base type '%s'[%u] of kind [%u] to distilled base BTF.\n", + name, i, kind); + return -EINVAL; + + } + if (err < 0) + break; + dist->id_map[i] = id++; + } + return err; +} + +/* Split BTF ids without a mapping will be shifted downwards since distilled + * base BTF is smaller than the original base BTF. For those that have a + * mapping (either to base or updated split BTF), update the id based on + * that mapping. + */ +static int btf_update_distilled_type_ids(struct btf_distill *dist, __u32 i) +{ + struct btf_type *t = btf_type_by_id(dist->pipe.dst, i); + struct btf_field_iter it; + __u32 *id; + int err; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + while ((id = btf_field_iter_next(&it))) { + if (dist->id_map[*id]) + *id = dist->id_map[*id]; + else if (*id >= dist->split_start_id) + *id -= dist->diff_id; + } + return 0; +} + +/* Create updated split BTF with distilled base BTF; distilled base BTF + * consists of BTF information required to clarify the types that split + * BTF refers to, omitting unneeded details. Specifically it will contain + * base types and memberless definitions of named structs, unions and enumerated + * types. Associated reference types like pointers, arrays and anonymous + * structs, unions and enumerated types will be added to split BTF. + * Size is recorded for named struct/unions to help guide matching to the + * target base BTF during later relocation. + * + * The only case where structs, unions or enumerated types are fully represented + * is when they are anonymous; in such cases, the anonymous type is added to + * split BTF in full. + * + * We return newly-created split BTF where the split BTF refers to a newly-created + * distilled base BTF. Both must be freed separately by the caller. + */ +int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf, + struct btf **new_split_btf) +{ + struct btf *new_base = NULL, *new_split = NULL; + const struct btf *old_base; + unsigned int n = btf__type_cnt(src_btf); + struct btf_distill dist = {}; + struct btf_type *t; + int i, err = 0; + + /* src BTF must be split BTF. */ + old_base = btf__base_btf(src_btf); + if (!new_base_btf || !new_split_btf || !old_base) + return libbpf_err(-EINVAL); + + new_base = btf__new_empty(); + if (!new_base) + return libbpf_err(-ENOMEM); + + btf__set_endianness(new_base, btf__endianness(src_btf)); + + dist.id_map = calloc(n, sizeof(*dist.id_map)); + if (!dist.id_map) { + err = -ENOMEM; + goto done; + } + dist.pipe.src = src_btf; + dist.pipe.dst = new_base; + dist.pipe.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(dist.pipe.str_off_map)) { + err = -ENOMEM; + goto done; + } + dist.split_start_id = btf__type_cnt(old_base); + dist.split_start_str = old_base->hdr->str_len; + + /* Pass over src split BTF; generate the list of base BTF type ids it + * references; these will constitute our distilled BTF set to be + * distributed over base and split BTF as appropriate. + */ + for (i = src_btf->start_id; i < n; i++) { + err = btf_add_distilled_type_ids(&dist, i); + if (err < 0) + goto done; + } + /* Next add types for each of the required references to base BTF and split BTF + * in turn. + */ + err = btf_add_distilled_types(&dist); + if (err < 0) + goto done; + + /* Create new split BTF with distilled base BTF as its base; the final + * state is split BTF with distilled base BTF that represents enough + * about its base references to allow it to be relocated with the base + * BTF available. + */ + new_split = btf__new_empty_split(new_base); + if (!new_split) { + err = -errno; + goto done; + } + dist.pipe.dst = new_split; + /* First add all split types */ + for (i = src_btf->start_id; i < n; i++) { + t = btf_type_by_id(src_btf, i); + err = btf_add_type(&dist.pipe, t); + if (err < 0) + goto done; + } + /* Now add distilled types to split BTF that are not added to base. */ + err = btf_add_distilled_types(&dist); + if (err < 0) + goto done; + + /* All split BTF ids will be shifted downwards since there are less base + * BTF ids in distilled base BTF. + */ + dist.diff_id = dist.split_start_id - btf__type_cnt(new_base); + + n = btf__type_cnt(new_split); + /* Now update base/split BTF ids. */ + for (i = 1; i < n; i++) { + err = btf_update_distilled_type_ids(&dist, i); + if (err < 0) + break; + } +done: + free(dist.id_map); + hashmap__free(dist.pipe.str_off_map); + if (err) { + btf__free(new_split); + btf__free(new_base); + return libbpf_err(err); + } + *new_base_btf = new_base; + *new_split_btf = new_split; + + return 0; +} + +const struct btf_header *btf_header(const struct btf *btf) +{ + return btf->hdr; +} + +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) +{ + btf->base_btf = (struct btf *)base_btf; + btf->start_id = btf__type_cnt(base_btf); + btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off; +} + +int btf__relocate(struct btf *btf, const struct btf *base_btf) +{ + int err = btf_relocate(btf, base_btf, NULL); + + if (!err) + btf->owns_base = false; + return libbpf_err(err); +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 061839f04525..cc01494d6210 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -18,6 +18,7 @@ extern "C" { #define BTF_ELF_SEC ".BTF" #define BTF_EXT_ELF_SEC ".BTF.ext" +#define BTF_BASE_ELF_SEC ".BTF.base" #define MAPS_ELF_SEC ".maps" struct btf; @@ -93,6 +94,7 @@ LIBBPF_API struct btf *btf__new_empty(void); * @brief **btf__new_empty_split()** creates an unpopulated BTF object from an * ELF BTF section except with a base BTF on top of which split BTF should be * based + * @param base_btf base BTF object * @return new BTF object instance which has to be eventually freed with * **btf__free()** * @@ -107,6 +109,31 @@ LIBBPF_API struct btf *btf__new_empty(void); */ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); +/** + * @brief **btf__distill_base()** creates new versions of the split BTF + * *src_btf* and its base BTF. The new base BTF will only contain the types + * needed to improve robustness of the split BTF to small changes in base BTF. + * When that split BTF is loaded against a (possibly changed) base, this + * distilled base BTF will help update references to that (possibly changed) + * base BTF. + * @param src_btf source split BTF object + * @param new_base_btf pointer to where the new base BTF object pointer will be stored + * @param new_split_btf pointer to where the new split BTF object pointer will be stored + * @return 0 on success; negative error code, otherwise + * + * Both the new split and its associated new base BTF must be freed by + * the caller. + * + * If successful, 0 is returned and **new_base_btf** and **new_split_btf** + * will point at new base/split BTF. Both the new split and its associated + * new base BTF must be freed by the caller. + * + * A negative value is returned on error and the thread-local `errno` variable + * is set to the error code as well. + */ +LIBBPF_API int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf, + struct btf **new_split_btf); + LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf); LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext); @@ -116,24 +143,15 @@ LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_b LIBBPF_API struct btf *btf__load_vmlinux_btf(void); LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf); -LIBBPF_API struct btf *libbpf_find_kernel_btf(void); LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); -LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead") -LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); -LIBBPF_DEPRECATED_SINCE(0, 6, "intended for internal libbpf use only") -LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); -LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead") -LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API int btf__load_into_kernel(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, __u32 kind); -LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__type_cnt() instead; note that btf__get_nr_types() == btf__type_cnt() - 1") -LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); LIBBPF_API __u32 btf__type_cnt(const struct btf *btf); LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, @@ -147,32 +165,16 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API void btf__set_fd(struct btf *btf, int fd); -LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__raw_data() instead") -LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); -LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, - __u32 expected_key_size, - __u32 expected_value_size, - __u32 *key_type_id, __u32 *value_type_id); LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); -LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, - __u32 *size); -LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions") -int btf_ext__reloc_func_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *cnt); -LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions") -int btf_ext__reloc_line_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **line_info, __u32 *cnt); -LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); -LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size); +LIBBPF_API enum btf_endianness btf_ext__endianness(const struct btf_ext *btf_ext); +LIBBPF_API int btf_ext__set_endianness(struct btf_ext *btf_ext, + enum btf_endianness endian); LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); @@ -215,6 +217,8 @@ LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_ /* enum construction APIs */ LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz); LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value); +LIBBPF_API int btf__add_enum64(struct btf *btf, const char *name, __u32 bytes_sz, bool is_signed); +LIBBPF_API int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value); enum btf_fwd_kind { BTF_FWD_STRUCT = 0, @@ -228,6 +232,7 @@ LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id); +LIBBPF_API int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id); /* func and func_proto construction APIs */ LIBBPF_API int btf__add_func(struct btf *btf, const char *name, @@ -244,6 +249,8 @@ LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id, /* tag construction API */ LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, int component_idx); +LIBBPF_API int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id, + int component_idx); struct btf_dedup_opts { size_t sz; @@ -257,22 +264,29 @@ struct btf_dedup_opts { LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); -LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts); - -LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead") -LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts); -#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__) -#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts) -#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts) +/** + * @brief **btf__relocate()** will check the split BTF *btf* for references + * to base BTF kinds, and verify those references are compatible with + * *base_btf*; if they are, *btf* is adjusted such that is re-parented to + * *base_btf* and type ids and strings are adjusted to accommodate this. + * @param btf split BTF object to relocate + * @param base_btf base BTF object + * @return 0 on success; negative error code, otherwise + * + * If successful, 0 is returned and **btf** now has **base_btf** as its + * base. + * + * A negative value is returned on error and the thread-local `errno` variable + * is set to the error code as well. + */ +LIBBPF_API int btf__relocate(struct btf *btf, const struct btf *base_btf); struct btf_dump; struct btf_dump_opts { - union { - size_t sz; - void *ctx; /* DEPRECATED: will be gone in v1.0 */ - }; + size_t sz; }; +#define btf_dump_opts__last_field sz typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args); @@ -281,57 +295,12 @@ LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf, void *ctx, const struct btf_dump_opts *opts); -LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, - btf_dump_printf_fn_t printf_fn, - void *ctx, - const struct btf_dump_opts *opts); - -LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, - const struct btf_ext *btf_ext, - const struct btf_dump_opts *opts, - btf_dump_printf_fn_t printf_fn); - -/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the - * type of 4th argument. If it's btf_dump's print callback, use deprecated - * API; otherwise, choose the new btf_dump__new(). ___libbpf_override() - * doesn't work here because both variants have 4 input arguments. - * - * (void *) casts are necessary to avoid compilation warnings about type - * mismatches, because even though __builtin_choose_expr() only ever evaluates - * one side the other side still has to satisfy type constraints (this is - * compiler implementation limitation which might be lifted eventually, - * according to the documentation). So passing struct btf_ext in place of - * btf_dump_printf_fn_t would be generating compilation warning. Casting to - * void * avoids this issue. - * - * Also, two type compatibility checks for a function and function pointer are - * required because passing function reference into btf_dump__new() as - * btf_dump__new(..., my_callback, ...) and as btf_dump__new(..., - * &my_callback, ...) (not explicit ampersand in the latter case) actually - * differs as far as __builtin_types_compatible_p() is concerned. Thus two - * checks are combined to detect callback argument. - * - * The rest works just like in case of ___libbpf_override() usage with symbol - * versioning. - * - * C++ compilers don't support __builtin_types_compatible_p(), so at least - * don't screw up compilation for them and let C++ users pick btf_dump__new - * vs btf_dump__new_deprecated explicitly. - */ -#ifndef __cplusplus -#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr( \ - __builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) || \ - __builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)), \ - btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4), \ - btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4)) -#endif - LIBBPF_API void btf_dump__free(struct btf_dump *d); LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); struct btf_dump_emit_type_decl_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* optional field name for type declaration, e.g.: * - struct my_struct <FNAME> @@ -365,9 +334,10 @@ struct btf_dump_type_data_opts { bool compact; /* no newlines/indentation */ bool skip_names; /* skip member/type names */ bool emit_zeroes; /* show 0-valued fields */ + bool emit_strings; /* print char arrays as strings */ size_t :0; }; -#define btf_dump_type_data_opts__last_field emit_zeroes +#define btf_dump_type_data_opts__last_field emit_strings LIBBPF_API int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, @@ -375,8 +345,29 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id, const struct btf_dump_type_data_opts *opts); /* - * A set of helpers for easier BTF types handling + * A set of helpers for easier BTF types handling. + * + * The inline functions below rely on constants from the kernel headers which + * may not be available for applications including this header file. To avoid + * compilation errors, we define all the constants here that were added after + * the initial introduction of the BTF_KIND* constants. */ +#ifndef BTF_KIND_FUNC +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#endif +#ifndef BTF_KIND_VAR +#define BTF_KIND_VAR 14 /* Variable */ +#define BTF_KIND_DATASEC 15 /* Section */ +#endif +#ifndef BTF_KIND_FLOAT +#define BTF_KIND_FLOAT 16 /* Floating point */ +#endif +/* The kernel header switched to enums, so the following were never #defined */ +#define BTF_KIND_DECL_TAG 17 /* Decl Tag */ +#define BTF_KIND_TYPE_TAG 18 /* Type Tag */ +#define BTF_KIND_ENUM64 19 /* Enum for up-to 64bit values */ + static inline __u16 btf_kind(const struct btf_type *t) { return BTF_INFO_KIND(t->info); @@ -434,6 +425,11 @@ static inline bool btf_is_enum(const struct btf_type *t) return btf_kind(t) == BTF_KIND_ENUM; } +static inline bool btf_is_enum64(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_ENUM64; +} + static inline bool btf_is_fwd(const struct btf_type *t) { return btf_kind(t) == BTF_KIND_FWD; @@ -504,6 +500,18 @@ static inline bool btf_is_type_tag(const struct btf_type *t) return btf_kind(t) == BTF_KIND_TYPE_TAG; } +static inline bool btf_is_any_enum(const struct btf_type *t) +{ + return btf_is_enum(t) || btf_is_enum64(t); +} + +static inline bool btf_kind_core_compat(const struct btf_type *t1, + const struct btf_type *t2) +{ + return btf_kind(t1) == btf_kind(t2) || + (btf_is_any_enum(t1) && btf_is_any_enum(t2)); +} + static inline __u8 btf_int_encoding(const struct btf_type *t) { return BTF_INT_ENCODING(*(__u32 *)(t + 1)); @@ -529,6 +537,39 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t) return (struct btf_enum *)(t + 1); } +struct btf_enum64; + +static inline struct btf_enum64 *btf_enum64(const struct btf_type *t) +{ + return (struct btf_enum64 *)(t + 1); +} + +static inline __u64 btf_enum64_value(const struct btf_enum64 *e) +{ + /* struct btf_enum64 is introduced in Linux 6.0, which is very + * bleeding-edge. Here we are avoiding relying on struct btf_enum64 + * definition coming from kernel UAPI headers to support wider range + * of system-wide kernel headers. + * + * Given this header can be also included from C++ applications, that + * further restricts C tricks we can use (like using compatible + * anonymous struct). So just treat struct btf_enum64 as + * a three-element array of u32 and access second (lo32) and third + * (hi32) elements directly. + * + * For reference, here is a struct btf_enum64 definition: + * + * const struct btf_enum64 { + * __u32 name_off; + * __u32 val_lo32; + * __u32 val_hi32; + * }; + */ + const __u32 *e64 = (const __u32 *)e; + + return ((__u64)e64[2] << 32) | e64[1]; +} + static inline struct btf_member *btf_members(const struct btf_type *t) { return (struct btf_member *)(t + 1); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index b9a3260c83cb..6388392f49a0 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -13,6 +13,7 @@ #include <ctype.h> #include <endian.h> #include <errno.h> +#include <limits.h> #include <linux/err.h> #include <linux/btf.h> #include <linux/kernel.h> @@ -66,6 +67,7 @@ struct btf_dump_data { bool compact; bool skip_names; bool emit_zeroes; + bool emit_strings; __u8 indent_lvl; /* base indent level */ char indent_str[BTF_DATA_INDENT_STR_LEN]; /* below are used during iteration */ @@ -117,14 +119,14 @@ struct btf_dump { struct btf_dump_data *typed_dump; }; -static size_t str_hash_fn(const void *key, void *ctx) +static size_t str_hash_fn(long key, void *ctx) { - return str_hash(key); + return str_hash((void *)key); } -static bool str_equal_fn(const void *a, const void *b, void *ctx) +static bool str_equal_fn(long a, long b, void *ctx) { - return strcmp(a, b) == 0; + return strcmp((void *)a, (void *)b) == 0; } static const char *btf_name_of(const struct btf_dump *d, __u32 name_off) @@ -144,15 +146,17 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) static int btf_dump_mark_referenced(struct btf_dump *d); static int btf_dump_resize(struct btf_dump *d); -DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0) -struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, - btf_dump_printf_fn_t printf_fn, - void *ctx, - const struct btf_dump_opts *opts) +struct btf_dump *btf_dump__new(const struct btf *btf, + btf_dump_printf_fn_t printf_fn, + void *ctx, + const struct btf_dump_opts *opts) { struct btf_dump *d; int err; + if (!OPTS_VALID(opts, btf_dump_opts)) + return libbpf_err_ptr(-EINVAL); + if (!printf_fn) return libbpf_err_ptr(-EINVAL); @@ -188,17 +192,6 @@ err: return libbpf_err_ptr(err); } -COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4) -struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, - const struct btf_ext *btf_ext, - const struct btf_dump_opts *opts, - btf_dump_printf_fn_t printf_fn) -{ - if (!printf_fn) - return libbpf_err_ptr(-EINVAL); - return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts); -} - static int btf_dump_resize(struct btf_dump *d) { int err, last_id = btf__type_cnt(d->btf) - 1; @@ -228,6 +221,20 @@ static int btf_dump_resize(struct btf_dump *d) return 0; } +static void btf_dump_free_names(struct hashmap *map) +{ + size_t bkt; + struct hashmap_entry *cur; + + if (!map) + return; + + hashmap__for_each_entry(map, cur, bkt) + free((void *)cur->pkey); + + hashmap__free(map); +} + void btf_dump__free(struct btf_dump *d) { int i; @@ -246,8 +253,8 @@ void btf_dump__free(struct btf_dump *d) free(d->cached_names); free(d->emit_queue); free(d->decl_stack); - hashmap__free(d->type_names); - hashmap__free(d->ident_names); + btf_dump_free_names(d->type_names); + btf_dump_free_names(d->ident_names); free(d); } @@ -301,7 +308,7 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) * definition, in which case they have to be declared inline as part of field * type declaration; or as a top-level anonymous enum, typically used for * declaring global constants. It's impossible to distinguish between two - * without knowning whether given enum type was referenced from other type: + * without knowing whether given enum type was referenced from other type: * top-level anonymous enum won't be referenced by anything, while embedded * one will. */ @@ -318,6 +325,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d) switch (btf_kind(t)) { case BTF_KIND_INT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: case BTF_KIND_FLOAT: break; @@ -538,6 +546,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) return 1; } case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: /* * non-anonymous or non-referenced enums are top-level @@ -739,6 +748,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) tstate->emit_state = EMITTED; break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: if (top_level_def) { btf_dump_emit_enum_def(d, id, t, 0); btf_dump_printf(d, ";\n\n"); @@ -828,14 +838,9 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, const struct btf_type *t) { const struct btf_member *m; - int align, i, bit_sz; + int max_align = 1, align, i, bit_sz; __u16 vlen; - align = btf__align_of(btf, id); - /* size of a non-packed struct has to be a multiple of its alignment*/ - if (align && t->size % align) - return true; - m = btf_members(t); vlen = btf_vlen(t); /* all non-bitfield fields have to be naturally aligned */ @@ -844,8 +849,11 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, bit_sz = btf_member_bitfield_size(t, i); if (align && bit_sz == 0 && m->offset % (8 * align) != 0) return true; + max_align = max(align, max_align); } - + /* size of a non-packed struct has to be a multiple of its alignment */ + if (t->size % max_align != 0) + return true; /* * if original struct was marked as packed, but its layout is * naturally aligned, we'll detect that it's not packed @@ -853,44 +861,97 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, return false; } -static int chip_away_bits(int total, int at_most) -{ - return total % at_most ? : at_most; -} - static void btf_dump_emit_bit_padding(const struct btf_dump *d, - int cur_off, int m_off, int m_bit_sz, - int align, int lvl) + int cur_off, int next_off, int next_align, + bool in_bitfield, int lvl) { - int off_diff = m_off - cur_off; - int ptr_bits = d->ptr_sz * 8; + const struct { + const char *name; + int bits; + } pads[] = { + {"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8} + }; + int new_off = 0, pad_bits = 0, bits, i; + const char *pad_type = NULL; + + if (cur_off >= next_off) + return; /* no gap */ + + /* For filling out padding we want to take advantage of + * natural alignment rules to minimize unnecessary explicit + * padding. First, we find the largest type (among long, int, + * short, or char) that can be used to force naturally aligned + * boundary. Once determined, we'll use such type to fill in + * the remaining padding gap. In some cases we can rely on + * compiler filling some gaps, but sometimes we need to force + * alignment to close natural alignment with markers like + * `long: 0` (this is always the case for bitfields). Note + * that even if struct itself has, let's say 4-byte alignment + * (i.e., it only uses up to int-aligned types), using `long: + * X;` explicit padding doesn't actually change struct's + * overall alignment requirements, but compiler does take into + * account that type's (long, in this example) natural + * alignment requirements when adding implicit padding. We use + * this fact heavily and don't worry about ruining correct + * struct alignment requirement. + */ + for (i = 0; i < ARRAY_SIZE(pads); i++) { + pad_bits = pads[i].bits; + pad_type = pads[i].name; - if (off_diff <= 0) - /* no gap */ - return; - if (m_bit_sz == 0 && off_diff < align * 8) - /* natural padding will take care of a gap */ - return; + new_off = roundup(cur_off, pad_bits); + if (new_off <= next_off) + break; + } - while (off_diff > 0) { - const char *pad_type; - int pad_bits; - - if (ptr_bits > 32 && off_diff > 32) { - pad_type = "long"; - pad_bits = chip_away_bits(off_diff, ptr_bits); - } else if (off_diff > 16) { - pad_type = "int"; - pad_bits = chip_away_bits(off_diff, 32); - } else if (off_diff > 8) { - pad_type = "short"; - pad_bits = chip_away_bits(off_diff, 16); - } else { - pad_type = "char"; - pad_bits = chip_away_bits(off_diff, 8); + if (new_off > cur_off && new_off <= next_off) { + /* We need explicit `<type>: 0` aligning mark if next + * field is right on alignment offset and its + * alignment requirement is less strict than <type>'s + * alignment (so compiler won't naturally align to the + * offset we expect), or if subsequent `<type>: X`, + * will actually completely fit in the remaining hole, + * making compiler basically ignore `<type>: X` + * completely. + */ + if (in_bitfield || + (new_off == next_off && roundup(cur_off, next_align * 8) != new_off) || + (new_off != next_off && next_off - new_off <= new_off - cur_off)) + /* but for bitfields we'll emit explicit bit count */ + btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, + in_bitfield ? new_off - cur_off : 0); + cur_off = new_off; + } + + /* Now we know we start at naturally aligned offset for a chosen + * padding type (long, int, short, or char), and so the rest is just + * a straightforward filling of remaining padding gap with full + * `<type>: sizeof(<type>);` markers, except for the last one, which + * might need smaller than sizeof(<type>) padding. + */ + while (cur_off != next_off) { + bits = min(next_off - cur_off, pad_bits); + if (bits == pad_bits) { + btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits); + cur_off += bits; + continue; + } + /* For the remainder padding that doesn't cover entire + * pad_type bit length, we pick the smallest necessary type. + * This is pure aesthetics, we could have just used `long`, + * but having smallest necessary one communicates better the + * scale of the padding gap. + */ + for (i = ARRAY_SIZE(pads) - 1; i >= 0; i--) { + pad_type = pads[i].name; + pad_bits = pads[i].bits; + if (pad_bits < bits) + continue; + + btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, bits); + cur_off += bits; + break; } - btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits); - off_diff -= pad_bits; } } @@ -910,9 +971,11 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, { const struct btf_member *m = btf_members(t); bool is_struct = btf_is_struct(t); - int align, i, packed, off = 0; + bool packed, prev_bitfield = false; + int align, i, off = 0; __u16 vlen = btf_vlen(t); + align = btf__align_of(d->btf, id); packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0; btf_dump_printf(d, "%s%s%s {", @@ -922,37 +985,47 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, for (i = 0; i < vlen; i++, m++) { const char *fname; - int m_off, m_sz; + int m_off, m_sz, m_align; + bool in_bitfield; fname = btf_name_of(d, m->name_off); m_sz = btf_member_bitfield_size(t, i); m_off = btf_member_bit_offset(t, i); - align = packed ? 1 : btf__align_of(d->btf, m->type); + m_align = packed ? 1 : btf__align_of(d->btf, m->type); - btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1); + in_bitfield = prev_bitfield && m_sz != 0; + + btf_dump_emit_bit_padding(d, off, m_off, m_align, in_bitfield, lvl + 1); btf_dump_printf(d, "\n%s", pfx(lvl + 1)); btf_dump_emit_type_decl(d, m->type, fname, lvl + 1); if (m_sz) { btf_dump_printf(d, ": %d", m_sz); off = m_off + m_sz; + prev_bitfield = true; } else { m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type)); off = m_off + m_sz * 8; + prev_bitfield = false; } + btf_dump_printf(d, ";"); } /* pad at the end, if necessary */ - if (is_struct) { - align = packed ? 1 : btf__align_of(d->btf, id); - btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, - lvl + 1); - } + if (is_struct) + btf_dump_emit_bit_padding(d, off, t->size * 8, align, false, lvl + 1); - if (vlen) + /* + * Keep `struct empty {}` on a single line, + * only print newline when there are regular or padding fields. + */ + if (vlen || t->size) { btf_dump_printf(d, "\n"); - btf_dump_printf(d, "%s}", pfx(lvl)); + btf_dump_printf(d, "%s}", pfx(lvl)); + } else { + btf_dump_printf(d, "}"); + } if (packed) btf_dump_printf(d, " __attribute__((packed))"); } @@ -989,38 +1062,118 @@ static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id)); } -static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, - const struct btf_type *t, - int lvl) +static void btf_dump_emit_enum32_val(struct btf_dump *d, + const struct btf_type *t, + int lvl, __u16 vlen) { const struct btf_enum *v = btf_enum(t); - __u16 vlen = btf_vlen(t); + bool is_signed = btf_kflag(t); + const char *fmt_str; const char *name; size_t dup_cnt; int i; + for (i = 0; i < vlen; i++, v++) { + name = btf_name_of(d, v->name_off); + /* enumerators share namespace with typedef idents */ + dup_cnt = btf_dump_name_dups(d, d->ident_names, name); + if (dup_cnt > 1) { + fmt_str = is_signed ? "\n%s%s___%zd = %d," : "\n%s%s___%zd = %u,"; + btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, dup_cnt, v->val); + } else { + fmt_str = is_signed ? "\n%s%s = %d," : "\n%s%s = %u,"; + btf_dump_printf(d, fmt_str, pfx(lvl + 1), name, v->val); + } + } +} + +static void btf_dump_emit_enum64_val(struct btf_dump *d, + const struct btf_type *t, + int lvl, __u16 vlen) +{ + const struct btf_enum64 *v = btf_enum64(t); + bool is_signed = btf_kflag(t); + const char *fmt_str; + const char *name; + size_t dup_cnt; + __u64 val; + int i; + + for (i = 0; i < vlen; i++, v++) { + name = btf_name_of(d, v->name_off); + dup_cnt = btf_dump_name_dups(d, d->ident_names, name); + val = btf_enum64_value(v); + if (dup_cnt > 1) { + fmt_str = is_signed ? "\n%s%s___%zd = %lldLL," + : "\n%s%s___%zd = %lluULL,"; + btf_dump_printf(d, fmt_str, + pfx(lvl + 1), name, dup_cnt, + (unsigned long long)val); + } else { + fmt_str = is_signed ? "\n%s%s = %lldLL," + : "\n%s%s = %lluULL,"; + btf_dump_printf(d, fmt_str, + pfx(lvl + 1), name, + (unsigned long long)val); + } + } +} +static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, + int lvl) +{ + __u16 vlen = btf_vlen(t); + btf_dump_printf(d, "enum%s%s", t->name_off ? " " : "", btf_dump_type_name(d, id)); - if (vlen) { - btf_dump_printf(d, " {"); - for (i = 0; i < vlen; i++, v++) { - name = btf_name_of(d, v->name_off); - /* enumerators share namespace with typedef idents */ - dup_cnt = btf_dump_name_dups(d, d->ident_names, name); - if (dup_cnt > 1) { - btf_dump_printf(d, "\n%s%s___%zu = %u,", - pfx(lvl + 1), name, dup_cnt, - (__u32)v->val); - } else { - btf_dump_printf(d, "\n%s%s = %u,", - pfx(lvl + 1), name, - (__u32)v->val); + if (!vlen) + return; + + btf_dump_printf(d, " {"); + if (btf_is_enum(t)) + btf_dump_emit_enum32_val(d, t, lvl, vlen); + else + btf_dump_emit_enum64_val(d, t, lvl, vlen); + btf_dump_printf(d, "\n%s}", pfx(lvl)); + + /* special case enums with special sizes */ + if (t->size == 1) { + /* one-byte enums can be forced with mode(byte) attribute */ + btf_dump_printf(d, " __attribute__((mode(byte)))"); + } else if (t->size == 8 && d->ptr_sz == 8) { + /* enum can be 8-byte sized if one of the enumerator values + * doesn't fit in 32-bit integer, or by adding mode(word) + * attribute (but probably only on 64-bit architectures); do + * our best here to try to satisfy the contract without adding + * unnecessary attributes + */ + bool needs_word_mode; + + if (btf_is_enum(t)) { + /* enum can't represent 64-bit values, so we need word mode */ + needs_word_mode = true; + } else { + /* enum64 needs mode(word) if none of its values has + * non-zero upper 32-bits (which means that all values + * fit in 32-bit integers and won't cause compiler to + * bump enum to be 64-bit naturally + */ + int i; + + needs_word_mode = true; + for (i = 0; i < vlen; i++) { + if (btf_enum64(t)[i].val_hi32 != 0) { + needs_word_mode = false; + break; + } } } - btf_dump_printf(d, "\n%s}", pfx(lvl)); + if (needs_word_mode) + btf_dump_printf(d, " __attribute__((mode(word)))"); } + } static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, @@ -1155,7 +1308,7 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, * chain, restore stack, emit warning, and try to * proceed nevertheless */ - pr_warn("not enough memory for decl stack:%d", err); + pr_warn("not enough memory for decl stack: %s\n", errstr(err)); d->decl_stack_cnt = stack_start; return; } @@ -1178,6 +1331,7 @@ skip_mod: break; case BTF_KIND_INT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: case BTF_KIND_STRUCT: case BTF_KIND_UNION: @@ -1312,6 +1466,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, btf_dump_emit_struct_fwd(d, id, t); break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: btf_dump_emit_mods(d, decls); /* inline anonymous enum */ if (t->name_off == 0 && !d->skip_anon_defs) @@ -1342,7 +1497,10 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, case BTF_KIND_TYPE_TAG: btf_dump_emit_mods(d, decls); name = btf_name_of(d, t->name_off); - btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); + if (btf_kflag(t)) + btf_dump_printf(d, " __attribute__((%s))", name); + else + btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); break; case BTF_KIND_ARRAY: { const struct btf_array *a = btf_array(t); @@ -1408,10 +1566,12 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, * Clang for BPF target generates func_proto with no * args as a func_proto with a single void arg (e.g., * `int (*f)(void)` vs just `int (*f)()`). We are - * going to pretend there are no args for such case. + * going to emit valid empty args (void) syntax for + * such case. Similarly and conveniently, valid + * no args case can be special-cased here as well. */ - if (vlen == 1 && p->type == 0) { - btf_dump_printf(d, ")"); + if (vlen == 0 || (vlen == 1 && p->type == 0)) { + btf_dump_printf(d, "void)"); return; } @@ -1481,11 +1641,22 @@ static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id, static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, const char *orig_name) { + char *old_name, *new_name; size_t dup_cnt = 0; + int err; - hashmap__find(name_map, orig_name, (void **)&dup_cnt); + new_name = strdup(orig_name); + if (!new_name) + return 1; + + (void)hashmap__find(name_map, orig_name, &dup_cnt); dup_cnt++; - hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL); + + err = hashmap__set(name_map, new_name, dup_cnt, &old_name, NULL); + if (err) + free(new_name); + + free(old_name); return dup_cnt; } @@ -1505,6 +1676,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id, if (s->name_resolved) return *cached_name ? *cached_name : orig_name; + if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) { + s->name_resolved = 1; + return orig_name; + } + dup_cnt = btf_dump_name_dups(d, name_map, orig_name); if (dup_cnt > 1) { const size_t max_len = 256; @@ -1762,6 +1938,7 @@ static int btf_dump_int_data(struct btf_dump *d, if (d->typed_dump->is_array_terminated) break; if (*(char *)data == '\0') { + btf_dump_type_values(d, "'\\0'"); d->typed_dump->is_array_terminated = true; break; } @@ -1854,6 +2031,52 @@ static int btf_dump_var_data(struct btf_dump *d, return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0); } +static int btf_dump_string_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_array *array = btf_array(t); + const char *chars = data; + __u32 i; + + /* Make sure it is a NUL-terminated string. */ + for (i = 0; i < array->nelems; i++) { + if ((void *)(chars + i) >= d->typed_dump->data_end) + return -E2BIG; + if (chars[i] == '\0') + break; + } + if (i == array->nelems) { + /* The caller will print this as a regular array. */ + return -EINVAL; + } + + btf_dump_data_pfx(d); + btf_dump_printf(d, "\""); + + for (i = 0; i < array->nelems; i++) { + char c = chars[i]; + + if (c == '\0') { + /* + * When printing character arrays as strings, NUL bytes + * are always treated as string terminators; they are + * never printed. + */ + break; + } + if (isprint(c)) + btf_dump_printf(d, "%c", c); + else + btf_dump_printf(d, "\\x%02x", (__u8)c); + } + + btf_dump_printf(d, "\""); + + return 0; +} + static int btf_dump_array_data(struct btf_dump *d, const struct btf_type *t, __u32 id, @@ -1861,14 +2084,17 @@ static int btf_dump_array_data(struct btf_dump *d, { const struct btf_array *array = btf_array(t); const struct btf_type *elem_type; - __u32 i, elem_size = 0, elem_type_id; + __u32 i, elem_type_id; + __s64 elem_size; bool is_array_member; + bool is_array_terminated; elem_type_id = array->type; elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); elem_size = btf__resolve_size(d->btf, elem_type_id); if (elem_size <= 0) { - pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id); + pr_warn("unexpected elem size %zd for array type [%u]\n", + (ssize_t)elem_size, id); return -EINVAL; } @@ -1878,8 +2104,13 @@ static int btf_dump_array_data(struct btf_dump *d, * char arrays, so if size is 1 and element is * printable as a char, we'll do that. */ - if (elem_size == 1) + if (elem_size == 1) { + if (d->typed_dump->emit_strings && + btf_dump_string_data(d, t, id, data) == 0) { + return 0; + } d->typed_dump->is_array_char = true; + } } /* note that we increment depth before calling btf_dump_print() below; @@ -1897,12 +2128,15 @@ static int btf_dump_array_data(struct btf_dump *d, */ is_array_member = d->typed_dump->is_array_member; d->typed_dump->is_array_member = true; + is_array_terminated = d->typed_dump->is_array_terminated; + d->typed_dump->is_array_terminated = false; for (i = 0; i < array->nelems; i++, data += elem_size) { if (d->typed_dump->is_array_terminated) break; btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0); } d->typed_dump->is_array_member = is_array_member; + d->typed_dump->is_array_terminated = is_array_terminated; d->typed_dump->depth--; btf_dump_data_pfx(d); btf_dump_type_values(d, "]"); @@ -1917,7 +2151,7 @@ static int btf_dump_struct_data(struct btf_dump *d, { const struct btf_member *m = btf_members(t); __u16 n = btf_vlen(t); - int i, err; + int i, err = 0; /* note that we increment depth before calling btf_dump_print() below; * this is intentional. btf_dump_data_newline() will not print a @@ -1981,7 +2215,8 @@ static int btf_dump_get_enum_value(struct btf_dump *d, __u32 id, __s64 *value) { - /* handle unaligned enum value */ + bool is_signed = btf_kflag(t); + if (!ptr_is_aligned(d->btf, id, data)) { __u64 val; int err; @@ -1998,13 +2233,13 @@ static int btf_dump_get_enum_value(struct btf_dump *d, *value = *(__s64 *)data; return 0; case 4: - *value = *(__s32 *)data; + *value = is_signed ? (__s64)*(__s32 *)data : *(__u32 *)data; return 0; case 2: - *value = *(__s16 *)data; + *value = is_signed ? *(__s16 *)data : *(__u16 *)data; return 0; case 1: - *value = *(__s8 *)data; + *value = is_signed ? *(__s8 *)data : *(__u8 *)data; return 0; default: pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id); @@ -2017,7 +2252,7 @@ static int btf_dump_enum_data(struct btf_dump *d, __u32 id, const void *data) { - const struct btf_enum *e; + bool is_signed; __s64 value; int i, err; @@ -2025,14 +2260,31 @@ static int btf_dump_enum_data(struct btf_dump *d, if (err) return err; - for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) { - if (value != e->val) - continue; - btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); - return 0; - } + is_signed = btf_kflag(t); + if (btf_is_enum(t)) { + const struct btf_enum *e; - btf_dump_type_values(d, "%d", value); + for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) { + if (value != e->val) + continue; + btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); + return 0; + } + + btf_dump_type_values(d, is_signed ? "%d" : "%u", value); + } else { + const struct btf_enum64 *e; + + for (i = 0, e = btf_enum64(t); i < btf_vlen(t); i++, e++) { + if (value != btf_enum64_value(e)) + continue; + btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); + return 0; + } + + btf_dump_type_values(d, is_signed ? "%lldLL" : "%lluULL", + (unsigned long long)value); + } return 0; } @@ -2063,9 +2315,25 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d, const struct btf_type *t, __u32 id, const void *data, - __u8 bits_offset) + __u8 bits_offset, + __u8 bit_sz) { - __s64 size = btf__resolve_size(d->btf, id); + __s64 size; + + if (bit_sz) { + /* bits_offset is at most 7. bit_sz is at most 128. */ + __u8 nr_bytes = (bits_offset + bit_sz + 7) / 8; + + /* When bit_sz is non zero, it is called from + * btf_dump_struct_data() where it only cares about + * negative error value. + * Return nr_bytes in success case to make it + * consistent as the regular integer case below. + */ + return data + nr_bytes > d->typed_dump->data_end ? -E2BIG : nr_bytes; + } + + size = btf__resolve_size(d->btf, id); if (size < 0 || size >= INT_MAX) { pr_warn("unexpected size [%zu] for id [%u]\n", @@ -2092,6 +2360,7 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d, case BTF_KIND_FLOAT: case BTF_KIND_PTR: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: if (data + bits_offset / 8 + size > d->typed_dump->data_end) return -E2BIG; break; @@ -2196,6 +2465,7 @@ static int btf_dump_type_data_check_zero(struct btf_dump *d, return -ENODATA; } case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: err = btf_dump_get_enum_value(d, t, data, id, &value); if (err) return err; @@ -2218,7 +2488,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, { int size, err = 0; - size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset); + size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset, bit_sz); if (size < 0) return size; err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz); @@ -2268,6 +2538,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, err = btf_dump_struct_data(d, t, id, data); break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: /* handle bitfield and int enum values */ if (bit_sz) { __u64 print_num; @@ -2318,7 +2589,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0); /* default indent string is a tab */ - if (!opts->indent_str) + if (!OPTS_GET(opts, indent_str, NULL)) d->typed_dump->indent_str[0] = '\t'; else libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str, @@ -2327,6 +2598,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, d->typed_dump->compact = OPTS_GET(opts, compact, false); d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false); + d->typed_dump->emit_strings = OPTS_GET(opts, emit_strings, false); ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0); diff --git a/tools/lib/bpf/btf_iter.c b/tools/lib/bpf/btf_iter.c new file mode 100644 index 000000000000..9a6c822c2294 --- /dev/null +++ b/tools/lib/bpf/btf_iter.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2021 Facebook */ +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#ifdef __KERNEL__ +#include <linux/bpf.h> +#include <linux/btf.h> + +#define btf_var_secinfos(t) (struct btf_var_secinfo *)btf_type_var_secinfo(t) + +#else +#include "btf.h" +#include "libbpf_internal.h" +#endif + +int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, + enum btf_field_iter_kind iter_kind) +{ + it->p = NULL; + it->m_idx = -1; + it->off_idx = 0; + it->vlen = 0; + + switch (iter_kind) { + case BTF_FIELD_ITER_IDS: + switch (btf_kind(t)) { + case BTF_KIND_UNKN: + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + it->desc = (struct btf_field_desc) {}; + break; + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + it->desc = (struct btf_field_desc) { 1, {offsetof(struct btf_type, type)} }; + break; + case BTF_KIND_ARRAY: + it->desc = (struct btf_field_desc) { + 2, {sizeof(struct btf_type) + offsetof(struct btf_array, type), + sizeof(struct btf_type) + offsetof(struct btf_array, index_type)} + }; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + it->desc = (struct btf_field_desc) { + 0, {}, + sizeof(struct btf_member), + 1, {offsetof(struct btf_member, type)} + }; + break; + case BTF_KIND_FUNC_PROTO: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, type)}, + sizeof(struct btf_param), + 1, {offsetof(struct btf_param, type)} + }; + break; + case BTF_KIND_DATASEC: + it->desc = (struct btf_field_desc) { + 0, {}, + sizeof(struct btf_var_secinfo), + 1, {offsetof(struct btf_var_secinfo, type)} + }; + break; + default: + return -EINVAL; + } + break; + case BTF_FIELD_ITER_STRS: + switch (btf_kind(t)) { + case BTF_KIND_UNKN: + it->desc = (struct btf_field_desc) {}; + break; + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + case BTF_KIND_ARRAY: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + case BTF_KIND_DATASEC: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)} + }; + break; + case BTF_KIND_ENUM: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_enum), + 1, {offsetof(struct btf_enum, name_off)} + }; + break; + case BTF_KIND_ENUM64: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_enum64), + 1, {offsetof(struct btf_enum64, name_off)} + }; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_member), + 1, {offsetof(struct btf_member, name_off)} + }; + break; + case BTF_KIND_FUNC_PROTO: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_param), + 1, {offsetof(struct btf_param, name_off)} + }; + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + if (it->desc.m_sz) + it->vlen = btf_vlen(t); + + it->p = t; + return 0; +} + +__u32 *btf_field_iter_next(struct btf_field_iter *it) +{ + if (!it->p) + return NULL; + + if (it->m_idx < 0) { + if (it->off_idx < it->desc.t_off_cnt) + return it->p + it->desc.t_offs[it->off_idx++]; + /* move to per-member iteration */ + it->m_idx = 0; + it->p += sizeof(struct btf_type); + it->off_idx = 0; + } + + /* if type doesn't have members, stop */ + if (it->desc.m_sz == 0) { + it->p = NULL; + return NULL; + } + + if (it->off_idx >= it->desc.m_off_cnt) { + /* exhausted this member's fields, go to the next member */ + it->m_idx++; + it->p += it->desc.m_sz; + it->off_idx = 0; + } + + if (it->m_idx < it->vlen) + return it->p + it->desc.m_offs[it->off_idx++]; + + it->p = NULL; + return NULL; +} diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c new file mode 100644 index 000000000000..53d1f3541bce --- /dev/null +++ b/tools/lib/bpf/btf_relocate.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#ifdef __KERNEL__ +#include <linux/bpf.h> +#include <linux/bsearch.h> +#include <linux/btf.h> +#include <linux/sort.h> +#include <linux/string.h> +#include <linux/bpf_verifier.h> + +#define btf_type_by_id (struct btf_type *)btf_type_by_id +#define btf__type_cnt btf_nr_types +#define btf__base_btf btf_base_btf +#define btf__name_by_offset btf_name_by_offset +#define btf__str_by_offset btf_str_by_offset +#define btf_kflag btf_type_kflag + +#define calloc(nmemb, sz) kvcalloc(nmemb, sz, GFP_KERNEL | __GFP_NOWARN) +#define free(ptr) kvfree(ptr) +#define qsort(base, num, sz, cmp) sort(base, num, sz, cmp, NULL) + +#else + +#include "btf.h" +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_internal.h" + +#endif /* __KERNEL__ */ + +struct btf; + +struct btf_relocate { + struct btf *btf; + const struct btf *base_btf; + const struct btf *dist_base_btf; + unsigned int nr_base_types; + unsigned int nr_split_types; + unsigned int nr_dist_base_types; + int dist_str_len; + int base_str_len; + __u32 *id_map; + __u32 *str_map; +}; + +/* Set temporarily in relocation id_map if distilled base struct/union is + * embedded in a split BTF struct/union; in such a case, size information must + * match between distilled base BTF and base BTF representation of type. + */ +#define BTF_IS_EMBEDDED ((__u32)-1) + +/* <name, size, id> triple used in sorting/searching distilled base BTF. */ +struct btf_name_info { + const char *name; + /* set when search requires a size match */ + bool needs_size: 1; + unsigned int size: 31; + __u32 id; +}; + +static int btf_relocate_rewrite_type_id(struct btf_relocate *r, __u32 i) +{ + struct btf_type *t = btf_type_by_id(r->btf, i); + struct btf_field_iter it; + __u32 *id; + int err; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((id = btf_field_iter_next(&it))) + *id = r->id_map[*id]; + return 0; +} + +/* Simple string comparison used for sorting within BTF, since all distilled + * types are named. If strings match, and size is non-zero for both elements + * fall back to using size for ordering. + */ +static int cmp_btf_name_size(const void *n1, const void *n2) +{ + const struct btf_name_info *ni1 = n1; + const struct btf_name_info *ni2 = n2; + int name_diff = strcmp(ni1->name, ni2->name); + + if (!name_diff && ni1->needs_size && ni2->needs_size) + return ni2->size - ni1->size; + return name_diff; +} + +/* Binary search with a small twist; find leftmost element that matches + * so that we can then iterate through all exact matches. So for example + * searching { "a", "bb", "bb", "c" } we would always match on the + * leftmost "bb". + */ +static struct btf_name_info *search_btf_name_size(struct btf_name_info *key, + struct btf_name_info *vals, + int nelems) +{ + struct btf_name_info *ret = NULL; + int high = nelems - 1; + int low = 0; + + while (low <= high) { + int mid = (low + high)/2; + struct btf_name_info *val = &vals[mid]; + int diff = cmp_btf_name_size(key, val); + + if (diff == 0) + ret = val; + /* even if found, keep searching for leftmost match */ + if (diff <= 0) + high = mid - 1; + else + low = mid + 1; + } + return ret; +} + +/* If a member of a split BTF struct/union refers to a base BTF + * struct/union, mark that struct/union id temporarily in the id_map + * with BTF_IS_EMBEDDED. Members can be const/restrict/volatile/typedef + * reference types, but if a pointer is encountered, the type is no longer + * considered embedded. + */ +static int btf_mark_embedded_composite_type_ids(struct btf_relocate *r, __u32 i) +{ + struct btf_type *t = btf_type_by_id(r->btf, i); + struct btf_field_iter it; + __u32 *id; + int err; + + if (!btf_is_composite(t)) + return 0; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((id = btf_field_iter_next(&it))) { + __u32 next_id = *id; + + while (next_id) { + t = btf_type_by_id(r->btf, next_id); + switch (btf_kind(t)) { + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VOLATILE: + case BTF_KIND_TYPEDEF: + case BTF_KIND_TYPE_TAG: + next_id = t->type; + break; + case BTF_KIND_ARRAY: { + struct btf_array *a = btf_array(t); + + next_id = a->type; + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + if (next_id < r->nr_dist_base_types) + r->id_map[next_id] = BTF_IS_EMBEDDED; + next_id = 0; + break; + default: + next_id = 0; + break; + } + } + } + + return 0; +} + +/* Build a map from distilled base BTF ids to base BTF ids. To do so, iterate + * through base BTF looking up distilled type (using binary search) equivalents. + */ +static int btf_relocate_map_distilled_base(struct btf_relocate *r) +{ + struct btf_name_info *info, *info_end; + struct btf_type *base_t, *dist_t; + __u8 *base_name_cnt = NULL; + int err = 0; + __u32 id; + + /* generate a sort index array of name/type ids sorted by name for + * distilled base BTF to speed name-based lookups. + */ + info = calloc(r->nr_dist_base_types, sizeof(*info)); + if (!info) { + err = -ENOMEM; + goto done; + } + info_end = info + r->nr_dist_base_types; + for (id = 0; id < r->nr_dist_base_types; id++) { + dist_t = btf_type_by_id(r->dist_base_btf, id); + info[id].name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off); + info[id].id = id; + info[id].size = dist_t->size; + info[id].needs_size = true; + } + qsort(info, r->nr_dist_base_types, sizeof(*info), cmp_btf_name_size); + + /* Mark distilled base struct/union members of split BTF structs/unions + * in id_map with BTF_IS_EMBEDDED; this signals that these types + * need to match both name and size, otherwise embedding the base + * struct/union in the split type is invalid. + */ + for (id = r->nr_dist_base_types; id < r->nr_dist_base_types + r->nr_split_types; id++) { + err = btf_mark_embedded_composite_type_ids(r, id); + if (err) + goto done; + } + + /* Collect name counts for composite types in base BTF. If multiple + * instances of a struct/union of the same name exist, we need to use + * size to determine which to map to since name alone is ambiguous. + */ + base_name_cnt = calloc(r->base_str_len, sizeof(*base_name_cnt)); + if (!base_name_cnt) { + err = -ENOMEM; + goto done; + } + for (id = 1; id < r->nr_base_types; id++) { + base_t = btf_type_by_id(r->base_btf, id); + if (!btf_is_composite(base_t) || !base_t->name_off) + continue; + if (base_name_cnt[base_t->name_off] < 255) + base_name_cnt[base_t->name_off]++; + } + + /* Now search base BTF for matching distilled base BTF types. */ + for (id = 1; id < r->nr_base_types; id++) { + struct btf_name_info *dist_info, base_info = {}; + int dist_kind, base_kind; + + base_t = btf_type_by_id(r->base_btf, id); + /* distilled base consists of named types only. */ + if (!base_t->name_off) + continue; + base_kind = btf_kind(base_t); + base_info.id = id; + base_info.name = btf__name_by_offset(r->base_btf, base_t->name_off); + switch (base_kind) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + /* These types should match both name and size */ + base_info.needs_size = true; + base_info.size = base_t->size; + break; + case BTF_KIND_FWD: + /* No size considerations for fwds. */ + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* Size only needs to be used for struct/union if there + * are multiple types in base BTF with the same name. + * If there are multiple _distilled_ types with the same + * name (a very unlikely scenario), that doesn't matter + * unless corresponding _base_ types to match them are + * missing. + */ + base_info.needs_size = base_name_cnt[base_t->name_off] > 1; + base_info.size = base_t->size; + break; + default: + continue; + } + /* iterate over all matching distilled base types */ + for (dist_info = search_btf_name_size(&base_info, info, r->nr_dist_base_types); + dist_info != NULL && dist_info < info_end && + cmp_btf_name_size(&base_info, dist_info) == 0; + dist_info++) { + if (!dist_info->id || dist_info->id >= r->nr_dist_base_types) { + pr_warn("base BTF id [%d] maps to invalid distilled base BTF id [%d]\n", + id, dist_info->id); + err = -EINVAL; + goto done; + } + dist_t = btf_type_by_id(r->dist_base_btf, dist_info->id); + dist_kind = btf_kind(dist_t); + + /* Validate that the found distilled type is compatible. + * Do not error out on mismatch as another match may + * occur for an identically-named type. + */ + switch (dist_kind) { + case BTF_KIND_FWD: + switch (base_kind) { + case BTF_KIND_FWD: + if (btf_kflag(dist_t) != btf_kflag(base_t)) + continue; + break; + case BTF_KIND_STRUCT: + if (btf_kflag(base_t)) + continue; + break; + case BTF_KIND_UNION: + if (!btf_kflag(base_t)) + continue; + break; + default: + continue; + } + break; + case BTF_KIND_INT: + if (dist_kind != base_kind || + btf_int_encoding(base_t) != btf_int_encoding(dist_t)) + continue; + break; + case BTF_KIND_FLOAT: + if (dist_kind != base_kind) + continue; + break; + case BTF_KIND_ENUM: + /* ENUM and ENUM64 are encoded as sized ENUM in + * distilled base BTF. + */ + if (base_kind != dist_kind && base_kind != BTF_KIND_ENUM64) + continue; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* size verification is required for embedded + * struct/unions. + */ + if (r->id_map[dist_info->id] == BTF_IS_EMBEDDED && + base_t->size != dist_t->size) + continue; + break; + default: + continue; + } + if (r->id_map[dist_info->id] && + r->id_map[dist_info->id] != BTF_IS_EMBEDDED) { + /* we already have a match; this tells us that + * multiple base types of the same name + * have the same size, since for cases where + * multiple types have the same name we match + * on name and size. In this case, we have + * no way of determining which to relocate + * to in base BTF, so error out. + */ + pr_warn("distilled base BTF type '%s' [%u], size %u has multiple candidates of the same size (ids [%u, %u]) in base BTF\n", + base_info.name, dist_info->id, + base_t->size, id, r->id_map[dist_info->id]); + err = -EINVAL; + goto done; + } + /* map id and name */ + r->id_map[dist_info->id] = id; + r->str_map[dist_t->name_off] = base_t->name_off; + } + } + /* ensure all distilled BTF ids now have a mapping... */ + for (id = 1; id < r->nr_dist_base_types; id++) { + const char *name; + + if (r->id_map[id] && r->id_map[id] != BTF_IS_EMBEDDED) + continue; + dist_t = btf_type_by_id(r->dist_base_btf, id); + name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off); + pr_warn("distilled base BTF type '%s' [%d] is not mapped to base BTF id\n", + name, id); + err = -EINVAL; + break; + } +done: + free(base_name_cnt); + free(info); + return err; +} + +/* distilled base should only have named int/float/enum/fwd/struct/union types. */ +static int btf_relocate_validate_distilled_base(struct btf_relocate *r) +{ + unsigned int i; + + for (i = 1; i < r->nr_dist_base_types; i++) { + struct btf_type *t = btf_type_by_id(r->dist_base_btf, i); + int kind = btf_kind(t); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_FWD: + if (t->name_off) + break; + pr_warn("type [%d], kind [%d] is invalid for distilled base BTF; it is anonymous\n", + i, kind); + return -EINVAL; + default: + pr_warn("type [%d] in distilled based BTF has unexpected kind [%d]\n", + i, kind); + return -EINVAL; + } + } + return 0; +} + +static int btf_relocate_rewrite_strs(struct btf_relocate *r, __u32 i) +{ + struct btf_type *t = btf_type_by_id(r->btf, i); + struct btf_field_iter it; + __u32 *str_off; + int off, err; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); + if (err) + return err; + + while ((str_off = btf_field_iter_next(&it))) { + if (!*str_off) + continue; + if (*str_off >= r->dist_str_len) { + *str_off += r->base_str_len - r->dist_str_len; + } else { + off = r->str_map[*str_off]; + if (!off) { + pr_warn("string '%s' [offset %u] is not mapped to base BTF\n", + btf__str_by_offset(r->btf, off), *str_off); + return -ENOENT; + } + *str_off = off; + } + } + return 0; +} + +/* If successful, output of relocation is updated BTF with base BTF pointing + * at base_btf, and type ids, strings adjusted accordingly. + */ +int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map) +{ + unsigned int nr_types = btf__type_cnt(btf); + const struct btf_header *dist_base_hdr; + const struct btf_header *base_hdr; + struct btf_relocate r = {}; + int err = 0; + __u32 id, i; + + r.dist_base_btf = btf__base_btf(btf); + if (!base_btf || r.dist_base_btf == base_btf) + return -EINVAL; + + r.nr_dist_base_types = btf__type_cnt(r.dist_base_btf); + r.nr_base_types = btf__type_cnt(base_btf); + r.nr_split_types = nr_types - r.nr_dist_base_types; + r.btf = btf; + r.base_btf = base_btf; + + r.id_map = calloc(nr_types, sizeof(*r.id_map)); + r.str_map = calloc(btf_header(r.dist_base_btf)->str_len, sizeof(*r.str_map)); + dist_base_hdr = btf_header(r.dist_base_btf); + base_hdr = btf_header(r.base_btf); + r.dist_str_len = dist_base_hdr->str_len; + r.base_str_len = base_hdr->str_len; + if (!r.id_map || !r.str_map) { + err = -ENOMEM; + goto err_out; + } + + err = btf_relocate_validate_distilled_base(&r); + if (err) + goto err_out; + + /* Split BTF ids need to be adjusted as base and distilled base + * have different numbers of types, changing the start id of split + * BTF. + */ + for (id = r.nr_dist_base_types; id < nr_types; id++) + r.id_map[id] = id + r.nr_base_types - r.nr_dist_base_types; + + /* Build a map from distilled base ids to actual base BTF ids; it is used + * to update split BTF id references. Also build a str_map mapping from + * distilled base BTF names to base BTF names. + */ + err = btf_relocate_map_distilled_base(&r); + if (err) + goto err_out; + + /* Next, rewrite type ids in split BTF, replacing split ids with updated + * ids based on number of types in base BTF, and base ids with + * relocated ids from base_btf. + */ + for (i = 0, id = r.nr_dist_base_types; i < r.nr_split_types; i++, id++) { + err = btf_relocate_rewrite_type_id(&r, id); + if (err) + goto err_out; + } + /* String offsets now need to be updated using the str_map. */ + for (i = 0; i < r.nr_split_types; i++) { + err = btf_relocate_rewrite_strs(&r, i + r.nr_dist_base_types); + if (err) + goto err_out; + } + /* Finally reset base BTF to be base_btf */ + btf_set_base_btf(btf, base_btf); + + if (id_map) { + *id_map = r.id_map; + r.id_map = NULL; + } +err_out: + free(r.id_map); + free(r.str_map); + return err; +} diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c new file mode 100644 index 000000000000..295dbda24580 --- /dev/null +++ b/tools/lib/bpf/elf.c @@ -0,0 +1,558 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <libelf.h> +#include <gelf.h> +#include <fcntl.h> +#include <linux/kernel.h> + +#include "libbpf_internal.h" + +/* A SHT_GNU_versym section holds 16-bit words. This bit is set if + * the symbol is hidden and can only be seen when referenced using an + * explicit version number. This is a GNU extension. + */ +#define VERSYM_HIDDEN 0x8000 + +/* This is the mask for the rest of the data in a word read from a + * SHT_GNU_versym section. + */ +#define VERSYM_VERSION 0x7fff + +int elf_open(const char *binary_path, struct elf_fd *elf_fd) +{ + int fd, ret; + Elf *elf; + + elf_fd->elf = NULL; + elf_fd->fd = -1; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn("elf: failed to init libelf for %s\n", binary_path); + return -LIBBPF_ERRNO__LIBELF; + } + fd = open(binary_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + ret = -errno; + pr_warn("elf: failed to open %s: %s\n", binary_path, errstr(ret)); + return ret; + } + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); + close(fd); + return -LIBBPF_ERRNO__FORMAT; + } + elf_fd->fd = fd; + elf_fd->elf = elf; + return 0; +} + +void elf_close(struct elf_fd *elf_fd) +{ + if (!elf_fd) + return; + elf_end(elf_fd->elf); + close(elf_fd->fd); +} + +/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ +static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) +{ + while ((scn = elf_nextscn(elf, scn)) != NULL) { + GElf_Shdr sh; + + if (!gelf_getshdr(scn, &sh)) + continue; + if (sh.sh_type == sh_type) + return scn; + } + return NULL; +} + +struct elf_sym { + const char *name; + GElf_Sym sym; + GElf_Shdr sh; + int ver; + bool hidden; +}; + +struct elf_sym_iter { + Elf *elf; + Elf_Data *syms; + Elf_Data *versyms; + Elf_Data *verdefs; + size_t nr_syms; + size_t strtabidx; + size_t verdef_strtabidx; + size_t next_sym_idx; + struct elf_sym sym; + int st_type; +}; + +static int elf_sym_iter_new(struct elf_sym_iter *iter, + Elf *elf, const char *binary_path, + int sh_type, int st_type) +{ + Elf_Scn *scn = NULL; + GElf_Ehdr ehdr; + GElf_Shdr sh; + + memset(iter, 0, sizeof(*iter)); + + if (!gelf_getehdr(elf, &ehdr)) { + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); + return -EINVAL; + } + + scn = elf_find_next_scn_by_type(elf, sh_type, NULL); + if (!scn) { + pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", + binary_path); + return -ENOENT; + } + + if (!gelf_getshdr(scn, &sh)) + return -EINVAL; + + iter->strtabidx = sh.sh_link; + iter->syms = elf_getdata(scn, 0); + if (!iter->syms) { + pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", + binary_path, elf_errmsg(-1)); + return -EINVAL; + } + iter->nr_syms = iter->syms->d_size / sh.sh_entsize; + iter->elf = elf; + iter->st_type = st_type; + + /* Version symbol table is meaningful to dynsym only */ + if (sh_type != SHT_DYNSYM) + return 0; + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_versym, NULL); + if (!scn) + return 0; + iter->versyms = elf_getdata(scn, 0); + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL); + if (!scn) + return 0; + + iter->verdefs = elf_getdata(scn, 0); + if (!iter->verdefs || !gelf_getshdr(scn, &sh)) { + pr_warn("elf: failed to get verdef ELF section in '%s'\n", binary_path); + return -EINVAL; + } + iter->verdef_strtabidx = sh.sh_link; + + return 0; +} + +static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) +{ + struct elf_sym *ret = &iter->sym; + GElf_Sym *sym = &ret->sym; + const char *name = NULL; + GElf_Versym versym; + Elf_Scn *sym_scn; + size_t idx; + + for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) { + if (!gelf_getsym(iter->syms, idx, sym)) + continue; + if (GELF_ST_TYPE(sym->st_info) != iter->st_type) + continue; + name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name); + if (!name) + continue; + sym_scn = elf_getscn(iter->elf, sym->st_shndx); + if (!sym_scn) + continue; + if (!gelf_getshdr(sym_scn, &ret->sh)) + continue; + + iter->next_sym_idx = idx + 1; + ret->name = name; + ret->ver = 0; + ret->hidden = false; + + if (iter->versyms) { + if (!gelf_getversym(iter->versyms, idx, &versym)) + continue; + ret->ver = versym & VERSYM_VERSION; + ret->hidden = versym & VERSYM_HIDDEN; + } + return ret; + } + + return NULL; +} + +static const char *elf_get_vername(struct elf_sym_iter *iter, int ver) +{ + GElf_Verdaux verdaux; + GElf_Verdef verdef; + int offset; + + if (!iter->verdefs) + return NULL; + + offset = 0; + while (gelf_getverdef(iter->verdefs, offset, &verdef)) { + if (verdef.vd_ndx != ver) { + if (!verdef.vd_next) + break; + + offset += verdef.vd_next; + continue; + } + + if (!gelf_getverdaux(iter->verdefs, offset + verdef.vd_aux, &verdaux)) + break; + + return elf_strptr(iter->elf, iter->verdef_strtabidx, verdaux.vda_name); + + } + return NULL; +} + +static bool symbol_match(struct elf_sym_iter *iter, int sh_type, struct elf_sym *sym, + const char *name, size_t name_len, const char *lib_ver) +{ + const char *ver_name; + + /* Symbols are in forms of func, func@LIB_VER or func@@LIB_VER + * make sure the func part matches the user specified name + */ + if (strncmp(sym->name, name, name_len) != 0) + return false; + + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (sym->name[name_len] != '\0' && sym->name[name_len] != '@') + return false; + + /* If user does not specify symbol version, then we got a match */ + if (!lib_ver) + return true; + + /* If user specifies symbol version, for dynamic symbols, + * get version name from ELF verdef section for comparison. + */ + if (sh_type == SHT_DYNSYM) { + ver_name = elf_get_vername(iter, sym->ver); + if (!ver_name) + return false; + return strcmp(ver_name, lib_ver) == 0; + } + + /* For normal symbols, it is already in form of func@LIB_VER */ + return strcmp(sym->name, name) == 0; +} + +/* Transform symbol's virtual address (absolute for binaries and relative + * for shared libs) into file offset, which is what kernel is expecting + * for uprobe/uretprobe attachment. + * See Documentation/trace/uprobetracer.rst for more details. This is done + * by looking up symbol's containing section's header and using iter's virtual + * address (sh_addr) and corresponding file offset (sh_offset) to transform + * sym.st_value (virtual address) into desired final file offset. + */ +static unsigned long elf_sym_offset(struct elf_sym *sym) +{ + return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset; +} + +/* Find offset of function name in the provided ELF object. "binary_path" is + * the path to the ELF binary represented by "elf", and only used for error + * reporting matters. "name" matches symbol name or name@@LIB for library + * functions. + */ +long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) +{ + int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; + const char *at_symbol, *lib_ver; + bool is_shared_lib; + long ret = -ENOENT; + size_t name_len; + GElf_Ehdr ehdr; + + if (!gelf_getehdr(elf, &ehdr)) { + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + /* for shared lib case, we do not need to calculate relative offset */ + is_shared_lib = ehdr.e_type == ET_DYN; + + /* Does name specify "@@LIB_VER" or "@LIB_VER" ? */ + at_symbol = strchr(name, '@'); + if (at_symbol) { + name_len = at_symbol - name; + /* skip second @ if it's @@LIB_VER case */ + if (at_symbol[1] == '@') + at_symbol++; + lib_ver = at_symbol + 1; + } else { + name_len = strlen(name); + lib_ver = NULL; + } + + /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if + * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically + * linked binary may not have SHT_DYMSYM, so absence of a section should not be + * reported as a warning/error. + */ + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { + struct elf_sym_iter iter; + struct elf_sym *sym; + int last_bind = -1; + int cur_bind; + + ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC); + if (ret == -ENOENT) + continue; + if (ret) + goto out; + + while ((sym = elf_sym_iter_next(&iter))) { + if (!symbol_match(&iter, sh_types[i], sym, name, name_len, lib_ver)) + continue; + + cur_bind = GELF_ST_BIND(sym->sym.st_info); + + if (ret > 0) { + /* handle multiple matches */ + if (elf_sym_offset(sym) == ret) { + /* same offset, no problem */ + continue; + } else if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { + /* Only accept one non-weak bind. */ + pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", + sym->name, name, binary_path); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } else if (cur_bind == STB_WEAK) { + /* already have a non-weak bind, and + * this is a weak bind, so ignore. + */ + continue; + } + } + + ret = elf_sym_offset(sym); + last_bind = cur_bind; + } + if (ret > 0) + break; + } + + if (ret > 0) { + pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, + ret); + } else { + if (ret == 0) { + pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, + is_shared_lib ? "should not be 0 in a shared library" : + "try using shared library path instead"); + ret = -ENOENT; + } else { + pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); + } + } +out: + return ret; +} + +/* Find offset of function name in ELF object specified by path. "name" matches + * symbol name or name@@LIB for library functions. + */ +long elf_find_func_offset_from_file(const char *binary_path, const char *name) +{ + struct elf_fd elf_fd; + long ret = -ENOENT; + + ret = elf_open(binary_path, &elf_fd); + if (ret) + return ret; + ret = elf_find_func_offset(elf_fd.elf, binary_path, name); + elf_close(&elf_fd); + return ret; +} + +struct symbol { + const char *name; + int bind; + int idx; +}; + +static int symbol_cmp(const void *a, const void *b) +{ + const struct symbol *sym_a = a; + const struct symbol *sym_b = b; + + return strcmp(sym_a->name, sym_b->name); +} + +/* + * Return offsets in @poffsets for symbols specified in @syms array argument. + * On success returns 0 and offsets are returned in allocated array with @cnt + * size, that needs to be released by the caller. + */ +int elf_resolve_syms_offsets(const char *binary_path, int cnt, + const char **syms, unsigned long **poffsets, + int st_type) +{ + int sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; + int err = 0, i, cnt_done = 0; + unsigned long *offsets; + struct symbol *symbols; + struct elf_fd elf_fd; + + err = elf_open(binary_path, &elf_fd); + if (err) + return err; + + offsets = calloc(cnt, sizeof(*offsets)); + symbols = calloc(cnt, sizeof(*symbols)); + + if (!offsets || !symbols) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < cnt; i++) { + symbols[i].name = syms[i]; + symbols[i].idx = i; + } + + qsort(symbols, cnt, sizeof(*symbols), symbol_cmp); + + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { + struct elf_sym_iter iter; + struct elf_sym *sym; + + err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], st_type); + if (err == -ENOENT) + continue; + if (err) + goto out; + + while ((sym = elf_sym_iter_next(&iter))) { + unsigned long sym_offset = elf_sym_offset(sym); + int bind = GELF_ST_BIND(sym->sym.st_info); + struct symbol *found, tmp = { + .name = sym->name, + }; + unsigned long *offset; + + found = bsearch(&tmp, symbols, cnt, sizeof(*symbols), symbol_cmp); + if (!found) + continue; + + offset = &offsets[found->idx]; + if (*offset > 0) { + /* same offset, no problem */ + if (*offset == sym_offset) + continue; + /* handle multiple matches */ + if (found->bind != STB_WEAK && bind != STB_WEAK) { + /* Only accept one non-weak bind. */ + pr_warn("elf: ambiguous match found '%s@%lu' in '%s' previous offset %lu\n", + sym->name, sym_offset, binary_path, *offset); + err = -ESRCH; + goto out; + } else if (bind == STB_WEAK) { + /* already have a non-weak bind, and + * this is a weak bind, so ignore. + */ + continue; + } + } else { + cnt_done++; + } + *offset = sym_offset; + found->bind = bind; + } + } + + if (cnt != cnt_done) { + err = -ENOENT; + goto out; + } + + *poffsets = offsets; + +out: + free(symbols); + if (err) + free(offsets); + elf_close(&elf_fd); + return err; +} + +/* + * Return offsets in @poffsets for symbols specified by @pattern argument. + * On success returns 0 and offsets are returned in allocated @poffsets + * array with the @pctn size, that needs to be released by the caller. + */ +int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, + unsigned long **poffsets, size_t *pcnt) +{ + int sh_types[2] = { SHT_SYMTAB, SHT_DYNSYM }; + unsigned long *offsets = NULL; + size_t cap = 0, cnt = 0; + struct elf_fd elf_fd; + int err = 0, i; + + err = elf_open(binary_path, &elf_fd); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { + struct elf_sym_iter iter; + struct elf_sym *sym; + + err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC); + if (err == -ENOENT) + continue; + if (err) + goto out; + + while ((sym = elf_sym_iter_next(&iter))) { + if (!glob_match(sym->name, pattern)) + continue; + + err = libbpf_ensure_mem((void **) &offsets, &cap, sizeof(*offsets), + cnt + 1); + if (err) + goto out; + + offsets[cnt++] = elf_sym_offset(sym); + } + + /* If we found anything in the first symbol section, + * do not search others to avoid duplicates. + */ + if (cnt) + break; + } + + if (cnt) { + *poffsets = offsets; + *pcnt = cnt; + } else { + err = -ENOENT; + } + +out: + if (err) + free(offsets); + elf_close(&elf_fd); + return err; +} diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c new file mode 100644 index 000000000000..b842b83e2480 --- /dev/null +++ b/tools/lib/bpf/features.c @@ -0,0 +1,609 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include <linux/kernel.h> +#include <linux/filter.h> +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_common.h" +#include "libbpf_internal.h" + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +int probe_fd(int fd) +{ + if (fd >= 0) + close(fd); + return fd >= 0; +} + +static int probe_kern_prog_name(int token_fd) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + union bpf_attr attr; + int ret; + + memset(&attr, 0, attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.license = ptr_to_u64("GPL"); + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + attr.prog_token_fd = token_fd; + if (token_fd) + attr.prog_flags |= BPF_F_TOKEN_FD; + libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); + + /* make sure loading with name works */ + ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); + return probe_fd(ret); +} + +static int probe_kern_global_data(int token_fd) +{ + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, + .token_fd = token_fd, + .map_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + int ret, map, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts); + if (map < 0) { + ret = -errno; + pr_warn("Error in %s(): %s. Couldn't create simple array map.\n", + __func__, errstr(ret)); + return ret; + } + + insns[0].imm = map; + + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); + close(map); + return probe_fd(ret); +} + +static int probe_kern_btf(int token_fd) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_func(int token_fd) +{ + static const char strs[] = "\0int\0x\0a"; + /* void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_func_global(int token_fd) +{ + static const char strs[] = "\0int\0x\0a"; + /* static void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_datasec(int token_fd) +{ + static const char strs[] = "\0x\0.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC val */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_qmark_datasec(int token_fd) +{ + static const char strs[] = "\0x\0?.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC ?.data */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_float(int token_fd) +{ + static const char strs[] = "\0float"; + __u32 types[] = { + /* float */ + BTF_TYPE_FLOAT_ENC(1, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_decl_tag(int token_fd) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* attr */ + BTF_TYPE_DECL_TAG_ENC(1, 2, -1), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_type_tag(int token_fd) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* attr */ + BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ + /* ptr */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_array_mmap(int token_fd) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_MMAPABLE | (token_fd ? BPF_F_TOKEN_FD : 0), + .token_fd = token_fd, + ); + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); + return probe_fd(fd); +} + +static int probe_kern_exp_attach_type(int token_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + /* use any valid combination of program type and (optional) + * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) + * to see if kernel supports expected_attach_type field for + * BPF_PROG_LOAD command + */ + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_kern_probe_read_kernel(int token_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ + BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ + BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_prog_bind_map(int token_fd) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, + .token_fd = token_fd, + .map_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts); + if (map < 0) { + ret = -errno; + pr_warn("Error in %s(): %s. Couldn't create simple array map.\n", + __func__, errstr(ret)); + return ret; + } + + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); + if (prog < 0) { + close(map); + return 0; + } + + ret = bpf_prog_bind_map(prog, map, NULL); + + close(map); + close(prog); + + return ret >= 0; +} + +static int probe_module_btf(int token_fd) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + struct bpf_btf_info info; + __u32 len = sizeof(info); + char name[16]; + int fd, err; + + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); + if (fd < 0) + return 0; /* BTF not supported at all */ + + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; + * kernel's module BTF support coincides with support for + * name/name_len fields in struct bpf_btf_info. + */ + err = bpf_btf_get_info_by_fd(fd, &info, &len); + close(fd); + return !err; +} + +static int probe_perf_link(int token_fd) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + int prog_fd, link_fd, err; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", + insns, ARRAY_SIZE(insns), &opts); + if (prog_fd < 0) + return -errno; + + /* use invalid perf_event FD to get EBADF, if link is supported; + * otherwise EINVAL should be returned + */ + link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_uprobe_multi_link(int token_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, + .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + unsigned long offset = 0; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", + insns, ARRAY_SIZE(insns), &load_opts); + if (prog_fd < 0) + return -errno; + + /* Creating uprobe in '/' binary should fail with -EBADF. */ + link_opts.uprobe_multi.path = "/"; + link_opts.uprobe_multi.offsets = &offset; + link_opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0 || err != -EBADF) { + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + return 0; + } + + /* Initial multi-uprobe support in kernel didn't handle PID filtering + * correctly (it was doing thread filtering, not process filtering). + * So now we'll detect if PID filtering logic was fixed, and, if not, + * we'll pretend multi-uprobes are not supported, if not. + * Multi-uprobes are used in USDT attachment logic, and we need to be + * conservative here, because multi-uprobe selection happens early at + * load time, while the use of PID filtering is known late at + * attachment time, at which point it's too late to undo multi-uprobe + * selection. + * + * Creating uprobe with pid == -1 for (invalid) '/' binary will fail + * early with -EINVAL on kernels with fixed PID filtering logic; + * otherwise -ESRCH would be returned if passed correct binary path + * (but we'll just get -BADF, of course). + */ + link_opts.uprobe_multi.pid = -1; /* invalid PID */ + link_opts.uprobe_multi.path = "/"; /* invalid path */ + link_opts.uprobe_multi.offsets = &offset; + link_opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EINVAL; +} + +static int probe_kern_bpf_cookie(int token_fd) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(ret); +} + +static int probe_kern_btf_enum64(int token_fd) +{ + static const char strs[] = "\0enum64"; + __u32 types[] = { + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_arg_ctx_tag(int token_fd) +{ + static const char strs[] = "\0a\0b\0arg:ctx\0"; + const __u32 types[] = { + /* [1] INT */ + BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4), + /* [2] PTR -> VOID */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + /* [3] FUNC_PROTO `int(void *a)` */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), + BTF_PARAM_ENC(1 /* "a" */, 2), + /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */ + BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3), + /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), + BTF_PARAM_ENC(3 /* "b" */, 2), + /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */ + BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5), + /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */ + BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0), + }; + const struct bpf_insn insns[] = { + /* main prog */ + BPF_CALL_REL(+1), + BPF_EXIT_INSN(), + /* global subprog */ + BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */ + BPF_EXIT_INSN(), + }; + const struct bpf_func_info_min func_infos[] = { + { 0, 4 }, /* main prog -> FUNC 'a' */ + { 2, 6 }, /* subprog -> FUNC 'b' */ + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns); + + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); + if (btf_fd < 0) + return 0; + + opts.prog_btf_fd = btf_fd; + opts.func_info = &func_infos; + opts.func_info_cnt = ARRAY_SIZE(func_infos); + opts.func_info_rec_size = sizeof(func_infos[0]); + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx", + "GPL", insns, insn_cnt, &opts); + close(btf_fd); + + return probe_fd(prog_fd); +} + +typedef int (*feature_probe_fn)(int /* token_fd */); + +static struct kern_feature_cache feature_cache; + +static struct kern_feature_desc { + const char *desc; + feature_probe_fn probe; +} feature_probes[__FEAT_CNT] = { + [FEAT_PROG_NAME] = { + "BPF program name", probe_kern_prog_name, + }, + [FEAT_GLOBAL_DATA] = { + "global variables", probe_kern_global_data, + }, + [FEAT_BTF] = { + "minimal BTF", probe_kern_btf, + }, + [FEAT_BTF_FUNC] = { + "BTF functions", probe_kern_btf_func, + }, + [FEAT_BTF_GLOBAL_FUNC] = { + "BTF global function", probe_kern_btf_func_global, + }, + [FEAT_BTF_DATASEC] = { + "BTF data section and variable", probe_kern_btf_datasec, + }, + [FEAT_ARRAY_MMAP] = { + "ARRAY map mmap()", probe_kern_array_mmap, + }, + [FEAT_EXP_ATTACH_TYPE] = { + "BPF_PROG_LOAD expected_attach_type attribute", + probe_kern_exp_attach_type, + }, + [FEAT_PROBE_READ_KERN] = { + "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, + }, + [FEAT_PROG_BIND_MAP] = { + "BPF_PROG_BIND_MAP support", probe_prog_bind_map, + }, + [FEAT_MODULE_BTF] = { + "module BTF support", probe_module_btf, + }, + [FEAT_BTF_FLOAT] = { + "BTF_KIND_FLOAT support", probe_kern_btf_float, + }, + [FEAT_PERF_LINK] = { + "BPF perf link support", probe_perf_link, + }, + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, + }, + [FEAT_BTF_TYPE_TAG] = { + "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, + }, + [FEAT_MEMCG_ACCOUNT] = { + "memcg-based memory accounting", probe_memcg_account, + }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, + [FEAT_BTF_ENUM64] = { + "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, + }, + [FEAT_SYSCALL_WRAPPER] = { + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, + }, + [FEAT_UPROBE_MULTI_LINK] = { + "BPF multi-uprobe link support", probe_uprobe_multi_link, + }, + [FEAT_ARG_CTX_TAG] = { + "kernel-side __arg_ctx tag", probe_kern_arg_ctx_tag, + }, + [FEAT_BTF_QMARK_DATASEC] = { + "BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec, + }, +}; + +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) +{ + struct kern_feature_desc *feat = &feature_probes[feat_id]; + int ret; + + /* assume global feature cache, unless custom one is provided */ + if (!cache) + cache = &feature_cache; + + if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { + ret = feat->probe(cache->token_fd); + if (ret > 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); + } else if (ret == 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } else { + pr_warn("Detection of kernel %s support failed: %s\n", + feat->desc, errstr(ret)); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } + } + + return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; +} diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 8ecef1088ba2..cd5c2543f54d 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -4,6 +4,7 @@ #include <stdlib.h> #include <string.h> #include <errno.h> +#include <asm/byteorder.h> #include <linux/filter.h> #include <sys/param.h> #include "btf.h" @@ -13,7 +14,6 @@ #include "hashmap.h" #include "bpf_gen_internal.h" #include "skel_internal.h" -#include <asm/byteorder.h> #define MAX_USED_MAPS 64 #define MAX_USED_PROGS 32 @@ -109,6 +109,7 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in static int add_data(struct bpf_gen *gen, const void *data, __u32 size); static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off); +static void emit_signature_match(struct bpf_gen *gen); void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps) { @@ -151,6 +152,8 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */ emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); emit(gen, BPF_EXIT_INSN()); + if (OPTS_GET(gen->opts, gen_hash, false)) + emit_signature_match(gen); } static int add_data(struct bpf_gen *gen, const void *data, __u32 size) @@ -367,6 +370,8 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off) __emit_sys_close(gen); } +static void compute_sha_update_offsets(struct bpf_gen *gen); + int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) { int i; @@ -393,7 +398,10 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) blob_fd_array_off(gen, i)); emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); emit(gen, BPF_EXIT_INSN()); - pr_debug("gen: finish %d\n", gen->error); + if (OPTS_GET(gen->opts, gen_hash, false)) + compute_sha_update_offsets(gen); + + pr_debug("gen: finish %s\n", errstr(gen->error)); if (!gen->error) { struct gen_loader_opts *opts = gen->opts; @@ -401,6 +409,15 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) opts->insns_sz = gen->insn_cur - gen->insn_start; opts->data = gen->data_start; opts->data_sz = gen->data_cur - gen->data_start; + + /* use target endianness for embedded loader */ + if (gen->swapped_endian) { + struct bpf_insn *insn = (struct bpf_insn *)opts->insns; + int insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); + + for (i = 0; i < insn_cnt; i++) + bpf_insn_bswap(insn++); + } } return gen->error; } @@ -414,6 +431,44 @@ void bpf_gen__free(struct bpf_gen *gen) free(gen); } +/* + * Fields of bpf_attr are set to values in native byte-order before being + * written to the target-bound data blob, and may need endian conversion. + * This macro allows providing the correct value in situ more simply than + * writing a separate converter for *all fields* of *all records* included + * in union bpf_attr. Note that sizeof(rval) should match the assignment + * target to avoid runtime problems. + */ +#define tgt_endian(rval) ({ \ + typeof(rval) _val = (rval); \ + if (gen->swapped_endian) { \ + switch (sizeof(_val)) { \ + case 1: break; \ + case 2: _val = bswap_16(_val); break; \ + case 4: _val = bswap_32(_val); break; \ + case 8: _val = bswap_64(_val); break; \ + default: pr_warn("unsupported bswap size!\n"); \ + } \ + } \ + _val; \ +}) + +static void compute_sha_update_offsets(struct bpf_gen *gen) +{ + __u64 sha[SHA256_DWORD_SIZE]; + __u64 sha_dw; + int i; + + libbpf_sha256(gen->data_start, gen->data_cur - gen->data_start, (__u8 *)sha); + for (i = 0; i < SHA256_DWORD_SIZE; i++) { + struct bpf_insn *insn = + (struct bpf_insn *)(gen->insn_start + gen->hash_insn_offset[i]); + sha_dw = tgt_endian(sha[i]); + insn[0].imm = (__u32)sha_dw; + insn[1].imm = sha_dw >> 32; + } +} + void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, __u32 btf_raw_size) { @@ -422,11 +477,12 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: load_btf: size %d\n", btf_raw_size); btf_data = add_data(gen, btf_raw_data, btf_raw_size); - attr.btf_size = btf_raw_size; + attr.btf_size = tgt_endian(btf_raw_size); btf_load_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: load_btf: off %d size %d, attr: off %d size %d\n", + btf_data, btf_raw_size, btf_load_attr, attr_size); /* populate union bpf_attr with user provided log details */ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4, @@ -457,28 +513,29 @@ void bpf_gen__map_create(struct bpf_gen *gen, union bpf_attr attr; memset(&attr, 0, attr_size); - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = value_size; - attr.map_flags = map_attr->map_flags; - attr.map_extra = map_attr->map_extra; + attr.map_type = tgt_endian(map_type); + attr.key_size = tgt_endian(key_size); + attr.value_size = tgt_endian(value_size); + attr.map_flags = tgt_endian(map_attr->map_flags); + attr.map_extra = tgt_endian(map_attr->map_extra); if (map_name) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); - attr.numa_node = map_attr->numa_node; - attr.map_ifindex = map_attr->map_ifindex; - attr.max_entries = max_entries; - attr.btf_key_type_id = map_attr->btf_key_type_id; - attr.btf_value_type_id = map_attr->btf_value_type_id; - - pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", - attr.map_name, map_idx, map_type, attr.btf_value_type_id); + attr.numa_node = tgt_endian(map_attr->numa_node); + attr.map_ifindex = tgt_endian(map_attr->map_ifindex); + attr.max_entries = tgt_endian(max_entries); + attr.btf_key_type_id = tgt_endian(map_attr->btf_key_type_id); + attr.btf_value_type_id = tgt_endian(map_attr->btf_value_type_id); map_create_attr = add_data(gen, &attr, attr_size); - if (attr.btf_value_type_id) + pr_debug("gen: map_create: %s idx %d type %d value_type_id %d, attr: off %d size %d\n", + map_name, map_idx, map_type, map_attr->btf_value_type_id, + map_create_attr, attr_size); + + if (map_attr->btf_value_type_id) /* populate union bpf_attr with btf_fd saved in the stack earlier */ move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4, stack_off(btf_fd)); - switch (attr.map_type) { + switch (map_type) { case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4, @@ -498,8 +555,8 @@ void bpf_gen__map_create(struct bpf_gen *gen, /* emit MAP_CREATE command */ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", - attr.map_name, map_idx, map_type, value_size, - attr.btf_value_type_id); + map_name, map_idx, map_type, value_size, + map_attr->btf_value_type_id); emit_check_err(gen); /* remember map_fd in the stack, if successful */ if (map_idx < 0) { @@ -523,6 +580,29 @@ void bpf_gen__map_create(struct bpf_gen *gen, emit_sys_close_stack(gen, stack_off(inner_map_fd)); } +static void emit_signature_match(struct bpf_gen *gen) +{ + __s64 off; + int i; + + for (i = 0; i < SHA256_DWORD_SIZE; i++) { + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX, + 0, 0, 0, 0)); + emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, i * sizeof(__u64))); + gen->hash_insn_offset[i] = gen->insn_cur - gen->insn_start; + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_3, 0, 0, 0, 0, 0)); + + off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1; + if (is_simm16(off)) { + emit(gen, BPF_MOV64_IMM(BPF_REG_7, -EINVAL)); + emit(gen, BPF_JMP_REG(BPF_JNE, BPF_REG_2, BPF_REG_3, off)); + } else { + gen->error = -ERANGE; + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1)); + } + } +} + void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name, enum bpf_attach_type type) { @@ -533,7 +613,7 @@ void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name, gen->attach_kind = kind; ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s", prefix, attach_name); - if (ret == sizeof(gen->attach_target)) + if (ret >= sizeof(gen->attach_target)) gen->error = -ENOSPC; } @@ -560,7 +640,7 @@ static void emit_find_attach_target(struct bpf_gen *gen) } void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, - bool is_typeless, int kind, int insn_idx) + bool is_typeless, bool is_ld64, int kind, int insn_idx) { struct ksym_relo_desc *relo; @@ -574,6 +654,7 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, relo->name = name; relo->is_weak = is_weak; relo->is_typeless = is_typeless; + relo->is_ld64 = is_ld64; relo->kind = kind; relo->insn_idx = insn_idx; gen->relo_cnt++; @@ -586,9 +667,11 @@ static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_des int i; for (i = 0; i < gen->nr_ksyms; i++) { - if (!strcmp(gen->ksyms[i].name, relo->name)) { - gen->ksyms[i].ref++; - return &gen->ksyms[i]; + kdesc = &gen->ksyms[i]; + if (kdesc->kind == relo->kind && kdesc->is_ld64 == relo->is_ld64 && + !strcmp(kdesc->name, relo->name)) { + kdesc->ref++; + return kdesc; } } kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc)); @@ -603,6 +686,7 @@ static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_des kdesc->ref = 1; kdesc->off = 0; kdesc->insn = 0; + kdesc->is_ld64 = relo->is_ld64; return kdesc; } @@ -699,17 +783,17 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo /* obtain fd in BPF_REG_9 */ emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); - /* jump to fd_array store if fd denotes module BTF */ - emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); - /* set the default value for off */ - emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); - /* skip BTF fd store for vmlinux BTF */ - emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); /* load fd_array slot pointer */ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); - /* store BTF fd in slot */ + /* store BTF fd in slot, 0 for vmlinux */ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); + /* jump to insn[insn_idx].off store if fd denotes module BTF */ + emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); + /* set the default value for off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); + /* skip BTF fd store for vmlinux BTF */ + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1)); /* store index into insn[insn_idx].off */ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); log: @@ -780,12 +864,12 @@ log: emit_ksym_relo_log(gen, relo, kdesc->ref); } -static __u32 src_reg_mask(void) +static __u32 src_reg_mask(struct bpf_gen *gen) { -#if defined(__LITTLE_ENDIAN_BITFIELD) - return 0x0f; /* src_reg,dst_reg,... */ -#elif defined(__BIG_ENDIAN_BITFIELD) - return 0xf0; /* dst_reg,src_reg,... */ +#if defined(__LITTLE_ENDIAN_BITFIELD) /* src_reg,dst_reg,... */ + return gen->swapped_endian ? 0xf0 : 0x0f; +#elif defined(__BIG_ENDIAN_BITFIELD) /* dst_reg,src_reg,... */ + return gen->swapped_endian ? 0x0f : 0xf0; #else #error "Unsupported bit endianness, cannot proceed" #endif @@ -804,11 +888,13 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, return; /* try to copy from existing ldimm64 insn */ if (kdesc->ref > 1) { - move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, - kdesc->insn + offsetof(struct bpf_insn, imm)); move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); - /* jump over src_reg adjustment if imm is not 0, reuse BPF_REG_0 from move_blob2blob */ + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + /* jump over src_reg adjustment if imm (btf_id) is not 0, reuse BPF_REG_0 from move_blob2blob + * If btf_id is zero, clear BPF_PSEUDO_BTF_ID flag in src_reg of ld_imm64 insn + */ emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3)); goto clear_src_reg; } @@ -831,10 +917,10 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); /* skip src_reg adjustment */ - emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 3)); clear_src_reg: /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ - reg_mask = src_reg_mask(); + reg_mask = src_reg_mask(gen); emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask)); emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code))); @@ -862,23 +948,17 @@ static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn { int insn; - pr_debug("gen: emit_relo (%d): %s at %d\n", relo->kind, relo->name, relo->insn_idx); + pr_debug("gen: emit_relo (%d): %s at %d %s\n", + relo->kind, relo->name, relo->insn_idx, relo->is_ld64 ? "ld64" : "call"); insn = insns + sizeof(struct bpf_insn) * relo->insn_idx; emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn)); - switch (relo->kind) { - case BTF_KIND_VAR: + if (relo->is_ld64) { if (relo->is_typeless) emit_relo_ksym_typeless(gen, relo, insn); else emit_relo_ksym_btf(gen, relo, insn); - break; - case BTF_KIND_FUNC: + } else { emit_relo_kfunc_btf(gen, relo, insn); - break; - default: - pr_warn("Unknown relocation kind '%d'\n", relo->kind); - gen->error = -EDOM; - return; } } @@ -901,18 +981,20 @@ static void cleanup_core_relo(struct bpf_gen *gen) static void cleanup_relos(struct bpf_gen *gen, int insns) { + struct ksym_desc *kdesc; int i, insn; for (i = 0; i < gen->nr_ksyms; i++) { + kdesc = &gen->ksyms[i]; /* only close fds for typed ksyms and kfuncs */ - if (gen->ksyms[i].kind == BTF_KIND_VAR && !gen->ksyms[i].typeless) { + if (kdesc->is_ld64 && !kdesc->typeless) { /* close fd recorded in insn[insn_idx + 1].imm */ - insn = gen->ksyms[i].insn; + insn = kdesc->insn; insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm); emit_sys_close_blob(gen, insn); - } else if (gen->ksyms[i].kind == BTF_KIND_FUNC) { - emit_sys_close_blob(gen, blob_fd_array_off(gen, gen->ksyms[i].off)); - if (gen->ksyms[i].off < MAX_FD_ARRAY_SZ) + } else if (!kdesc->is_ld64) { + emit_sys_close_blob(gen, blob_fd_array_off(gen, kdesc->off)); + if (kdesc->off < MAX_FD_ARRAY_SZ) gen->nr_fd_array--; } } @@ -929,48 +1011,94 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) cleanup_core_relo(gen); } +/* Convert func, line, and core relo info blobs to target endianness */ +static void info_blob_bswap(struct bpf_gen *gen, int func_info, int line_info, + int core_relos, struct bpf_prog_load_opts *load_attr) +{ + struct bpf_func_info *fi = gen->data_start + func_info; + struct bpf_line_info *li = gen->data_start + line_info; + struct bpf_core_relo *cr = gen->data_start + core_relos; + int i; + + for (i = 0; i < load_attr->func_info_cnt; i++) + bpf_func_info_bswap(fi++); + + for (i = 0; i < load_attr->line_info_cnt; i++) + bpf_line_info_bswap(li++); + + for (i = 0; i < gen->core_relo_cnt; i++) + bpf_core_relo_bswap(cr++); +} + void bpf_gen__prog_load(struct bpf_gen *gen, enum bpf_prog_type prog_type, const char *prog_name, const char *license, struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *load_attr, int prog_idx) { + int func_info_tot_sz = load_attr->func_info_cnt * + load_attr->func_info_rec_size; + int line_info_tot_sz = load_attr->line_info_cnt * + load_attr->line_info_rec_size; + int core_relo_tot_sz = gen->core_relo_cnt * + sizeof(struct bpf_core_relo); int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos; int attr_size = offsetofend(union bpf_attr, core_relo_rec_size); union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n", - prog_type, insn_cnt, prog_idx); /* add license string to blob of bytes */ license_off = add_data(gen, license, strlen(license) + 1); /* add insns to blob of bytes */ insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn)); + pr_debug("gen: prog_load: prog_idx %d type %d insn off %d insns_cnt %zd license off %d\n", + prog_idx, prog_type, insns_off, insn_cnt, license_off); - attr.prog_type = prog_type; - attr.expected_attach_type = load_attr->expected_attach_type; - attr.attach_btf_id = load_attr->attach_btf_id; - attr.prog_ifindex = load_attr->prog_ifindex; - attr.kern_version = 0; - attr.insn_cnt = (__u32)insn_cnt; - attr.prog_flags = load_attr->prog_flags; - - attr.func_info_rec_size = load_attr->func_info_rec_size; - attr.func_info_cnt = load_attr->func_info_cnt; - func_info = add_data(gen, load_attr->func_info, - attr.func_info_cnt * attr.func_info_rec_size); + /* convert blob insns to target endianness */ + if (gen->swapped_endian) { + struct bpf_insn *insn = gen->data_start + insns_off; + int i; - attr.line_info_rec_size = load_attr->line_info_rec_size; - attr.line_info_cnt = load_attr->line_info_cnt; - line_info = add_data(gen, load_attr->line_info, - attr.line_info_cnt * attr.line_info_rec_size); + for (i = 0; i < insn_cnt; i++, insn++) + bpf_insn_bswap(insn); + } - attr.core_relo_rec_size = sizeof(struct bpf_core_relo); - attr.core_relo_cnt = gen->core_relo_cnt; - core_relos = add_data(gen, gen->core_relos, - attr.core_relo_cnt * attr.core_relo_rec_size); + attr.prog_type = tgt_endian(prog_type); + attr.expected_attach_type = tgt_endian(load_attr->expected_attach_type); + attr.attach_btf_id = tgt_endian(load_attr->attach_btf_id); + attr.prog_ifindex = tgt_endian(load_attr->prog_ifindex); + attr.kern_version = 0; + attr.insn_cnt = tgt_endian((__u32)insn_cnt); + attr.prog_flags = tgt_endian(load_attr->prog_flags); + + attr.func_info_rec_size = tgt_endian(load_attr->func_info_rec_size); + attr.func_info_cnt = tgt_endian(load_attr->func_info_cnt); + func_info = add_data(gen, load_attr->func_info, func_info_tot_sz); + pr_debug("gen: prog_load: func_info: off %d cnt %d rec size %d\n", + func_info, load_attr->func_info_cnt, + load_attr->func_info_rec_size); + + attr.line_info_rec_size = tgt_endian(load_attr->line_info_rec_size); + attr.line_info_cnt = tgt_endian(load_attr->line_info_cnt); + line_info = add_data(gen, load_attr->line_info, line_info_tot_sz); + pr_debug("gen: prog_load: line_info: off %d cnt %d rec size %d\n", + line_info, load_attr->line_info_cnt, + load_attr->line_info_rec_size); + + attr.core_relo_rec_size = tgt_endian((__u32)sizeof(struct bpf_core_relo)); + attr.core_relo_cnt = tgt_endian(gen->core_relo_cnt); + core_relos = add_data(gen, gen->core_relos, core_relo_tot_sz); + pr_debug("gen: prog_load: core_relos: off %d cnt %d rec size %zd\n", + core_relos, gen->core_relo_cnt, + sizeof(struct bpf_core_relo)); + + /* convert all info blobs to target endianness */ + if (gen->swapped_endian) + info_blob_bswap(gen, func_info, line_info, core_relos, load_attr); libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); prog_load_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: prog_load: attr: off %d size %d\n", + prog_load_attr, attr_size); /* populate union bpf_attr with a pointer to license */ emit_rel_store(gen, attr_field(prog_load_attr, license), license_off); @@ -1038,25 +1166,35 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, int zero = 0; memset(&attr, 0, attr_size); - pr_debug("gen: map_update_elem: idx %d\n", map_idx); value = add_data(gen, pvalue, value_size); key = add_data(gen, &zero, sizeof(zero)); - /* if (map_desc[map_idx].initial_value) - * copy_from_user(value, initial_value, value_size); + /* if (map_desc[map_idx].initial_value) { + * if (ctx->flags & BPF_SKEL_KERNEL) + * bpf_probe_read_kernel(value, value_size, initial_value); + * else + * bpf_copy_from_user(value, value_size, initial_value); + * } */ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, sizeof(struct bpf_loader_ctx) + sizeof(struct bpf_map_desc) * map_idx + offsetof(struct bpf_map_desc, initial_value))); - emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 4)); + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 8)); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, value)); emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size)); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct bpf_loader_ctx, flags))); + emit(gen, BPF_JMP_IMM(BPF_JSET, BPF_REG_0, BPF_SKEL_KERNEL, 2)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); map_update_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: map_update_elem: idx %d, value: off %d size %d, attr: off %d size %d\n", + map_idx, value, value_size, map_update_attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, blob_fd_array_off(gen, map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); @@ -1073,14 +1211,16 @@ void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slo int attr_size = offsetofend(union bpf_attr, flags); int map_update_attr, key; union bpf_attr attr; + int tgt_slot; memset(&attr, 0, attr_size); - pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n", - outer_map_idx, slot, inner_map_idx); - key = add_data(gen, &slot, sizeof(slot)); + tgt_slot = tgt_endian(slot); + key = add_data(gen, &tgt_slot, sizeof(tgt_slot)); map_update_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: populate_outer_map: outer %d key %d inner %d, attr: off %d size %d\n", + outer_map_idx, slot, inner_map_idx, map_update_attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, blob_fd_array_off(gen, outer_map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); @@ -1101,8 +1241,9 @@ void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: map_freeze: idx %d\n", map_idx); map_freeze_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: map_freeze: idx %d, attr: off %d size %d\n", + map_idx, map_freeze_attr, attr_size); move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4, blob_fd_array_off(gen, map_idx)); /* emit MAP_FREEZE command */ diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c index 3c20b126d60d..140ee4055676 100644 --- a/tools/lib/bpf/hashmap.c +++ b/tools/lib/bpf/hashmap.c @@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map) void hashmap__free(struct hashmap *map) { - if (!map) + if (IS_ERR_OR_NULL(map)) return; hashmap__clear(map); @@ -128,7 +128,7 @@ static int hashmap_grow(struct hashmap *map) } static bool hashmap_find_entry(const struct hashmap *map, - const void *key, size_t hash, + const long key, size_t hash, struct hashmap_entry ***pprev, struct hashmap_entry **entry) { @@ -151,18 +151,18 @@ static bool hashmap_find_entry(const struct hashmap *map, return false; } -int hashmap__insert(struct hashmap *map, const void *key, void *value, - enum hashmap_insert_strategy strategy, - const void **old_key, void **old_value) +int hashmap_insert(struct hashmap *map, long key, long value, + enum hashmap_insert_strategy strategy, + long *old_key, long *old_value) { struct hashmap_entry *entry; size_t h; int err; if (old_key) - *old_key = NULL; + *old_key = 0; if (old_value) - *old_value = NULL; + *old_value = 0; h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); if (strategy != HASHMAP_APPEND && @@ -203,7 +203,7 @@ int hashmap__insert(struct hashmap *map, const void *key, void *value, return 0; } -bool hashmap__find(const struct hashmap *map, const void *key, void **value) +bool hashmap_find(const struct hashmap *map, long key, long *value) { struct hashmap_entry *entry; size_t h; @@ -217,8 +217,8 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value) return true; } -bool hashmap__delete(struct hashmap *map, const void *key, - const void **old_key, void **old_value) +bool hashmap_delete(struct hashmap *map, long key, + long *old_key, long *old_value) { struct hashmap_entry **pprev, *entry; size_t h; @@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key, return true; } - diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index 10a4c4cd13cf..0c4f155e8eb7 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -40,12 +40,32 @@ static inline size_t str_hash(const char *s) return h; } -typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); -typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); +typedef size_t (*hashmap_hash_fn)(long key, void *ctx); +typedef bool (*hashmap_equal_fn)(long key1, long key2, void *ctx); +/* + * Hashmap interface is polymorphic, keys and values could be either + * long-sized integers or pointers, this is achieved as follows: + * - interface functions that operate on keys and values are hidden + * behind auxiliary macros, e.g. hashmap_insert <-> hashmap__insert; + * - these auxiliary macros cast the key and value parameters as + * long or long *, so the user does not have to specify the casts explicitly; + * - for pointer parameters (e.g. old_key) the size of the pointed + * type is verified by hashmap_cast_ptr using _Static_assert; + * - when iterating using hashmap__for_each_* forms + * hasmap_entry->key should be used for integer keys and + * hasmap_entry->pkey should be used for pointer keys, + * same goes for values. + */ struct hashmap_entry { - const void *key; - void *value; + union { + long key; + const void *pkey; + }; + union { + long value; + void *pvalue; + }; struct hashmap_entry *next; }; @@ -60,16 +80,6 @@ struct hashmap { size_t sz; }; -#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \ - .hash_fn = (hash_fn), \ - .equal_fn = (equal_fn), \ - .ctx = (ctx), \ - .buckets = NULL, \ - .cap = 0, \ - .cap_bits = 0, \ - .sz = 0, \ -} - void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, hashmap_equal_fn equal_fn, void *ctx); struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, @@ -102,6 +112,13 @@ enum hashmap_insert_strategy { HASHMAP_APPEND, }; +#define hashmap_cast_ptr(p) ({ \ + _Static_assert((__builtin_constant_p((p)) ? (p) == NULL : 0) || \ + sizeof(*(p)) == sizeof(long), \ + #p " pointee should be a long-sized integer or a pointer"); \ + (long *)(p); \ +}) + /* * hashmap__insert() adds key/value entry w/ various semantics, depending on * provided strategy value. If a given key/value pair replaced already @@ -109,42 +126,38 @@ enum hashmap_insert_strategy { * through old_key and old_value to allow calling code do proper memory * management. */ -int hashmap__insert(struct hashmap *map, const void *key, void *value, - enum hashmap_insert_strategy strategy, - const void **old_key, void **old_value); +int hashmap_insert(struct hashmap *map, long key, long value, + enum hashmap_insert_strategy strategy, + long *old_key, long *old_value); -static inline int hashmap__add(struct hashmap *map, - const void *key, void *value) -{ - return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL); -} +#define hashmap__insert(map, key, value, strategy, old_key, old_value) \ + hashmap_insert((map), (long)(key), (long)(value), (strategy), \ + hashmap_cast_ptr(old_key), \ + hashmap_cast_ptr(old_value)) -static inline int hashmap__set(struct hashmap *map, - const void *key, void *value, - const void **old_key, void **old_value) -{ - return hashmap__insert(map, key, value, HASHMAP_SET, - old_key, old_value); -} +#define hashmap__add(map, key, value) \ + hashmap__insert((map), (key), (value), HASHMAP_ADD, NULL, NULL) -static inline int hashmap__update(struct hashmap *map, - const void *key, void *value, - const void **old_key, void **old_value) -{ - return hashmap__insert(map, key, value, HASHMAP_UPDATE, - old_key, old_value); -} +#define hashmap__set(map, key, value, old_key, old_value) \ + hashmap__insert((map), (key), (value), HASHMAP_SET, (old_key), (old_value)) -static inline int hashmap__append(struct hashmap *map, - const void *key, void *value) -{ - return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL); -} +#define hashmap__update(map, key, value, old_key, old_value) \ + hashmap__insert((map), (key), (value), HASHMAP_UPDATE, (old_key), (old_value)) + +#define hashmap__append(map, key, value) \ + hashmap__insert((map), (key), (value), HASHMAP_APPEND, NULL, NULL) + +bool hashmap_delete(struct hashmap *map, long key, long *old_key, long *old_value); + +#define hashmap__delete(map, key, old_key, old_value) \ + hashmap_delete((map), (long)(key), \ + hashmap_cast_ptr(old_key), \ + hashmap_cast_ptr(old_value)) -bool hashmap__delete(struct hashmap *map, const void *key, - const void **old_key, void **old_value); +bool hashmap_find(const struct hashmap *map, long key, long *value); -bool hashmap__find(const struct hashmap *map, const void *key, void **value); +#define hashmap__find(map, key, value) \ + hashmap_find((map), (long)(key), hashmap_cast_ptr(value)) /* * hashmap__for_each_entry - iterate over all entries in hashmap @@ -153,8 +166,8 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry(map, cur, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; cur; cur = cur->next) + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; cur; cur = cur->next) /* * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe @@ -165,8 +178,8 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; \ + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; \ cur && ({tmp = cur->next; true; }); \ cur = tmp) @@ -177,19 +190,19 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur; \ cur = cur->next) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #endif /* __LIBBPF_HASHMAP_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7f10dd501a52..3dc8a8078815 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -31,11 +31,10 @@ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/filter.h> -#include <linux/list.h> #include <linux/limits.h> #include <linux/perf_event.h> +#include <linux/bpf_perf_event.h> #include <linux/ring_buffer.h> -#include <linux/version.h> #include <sys/epoll.h> #include <sys/ioctl.h> #include <sys/mman.h> @@ -51,15 +50,19 @@ #include "libbpf.h" #include "bpf.h" #include "btf.h" -#include "str_error.h" #include "libbpf_internal.h" #include "hashmap.h" #include "bpf_gen_internal.h" +#include "zip.h" #ifndef BPF_FS_MAGIC #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define MAX_EVENT_NAME_LEN 64 + +#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" + #define BPF_INSN_SZ (sizeof(struct bpf_insn)) /* vsprintf() in __base_pr() uses nonliteral format string. It may break @@ -71,11 +74,188 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); +static int map_set_def_max_entries(struct bpf_map *map); + +static const char * const attach_type_name[] = { + [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", + [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress", + [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create", + [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release", + [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops", + [BPF_CGROUP_DEVICE] = "cgroup_device", + [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind", + [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", + [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", + [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", + [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", + [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", + [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", + [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", + [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", + [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", + [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", + [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", + [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", + [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", + [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", + [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", + [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", + [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", + [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", + [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", + [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", + [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", + [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", + [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", + [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", + [BPF_LIRC_MODE2] = "lirc_mode2", + [BPF_FLOW_DISSECTOR] = "flow_dissector", + [BPF_TRACE_RAW_TP] = "trace_raw_tp", + [BPF_TRACE_FENTRY] = "trace_fentry", + [BPF_TRACE_FEXIT] = "trace_fexit", + [BPF_MODIFY_RETURN] = "modify_return", + [BPF_LSM_MAC] = "lsm_mac", + [BPF_LSM_CGROUP] = "lsm_cgroup", + [BPF_SK_LOOKUP] = "sk_lookup", + [BPF_TRACE_ITER] = "trace_iter", + [BPF_XDP_DEVMAP] = "xdp_devmap", + [BPF_XDP_CPUMAP] = "xdp_cpumap", + [BPF_XDP] = "xdp", + [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select", + [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", + [BPF_PERF_EVENT] = "perf_event", + [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", + [BPF_STRUCT_OPS] = "struct_ops", + [BPF_NETFILTER] = "netfilter", + [BPF_TCX_INGRESS] = "tcx_ingress", + [BPF_TCX_EGRESS] = "tcx_egress", + [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", + [BPF_NETKIT_PRIMARY] = "netkit_primary", + [BPF_NETKIT_PEER] = "netkit_peer", + [BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session", + [BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session", +}; + +static const char * const link_type_name[] = { + [BPF_LINK_TYPE_UNSPEC] = "unspec", + [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_LINK_TYPE_TRACING] = "tracing", + [BPF_LINK_TYPE_CGROUP] = "cgroup", + [BPF_LINK_TYPE_ITER] = "iter", + [BPF_LINK_TYPE_NETNS] = "netns", + [BPF_LINK_TYPE_XDP] = "xdp", + [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", + [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", + [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_LINK_TYPE_NETFILTER] = "netfilter", + [BPF_LINK_TYPE_TCX] = "tcx", + [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", + [BPF_LINK_TYPE_NETKIT] = "netkit", + [BPF_LINK_TYPE_SOCKMAP] = "sockmap", +}; + +static const char * const map_type_name[] = { + [BPF_MAP_TYPE_UNSPEC] = "unspec", + [BPF_MAP_TYPE_HASH] = "hash", + [BPF_MAP_TYPE_ARRAY] = "array", + [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", + [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", + [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", + [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", + [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", + [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", + [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", + [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", + [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", + [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", + [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", + [BPF_MAP_TYPE_DEVMAP] = "devmap", + [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", + [BPF_MAP_TYPE_SOCKMAP] = "sockmap", + [BPF_MAP_TYPE_CPUMAP] = "cpumap", + [BPF_MAP_TYPE_XSKMAP] = "xskmap", + [BPF_MAP_TYPE_SOCKHASH] = "sockhash", + [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", + [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", + [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", + [BPF_MAP_TYPE_QUEUE] = "queue", + [BPF_MAP_TYPE_STACK] = "stack", + [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", + [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_MAP_TYPE_RINGBUF] = "ringbuf", + [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", + [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", + [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", + [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", + [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", + [BPF_MAP_TYPE_ARENA] = "arena", + [BPF_MAP_TYPE_INSN_ARRAY] = "insn_array", +}; + +static const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", + [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", + [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", + [BPF_PROG_TYPE_XDP] = "xdp", + [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", + [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", + [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", + [BPF_PROG_TYPE_LWT_IN] = "lwt_in", + [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", + [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", + [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", + [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", + [BPF_PROG_TYPE_SK_MSG] = "sk_msg", + [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", + [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", + [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", + [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", + [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", + [BPF_PROG_TYPE_TRACING] = "tracing", + [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_PROG_TYPE_EXT] = "ext", + [BPF_PROG_TYPE_LSM] = "lsm", + [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", + [BPF_PROG_TYPE_SYSCALL] = "syscall", + [BPF_PROG_TYPE_NETFILTER] = "netfilter", +}; static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) { - if (level == LIBBPF_DEBUG) + const char *env_var = "LIBBPF_LOG_LEVEL"; + static enum libbpf_print_level min_level = LIBBPF_INFO; + static bool initialized; + + if (!initialized) { + char *verbosity; + + initialized = true; + verbosity = getenv(env_var); + if (verbosity) { + if (strcasecmp(verbosity, "warn") == 0) + min_level = LIBBPF_WARN; + else if (strcasecmp(verbosity, "debug") == 0) + min_level = LIBBPF_DEBUG; + else if (strcasecmp(verbosity, "info") == 0) + min_level = LIBBPF_INFO; + else + fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n", + env_var, verbosity); + } + } + + /* if too verbose, skip logging */ + if (level > min_level) return 0; return vfprintf(stderr, format, args); @@ -85,9 +265,10 @@ static libbpf_print_fn_t __libbpf_pr = __base_pr; libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) { - libbpf_print_fn_t old_print_fn = __libbpf_pr; + libbpf_print_fn_t old_print_fn; + + old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED); - __libbpf_pr = fn; return old_print_fn; } @@ -95,13 +276,20 @@ __printf(2, 3) void libbpf_print(enum libbpf_print_level level, const char *format, ...) { va_list args; + int old_errno; + libbpf_print_fn_t print_fn; - if (!__libbpf_pr) + print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED); + if (!print_fn) return; + old_errno = errno; + va_start(args, format); - __libbpf_pr(level, format, args); + print_fn(level, format, args); va_end(args); + + errno = old_errno; } static void pr_perm_msg(int err) @@ -130,8 +318,6 @@ static void pr_perm_msg(int err) buf); } -#define STRERR_BUFSIZE 128 - /* Copied from tools/perf/util/util.h */ #ifndef zfree # define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) @@ -151,20 +337,9 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } -/* this goes away in libbpf 1.0 */ -enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE; - int libbpf_set_strict_mode(enum libbpf_strict_mode mode) { - /* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to - * get all possible values we compensate last +1, and then (2*x - 1) - * to get the bit mask - */ - if (mode != LIBBPF_STRICT_ALL - && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1))) - return errno = EINVAL, -EINVAL; - - libbpf_mode = mode; + /* as of v1.0 libbpf_set_strict_mode() is a no-op */ return 0; } @@ -191,10 +366,11 @@ enum reloc_type { RELO_LD64, RELO_CALL, RELO_DATA, - RELO_EXTERN_VAR, - RELO_EXTERN_FUNC, + RELO_EXTERN_LD64, + RELO_EXTERN_CALL, RELO_SUBPROG_ADDR, RELO_CORE, + RELO_INSN_ARRAY, }; struct reloc_desc { @@ -205,16 +381,20 @@ struct reloc_desc { struct { int map_idx; int sym_off; + /* + * The following two fields can be unionized, as the + * ext_idx field is used for extern symbols, and the + * sym_size is used for jump tables, which are never + * extern + */ + union { + int ext_idx; + int sym_size; + }; }; }; }; -struct bpf_sec_def; - -typedef int (*init_fn_t)(struct bpf_program *prog, long cookie); -typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_opts *opts, long cookie); -typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie); - /* stored as sec_def->cookie for all libbpf-supported SEC()s */ enum sec_def_flags { SEC_NONE = 0, @@ -229,23 +409,32 @@ enum sec_def_flags { SEC_ATTACHABLE = 2, SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, /* attachment target is specified through BTF ID in either kernel or - * other BPF program's BTF object */ + * other BPF program's BTF object + */ SEC_ATTACH_BTF = 4, /* BPF program type allows sleeping/blocking in kernel */ SEC_SLEEPABLE = 8, - /* allow non-strict prefix matching */ - SEC_SLOPPY_PFX = 16, + /* BPF program support non-linear XDP buffer */ + SEC_XDP_FRAGS = 16, + /* Setup proper attach type for usdt probes. */ + SEC_USDT = 32, }; struct bpf_sec_def { - const char *sec; + char *sec; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; long cookie; + int handler_id; + + libbpf_prog_setup_fn_t prog_setup_fn; + libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; + libbpf_prog_attach_fn_t prog_attach_fn; +}; - init_fn_t init_fn; - preload_fn_t preload_fn; - attach_fn_t attach_fn; +struct bpf_light_subprog { + __u32 sec_insn_off; + __u32 sub_insn_off; }; /* @@ -253,9 +442,10 @@ struct bpf_sec_def { * linux/filter.h. */ struct bpf_program { - const struct bpf_sec_def *sec_def; + char *name; char *sec_name; size_t sec_idx; + const struct bpf_sec_def *sec_def; /* this program's instruction offset (in number of instructions) * within its containing ELF section */ @@ -275,12 +465,6 @@ struct bpf_program { */ size_t sub_insn_off; - char *name; - /* name with / replaced by _; makes recursive pinning - * in bpf_object__pin_programs easier - */ - char *pin_name; - /* instructions that belong to BPF program; insns[0] is located at * sec_insn_off instruction within its ELF section in ELF file, so * when mapping ELF file instruction index to the local instruction, @@ -301,24 +485,22 @@ struct bpf_program { size_t log_size; __u32 log_level; - struct { - int nr; - int *fds; - } instances; - bpf_program_prep_t preprocessor; - struct bpf_object *obj; - void *priv; - bpf_program_clear_priv_t clear_priv; - bool load; + int fd; + bool autoload; + bool autoattach; + bool sym_global; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; + int exception_cb_idx; + int prog_ifindex; __u32 attach_btf_obj_fd; __u32 attach_btf_id; __u32 attach_prog_fd; + void *func_info; __u32 func_info_rec_size; __u32 func_info_cnt; @@ -327,11 +509,13 @@ struct bpf_program { __u32 line_info_rec_size; __u32 line_info_cnt; __u32 prog_flags; + __u8 hash[SHA256_DIGEST_LENGTH]; + + struct bpf_light_subprog *subprogs; + __u32 subprog_cnt; }; struct bpf_struct_ops { - const char *tname; - const struct btf_type *type; struct bpf_program **progs; __u32 *kern_func_off; /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ @@ -356,6 +540,8 @@ struct bpf_struct_ops { #define KCONFIG_SEC ".kconfig" #define KSYMS_SEC ".ksyms" #define STRUCT_OPS_SEC ".struct_ops" +#define STRUCT_OPS_LINK_SEC ".struct_ops.link" +#define ARENA_SEC ".addr_space.1" enum libbpf_map_type { LIBBPF_MAP_UNSPEC, @@ -365,11 +551,20 @@ enum libbpf_map_type { LIBBPF_MAP_KCONFIG, }; +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; +}; + struct bpf_map { + struct bpf_object *obj; char *name; /* real_name is defined for special internal maps (.rodata*, * .data*, .bss, .kconfig) and preserves their original ELF section - * name. This is important to be be able to find corresponding BTF + * name. This is important to be able to find corresponding BTF * DATASEC information. */ char *real_name; @@ -381,11 +576,10 @@ struct bpf_map { struct bpf_map_def def; __u32 numa_node; __u32 btf_var_idx; + int mod_btf_fd; __u32 btf_key_type_id; __u32 btf_value_type_id; __u32 btf_vmlinux_value_type_id; - void *priv; - bpf_map_clear_priv_t clear_priv; enum libbpf_map_type libbpf_type; void *mmaped; struct bpf_struct_ops *st_ops; @@ -395,8 +589,10 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; - bool skipped; + bool autocreate; + bool autoattach; __u64 map_extra; + struct bpf_program *excl_prog; }; enum extern_type { @@ -419,7 +615,8 @@ struct extern_desc { int sym_idx; int btf_id; int sec_btf_id; - const char *name; + char *name; + char *essent_name; bool is_set; bool is_weak; union { @@ -448,8 +645,6 @@ struct extern_desc { }; }; -static LIST_HEAD(bpf_objects_list); - struct module_btf { struct btf *btf; char *name; @@ -464,6 +659,7 @@ enum sec_type { SEC_BSS, SEC_DATA, SEC_RODATA, + SEC_ST_OPS, }; struct elf_sec_desc { @@ -479,17 +675,26 @@ struct elf_state { Elf *elf; Elf64_Ehdr *ehdr; Elf_Data *symbols; - Elf_Data *st_ops_data; + Elf_Data *arena_data; size_t shstrndx; /* section index for section name strings */ size_t strtabidx; struct elf_sec_desc *secs; - int sec_cnt; - int maps_shndx; + size_t sec_cnt; int btf_maps_shndx; __u32 btf_maps_sec_btf_id; int text_shndx; int symbols_shndx; - int st_ops_shndx; + bool has_st_ops; + int arena_data_shndx; + int jumptables_data_shndx; +}; + +struct usdt_manager; + +enum bpf_object_state { + OBJ_OPEN, + OBJ_PREPARED, + OBJ_LOADED, }; struct bpf_object { @@ -497,6 +702,7 @@ struct bpf_object { char license[64]; __u32 kern_version; + enum bpf_object_state state; struct bpf_program *programs; size_t nr_programs; struct bpf_map *maps; @@ -508,7 +714,6 @@ struct bpf_object { int nr_extern; int kconfig_map_idx; - bool loaded; bool has_subcalls; bool has_rodata; @@ -516,12 +721,8 @@ struct bpf_object { /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ struct elf_state efile; - /* - * All loaded bpf_object are linked in a list, which is - * hidden to caller. bpf_objects__<func> handlers deal with - * all objects. - */ - struct list_head list; + + unsigned char byteorder; struct btf *btf; struct btf_ext *btf_ext; @@ -547,13 +748,30 @@ struct bpf_object { size_t log_size; __u32 log_level; - void *priv; - bpf_object_clear_priv_t clear_priv; - int *fd_array; size_t fd_array_cap; size_t fd_array_cnt; + struct usdt_manager *usdt_man; + + int arena_map_idx; + void *arena_data; + size_t arena_data_sz; + + void *jumptables_data; + size_t jumptables_data_sz; + + struct { + struct bpf_program *prog; + int sym_off; + int fd; + } *jumptable_maps; + size_t jumptable_map_cnt; + + struct kern_feature_cache *feat_cache; + char *token_path; + int token_fd; + char path[]; }; @@ -569,28 +787,14 @@ static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); void bpf_program__unload(struct bpf_program *prog) { - int i; - if (!prog) return; - /* - * If the object is opened but the program was never loaded, - * it is possible that prog->instances.nr == -1. - */ - if (prog->instances.nr > 0) { - for (i = 0; i < prog->instances.nr; i++) - zclose(prog->instances.fds[i]); - } else if (prog->instances.nr != -1) { - pr_warn("Internal error: instances.nr is %d\n", - prog->instances.nr); - } - - prog->instances.nr = -1; - zfree(&prog->instances.fds); + zclose(prog->fd); zfree(&prog->func_info); zfree(&prog->line_info); + zfree(&prog->subprogs); } static void bpf_program__exit(struct bpf_program *prog) @@ -598,16 +802,9 @@ static void bpf_program__exit(struct bpf_program *prog) if (!prog) return; - if (prog->clear_priv) - prog->clear_priv(prog, prog->priv); - - prog->priv = NULL; - prog->clear_priv = NULL; - bpf_program__unload(prog); zfree(&prog->name); zfree(&prog->sec_name); - zfree(&prog->pin_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -616,26 +813,6 @@ static void bpf_program__exit(struct bpf_program *prog) prog->sec_idx = -1; } -static char *__bpf_program__pin_name(struct bpf_program *prog) -{ - char *name, *p; - - if (libbpf_mode & LIBBPF_STRICT_SEC_NAME) - name = strdup(prog->name); - else - name = strdup(prog->sec_name); - - if (!name) - return NULL; - - p = name; - - while ((p = strchr(p, '/'))) - *p = '_'; - - return name; -} - static bool insn_is_subprog_call(const struct bpf_insn *insn) { return BPF_CLASS(insn->code) == BPF_JMP && @@ -677,10 +854,22 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->insns_cnt = prog->sec_insn_cnt; prog->type = BPF_PROG_TYPE_UNSPEC; - prog->load = true; + prog->fd = -1; + prog->exception_cb_idx = -1; + + /* libbpf's convention for SEC("?abc...") is that it's just like + * SEC("abc...") but the corresponding bpf_program starts out with + * autoload set to false. + */ + if (sec_name[0] == '?') { + prog->autoload = false; + /* from now on forget there was ? in section name */ + sec_name++; + } else { + prog->autoload = true; + } - prog->instances.fds = NULL; - prog->instances.nr = -1; + prog->autoattach = true; /* inherit object's log_level */ prog->log_level = obj->log_level; @@ -693,10 +882,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, if (!prog->name) goto errout; - prog->pin_name = __bpf_program__pin_name(prog); - if (!prog->pin_name) - goto errout; - prog->insns = malloc(insn_data_sz); if (!prog->insns) goto errout; @@ -724,7 +909,6 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, progs = obj->programs; nr_progs = obj->nr_programs; nr_syms = symbols->d_size / sizeof(Elf64_Sym); - sec_off = 0; for (i = 0; i < nr_syms; i++) { sym = elf_sym_by_idx(obj, i); @@ -744,7 +928,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - if (sec_off + prog_sz > sec_sz) { + if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) { pr_warn("sec '%s': program at offset %zu crosses section boundary\n", sec_name, sec_off); return -LIBBPF_ERRNO__FORMAT; @@ -778,14 +962,16 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, if (err) return err; + if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL) + prog->sym_global = true; + /* if function is a global/weak symbol, but has restricted * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC * as static to enable more permissive BPF verification mode * with more outside context available to BPF verifier */ - if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL - && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN - || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) + if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN + || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) prog->mark_btf_static = true; nr_progs++; @@ -795,40 +981,18 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return 0; } -__u32 get_kernel_version(void) +static void bpf_object_bswap_progs(struct bpf_object *obj) { - /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release, - * but Ubuntu provides /proc/version_signature file, as described at - * https://ubuntu.com/kernel, with an example contents below, which we - * can use to get a proper LINUX_VERSION_CODE. - * - * Ubuntu 5.4.0-12.15-generic 5.4.8 - * - * In the above, 5.4.8 is what kernel is actually expecting, while - * uname() call will return 5.4.0 in info.release. - */ - const char *ubuntu_kver_file = "/proc/version_signature"; - __u32 major, minor, patch; - struct utsname info; - - if (access(ubuntu_kver_file, R_OK) == 0) { - FILE *f; - - f = fopen(ubuntu_kver_file, "r"); - if (f) { - if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) { - fclose(f); - return KERNEL_VERSION(major, minor, patch); - } - fclose(f); - } - /* something went wrong, fall back to uname() approach */ - } + struct bpf_program *prog = obj->programs; + struct bpf_insn *insn; + int p, i; - uname(&info); - if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) - return 0; - return KERNEL_VERSION(major, minor, patch); + for (p = 0; p < obj->nr_programs; p++, prog++) { + insn = prog->insns; + for (i = 0; i < prog->insns_cnt; i++, insn++) + bpf_insn_bswap(insn); + } + pr_debug("converted %zu BPF programs to native byte order\n", obj->nr_programs); } static const struct btf_member * @@ -860,43 +1024,52 @@ find_member_by_name(const struct btf *btf, const struct btf_type *t, return NULL; } +static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, + __u16 kind, struct btf **res_btf, + struct module_btf **res_mod_btf); + #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, const char *name, __u32 kind); static int -find_struct_ops_kern_types(const struct btf *btf, const char *tname, +find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw, + struct module_btf **mod_btf, const struct btf_type **type, __u32 *type_id, const struct btf_type **vtype, __u32 *vtype_id, const struct btf_member **data_member) { const struct btf_type *kern_type, *kern_vtype; const struct btf_member *kern_data_member; + struct btf *btf = NULL; __s32 kern_vtype_id, kern_type_id; + char tname[192], stname[256]; __u32 i; - kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); - if (kern_type_id < 0) { - pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", - tname); - return kern_type_id; - } - kern_type = btf__type_by_id(btf, kern_type_id); + snprintf(tname, sizeof(tname), "%.*s", + (int)bpf_core_essential_name_len(tname_raw), tname_raw); + + snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname); - /* Find the corresponding "map_value" type that will be used - * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, - * find "struct bpf_struct_ops_tcp_congestion_ops" from the - * btf_vmlinux. + /* Look for the corresponding "map_value" type that will be used + * in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf + * and the mod_btf. + * For example, find "struct bpf_struct_ops_tcp_congestion_ops". */ - kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, - tname, BTF_KIND_STRUCT); + kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf); if (kern_vtype_id < 0) { - pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", - STRUCT_OPS_VALUE_PREFIX, tname); + pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname); return kern_vtype_id; } kern_vtype = btf__type_by_id(btf, kern_vtype_id); + kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + if (kern_type_id < 0) { + pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname); + return kern_type_id; + } + kern_type = btf__type_by_id(btf, kern_type_id); + /* Find "struct tcp_congestion_ops" from * struct bpf_struct_ops_tcp_congestion_ops { * [ ... ] @@ -909,8 +1082,8 @@ find_struct_ops_kern_types(const struct btf *btf, const char *tname, break; } if (i == btf_vlen(kern_vtype)) { - pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", - tname, STRUCT_OPS_VALUE_PREFIX, tname); + pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n", + tname, stname); return -EINVAL; } @@ -928,32 +1101,95 @@ static bool bpf_map__is_struct_ops(const struct bpf_map *map) return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; } +static bool is_valid_st_ops_program(struct bpf_object *obj, + const struct bpf_program *prog) +{ + int i; + + for (i = 0; i < obj->nr_programs; i++) { + if (&obj->programs[i] == prog) + return prog->type == BPF_PROG_TYPE_STRUCT_OPS; + } + + return false; +} + +/* For each struct_ops program P, referenced from some struct_ops map M, + * enable P.autoload if there are Ms for which M.autocreate is true, + * disable P.autoload if for all Ms M.autocreate is false. + * Don't change P.autoload for programs that are not referenced from any maps. + */ +static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj) +{ + struct bpf_program *prog, *slot_prog; + struct bpf_map *map; + int i, j, k, vlen; + + for (i = 0; i < obj->nr_programs; ++i) { + int should_load = false; + int use_cnt = 0; + + prog = &obj->programs[i]; + if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) + continue; + + for (j = 0; j < obj->nr_maps; ++j) { + const struct btf_type *type; + + map = &obj->maps[j]; + if (!bpf_map__is_struct_ops(map)) + continue; + + type = btf__type_by_id(obj->btf, map->st_ops->type_id); + vlen = btf_vlen(type); + for (k = 0; k < vlen; ++k) { + slot_prog = map->st_ops->progs[k]; + if (prog != slot_prog) + continue; + + use_cnt++; + if (map->autocreate) + should_load = true; + } + } + if (use_cnt) + prog->autoload = should_load; + } + + return 0; +} + /* Init the map's fields that depend on kern_btf */ -static int bpf_map__init_kern_struct_ops(struct bpf_map *map, - const struct btf *btf, - const struct btf *kern_btf) +static int bpf_map__init_kern_struct_ops(struct bpf_map *map) { const struct btf_member *member, *kern_member, *kern_data_member; const struct btf_type *type, *kern_type, *kern_vtype; __u32 i, kern_type_id, kern_vtype_id, kern_data_off; + struct bpf_object *obj = map->obj; + const struct btf *btf = obj->btf; struct bpf_struct_ops *st_ops; + const struct btf *kern_btf; + struct module_btf *mod_btf = NULL; void *data, *kern_data; const char *tname; int err; st_ops = map->st_ops; - type = st_ops->type; - tname = st_ops->tname; - err = find_struct_ops_kern_types(kern_btf, tname, + type = btf__type_by_id(btf, st_ops->type_id); + tname = btf__name_by_offset(btf, type->name_off); + err = find_struct_ops_kern_types(obj, tname, &mod_btf, &kern_type, &kern_type_id, &kern_vtype, &kern_vtype_id, &kern_data_member); if (err) return err; + kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux; + pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", map->name, st_ops->type_id, kern_type_id, kern_vtype_id); + map->mod_btf_fd = mod_btf ? mod_btf->fd : -1; map->def.value_size = kern_vtype->size; map->btf_vmlinux_value_type_id = kern_vtype_id; @@ -970,17 +1206,46 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, const struct btf_type *mtype, *kern_mtype; __u32 mtype_id, kern_mtype_id; void *mdata, *kern_mdata; + struct bpf_program *prog; __s64 msize, kern_msize; __u32 moff, kern_moff; __u32 kern_member_idx; const char *mname; mname = btf__name_by_offset(btf, member->name_off); + moff = member->offset / 8; + mdata = data + moff; + msize = btf__resolve_size(btf, member->type); + if (msize < 0) { + pr_warn("struct_ops init_kern %s: failed to resolve the size of member %s\n", + map->name, mname); + return msize; + } + kern_member = find_member_by_name(kern_btf, kern_type, mname); if (!kern_member) { - pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", + if (!libbpf_is_mem_zeroed(mdata, msize)) { + pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", + map->name, mname); + return -ENOTSUP; + } + + if (st_ops->progs[i]) { + /* If we had declaratively set struct_ops callback, we need to + * force its autoload to false, because it doesn't have + * a chance of succeeding from POV of the current struct_ops map. + * If this program is still referenced somewhere else, though, + * then bpf_object_adjust_struct_ops_autoload() will update its + * autoload accordingly. + */ + st_ops->progs[i]->autoload = false; + st_ops->progs[i] = NULL; + } + + /* Skip all-zero/NULL fields if they are not present in the kernel BTF */ + pr_info("struct_ops %s: member %s not found in kernel, skipping it as it's set to zero\n", map->name, mname); - return -ENOTSUP; + continue; } kern_member_idx = kern_member - btf_members(kern_type); @@ -991,10 +1256,7 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, return -ENOTSUP; } - moff = member->offset / 8; kern_moff = kern_member->offset / 8; - - mdata = data + moff; kern_mdata = kern_data + kern_moff; mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); @@ -1009,12 +1271,25 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, } if (btf_is_ptr(mtype)) { - struct bpf_program *prog; + prog = *(void **)mdata; + /* just like for !kern_member case above, reset declaratively + * set (at compile time) program's autload to false, + * if user replaced it with another program or NULL + */ + if (st_ops->progs[i] && st_ops->progs[i] != prog) + st_ops->progs[i]->autoload = false; - prog = st_ops->progs[i]; + /* Update the value from the shadow type */ + st_ops->progs[i] = prog; if (!prog) continue; + if (!is_valid_st_ops_program(obj, prog)) { + pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n", + map->name, mname); + return -ENOTSUP; + } + kern_mtype = skip_mods_and_typedefs(kern_btf, kern_mtype->type, &kern_mtype_id); @@ -1029,8 +1304,34 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, return -ENOTSUP; } - prog->attach_btf_id = kern_type_id; - prog->expected_attach_type = kern_member_idx; + if (mod_btf) + prog->attach_btf_obj_fd = mod_btf->fd; + + /* if we haven't yet processed this BPF program, record proper + * attach_btf_id and member_idx + */ + if (!prog->attach_btf_id) { + prog->attach_btf_id = kern_type_id; + prog->expected_attach_type = kern_member_idx; + } + + /* struct_ops BPF prog can be re-used between multiple + * .struct_ops & .struct_ops.link as long as it's the + * same struct_ops struct definition and the same + * function pointer field + */ + if (prog->attach_btf_id != kern_type_id) { + pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n", + map->name, mname, prog->name, prog->sec_name, prog->type, + prog->attach_btf_id, kern_type_id); + return -EINVAL; + } + if (prog->expected_attach_type != kern_member_idx) { + pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n", + map->name, mname, prog->name, prog->sec_name, prog->type, + prog->expected_attach_type, kern_member_idx); + return -EINVAL; + } st_ops->kern_func_off[i] = kern_data_off + kern_moff; @@ -1041,9 +1342,8 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, continue; } - msize = btf__resolve_size(btf, mtype_id); kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); - if (msize < 0 || kern_msize < 0 || msize != kern_msize) { + if (kern_msize < 0 || msize != kern_msize) { pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", map->name, mname, (ssize_t)msize, (ssize_t)kern_msize); @@ -1071,8 +1371,10 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) if (!bpf_map__is_struct_ops(map)) continue; - err = bpf_map__init_kern_struct_ops(map, obj->btf, - obj->btf_vmlinux); + if (!map->autocreate) + continue; + + err = bpf_map__init_kern_struct_ops(map); if (err) return err; } @@ -1080,7 +1382,8 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) return 0; } -static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) +static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, + int shndx, Elf_Data *data) { const struct btf_type *type, *datasec; const struct btf_var_secinfo *vsi; @@ -1091,15 +1394,15 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) struct bpf_map *map; __u32 i; - if (obj->efile.st_ops_shndx == -1) + if (shndx == -1) return 0; btf = obj->btf; - datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC, + datasec_id = btf__find_by_name_kind(btf, sec_name, BTF_KIND_DATASEC); if (datasec_id < 0) { pr_warn("struct_ops init: DATASEC %s not found\n", - STRUCT_OPS_SEC); + sec_name); return -EINVAL; } @@ -1112,7 +1415,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) type_id = btf__resolve_type(obj->btf, vsi->type); if (type_id < 0) { pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", - vsi->type, STRUCT_OPS_SEC); + vsi->type, sec_name); return -EINVAL; } @@ -1131,16 +1434,28 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) if (IS_ERR(map)) return PTR_ERR(map); - map->sec_idx = obj->efile.st_ops_shndx; + map->sec_idx = shndx; map->sec_offset = vsi->offset; map->name = strdup(var_name); if (!map->name) return -ENOMEM; + map->btf_value_type_id = type_id; + + /* Follow same convention as for programs autoload: + * SEC("?.struct_ops") means map is not created by default. + */ + if (sec_name[0] == '?') { + map->autocreate = false; + /* from now on forget there was ? in section name */ + sec_name++; + } map->def.type = BPF_MAP_TYPE_STRUCT_OPS; map->def.key_size = sizeof(int); map->def.value_size = type->size; map->def.max_entries = 1; + map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0; + map->autoattach = true; map->st_ops = calloc(1, sizeof(*map->st_ops)); if (!map->st_ops) @@ -1153,17 +1468,15 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) return -ENOMEM; - if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) { + if (vsi->offset + type->size > data->d_size) { pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", - var_name, STRUCT_OPS_SEC); + var_name, sec_name); return -EINVAL; } memcpy(st_ops->data, - obj->efile.st_ops_data->d_buf + vsi->offset, + data->d_buf + vsi->offset, type->size); - st_ops->tname = tname; - st_ops->type = type; st_ops->type_id = type_id; pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", @@ -1173,12 +1486,34 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) return 0; } +static int bpf_object_init_struct_ops(struct bpf_object *obj) +{ + const char *sec_name; + int sec_idx, err; + + for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) { + struct elf_sec_desc *desc = &obj->efile.secs[sec_idx]; + + if (desc->sec_type != SEC_ST_OPS) + continue; + + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + if (!sec_name) + return -LIBBPF_ERRNO__FORMAT; + + err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data); + if (err) + return err; + } + + return 0; +} + static struct bpf_object *bpf_object__new(const char *path, const void *obj_buf, size_t obj_buf_sz, const char *obj_name) { - bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); struct bpf_object *obj; char *end; @@ -1208,17 +1543,13 @@ static struct bpf_object *bpf_object__new(const char *path, */ obj->efile.obj_buf = obj_buf; obj->efile.obj_buf_sz = obj_buf_sz; - obj->efile.maps_shndx = -1; obj->efile.btf_maps_shndx = -1; - obj->efile.st_ops_shndx = -1; obj->kconfig_map_idx = -1; + obj->arena_map_idx = -1; obj->kern_version = get_kernel_version(); - obj->loaded = false; + obj->state = OBJ_OPEN; - INIT_LIST_HEAD(&obj->list); - if (!strict) - list_add(&obj->list, &bpf_objects_list); return obj; } @@ -1227,12 +1558,11 @@ static void bpf_object__elf_finish(struct bpf_object *obj) if (!obj->efile.elf) return; - if (obj->efile.elf) { - elf_end(obj->efile.elf); - obj->efile.elf = NULL; - } + elf_end(obj->efile.elf); + obj->efile.elf = NULL; + obj->efile.ehdr = NULL; obj->efile.symbols = NULL; - obj->efile.st_ops_data = NULL; + obj->efile.arena_data = NULL; zfree(&obj->efile.secs); obj->efile.sec_cnt = 0; @@ -1253,19 +1583,13 @@ static int bpf_object__elf_init(struct bpf_object *obj) } if (obj->efile.obj_buf_sz > 0) { - /* - * obj_buf should have been validated by - * bpf_object__open_buffer(). - */ + /* obj_buf should have been validated by bpf_object__open_mem(). */ elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); } else { obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); if (obj->efile.fd < 0) { - char errmsg[STRERR_BUFSIZE], *cp; - err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("elf: failed to open %s: %s\n", obj->path, cp); + pr_warn("elf: failed to open %s: %s\n", obj->path, errstr(err)); return err; } @@ -1299,6 +1623,16 @@ static int bpf_object__elf_init(struct bpf_object *obj) goto errout; } + /* Validate ELF object endianness... */ + if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && + ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { + err = -LIBBPF_ERRNO__ENDIAN; + pr_warn("elf: '%s' has unknown byte order\n", obj->path); + goto errout; + } + /* and save after bpf_object_open() frees ELF data */ + obj->byteorder = ehdr->e_ident[EI_DATA]; + if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { pr_warn("elf: failed to get section names section index for %s: %s\n", obj->path, elf_errmsg(-1)); @@ -1306,7 +1640,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) goto errout; } - /* Elf is corrupted/truncated, avoid calling elf_strptr. */ + /* ELF is corrupted/truncated, avoid calling elf_strptr. */ if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { pr_warn("elf: failed to get section names strings from %s: %s\n", obj->path, elf_errmsg(-1)); @@ -1327,24 +1661,24 @@ errout: return err; } -static int bpf_object__check_endianness(struct bpf_object *obj) +static bool is_native_endianness(struct bpf_object *obj) { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) - return 0; + return obj->byteorder == ELFDATA2LSB; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) - return 0; + return obj->byteorder == ELFDATA2MSB; #else # error "Unrecognized __BYTE_ORDER__" #endif - pr_warn("elf: endianness mismatch in %s.\n", obj->path); - return -LIBBPF_ERRNO__ENDIAN; } static int bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) { + if (!data) { + pr_warn("invalid license section in %s\n", obj->path); + return -LIBBPF_ERRNO__FORMAT; + } /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't * go over allowed ELF data section buffer */ @@ -1358,7 +1692,7 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) { __u32 kver; - if (size != sizeof(kver)) { + if (!data || size != sizeof(kver)) { pr_warn("invalid kver section in %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; } @@ -1378,98 +1712,153 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) { - int ret = -ENOENT; Elf_Data *data; Elf_Scn *scn; - *size = 0; if (!name) return -EINVAL; scn = elf_sec_by_name(obj, name); data = elf_sec_data(obj, scn); if (data) { - ret = 0; /* found it */ *size = data->d_size; + return 0; /* found it */ } - return *size ? 0 : ret; + return -ENOENT; } -static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off) +static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name) { Elf_Data *symbols = obj->efile.symbols; const char *sname; size_t si; - if (!name || !off) - return -EINVAL; - for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { Elf64_Sym *sym = elf_sym_by_idx(obj, si); - if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL || - ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + continue; + + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && + ELF64_ST_BIND(sym->st_info) != STB_WEAK) continue; sname = elf_sym_str(obj, sym->st_name); if (!sname) { pr_warn("failed to get sym name string for var %s\n", name); - return -EIO; - } - if (strcmp(name, sname) == 0) { - *off = sym->st_value; - return 0; + return ERR_PTR(-EIO); } + if (strcmp(name, sname) == 0) + return sym; } - return -ENOENT; + return ERR_PTR(-ENOENT); } -static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif +#ifndef MFD_NOEXEC_SEAL +#define MFD_NOEXEC_SEAL 0x0008U +#endif + +static int create_placeholder_fd(void) { - struct bpf_map *new_maps; - size_t new_cap; - int i; + unsigned int flags = MFD_CLOEXEC | MFD_NOEXEC_SEAL; + const char *name = "libbpf-placeholder-fd"; + int fd; - if (obj->nr_maps < obj->maps_cap) - return &obj->maps[obj->nr_maps++]; + fd = ensure_good_fd(sys_memfd_create(name, flags)); + if (fd >= 0) + return fd; + else if (errno != EINVAL) + return -errno; - new_cap = max((size_t)4, obj->maps_cap * 3 / 2); - new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps)); - if (!new_maps) { - pr_warn("alloc maps for object failed\n"); - return ERR_PTR(-ENOMEM); - } + /* Possibly running on kernel without MFD_NOEXEC_SEAL */ + fd = ensure_good_fd(sys_memfd_create(name, flags & ~MFD_NOEXEC_SEAL)); + if (fd < 0) + return -errno; + return fd; +} - obj->maps_cap = new_cap; - obj->maps = new_maps; +static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) +{ + struct bpf_map *map; + int err; - /* zero out new maps */ - memset(obj->maps + obj->nr_maps, 0, - (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps)); - /* - * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin) - * when failure (zclose won't close negative fd)). + err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, + sizeof(*obj->maps), obj->nr_maps + 1); + if (err) + return ERR_PTR(err); + + map = &obj->maps[obj->nr_maps++]; + map->obj = obj; + /* Preallocate map FD without actually creating BPF map just yet. + * These map FD "placeholders" will be reused later without changing + * FD value when map is actually created in the kernel. + * + * This is useful to be able to perform BPF program relocations + * without having to create BPF maps before that step. This allows us + * to finalize and load BTF very late in BPF object's loading phase, + * right before BPF maps have to be created and BPF programs have to + * be loaded. By having these map FD placeholders we can perform all + * the sanitizations, relocations, and any other adjustments before we + * start creating actual BPF kernel objects (BTF, maps, progs). */ - for (i = obj->nr_maps; i < obj->maps_cap; i++) { - obj->maps[i].fd = -1; - obj->maps[i].inner_map_fd = -1; - } + map->fd = create_placeholder_fd(); + if (map->fd < 0) + return ERR_PTR(map->fd); + map->inner_map_fd = -1; + map->autocreate = true; - return &obj->maps[obj->nr_maps++]; + return map; } -static size_t bpf_map_mmap_sz(const struct bpf_map *map) +static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) { - long page_sz = sysconf(_SC_PAGE_SIZE); + const long page_sz = sysconf(_SC_PAGE_SIZE); size_t map_sz; - map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries; + map_sz = (size_t)roundup(value_sz, 8) * max_entries; map_sz = roundup(map_sz, page_sz); return map_sz; } +static size_t bpf_map_mmap_sz(const struct bpf_map *map) +{ + const long page_sz = sysconf(_SC_PAGE_SIZE); + + switch (map->def.type) { + case BPF_MAP_TYPE_ARRAY: + return array_map_mmap_sz(map->def.value_size, map->def.max_entries); + case BPF_MAP_TYPE_ARENA: + return page_sz * map->def.max_entries; + default: + return 0; /* not supported */ + } +} + +static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) +{ + void *mmaped; + + if (!map->mmaped) + return -EINVAL; + + if (old_sz == new_sz) + return 0; + + mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (mmaped == MAP_FAILED) + return -errno; + + memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); + munmap(map->mmaped, old_sz); + map->mmaped = mmaped; + return 0; +} + static char *internal_map_name(struct bpf_object *obj, const char *real_name) { char map_name[BPF_OBJ_NAME_LEN], *p; @@ -1520,7 +1909,7 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name) snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, sfx_len, real_name); - /* sanitise map name to characters allowed by kernel */ + /* sanities map name to characters allowed by kernel */ for (p = map_name; *p && p < map_name + sizeof(map_name); p++) if (!isalnum(*p) && *p != '_' && *p != '.') *p = '_'; @@ -1529,11 +1918,46 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name) } static int +map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map); + +/* Internal BPF map is mmap()'able only if at least one of corresponding + * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL + * variable and it's not marked as __hidden (which turns it into, effectively, + * a STATIC variable). + */ +static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map) +{ + const struct btf_type *t, *vt; + struct btf_var_secinfo *vsi; + int i, n; + + if (!map->btf_value_type_id) + return false; + + t = btf__type_by_id(obj->btf, map->btf_value_type_id); + if (!btf_is_datasec(t)) + return false; + + vsi = btf_var_secinfos(t); + for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) { + vt = btf__type_by_id(obj->btf, vsi->type); + if (!btf_is_var(vt)) + continue; + + if (btf_var(vt)->linkage != BTF_VAR_STATIC) + return true; + } + + return false; +} + +static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, const char *real_name, int sec_idx, void *data, size_t data_sz) { struct bpf_map_def *def; struct bpf_map *map; + size_t mmap_sz; int err; map = bpf_object__add_map(obj); @@ -1557,19 +1981,24 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, def->value_size = data_sz; def->max_entries = 1; def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG - ? BPF_F_RDONLY_PROG : 0; - def->map_flags |= BPF_F_MMAPABLE; + ? BPF_F_RDONLY_PROG : 0; + + /* failures are fine because of maps like .rodata.str1.1 */ + (void) map_fill_btf_type_info(obj, map); + + if (map_is_mmapable(obj, map)) + def->map_flags |= BPF_F_MMAPABLE; pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", map->name, map->sec_idx, map->sec_offset, def->map_flags); - map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, + mmap_sz = bpf_map_mmap_sz(map); + map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (map->mmaped == MAP_FAILED) { err = -errno; map->mmaped = NULL; - pr_warn("failed to alloc map '%s' content buffer: %d\n", - map->name, err); + pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err)); zfree(&map->real_name); zfree(&map->name); return err; @@ -1594,6 +2023,10 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj) for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { sec_desc = &obj->efile.secs[sec_idx]; + /* Skip recognized sections with size 0. */ + if (!sec_desc->data || sec_desc->data->d_size == 0) + continue; + switch (sec_desc->sec_type) { case SEC_DATA: sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); @@ -1640,13 +2073,27 @@ static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, return NULL; } +static struct extern_desc *find_extern_by_name_with_len(const struct bpf_object *obj, + const void *name, int len) +{ + const char *ext_name; + int i; + + for (i = 0; i < obj->nr_extern; i++) { + ext_name = obj->externs[i].name; + if (strlen(ext_name) == len && strncmp(ext_name, name, len) == 0) + return &obj->externs[i]; + } + return NULL; +} + static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, char value) { switch (ext->kcfg.type) { case KCFG_BOOL: if (value == 'm') { - pr_warn("extern (kcfg) %s=%c should be tristate or char\n", + pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", ext->name, value); return -EINVAL; } @@ -1667,7 +2114,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, case KCFG_INT: case KCFG_CHAR_ARR: default: - pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n", + pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", ext->name, value); return -EINVAL; } @@ -1681,12 +2128,13 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, size_t len; if (ext->kcfg.type != KCFG_CHAR_ARR) { - pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value); + pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", + ext->name, value); return -EINVAL; } len = strlen(value); - if (value[len - 1] != '"') { + if (len < 2 || value[len - 1] != '"') { pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", ext->name, value); return -EINVAL; @@ -1695,7 +2143,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, /* strip quotes */ len -= 2; if (len >= ext->kcfg.sz) { - pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", + pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", ext->name, value, len, ext->kcfg.sz - 1); len = ext->kcfg.sz - 1; } @@ -1714,7 +2162,7 @@ static int parse_u64(const char *value, __u64 *res) *res = strtoull(value, &value_end, 0); if (errno) { err = -errno; - pr_warn("failed to parse '%s' as integer: %d\n", value, err); + pr_warn("failed to parse '%s': %s\n", value, errstr(err)); return err; } if (*value_end) { @@ -1752,23 +2200,38 @@ static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, __u64 value) { - if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { - pr_warn("extern (kcfg) %s=%llu should be integer\n", + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && + ext->kcfg.type != KCFG_BOOL) { + pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", ext->name, (unsigned long long)value); return -EINVAL; } + if (ext->kcfg.type == KCFG_BOOL && value > 1) { + pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", + ext->name, (unsigned long long)value); + return -EINVAL; + + } if (!is_kcfg_value_in_range(ext, value)) { - pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n", + pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", ext->name, (unsigned long long)value, ext->kcfg.sz); return -ERANGE; } switch (ext->kcfg.sz) { - case 1: *(__u8 *)ext_val = value; break; - case 2: *(__u16 *)ext_val = value; break; - case 4: *(__u32 *)ext_val = value; break; - case 8: *(__u64 *)ext_val = value; break; - default: - return -EINVAL; + case 1: + *(__u8 *)ext_val = value; + break; + case 2: + *(__u16 *)ext_val = value; + break; + case 4: + *(__u32 *)ext_val = value; + break; + case 8: + *(__u64 *)ext_val = value; + break; + default: + return -EINVAL; } ext->is_set = true; return 0; @@ -1822,16 +2285,19 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj, /* assume integer */ err = parse_u64(value, &num); if (err) { - pr_warn("extern (kcfg) %s=%s should be integer\n", - ext->name, value); + pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); return err; } + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { + pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); + return -EINVAL; + } err = set_kcfg_value_num(ext, ext_val, num); break; } if (err) return err; - pr_debug("extern (kcfg) %s=%s\n", ext->name, value); + pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); return 0; } @@ -1850,9 +2316,9 @@ static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) return -ENAMETOOLONG; /* gzopen also accepts uncompressed files. */ - file = gzopen(buf, "r"); + file = gzopen(buf, "re"); if (!file) - file = gzopen("/proc/config.gz", "r"); + file = gzopen("/proc/config.gz", "re"); if (!file) { pr_warn("failed to open system Kconfig\n"); @@ -1862,8 +2328,8 @@ static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) while (gzgets(file, buf, sizeof(buf))) { err = bpf_object__process_kconfig_line(obj, buf, data); if (err) { - pr_warn("error parsing system Kconfig line '%s': %d\n", - buf, err); + pr_warn("error parsing system Kconfig line '%s': %s\n", + buf, errstr(err)); goto out; } } @@ -1883,15 +2349,15 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj, file = fmemopen((void *)config, strlen(config), "r"); if (!file) { err = -errno; - pr_warn("failed to open in-memory Kconfig: %d\n", err); + pr_warn("failed to open in-memory Kconfig: %s\n", errstr(err)); return err; } while (fgets(buf, sizeof(buf), file)) { err = bpf_object__process_kconfig_line(obj, buf, data); if (err) { - pr_warn("error parsing in-memory Kconfig line '%s': %d\n", - buf, err); + pr_warn("error parsing in-memory Kconfig line '%s': %s\n", + buf, errstr(err)); break; } } @@ -1927,133 +2393,6 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj) return 0; } -static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) -{ - Elf_Data *symbols = obj->efile.symbols; - int i, map_def_sz = 0, nr_maps = 0, nr_syms; - Elf_Data *data = NULL; - Elf_Scn *scn; - - if (obj->efile.maps_shndx < 0) - return 0; - - if (!symbols) - return -EINVAL; - - scn = elf_sec_by_idx(obj, obj->efile.maps_shndx); - data = elf_sec_data(obj, scn); - if (!scn || !data) { - pr_warn("elf: failed to get legacy map definitions for %s\n", - obj->path); - return -EINVAL; - } - - /* - * Count number of maps. Each map has a name. - * Array of maps is not supported: only the first element is - * considered. - * - * TODO: Detect array of map and report error. - */ - nr_syms = symbols->d_size / sizeof(Elf64_Sym); - for (i = 0; i < nr_syms; i++) { - Elf64_Sym *sym = elf_sym_by_idx(obj, i); - - if (sym->st_shndx != obj->efile.maps_shndx) - continue; - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) - continue; - nr_maps++; - } - /* Assume equally sized map definitions */ - pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n", - nr_maps, data->d_size, obj->path); - - if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) { - pr_warn("elf: unable to determine legacy map definition size in %s\n", - obj->path); - return -EINVAL; - } - map_def_sz = data->d_size / nr_maps; - - /* Fill obj->maps using data in "maps" section. */ - for (i = 0; i < nr_syms; i++) { - Elf64_Sym *sym = elf_sym_by_idx(obj, i); - const char *map_name; - struct bpf_map_def *def; - struct bpf_map *map; - - if (sym->st_shndx != obj->efile.maps_shndx) - continue; - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) - continue; - - map = bpf_object__add_map(obj); - if (IS_ERR(map)) - return PTR_ERR(map); - - map_name = elf_sym_str(obj, sym->st_name); - if (!map_name) { - pr_warn("failed to get map #%d name sym string for obj %s\n", - i, obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - - if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { - pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); - return -ENOTSUP; - } - - map->libbpf_type = LIBBPF_MAP_UNSPEC; - map->sec_idx = sym->st_shndx; - map->sec_offset = sym->st_value; - pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", - map_name, map->sec_idx, map->sec_offset); - if (sym->st_value + map_def_sz > data->d_size) { - pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", - obj->path, map_name); - return -EINVAL; - } - - map->name = strdup(map_name); - if (!map->name) { - pr_warn("map '%s': failed to alloc map name\n", map_name); - return -ENOMEM; - } - pr_debug("map %d is \"%s\"\n", i, map->name); - def = (struct bpf_map_def *)(data->d_buf + sym->st_value); - /* - * If the definition of the map in the object file fits in - * bpf_map_def, copy it. Any extra fields in our version - * of bpf_map_def will default to zero as a result of the - * calloc above. - */ - if (map_def_sz <= sizeof(struct bpf_map_def)) { - memcpy(&map->def, def, map_def_sz); - } else { - /* - * Here the map structure being read is bigger than what - * we expect, truncate if the excess bits are all zero. - * If they are not zero, reject this map as - * incompatible. - */ - char *b; - - for (b = ((char *)def) + sizeof(struct bpf_map_def); - b < ((char *)def) + map_def_sz; b++) { - if (*b != 0) { - pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n", - obj->path, map_name); - if (strict) - return -EINVAL; - } - } - memcpy(&map->def, def, sizeof(struct bpf_map_def)); - } - } - return 0; -} - const struct btf_type * skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) { @@ -2107,6 +2446,7 @@ static const char *__btf_kind_str(__u16 kind) case BTF_KIND_FLOAT: return "float"; case BTF_KIND_DECL_TAG: return "decl_tag"; case BTF_KIND_TYPE_TAG: return "type_tag"; + case BTF_KIND_ENUM64: return "enum64"; default: return "unknown"; } } @@ -2153,23 +2493,81 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf, return true; } -static int build_map_pin_path(struct bpf_map *map, const char *path) +static bool get_map_field_long(const char *map_name, const struct btf *btf, + const struct btf_member *m, __u64 *res) { - char buf[PATH_MAX]; - int len; + const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); + const char *name = btf__name_by_offset(btf, m->name_off); - if (!path) - path = "/sys/fs/bpf"; + if (btf_is_ptr(t)) { + __u32 res32; + bool ret; + + ret = get_map_field_int(map_name, btf, m, &res32); + if (ret) + *res = (__u64)res32; + return ret; + } + + if (!btf_is_enum(t) && !btf_is_enum64(t)) { + pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n", + map_name, name, btf_kind_str(t)); + return false; + } + + if (btf_vlen(t) != 1) { + pr_warn("map '%s': attr '%s': invalid __ulong\n", + map_name, name); + return false; + } + + if (btf_is_enum(t)) { + const struct btf_enum *e = btf_enum(t); + + *res = e->val; + } else { + const struct btf_enum64 *e = btf_enum64(t); - len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map)); + *res = btf_enum64_value(e); + } + return true; +} + +static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) +{ + int len; + + len = snprintf(buf, buf_sz, "%s/%s", path, name); if (len < 0) return -EINVAL; - else if (len >= PATH_MAX) + if (len >= buf_sz) return -ENAMETOOLONG; + return 0; +} + +static int build_map_pin_path(struct bpf_map *map, const char *path) +{ + char buf[PATH_MAX]; + int err; + + if (!path) + path = BPF_FS_DEFAULT_PATH; + + err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); + if (err) + return err; + return bpf_map__set_pin_path(map, buf); } +/* should match definition in bpf_helpers.h */ +enum libbpf_pin_type { + LIBBPF_PIN_NONE, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + LIBBPF_PIN_BY_NAME, +}; + int parse_btf_map_def(const char *map_name, struct btf *btf, const struct btf_type *def_t, bool strict, struct btf_map_def *map_def, struct btf_map_def *inner_def) @@ -2368,9 +2766,9 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_def->pinning = val; map_def->parts |= MAP_DEF_PINNING; } else if (strcmp(name, "map_extra") == 0) { - __u32 map_extra; + __u64 map_extra; - if (!get_map_field_int(map_name, btf, m, &map_extra)) + if (!get_map_field_long(map_name, btf, m, &map_extra)) return -EINVAL; map_def->map_extra = map_extra; map_def->parts |= MAP_DEF_MAP_EXTRA; @@ -2391,6 +2789,43 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, return 0; } +static size_t adjust_ringbuf_sz(size_t sz) +{ + __u32 page_sz = sysconf(_SC_PAGE_SIZE); + __u32 mul; + + /* if user forgot to set any size, make sure they see error */ + if (sz == 0) + return 0; + /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be + * a power-of-2 multiple of kernel's page size. If user diligently + * satisified these conditions, pass the size through. + */ + if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) + return sz; + + /* Otherwise find closest (page_sz * power_of_2) product bigger than + * user-set size to satisfy both user size request and kernel + * requirements and substitute correct max_entries for map creation. + */ + for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { + if (mul * page_sz > sz) + return mul * page_sz; + } + + /* if it's impossible to satisfy the conditions (i.e., user size is + * very close to UINT_MAX but is not a power-of-2 multiple of + * page_size) then just return original size and let kernel reject it + */ + return sz; +} + +static bool map_is_ringbuf(const struct bpf_map *map) +{ + return map->def.type == BPF_MAP_TYPE_RINGBUF || + map->def.type == BPF_MAP_TYPE_USER_RINGBUF; +} + static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) { map->def.type = def->map_type; @@ -2404,6 +2839,10 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def map->btf_key_type_id = def->key_type_id; map->btf_value_type_id = def->value_type_id; + /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ + if (map_is_ringbuf(map)) + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + if (def->parts & MAP_DEF_MAP_TYPE) pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); @@ -2528,7 +2967,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, map->inner_map = calloc(1, sizeof(*map->inner_map)); if (!map->inner_map) return -ENOMEM; - map->inner_map->fd = -1; + map->inner_map->fd = create_placeholder_fd(); + if (map->inner_map->fd < 0) + return map->inner_map->fd; map->inner_map->sec_idx = sec_idx; map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1); if (!map->inner_map->name) @@ -2538,6 +2979,36 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, fill_map_from_def(map->inner_map, &inner_def); } + err = map_fill_btf_type_info(obj, map); + if (err) + return err; + + return 0; +} + +static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, + const char *sec_name, int sec_idx, + void *data, size_t data_sz) +{ + const long page_sz = sysconf(_SC_PAGE_SIZE); + size_t mmap_sz; + + mmap_sz = bpf_map_mmap_sz(map); + if (roundup(data_sz, page_sz) > mmap_sz) { + pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", + sec_name, mmap_sz, data_sz); + return -E2BIG; + } + + obj->arena_data = malloc(data_sz); + if (!obj->arena_data) + return -ENOMEM; + memcpy(obj->arena_data, data, data_sz); + obj->arena_data_sz = data_sz; + + /* make bpf_map__init_value() work for ARENA maps */ + map->mmaped = obj->arena_data; + return 0; } @@ -2556,7 +3027,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); data = elf_sec_data(obj, scn); - if (!scn || !data) { + if (!data) { pr_warn("elf: failed to get %s map definitions for %s\n", MAPS_ELF_SEC, obj->path); return -EINVAL; @@ -2590,6 +3061,33 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return err; } + for (i = 0; i < obj->nr_maps; i++) { + struct bpf_map *map = &obj->maps[i]; + + if (map->def.type != BPF_MAP_TYPE_ARENA) + continue; + + if (obj->arena_map_idx >= 0) { + pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", + map->name, obj->maps[obj->arena_map_idx].name); + return -EINVAL; + } + obj->arena_map_idx = i; + + if (obj->efile.arena_data) { + err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, + obj->efile.arena_data->d_buf, + obj->efile.arena_data->d_size); + if (err) + return err; + } + } + if (obj->efile.arena_data && obj->arena_map_idx < 0) { + pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", + ARENA_SEC); + return -ENOENT; + } + return 0; } @@ -2598,16 +3096,15 @@ static int bpf_object__init_maps(struct bpf_object *obj, { const char *pin_root_path; bool strict; - int err; + int err = 0; strict = !OPTS_GET(opts, relaxed_maps, false); pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - err = bpf_object__init_user_maps(obj, strict); - err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); + err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); err = err ?: bpf_object__init_global_data_maps(obj); err = err ?: bpf_object__init_kconfig_map(obj); - err = err ?: bpf_object__init_struct_ops_maps(obj); + err = err ?: bpf_object_init_struct_ops(obj); return err; } @@ -2623,6 +3120,11 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx) return sh->sh_flags & SHF_EXECINSTR; } +static bool starts_with_qmark(const char *s) +{ + return s && s[0] == '?'; +} + static bool btf_needs_sanitization(struct bpf_object *obj) { bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); @@ -2631,12 +3133,14 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); + bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); + bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); return !has_func || !has_datasec || !has_func_global || !has_float || - !has_decl_tag || !has_type_tag; + !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec; } -static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) +static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) { bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); @@ -2644,6 +3148,9 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); + bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); + bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); + int enum64_placeholder_id = 0; struct btf_type *t; int i, j, vlen; @@ -2669,7 +3176,7 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) name = (char *)btf__name_by_offset(btf, t->name_off); while (*name) { - if (*name == '.') + if (*name == '.' || *name == '?') *name = '_'; name++; } @@ -2684,6 +3191,14 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) vt = (void *)btf__type_by_id(btf, v->type); m->name_off = vt->name_off; } + } else if (!has_qmark_datasec && btf_is_datasec(t) && + starts_with_qmark(btf__name_by_offset(btf, t->name_off))) { + /* replace '?' prefix with '_' for DATASEC names */ + char *name; + + name = (char *)btf__name_by_offset(btf, t->name_off); + if (name[0] == '?') + name[0] = '_'; } else if (!has_func && btf_is_func_proto(t)) { /* replace FUNC_PROTO with ENUM */ vlen = btf_vlen(t); @@ -2706,20 +3221,44 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) /* replace TYPE_TAG with a CONST */ t->name_off = 0; t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); + } else if (!has_enum64 && btf_is_enum(t)) { + /* clear the kflag */ + t->info = btf_type_info(btf_kind(t), btf_vlen(t), false); + } else if (!has_enum64 && btf_is_enum64(t)) { + /* replace ENUM64 with a union */ + struct btf_member *m; + + if (enum64_placeholder_id == 0) { + enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); + if (enum64_placeholder_id < 0) + return enum64_placeholder_id; + + t = (struct btf_type *)btf__type_by_id(btf, i); + } + + m = btf_members(t); + vlen = btf_vlen(t); + t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen); + for (j = 0; j < vlen; j++, m++) { + m->type = enum64_placeholder_id; + m->offset = 0; + } } } + + return 0; } static bool libbpf_needs_btf(const struct bpf_object *obj) { return obj->efile.btf_maps_shndx >= 0 || - obj->efile.st_ops_shndx >= 0 || + obj->efile.has_st_ops || obj->nr_extern > 0; } static bool kernel_needs_btf(const struct bpf_object *obj) { - return obj->efile.st_ops_shndx >= 0; + return obj->efile.has_st_ops; } static int bpf_object__init_btf(struct bpf_object *obj, @@ -2733,13 +3272,16 @@ static int bpf_object__init_btf(struct bpf_object *obj, err = libbpf_get_error(obj->btf); if (err) { obj->btf = NULL; - pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %s.\n", BTF_ELF_SEC, errstr(err)); goto out; } /* enforce 8-byte pointers for BPF-targeted BTFs */ btf__set_pointer_size(obj->btf, 8); } if (btf_ext_data) { + struct btf_ext_info *ext_segs[3]; + int seg_num, sec_num; + if (!obj->btf) { pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", BTF_EXT_ELF_SEC, BTF_ELF_SEC); @@ -2748,11 +3290,48 @@ static int bpf_object__init_btf(struct bpf_object *obj, obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); err = libbpf_get_error(obj->btf_ext); if (err) { - pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n", - BTF_EXT_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %s. Ignored and continue.\n", + BTF_EXT_ELF_SEC, errstr(err)); obj->btf_ext = NULL; goto out; } + + /* setup .BTF.ext to ELF section mapping */ + ext_segs[0] = &obj->btf_ext->func_info; + ext_segs[1] = &obj->btf_ext->line_info; + ext_segs[2] = &obj->btf_ext->core_relo_info; + for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { + struct btf_ext_info *seg = ext_segs[seg_num]; + const struct btf_ext_info_sec *sec; + const char *sec_name; + Elf_Scn *scn; + + if (seg->sec_cnt == 0) + continue; + + seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); + if (!seg->sec_idxs) { + err = -ENOMEM; + goto out; + } + + sec_num = 0; + for_each_btf_ext_sec(seg, sec) { + /* preventively increment index to avoid doing + * this before every continue below + */ + sec_num++; + + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); + if (str_is_empty(sec_name)) + continue; + scn = elf_sec_by_name(obj, sec_name); + if (!scn) + continue; + + seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); + } + } } out: if (err && libbpf_needs_btf(obj)) { @@ -2773,57 +3352,89 @@ static int compare_vsi_off(const void *_a, const void *_b) static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, struct btf_type *t) { - __u32 size = 0, off = 0, i, vars = btf_vlen(t); - const char *name = btf__name_by_offset(btf, t->name_off); - const struct btf_type *t_var; + __u32 size = 0, i, vars = btf_vlen(t); + const char *sec_name = btf__name_by_offset(btf, t->name_off); struct btf_var_secinfo *vsi; - const struct btf_var *var; - int ret; + bool fixup_offsets = false; + int err; - if (!name) { + if (!sec_name) { pr_debug("No name found in string section for DATASEC kind.\n"); return -ENOENT; } - /* .extern datasec size and var offsets were set correctly during - * extern collection step, so just skip straight to sorting variables + /* Extern-backing datasecs (.ksyms, .kconfig) have their size and + * variable offsets set at the previous step. Further, not every + * extern BTF VAR has corresponding ELF symbol preserved, so we skip + * all fixups altogether for such sections and go straight to sorting + * VARs within their DATASEC. */ - if (t->size) + if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0) goto sort_vars; - ret = find_elf_sec_sz(obj, name, &size); - if (ret || !size || (t->size && t->size != size)) { - pr_debug("Invalid size for section %s: %u bytes\n", name, size); - return -ENOENT; - } + /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to + * fix this up. But BPF static linker already fixes this up and fills + * all the sizes and offsets during static linking. So this step has + * to be optional. But the STV_HIDDEN handling is non-optional for any + * non-extern DATASEC, so the variable fixup loop below handles both + * functions at the same time, paying the cost of BTF VAR <-> ELF + * symbol matching just once. + */ + if (t->size == 0) { + err = find_elf_sec_sz(obj, sec_name, &size); + if (err || !size) { + pr_debug("sec '%s': failed to determine size from ELF: size %u, err %s\n", + sec_name, size, errstr(err)); + return -ENOENT; + } - t->size = size; + t->size = size; + fixup_offsets = true; + } for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { + const struct btf_type *t_var; + struct btf_var *var; + const char *var_name; + Elf64_Sym *sym; + t_var = btf__type_by_id(btf, vsi->type); if (!t_var || !btf_is_var(t_var)) { - pr_debug("Non-VAR type seen in section %s\n", name); + pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name); return -EINVAL; } var = btf_var(t_var); - if (var->linkage == BTF_VAR_STATIC) + if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN) continue; - name = btf__name_by_offset(btf, t_var->name_off); - if (!name) { - pr_debug("No name found in string section for VAR kind\n"); + var_name = btf__name_by_offset(btf, t_var->name_off); + if (!var_name) { + pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n", + sec_name, i); return -ENOENT; } - ret = find_elf_var_offset(obj, name, &off); - if (ret) { - pr_debug("No offset found in symbol table for VAR %s\n", - name); + sym = find_elf_var_sym(obj, var_name); + if (IS_ERR(sym)) { + pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n", + sec_name, var_name); return -ENOENT; } - vsi->offset = off; + if (fixup_offsets) + vsi->offset = sym->st_value; + + /* if variable is a global/weak symbol, but has restricted + * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR + * as static. This follows similar logic for functions (BPF + * subprogs) and influences libbpf's further decisions about + * whether to make global data BPF array maps as + * BPF_F_MMAPABLE. + */ + if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN + || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL) + var->linkage = BTF_VAR_STATIC; } sort_vars: @@ -2831,13 +3442,16 @@ sort_vars: return 0; } -static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) +static int bpf_object_fixup_btf(struct bpf_object *obj) { - int err = 0; - __u32 i, n = btf__type_cnt(btf); + int i, n, err = 0; + if (!obj->btf) + return 0; + + n = btf__type_cnt(obj->btf); for (i = 1; i < n; i++) { - struct btf_type *t = btf_type_by_id(btf, i); + struct btf_type *t = btf_type_by_id(obj->btf, i); /* Loader needs to fix up some of the things compiler * couldn't get its hands on while emitting BTF. This @@ -2845,33 +3459,12 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) * the info from the ELF itself for this purpose. */ if (btf_is_datasec(t)) { - err = btf_fixup_datasec(obj, btf, t); + err = btf_fixup_datasec(obj, obj->btf, t); if (err) - break; + return err; } } - return libbpf_err(err); -} - -int btf__finalize_data(struct bpf_object *obj, struct btf *btf) -{ - return btf_finalize_data(obj, btf); -} - -static int bpf_object__finalize_btf(struct bpf_object *obj) -{ - int err; - - if (!obj->btf) - return 0; - - err = btf_finalize_data(obj, obj->btf); - if (err) { - pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); - return err; - } - return 0; } @@ -2890,9 +3483,15 @@ static bool prog_needs_vmlinux_btf(struct bpf_program *prog) return false; } +static bool map_needs_vmlinux_btf(struct bpf_map *map) +{ + return bpf_map__is_struct_ops(map); +} + static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) { struct bpf_program *prog; + struct bpf_map *map; int i; /* CO-RE relocations need kernel BTF, only when btf_custom_path @@ -2911,12 +3510,17 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) } bpf_object__for_each_program(prog, obj) { - if (!prog->load) + if (!prog->autoload) continue; if (prog_needs_vmlinux_btf(prog)) return true; } + bpf_object__for_each_map(map, obj) { + if (map_needs_vmlinux_btf(map)) + return true; + } + return false; } @@ -2934,7 +3538,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) obj->btf_vmlinux = btf__load_vmlinux_btf(); err = libbpf_get_error(obj->btf_vmlinux); if (err) { - pr_warn("Error loading vmlinux BTF: %d\n", err); + pr_warn("Error loading vmlinux BTF: %s\n", errstr(err)); obj->btf_vmlinux = NULL; return err; } @@ -3005,7 +3609,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) /* enforce 8-byte pointers for BPF-targeted BTFs */ btf__set_pointer_size(obj->btf, 8); - bpf_object__sanitize_btf(obj, kern_btf); + err = bpf_object__sanitize_btf(obj, kern_btf); + if (err) + return err; } if (obj->gen_loader) { @@ -3022,7 +3628,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) } else { /* currently BPF_BTF_LOAD only supports log_level 1 */ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, - obj->log_level ? 1 : 0); + obj->log_level ? 1 : 0, obj->token_fd); } if (sanitize) { if (!err) { @@ -3035,11 +3641,14 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) report: if (err) { btf_mandatory = kernel_needs_btf(obj); - pr_warn("Error loading .BTF into kernel: %d. %s\n", err, - btf_mandatory ? "BTF is mandatory, can't proceed." - : "BTF is optional, ignoring."); - if (!btf_mandatory) + if (btf_mandatory) { + pr_warn("Error loading .BTF into kernel: %s. BTF is mandatory, can't proceed.\n", + errstr(err)); + } else { + pr_info("Error loading .BTF into kernel: %s. BTF is optional, ignoring.\n", + errstr(err)); err = 0; + } } return err; } @@ -3242,10 +3851,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj) Elf64_Shdr *sh; /* ELF section indices are 0-based, but sec #0 is special "invalid" - * section. e_shnum does include sec #0, so e_shnum is the necessary - * size of an array to keep all the sections. + * section. Since section count retrieved by elf_getshdrnum() does + * include sec #0, it is already the necessary size of an array to keep + * all the sections. */ - obj->efile.sec_cnt = obj->efile.ehdr->e_shnum; + if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) { + pr_warn("elf: failed to get the number of sections for %s: %s\n", + obj->path, elf_errmsg(-1)); + return -LIBBPF_ERRNO__FORMAT; + } obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); if (!obj->efile.secs) return -ENOMEM; @@ -3317,7 +3931,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (err) return err; } else if (strcmp(name, "maps") == 0) { - obj->efile.maps_shndx = idx; + pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n"); + return -ENOTSUP; } else if (strcmp(name, MAPS_ELF_SEC) == 0) { obj->efile.btf_maps_shndx = idx; } else if (strcmp(name, BTF_ELF_SEC) == 0) { @@ -3347,9 +3962,24 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->sec_type = SEC_RODATA; sec_desc->shdr = sh; sec_desc->data = data; - } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { - obj->efile.st_ops_data = data; - obj->efile.st_ops_shndx = idx; + } else if (strcmp(name, STRUCT_OPS_SEC) == 0 || + strcmp(name, STRUCT_OPS_LINK_SEC) == 0 || + strcmp(name, "?" STRUCT_OPS_SEC) == 0 || + strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) { + sec_desc->sec_type = SEC_ST_OPS; + sec_desc->shdr = sh; + sec_desc->data = data; + obj->efile.has_st_ops = true; + } else if (strcmp(name, ARENA_SEC) == 0) { + obj->efile.arena_data = data; + obj->efile.arena_data_shndx = idx; + } else if (strcmp(name, JUMPTABLES_SEC) == 0) { + obj->jumptables_data = malloc(data->d_size); + if (!obj->jumptables_data) + return -ENOMEM; + memcpy(obj->jumptables_data, data->d_buf, data->d_size); + obj->jumptables_data_sz = data->d_size; + obj->efile.jumptables_data_shndx = idx; } else { pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); @@ -3364,6 +3994,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) /* Only do relo for section with exec instructions */ if (!section_have_execinstr(obj, targ_sec_idx) && strcmp(name, ".rel" STRUCT_OPS_SEC) && + strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) && + strcmp(name, ".rel?" STRUCT_OPS_SEC) && + strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) && strcmp(name, ".rel" MAPS_ELF_SEC)) { pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", idx, name, targ_sec_idx, @@ -3374,7 +4007,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->sec_type = SEC_RELO; sec_desc->shdr = sh; sec_desc->data = data; - } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { + } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 || + str_has_pfx(name, BSS_SEC "."))) { sec_desc->sec_type = SEC_BSS; sec_desc->shdr = sh; sec_desc->data = data; @@ -3389,8 +4023,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj) return -LIBBPF_ERRNO__FORMAT; } + /* change BPF program insns to native endianness for introspection */ + if (!is_native_endianness(obj)) + bpf_object_bswap_progs(obj); + /* sort BPF programs by section name and in-section instruction offset - * for faster search */ + * for faster search + */ if (obj->nr_programs) qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); @@ -3420,7 +4059,7 @@ static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) return true; /* global function */ - return bind == STB_GLOBAL && type == STT_FUNC; + return (bind == STB_GLOBAL || bind == STB_WEAK) && type == STT_FUNC; } static int find_extern_btf_id(const struct btf *btf, const char *ext_name) @@ -3512,6 +4151,10 @@ static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, if (strcmp(name, "libbpf_tristate")) return KCFG_UNKNOWN; return KCFG_TRISTATE; + case BTF_KIND_ENUM64: + if (strcmp(name, "libbpf_tristate")) + return KCFG_UNKNOWN; + return KCFG_TRISTATE; case BTF_KIND_ARRAY: if (btf_array(t)->nelems == 0) return KCFG_UNKNOWN; @@ -3606,6 +4249,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) struct extern_desc *ext; int i, n, off, dummy_var_btf_id; const char *ext_name, *sec_name; + size_t ext_essent_len; Elf_Scn *scn; Elf64_Shdr *sh; @@ -3651,10 +4295,20 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return ext->btf_id; } t = btf__type_by_id(obj->btf, ext->btf_id); - ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off)); + if (!ext->name) + return -ENOMEM; ext->sym_idx = i; ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; + ext_essent_len = bpf_core_essential_name_len(ext->name); + ext->essent_name = NULL; + if (ext_essent_len != strlen(ext->name)) { + ext->essent_name = strndup(ext->name, ext_essent_len); + if (!ext->essent_name) + return -ENOMEM; + } + ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); if (ext->sec_btf_id <= 0) { pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", @@ -3685,9 +4339,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return -EINVAL; } ext->kcfg.type = find_kcfg_type(obj->btf, t->type, - &ext->kcfg.is_signed); + &ext->kcfg.is_signed); if (ext->kcfg.type == KCFG_UNKNOWN) { - pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name); + pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); return -ENOTSUP; } } else if (strcmp(sec_name, KSYMS_SEC) == 0) { @@ -3809,35 +4463,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return 0; } -struct bpf_program * -bpf_object__find_program_by_title(const struct bpf_object *obj, - const char *title) +static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) { - struct bpf_program *pos; - - bpf_object__for_each_program(pos, obj) { - if (pos->sec_name && !strcmp(pos->sec_name, title)) - return pos; - } - return errno = ENOENT, NULL; -} - -static bool prog_is_subprog(const struct bpf_object *obj, - const struct bpf_program *prog) -{ - /* For legacy reasons, libbpf supports an entry-point BPF programs - * without SEC() attribute, i.e., those in the .text section. But if - * there are 2 or more such programs in the .text section, they all - * must be subprograms called from entry-point BPF programs in - * designated SEC()'tions, otherwise there is no way to distinguish - * which of those programs should be loaded vs which are a subprogram. - * Similarly, if there is a function/program in .text and at least one - * other BPF program with custom SEC() attribute, then we just assume - * .text programs are subprograms (even if they are not called from - * other programs), because libbpf never explicitly supported mixing - * SEC()-designated BPF programs and .text entry-point BPF programs. - */ - return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; + return prog->sec_idx == obj->efile.text_shndx; } struct bpf_program * @@ -3871,8 +4499,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj, static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, int shndx) { - return shndx == obj->efile.maps_shndx || - shndx == obj->efile.btf_maps_shndx; + return shndx == obj->efile.btf_maps_shndx; } static enum libbpf_map_type @@ -3893,6 +4520,44 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) } } +static int bpf_prog_compute_hash(struct bpf_program *prog) +{ + struct bpf_insn *purged; + int i, err = 0; + + purged = calloc(prog->insns_cnt, BPF_INSN_SZ); + if (!purged) + return -ENOMEM; + + /* If relocations have been done, the map_fd needs to be + * discarded for the digest calculation. + */ + for (i = 0; i < prog->insns_cnt; i++) { + purged[i] = prog->insns[i]; + if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) && + (purged[i].src_reg == BPF_PSEUDO_MAP_FD || + purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) { + purged[i].imm = 0; + i++; + if (i >= prog->insns_cnt || + prog->insns[i].code != 0 || + prog->insns[i].dst_reg != 0 || + prog->insns[i].src_reg != 0 || + prog->insns[i].off != 0) { + err = -EINVAL; + goto out; + } + purged[i] = prog->insns[i]; + purged[i].imm = 0; + } + } + libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn), + prog->hash); +out: + free(purged); + return err; +} + static int bpf_program__record_reloc(struct bpf_program *prog, struct reloc_desc *reloc_desc, __u32 insn_idx, const char *sym_name, @@ -3930,11 +4595,11 @@ static int bpf_program__record_reloc(struct bpf_program *prog, pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", prog->name, i, ext->name, ext->sym_idx, insn_idx); if (insn->code == (BPF_JMP | BPF_CALL)) - reloc_desc->type = RELO_EXTERN_FUNC; + reloc_desc->type = RELO_EXTERN_CALL; else - reloc_desc->type = RELO_EXTERN_VAR; + reloc_desc->type = RELO_EXTERN_LD64; reloc_desc->insn_idx = insn_idx; - reloc_desc->sym_off = i; /* sym_off stores extern index */ + reloc_desc->ext_idx = i; return 0; } @@ -3988,6 +4653,35 @@ static int bpf_program__record_reloc(struct bpf_program *prog, type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); + /* arena data relocation */ + if (shdr_idx == obj->efile.arena_data_shndx) { + if (obj->arena_map_idx < 0) { + pr_warn("prog '%s': bad arena data relocation at insn %u, no arena maps defined\n", + prog->name, insn_idx); + return -LIBBPF_ERRNO__RELOC; + } + reloc_desc->type = RELO_DATA; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = obj->arena_map_idx; + reloc_desc->sym_off = sym->st_value; + + map = &obj->maps[obj->arena_map_idx]; + pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n", + prog->name, obj->arena_map_idx, map->name, map->sec_idx, + map->sec_offset, insn_idx); + return 0; + } + + /* jump table data relocation */ + if (shdr_idx == obj->efile.jumptables_data_shndx) { + reloc_desc->type = RELO_INSN_ARRAY; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = -1; + reloc_desc->sym_off = sym->st_value; + reloc_desc->sym_size = sym->st_size; + return 0; + } + /* generic map reference relocation */ if (type == LIBBPF_MAP_UNSPEC) { if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { @@ -4058,6 +4752,9 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, int l = 0, r = obj->nr_programs - 1, m; struct bpf_program *prog; + if (!obj->nr_programs) + return NULL; + while (l < r) { m = l + (r - l + 1) / 2; prog = &obj->programs[m]; @@ -4097,6 +4794,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat scn = elf_sec_by_idx(obj, sec_idx); scn_data = elf_sec_data(obj, scn); + if (!scn_data) + return -LIBBPF_ERRNO__FORMAT; relo_sec_name = elf_sec_str(obj, shdr->sh_name); sec_name = elf_sec_name(obj, scn); @@ -4175,37 +4874,33 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat return 0; } -static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) +static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map) { - struct bpf_map_def *def = &map->def; - __u32 key_type_id = 0, value_type_id = 0; - int ret; + int id; + + if (!obj->btf) + return -ENOENT; /* if it's BTF-defined map, we don't need to search for type IDs. * For struct_ops map, it does not need btf_key_type_id and * btf_value_type_id. */ - if (map->sec_idx == obj->efile.btf_maps_shndx || - bpf_map__is_struct_ops(map)) + if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map)) return 0; - if (!bpf_map__is_internal(map)) { - ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size, - def->value_size, &key_type_id, - &value_type_id); - } else { - /* - * LLVM annotates global data differently in BTF, that is, - * only as '.data', '.bss' or '.rodata'. - */ - ret = btf__find_by_name(obj->btf, map->real_name); - } - if (ret < 0) - return ret; + /* + * LLVM annotates global data differently in BTF, that is, + * only as '.data', '.bss' or '.rodata'. + */ + if (!bpf_map__is_internal(map)) + return -ENOENT; - map->btf_key_type_id = key_type_id; - map->btf_value_type_id = bpf_map__is_internal(map) ? - ret : value_type_id; + id = btf__find_by_name(obj->btf, map->real_name); + if (id < 0) + return id; + + map->btf_key_type_id = 0; + map->btf_value_type_id = id; return 0; } @@ -4219,11 +4914,11 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); memset(info, 0, sizeof(*info)); - fp = fopen(file, "r"); + fp = fopen(file, "re"); if (!fp) { err = -errno; - pr_warn("failed to open %s: %d. No procfs support?\n", file, - err); + pr_warn("failed to open %s: %s. No procfs support?\n", file, + errstr(err)); return err; } @@ -4245,43 +4940,79 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) return 0; } +static bool map_is_created(const struct bpf_map *map) +{ + return map->obj->state >= OBJ_PREPARED || map->reused; +} + +bool bpf_map__autocreate(const struct bpf_map *map) +{ + return map->autocreate; +} + +int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) +{ + if (map_is_created(map)) + return libbpf_err(-EBUSY); + + map->autocreate = autocreate; + return 0; +} + +int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach) +{ + if (!bpf_map__is_struct_ops(map)) + return libbpf_err(-EINVAL); + + map->autoattach = autoattach; + return 0; +} + +bool bpf_map__autoattach(const struct bpf_map *map) +{ + return map->autoattach; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { - struct bpf_map_info info = {}; - __u32 len = sizeof(info); + struct bpf_map_info info; + __u32 len = sizeof(info), name_len; int new_fd, err; char *new_name; - err = bpf_obj_get_info_by_fd(fd, &info, &len); + memset(&info, 0, len); + err = bpf_map_get_info_by_fd(fd, &info, &len); if (err && errno == EINVAL) err = bpf_get_map_info_from_fdinfo(fd, &info); if (err) return libbpf_err(err); - new_name = strdup(info.name); + name_len = strlen(info.name); + if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) + new_name = strdup(map->name); + else + new_name = strdup(info.name); + if (!new_name) return libbpf_err(-errno); - new_fd = open("/", O_RDONLY | O_CLOEXEC); + /* + * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. + * This is similar to what we do in ensure_good_fd(), but without + * closing original FD. + */ + new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); if (new_fd < 0) { err = -errno; goto err_free_new_name; } - new_fd = dup3(fd, new_fd, O_CLOEXEC); - if (new_fd < 0) { - err = -errno; - goto err_close_new_fd; - } + err = reuse_fd(map->fd, new_fd); + if (err) + goto err_free_new_name; - err = zclose(map->fd); - if (err) { - err = -errno; - goto err_close_new_fd; - } free(map->name); - map->fd = new_fd; map->name = new_name; map->def.type = info.type; map->def.key_size = info.key_size; @@ -4295,8 +5026,6 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) return 0; -err_close_new_fd: - close(new_fd); err_free_new_name: free(new_name); return libbpf_err(err); @@ -4317,48 +5046,99 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map) int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); + map->def.max_entries = max_entries; + + /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ + if (map_is_ringbuf(map)) + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + return 0; } -int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +static int bpf_object_prepare_token(struct bpf_object *obj) { - if (!map || !max_entries) - return libbpf_err(-EINVAL); + const char *bpffs_path; + int bpffs_fd = -1, token_fd, err; + bool mandatory; + enum libbpf_print_level level; + + /* token is explicitly prevented */ + if (obj->token_path && obj->token_path[0] == '\0') { + pr_debug("object '%s': token is prevented, skipping...\n", obj->name); + return 0; + } - return bpf_map__set_max_entries(map, max_entries); + mandatory = obj->token_path != NULL; + level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; + + bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; + bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); + if (bpffs_fd < 0) { + err = -errno; + __pr(level, "object '%s': failed (%s) to open BPF FS mount at '%s'%s\n", + obj->name, errstr(err), bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? err : 0; + } + + token_fd = bpf_token_create(bpffs_fd, 0); + close(bpffs_fd); + if (token_fd < 0) { + if (!mandatory && token_fd == -ENOENT) { + pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", + obj->name, bpffs_path); + return 0; + } + __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", + obj->name, token_fd, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? token_fd : 0; + } + + obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); + if (!obj->feat_cache) { + close(token_fd); + return -ENOMEM; + } + + obj->token_fd = token_fd; + obj->feat_cache->token_fd = token_fd; + + return 0; } static int bpf_object__probe_loading(struct bpf_object *obj) { - char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; int ret, insn_cnt = ARRAY_SIZE(insns); + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = obj->token_fd, + .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0, + ); if (obj->gen_loader) return 0; ret = bump_rlimit_memlock(); if (ret) - pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); + pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %s), you might need to do it explicitly!\n", + errstr(ret)); /* make sure basic loading works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) { ret = errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF " - "program. Make sure your kernel supports BPF " - "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is " - "set to big enough value.\n", __func__, cp, ret); + pr_warn("Error in %s(): %s. Couldn't load trivial BPF program. Make sure your kernel supports BPF (CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is set to big enough value.\n", + __func__, errstr(ret)); return -ret; } close(ret); @@ -4366,403 +5146,46 @@ bpf_object__probe_loading(struct bpf_object *obj) return 0; } -static int probe_fd(int fd) -{ - if (fd >= 0) - close(fd); - return fd >= 0; -} - -static int probe_kern_prog_name(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, insn_cnt = ARRAY_SIZE(insns); - - /* make sure loading with name works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL); - return probe_fd(ret); -} - -static int probe_kern_global_data(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - insns[0].imm = map; - - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - close(map); - return probe_fd(ret); -} - -static int probe_kern_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func_global(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* static void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_datasec(void) -{ - static const char strs[] = "\0x\0.data"; - /* static int a; */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* DATASEC val */ /* [3] */ - BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_float(void) -{ - static const char strs[] = "\0float"; - __u32 types[] = { - /* float */ - BTF_TYPE_FLOAT_ENC(1, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_decl_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* attr */ - BTF_TYPE_DECL_TAG_ENC(1, 2, -1), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_type_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* attr */ - BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ - /* ptr */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_array_mmap(void) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); - int fd; - - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts); - return probe_fd(fd); -} - -static int probe_kern_exp_attach_type(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - /* use any valid combination of program type and (optional) - * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) - * to see if kernel supports expected_attach_type field for - * BPF_PROG_LOAD command - */ - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); -} - -static int probe_kern_probe_read_kernel(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ - BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ - BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(fd); -} - -static int probe_prog_bind_map(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - if (prog < 0) { - close(map); - return 0; - } - - ret = bpf_prog_bind_map(prog, map, NULL); - - close(map); - close(prog); - - return ret >= 0; -} - -static int probe_module_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - struct bpf_btf_info info; - __u32 len = sizeof(info); - char name[16]; - int fd, err; - - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); - if (fd < 0) - return 0; /* BTF not supported at all */ - - memset(&info, 0, sizeof(info)); - info.name = ptr_to_u64(name); - info.name_len = sizeof(name); - - /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; - * kernel's module BTF support coincides with support for - * name/name_len fields in struct bpf_btf_info. - */ - err = bpf_obj_get_info_by_fd(fd, &info, &len); - close(fd); - return !err; -} - -static int probe_perf_link(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), NULL); - if (prog_fd < 0) - return -errno; - - /* use invalid perf_event FD to get EBADF, if link is supported; - * otherwise EINVAL should be returned - */ - link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -enum kern_feature_result { - FEAT_UNKNOWN = 0, - FEAT_SUPPORTED = 1, - FEAT_MISSING = 2, -}; - -typedef int (*feature_probe_fn)(void); - -static struct kern_feature_desc { - const char *desc; - feature_probe_fn probe; - enum kern_feature_result res; -} feature_probes[__FEAT_CNT] = { - [FEAT_PROG_NAME] = { - "BPF program name", probe_kern_prog_name, - }, - [FEAT_GLOBAL_DATA] = { - "global variables", probe_kern_global_data, - }, - [FEAT_BTF] = { - "minimal BTF", probe_kern_btf, - }, - [FEAT_BTF_FUNC] = { - "BTF functions", probe_kern_btf_func, - }, - [FEAT_BTF_GLOBAL_FUNC] = { - "BTF global function", probe_kern_btf_func_global, - }, - [FEAT_BTF_DATASEC] = { - "BTF data section and variable", probe_kern_btf_datasec, - }, - [FEAT_ARRAY_MMAP] = { - "ARRAY map mmap()", probe_kern_array_mmap, - }, - [FEAT_EXP_ATTACH_TYPE] = { - "BPF_PROG_LOAD expected_attach_type attribute", - probe_kern_exp_attach_type, - }, - [FEAT_PROBE_READ_KERN] = { - "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, - }, - [FEAT_PROG_BIND_MAP] = { - "BPF_PROG_BIND_MAP support", probe_prog_bind_map, - }, - [FEAT_MODULE_BTF] = { - "module BTF support", probe_module_btf, - }, - [FEAT_BTF_FLOAT] = { - "BTF_KIND_FLOAT support", probe_kern_btf_float, - }, - [FEAT_PERF_LINK] = { - "BPF perf link support", probe_perf_link, - }, - [FEAT_BTF_DECL_TAG] = { - "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, - }, - [FEAT_BTF_TYPE_TAG] = { - "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, - }, - [FEAT_MEMCG_ACCOUNT] = { - "memcg-based memory accounting", probe_memcg_account, - }, -}; - bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { - struct kern_feature_desc *feat = &feature_probes[feat_id]; - int ret; - - if (obj && obj->gen_loader) + if (obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ return true; - if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { - ret = feat->probe(); - if (ret > 0) { - WRITE_ONCE(feat->res, FEAT_SUPPORTED); - } else if (ret == 0) { - WRITE_ONCE(feat->res, FEAT_MISSING); - } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(feat->res, FEAT_MISSING); - } - } + if (obj->token_fd) + return feat_supported(obj->feat_cache, feat_id); - return READ_ONCE(feat->res) == FEAT_SUPPORTED; + return feat_supported(NULL, feat_id); } static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) { - struct bpf_map_info map_info = {}; - char msg[STRERR_BUFSIZE]; - __u32 map_info_len; + struct bpf_map_info map_info; + __u32 map_info_len = sizeof(map_info); int err; - map_info_len = sizeof(map_info); - - err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); + memset(&map_info, 0, map_info_len); + err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len); if (err && errno == EINVAL) err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); if (err) { pr_warn("failed to get map info for map FD %d: %s\n", map_fd, - libbpf_strerror_r(errno, msg, sizeof(msg))); + errstr(err)); return false; } + /* + * bpf_get_map_info_by_fd() for DEVMAP will always return flags with + * BPF_F_RDONLY_PROG set, but it generally is not set at map creation time. + * Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from + * bpf_get_map_info_by_fd() when checking for compatibility with an + * existing DEVMAP. + */ + if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH) + map_info.map_flags &= ~BPF_F_RDONLY_PROG; + return (map_info.type == map->def.type && map_info.key_size == map->def.key_size && map_info.value_size == map->def.value_size && @@ -4774,7 +5197,6 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) static int bpf_object__reuse_map(struct bpf_map *map) { - char *cp, errmsg[STRERR_BUFSIZE]; int err, pin_fd; pin_fd = bpf_obj_get(map->pin_path); @@ -4786,9 +5208,8 @@ bpf_object__reuse_map(struct bpf_map *map) return 0; } - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); pr_warn("couldn't retrieve pinned map '%s': %s\n", - map->pin_path, cp); + map->pin_path, errstr(err)); return err; } @@ -4800,10 +5221,10 @@ bpf_object__reuse_map(struct bpf_map *map) } err = bpf_map__reuse_fd(map, pin_fd); - if (err) { - close(pin_fd); + close(pin_fd); + if (err) return err; - } + map->pinned = true; pr_debug("reused pinned map at '%s'\n", map->pin_path); @@ -4814,8 +5235,8 @@ static int bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) { enum libbpf_map_type map_type = map->libbpf_type; - char *cp, errmsg[STRERR_BUFSIZE]; int err, zero = 0; + size_t mmap_sz; if (obj->gen_loader) { bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, @@ -4824,12 +5245,12 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); return 0; } + err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); if (err) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error setting initial map(%s) contents: %s\n", - map->name, cp); + pr_warn("map '%s': failed to set initial contents: %s\n", + bpf_map__name(map), errstr(err)); return err; } @@ -4838,12 +5259,43 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_freeze(map->fd); if (err) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error freezing map(%s) as read-only: %s\n", - map->name, cp); + pr_warn("map '%s': failed to freeze as read-only: %s\n", + bpf_map__name(map), errstr(err)); return err; } } + + /* Remap anonymous mmap()-ed "map initialization image" as + * a BPF map-backed mmap()-ed memory, but preserving the same + * memory address. This will cause kernel to change process' + * page table to point to a different piece of kernel memory, + * but from userspace point of view memory address (and its + * contents, being identical at this point) will stay the + * same. This mapping will be released by bpf_object__close() + * as per normal clean up procedure. + */ + mmap_sz = bpf_map_mmap_sz(map); + if (map->def.map_flags & BPF_F_MMAPABLE) { + void *mmaped; + int prot; + + if (map->def.map_flags & BPF_F_RDONLY_PROG) + prot = PROT_READ; + else + prot = PROT_READ | PROT_WRITE; + mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map->fd, 0); + if (mmaped == MAP_FAILED) { + err = -errno; + pr_warn("map '%s': failed to re-mmap() contents: %s\n", + bpf_map__name(map), errstr(err)); + return err; + } + map->mmaped = mmaped; + } else if (map->mmaped) { + munmap(map->mmaped, mmap_sz); + map->mmaped = NULL; + } + return 0; } @@ -4854,8 +5306,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b LIBBPF_OPTS(bpf_map_create_opts, create_attr); struct bpf_map_def *def = &map->def; const char *map_name = NULL; - __u32 max_entries; - int err = 0; + int err = 0, map_fd; if (kernel_supports(obj, FEAT_PROG_NAME)) map_name = map->name; @@ -4863,26 +5314,27 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.map_flags = def->map_flags; create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; + create_attr.token_fd = obj->token_fd; + if (obj->token_fd) + create_attr.map_flags |= BPF_F_TOKEN_FD; + if (map->excl_prog) { + err = bpf_prog_compute_hash(map->excl_prog); + if (err) + return err; - if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { - int nr_cpus; - - nr_cpus = libbpf_num_possible_cpus(); - if (nr_cpus < 0) { - pr_warn("map '%s': failed to determine number of system CPUs: %d\n", - map->name, nr_cpus); - return nr_cpus; - } - pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); - max_entries = nr_cpus; - } else { - max_entries = def->max_entries; + create_attr.excl_prog_hash = map->excl_prog->hash; + create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH; } - if (bpf_map__is_struct_ops(map)) + if (bpf_map__is_struct_ops(map)) { create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; + if (map->mod_btf_fd >= 0) { + create_attr.value_type_btf_obj_fd = map->mod_btf_fd; + create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD; + } + } - if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) { + if (obj->btf && btf__fd(obj->btf) >= 0) { create_attr.btf_fd = btf__fd(obj->btf); create_attr.btf_key_type_id = map->btf_key_type_id; create_attr.btf_value_type_id = map->btf_value_type_id; @@ -4890,13 +5342,16 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (bpf_map_type__is_map_in_map(def->type)) { if (map->inner_map) { + err = map_set_def_max_entries(map->inner_map); + if (err) + return err; err = bpf_object__create_map(obj, map->inner_map, true); if (err) { - pr_warn("map '%s': failed to create inner map: %d\n", - map->name, err); + pr_warn("map '%s': failed to create inner map: %s\n", + map->name, errstr(err)); return err; } - map->inner_map_fd = bpf_map__fd(map->inner_map); + map->inner_map_fd = map->inner_map->fd; } if (map->inner_map_fd >= 0) create_attr.inner_map_fd = map->inner_map_fd; @@ -4916,49 +5371,50 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_QUEUE: case BPF_MAP_TYPE_STACK: - case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_ARENA: create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; + break; + case BPF_MAP_TYPE_STRUCT_OPS: + create_attr.btf_value_type_id = 0; + break; default: break; } if (obj->gen_loader) { bpf_gen__map_create(obj->gen_loader, def->type, map_name, - def->key_size, def->value_size, max_entries, + def->key_size, def->value_size, def->max_entries, &create_attr, is_inner ? -1 : map - obj->maps); - /* Pretend to have valid FD to pass various fd >= 0 checks. - * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. + /* We keep pretenting we have valid FD to pass various fd >= 0 + * checks by just keeping original placeholder FDs in place. + * See bpf_object__add_map() comment. + * This placeholder fd will not be used with any syscall and + * will be reset to -1 eventually. */ - map->fd = 0; + map_fd = map->fd; } else { - map->fd = bpf_map_create(def->type, map_name, - def->key_size, def->value_size, - max_entries, &create_attr); + map_fd = bpf_map_create(def->type, map_name, + def->key_size, def->value_size, + def->max_entries, &create_attr); } - if (map->fd < 0 && (create_attr.btf_key_type_id || - create_attr.btf_value_type_id)) { - char *cp, errmsg[STRERR_BUFSIZE]; - + if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", - map->name, cp, err); + pr_warn("Error in bpf_create_map_xattr(%s): %s. Retrying without BTF.\n", + map->name, errstr(err)); create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; - map->fd = bpf_map_create(def->type, map_name, - def->key_size, def->value_size, - max_entries, &create_attr); + map_fd = bpf_map_create(def->type, map_name, + def->key_size, def->value_size, + def->max_entries, &create_attr); } - err = map->fd < 0 ? -errno : 0; - if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { if (obj->gen_loader) map->inner_map->fd = -1; @@ -4966,7 +5422,19 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b zfree(&map->inner_map); } - return err; + if (map_fd < 0) + return map_fd; + + /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */ + if (map->fd == map_fd) + return 0; + + /* Keep placeholder FD value but now point it to the BPF map object. + * This way everything that relied on this map's FD (e.g., relocated + * ldimm64 instructions) will stay valid and won't need adjustments. + * map->fd stays valid but now point to what map_fd points to. + */ + return reuse_fd(map->fd, map_fd); } static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) @@ -4980,7 +5448,7 @@ static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) continue; targ_map = map->init_slots[i]; - fd = bpf_map__fd(targ_map); + fd = targ_map->fd; if (obj->gen_loader) { bpf_gen__populate_outer_map(obj->gen_loader, @@ -4991,8 +5459,8 @@ static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) } if (err) { err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", - map->name, i, targ_map->name, fd, err); + pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %s\n", + map->name, i, targ_map->name, fd, errstr(err)); return err; } pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", @@ -5024,8 +5492,8 @@ static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_update_elem(map->fd, &i, &fd, 0); if (err) { err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", - map->name, i, targ_prog->name, fd, err); + pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %s\n", + map->name, i, targ_prog->name, fd, errstr(err)); return err; } pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", @@ -5050,11 +5518,27 @@ static int bpf_object_init_prog_arrays(struct bpf_object *obj) continue; err = init_prog_array_slots(obj, map); - if (err < 0) { - zclose(map->fd); + if (err < 0) return err; + } + return 0; +} + +static int map_set_def_max_entries(struct bpf_map *map) +{ + if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) { + int nr_cpus; + + nr_cpus = libbpf_num_possible_cpus(); + if (nr_cpus < 0) { + pr_warn("map '%s': failed to determine number of system CPUs: %d\n", + map->name, nr_cpus); + return nr_cpus; } + pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); + map->def.max_entries = nr_cpus; } + return 0; } @@ -5062,7 +5546,6 @@ static int bpf_object__create_maps(struct bpf_object *obj) { struct bpf_map *map; - char *cp, errmsg[STRERR_BUFSIZE]; unsigned int i, j; int err; bool retried; @@ -5084,12 +5567,18 @@ bpf_object__create_maps(struct bpf_object *obj) * bpf_object loading will succeed just fine even on old * kernels. */ - if (bpf_map__is_internal(map) && - !kernel_supports(obj, FEAT_GLOBAL_DATA)) { - map->skipped = true; + if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) + map->autocreate = false; + + if (!map->autocreate) { + pr_debug("map '%s': skipped auto-creating...\n", map->name); continue; } + err = map_set_def_max_entries(map); + if (err) + goto err_out; + retried = false; retry: if (map->pin_path) { @@ -5107,7 +5596,7 @@ retry: } } - if (map->fd >= 0) { + if (map->reused) { pr_debug("map '%s': skipping creation (preset fd=%d)\n", map->name, map->fd); } else { @@ -5120,31 +5609,41 @@ retry: if (bpf_map__is_internal(map)) { err = bpf_object__populate_internal_map(obj, map); - if (err < 0) { - zclose(map->fd); + if (err < 0) goto err_out; + } else if (map->def.type == BPF_MAP_TYPE_ARENA) { + map->mmaped = mmap((void *)(long)map->map_extra, + bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, + map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, + map->fd, 0); + if (map->mmaped == MAP_FAILED) { + err = -errno; + map->mmaped = NULL; + pr_warn("map '%s': failed to mmap arena: %s\n", + map->name, errstr(err)); + return err; + } + if (obj->arena_data) { + memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz); + zfree(&obj->arena_data); } } - if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { err = init_map_in_map_slots(obj, map); - if (err < 0) { - zclose(map->fd); + if (err < 0) goto err_out; - } } } if (map->pin_path && !map->pinned) { err = bpf_map__pin(map, NULL); if (err) { - zclose(map->fd); if (!retried && err == -EEXIST) { retried = true; goto retry; } - pr_warn("map '%s': failed to auto-pin at '%s': %d\n", - map->name, map->pin_path, err); + pr_warn("map '%s': failed to auto-pin at '%s': %s\n", + map->name, map->pin_path, errstr(err)); goto err_out; } } @@ -5153,8 +5652,7 @@ retry: return 0; err_out: - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err); + pr_warn("map '%s': failed to create: %s\n", map->name, errstr(err)); pr_perm_msg(err); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); @@ -5185,18 +5683,21 @@ size_t bpf_core_essential_name_len(const char *name) return n; } -static void bpf_core_free_cands(struct bpf_core_cand_list *cands) +void bpf_core_free_cands(struct bpf_core_cand_list *cands) { + if (!cands) + return; + free(cands->cands); free(cands); } -static int bpf_core_add_cands(struct bpf_core_cand *local_cand, - size_t local_essent_len, - const struct btf *targ_btf, - const char *targ_btf_name, - int targ_start_id, - struct bpf_core_cand_list *cands) +int bpf_core_add_cands(struct bpf_core_cand *local_cand, + size_t local_essent_len, + const struct btf *targ_btf, + const char *targ_btf_name, + int targ_start_id, + struct bpf_core_cand_list *cands) { struct bpf_core_cand *new_cands, *cand; const struct btf_type *t, *local_t; @@ -5210,7 +5711,7 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, n = btf__type_cnt(targ_btf); for (i = targ_start_id; i < n; i++) { t = btf__type_by_id(targ_btf, i); - if (btf_kind(t) != btf_kind(local_t)) + if (!btf_kind_core_compat(t, local_t)) continue; targ_name = btf__name_by_offset(targ_btf, t->name_off); @@ -5269,9 +5770,13 @@ static int load_module_btfs(struct bpf_object *obj) err = bpf_btf_get_next_id(id, &id); if (err && errno == ENOENT) return 0; + if (err && errno == EPERM) { + pr_debug("skipping module BTFs loading, missing privileges\n"); + return 0; + } if (err) { err = -errno; - pr_warn("failed to iterate BTF objects: %d\n", err); + pr_warn("failed to iterate BTF objects: %s\n", errstr(err)); return err; } @@ -5280,7 +5785,7 @@ static int load_module_btfs(struct bpf_object *obj) if (errno == ENOENT) continue; /* expected race: BTF was unloaded */ err = -errno; - pr_warn("failed to get BTF object #%d FD: %d\n", id, err); + pr_warn("failed to get BTF object #%d FD: %s\n", id, errstr(err)); return err; } @@ -5289,10 +5794,10 @@ static int load_module_btfs(struct bpf_object *obj) info.name = ptr_to_u64(name); info.name_len = sizeof(name); - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_btf_get_info_by_fd(fd, &info, &len); if (err) { err = -errno; - pr_warn("failed to get BTF object #%d info: %d\n", id, err); + pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err)); goto err_out; } @@ -5305,13 +5810,13 @@ static int load_module_btfs(struct bpf_object *obj) btf = btf_get_from_fd(fd, obj->btf_vmlinux); err = libbpf_get_error(btf); if (err) { - pr_warn("failed to load module [%s]'s BTF object #%d: %d\n", - name, id, err); + pr_warn("failed to load module [%s]'s BTF object #%d: %s\n", + name, id, errstr(err)); goto err_out; } err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, - sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); + sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); if (err) goto err_out; @@ -5418,91 +5923,23 @@ err_out: int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id) { - const struct btf_type *local_type, *targ_type; - int depth = 32; /* max recursion depth */ - - /* caller made sure that names match (ignoring flavor suffix) */ - local_type = btf__type_by_id(local_btf, local_id); - targ_type = btf__type_by_id(targ_btf, targ_id); - if (btf_kind(local_type) != btf_kind(targ_type)) - return 0; - -recur: - depth--; - if (depth < 0) - return -EINVAL; - - local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); - targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); - if (!local_type || !targ_type) - return -EINVAL; - - if (btf_kind(local_type) != btf_kind(targ_type)) - return 0; - - switch (btf_kind(local_type)) { - case BTF_KIND_UNKN: - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - case BTF_KIND_ENUM: - case BTF_KIND_FWD: - return 1; - case BTF_KIND_INT: - /* just reject deprecated bitfield-like integers; all other - * integers are by default compatible between each other - */ - return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0; - case BTF_KIND_PTR: - local_id = local_type->type; - targ_id = targ_type->type; - goto recur; - case BTF_KIND_ARRAY: - local_id = btf_array(local_type)->type; - targ_id = btf_array(targ_type)->type; - goto recur; - case BTF_KIND_FUNC_PROTO: { - struct btf_param *local_p = btf_params(local_type); - struct btf_param *targ_p = btf_params(targ_type); - __u16 local_vlen = btf_vlen(local_type); - __u16 targ_vlen = btf_vlen(targ_type); - int i, err; - - if (local_vlen != targ_vlen) - return 0; - - for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { - skip_mods_and_typedefs(local_btf, local_p->type, &local_id); - skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id); - err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id); - if (err <= 0) - return err; - } - - /* tail recurse for return type check */ - skip_mods_and_typedefs(local_btf, local_type->type, &local_id); - skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id); - goto recur; - } - default: - pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", - btf_kind_str(local_type), local_id, targ_id); - return 0; - } + return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32); } -static size_t bpf_core_hash_fn(const void *key, void *ctx) +int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id) { - return (size_t)key; + return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32); } -static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx) +static size_t bpf_core_hash_fn(const long key, void *ctx) { - return k1 == k2; + return key; } -static void *u32_as_hash_key(__u32 x) +static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx) { - return (void *)(uintptr_t)x; + return k1 == k2; } static int record_relo_core(struct bpf_program *prog, @@ -5523,33 +5960,36 @@ static int record_relo_core(struct bpf_program *prog, return 0; } -static int bpf_core_apply_relo(struct bpf_program *prog, - const struct bpf_core_relo *relo, - int relo_idx, - const struct btf *local_btf, - struct hashmap *cand_cache) +static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) +{ + struct reloc_desc *relo; + int i; + + for (i = 0; i < prog->nr_reloc; i++) { + relo = &prog->reloc_desc[i]; + if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) + continue; + + return relo->core_relo; + } + + return NULL; +} + +static int bpf_core_resolve_relo(struct bpf_program *prog, + const struct bpf_core_relo *relo, + int relo_idx, + const struct btf *local_btf, + struct hashmap *cand_cache, + struct bpf_core_relo_res *targ_res) { struct bpf_core_spec specs_scratch[3] = {}; - const void *type_key = u32_as_hash_key(relo->type_id); struct bpf_core_cand_list *cands = NULL; const char *prog_name = prog->name; const struct btf_type *local_type; const char *local_name; __u32 local_id = relo->type_id; - struct bpf_insn *insn; - int insn_idx, err; - - if (relo->insn_off % BPF_INSN_SZ) - return -EINVAL; - insn_idx = relo->insn_off / BPF_INSN_SZ; - /* adjust insn_idx from section frame of reference to the local - * program's frame of reference; (sub-)program code is not yet - * relocated, so it's enough to just subtract in-section offset - */ - insn_idx = insn_idx - prog->sec_insn_off; - if (insn_idx >= prog->insns_cnt) - return -EINVAL; - insn = &prog->insns[insn_idx]; + int err; local_type = btf__type_by_id(local_btf, local_id); if (!local_type) @@ -5559,17 +5999,8 @@ static int bpf_core_apply_relo(struct bpf_program *prog, if (!local_name) return -EINVAL; - if (prog->obj->gen_loader) { - const char *spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - - pr_debug("record_relo_core: prog %td insn[%d] %s %s %s final insn_idx %d\n", - prog - prog->obj->programs, relo->insn_off / 8, - btf_kind_str(local_type), local_name, spec_str, insn_idx); - return record_relo_core(prog, relo, insn_idx); - } - if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && - !hashmap__find(cand_cache, type_key, (void **)&cands)) { + !hashmap__find(cand_cache, local_id, &cands)) { cands = bpf_core_find_cands(prog->obj, local_btf, local_id); if (IS_ERR(cands)) { pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", @@ -5577,28 +6008,30 @@ static int bpf_core_apply_relo(struct bpf_program *prog, local_name, PTR_ERR(cands)); return PTR_ERR(cands); } - err = hashmap__set(cand_cache, type_key, cands, NULL, NULL); + err = hashmap__set(cand_cache, local_id, cands, NULL, NULL); if (err) { bpf_core_free_cands(cands); return err; } } - return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, - relo_idx, local_btf, cands, specs_scratch); + return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch, + targ_res); } static int bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) { const struct btf_ext_info_sec *sec; + struct bpf_core_relo_res targ_res; const struct bpf_core_relo *rec; const struct btf_ext_info *seg; struct hashmap_entry *entry; struct hashmap *cand_cache = NULL; struct bpf_program *prog; + struct bpf_insn *insn; const char *sec_name; - int i, err = 0, insn_idx, sec_idx; + int i, err = 0, insn_idx, sec_idx, sec_num; if (obj->btf_ext->core_relo_info.len == 0) return 0; @@ -5607,7 +6040,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); err = libbpf_get_error(obj->btf_vmlinux_override); if (err) { - pr_warn("failed to parse target BTF: %d\n", err); + pr_warn("failed to parse target BTF: %s\n", errstr(err)); return err; } } @@ -5619,52 +6052,73 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) } seg = &obj->btf_ext->core_relo_info; + sec_num = 0; for_each_btf_ext_sec(seg, sec) { + sec_idx = seg->sec_idxs[sec_num]; + sec_num++; + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); if (str_is_empty(sec_name)) { err = -EINVAL; goto out; } - /* bpf_object's ELF is gone by now so it's not easy to find - * section index by section name, but we can find *any* - * bpf_program within desired section name and use it's - * prog->sec_idx to do a proper search by section index and - * instruction offset - */ - prog = NULL; - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - if (strcmp(prog->sec_name, sec_name) == 0) - break; - } - if (!prog) { - pr_warn("sec '%s': failed to find a BPF program\n", sec_name); - return -ENOENT; - } - sec_idx = prog->sec_idx; - pr_debug("sec '%s': found %d CO-RE relocations\n", - sec_name, sec->num_info); + pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { + if (rec->insn_off % BPF_INSN_SZ) + return -EINVAL; insn_idx = rec->insn_off / BPF_INSN_SZ; prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); if (!prog) { - pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n", - sec_name, insn_idx, i); - err = -EINVAL; - goto out; + /* When __weak subprog is "overridden" by another instance + * of the subprog from a different object file, linker still + * appends all the .BTF.ext info that used to belong to that + * eliminated subprogram. + * This is similar to what x86-64 linker does for relocations. + * So just ignore such relocations just like we ignore + * subprog instructions when discovering subprograms. + */ + pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", + sec_name, i, insn_idx); + continue; } /* no need to apply CO-RE relocation if the program is * not going to be loaded */ - if (!prog->load) + if (!prog->autoload) + continue; + + /* adjust insn_idx from section frame of reference to the local + * program's frame of reference; (sub-)program code is not yet + * relocated, so it's enough to just subtract in-section offset + */ + insn_idx = insn_idx - prog->sec_insn_off; + if (insn_idx >= prog->insns_cnt) + return -EINVAL; + insn = &prog->insns[insn_idx]; + + err = record_relo_core(prog, rec, insn_idx); + if (err) { + pr_warn("prog '%s': relo #%d: failed to record relocation: %s\n", + prog->name, i, errstr(err)); + goto out; + } + + if (prog->obj->gen_loader) continue; - err = bpf_core_apply_relo(prog, rec, i, obj->btf, cand_cache); + err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); + if (err) { + pr_warn("prog '%s': relo #%d: failed to relocate: %s\n", + prog->name, i, errstr(err)); + goto out; + } + + err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); if (err) { - pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", - prog->name, i, err); + pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %s\n", + prog->name, i, insn_idx, errstr(err)); goto out; } } @@ -5677,13 +6131,218 @@ out: if (!IS_ERR_OR_NULL(cand_cache)) { hashmap__for_each_entry(cand_cache, entry, i) { - bpf_core_free_cands(entry->value); + bpf_core_free_cands(entry->pvalue); } hashmap__free(cand_cache); } return err; } +/* base map load ldimm64 special constant, used also for log fixup logic */ +#define POISON_LDIMM64_MAP_BASE 2001000000 +#define POISON_LDIMM64_MAP_PFX "200100" + +static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, + int insn_idx, struct bpf_insn *insn, + int map_idx, const struct bpf_map *map) +{ + int i; + + pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", + prog->name, relo_idx, insn_idx, map_idx, map->name); + + /* we turn single ldimm64 into two identical invalid calls */ + for (i = 0; i < 2; i++) { + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with something like: + * invalid func unknown#2001000123 + * where lower 123 is map index into obj->maps[] array + */ + insn->imm = POISON_LDIMM64_MAP_BASE + map_idx; + + insn++; + } +} + +/* unresolved kfunc call special constant, used also for log fixup logic */ +#define POISON_CALL_KFUNC_BASE 2002000000 +#define POISON_CALL_KFUNC_PFX "2002" + +static void poison_kfunc_call(struct bpf_program *prog, int relo_idx, + int insn_idx, struct bpf_insn *insn, + int ext_idx, const struct extern_desc *ext) +{ + pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n", + prog->name, relo_idx, insn_idx, ext->name); + + /* we turn kfunc call into invalid helper call with identifiable constant */ + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with something like: + * invalid func unknown#2001000123 + * where lower 123 is extern index into obj->externs[] array + */ + insn->imm = POISON_CALL_KFUNC_BASE + ext_idx; +} + +static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off) +{ + size_t i; + + for (i = 0; i < obj->jumptable_map_cnt; i++) { + /* + * This might happen that same offset is used for two different + * programs (as jump tables can be the same). However, for + * different programs different maps should be created. + */ + if (obj->jumptable_maps[i].sym_off == sym_off && + obj->jumptable_maps[i].prog == prog) + return obj->jumptable_maps[i].fd; + } + + return -ENOENT; +} + +static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off, int map_fd) +{ + size_t cnt = obj->jumptable_map_cnt; + size_t size = sizeof(obj->jumptable_maps[0]); + void *tmp; + + tmp = libbpf_reallocarray(obj->jumptable_maps, cnt + 1, size); + if (!tmp) + return -ENOMEM; + + obj->jumptable_maps = tmp; + obj->jumptable_maps[cnt].prog = prog; + obj->jumptable_maps[cnt].sym_off = sym_off; + obj->jumptable_maps[cnt].fd = map_fd; + obj->jumptable_map_cnt++; + + return 0; +} + +static int find_subprog_idx(struct bpf_program *prog, int insn_idx) +{ + int i; + + for (i = prog->subprog_cnt - 1; i >= 0; i--) { + if (insn_idx >= prog->subprogs[i].sub_insn_off) + return i; + } + + return -1; +} + +static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struct reloc_desc *relo) +{ + const __u32 jt_entry_size = 8; + int sym_off = relo->sym_off; + int jt_size = relo->sym_size; + __u32 max_entries = jt_size / jt_entry_size; + __u32 value_size = sizeof(struct bpf_insn_array_value); + struct bpf_insn_array_value val = {}; + int subprog_idx; + int map_fd, err; + __u64 insn_off; + __u64 *jt; + __u32 i; + + map_fd = find_jt_map(obj, prog, sym_off); + if (map_fd >= 0) + return map_fd; + + if (sym_off % jt_entry_size) { + pr_warn("map '.jumptables': jumptable start %d should be multiple of %u\n", + sym_off, jt_entry_size); + return -EINVAL; + } + + if (jt_size % jt_entry_size) { + pr_warn("map '.jumptables': jumptable size %d should be multiple of %u\n", + jt_size, jt_entry_size); + return -EINVAL; + } + + map_fd = bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, ".jumptables", + 4, value_size, max_entries, NULL); + if (map_fd < 0) + return map_fd; + + if (!obj->jumptables_data) { + pr_warn("map '.jumptables': ELF file is missing jump table data\n"); + err = -EINVAL; + goto err_close; + } + if (sym_off + jt_size > obj->jumptables_data_sz) { + pr_warn("map '.jumptables': jumptables_data size is %zd, trying to access %d\n", + obj->jumptables_data_sz, sym_off + jt_size); + err = -EINVAL; + goto err_close; + } + + subprog_idx = -1; /* main program */ + if (relo->insn_idx < 0 || relo->insn_idx >= prog->insns_cnt) { + pr_warn("map '.jumptables': invalid instruction index %d\n", relo->insn_idx); + err = -EINVAL; + goto err_close; + } + if (prog->subprogs) + subprog_idx = find_subprog_idx(prog, relo->insn_idx); + + jt = (__u64 *)(obj->jumptables_data + sym_off); + for (i = 0; i < max_entries; i++) { + /* + * The offset should be made to be relative to the beginning of + * the main function, not the subfunction. + */ + insn_off = jt[i]/sizeof(struct bpf_insn); + if (subprog_idx >= 0) { + insn_off -= prog->subprogs[subprog_idx].sec_insn_off; + insn_off += prog->subprogs[subprog_idx].sub_insn_off; + } else { + insn_off -= prog->sec_insn_off; + } + + /* + * LLVM-generated jump tables contain u64 records, however + * should contain values that fit in u32. + */ + if (insn_off > UINT32_MAX) { + pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %d\n", + (long long)jt[i], sym_off + i * jt_entry_size); + err = -EINVAL; + goto err_close; + } + + val.orig_off = insn_off; + err = bpf_map_update_elem(map_fd, &i, &val, 0); + if (err) + goto err_close; + } + + err = bpf_map_freeze(map_fd); + if (err) + goto err_close; + + err = add_jt_map(obj, prog, sym_off, map_fd); + if (err) + goto err_close; + + return map_fd; + +err_close: + close(map_fd); + return err; +} + /* Relocate data references within program code: * - map references; * - global variable references; @@ -5697,37 +6356,39 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + const struct bpf_map *map; struct extern_desc *ext; switch (relo->type) { case RELO_LD64: + map = &obj->maps[relo->map_idx]; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX; insn[0].imm = relo->map_idx; - } else { + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_FD; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; case RELO_DATA: + map = &obj->maps[relo->map_idx]; insn[1].imm = insn[0].imm + relo->sym_off; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; insn[0].imm = relo->map_idx; - } else { - const struct bpf_map *map = &obj->maps[relo->map_idx]; - - if (map->skipped) { - pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", - prog->name, i); - return -ENOTSUP; - } + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; - case RELO_EXTERN_VAR: - ext = &obj->externs[relo->sym_off]; + case RELO_EXTERN_LD64: + ext = &obj->externs[relo->ext_idx]; if (ext->type == EXT_KCFG) { if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; @@ -5748,15 +6409,15 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) } } break; - case RELO_EXTERN_FUNC: - ext = &obj->externs[relo->sym_off]; + case RELO_EXTERN_CALL: + ext = &obj->externs[relo->ext_idx]; insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; if (ext->is_set) { insn[0].imm = ext->ksym.kernel_btf_id; insn[0].off = ext->ksym.btf_fd_idx; - } else { /* unresolved weak kfunc */ - insn[0].imm = 0; - insn[0].off = 0; + } else { /* unresolved weak kfunc call */ + poison_kfunc_call(prog, i, relo->insn_idx, insn, + relo->ext_idx, ext); } break; case RELO_SUBPROG_ADDR: @@ -5773,6 +6434,20 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_CORE: /* will be handled by bpf_program_record_relos() */ break; + case RELO_INSN_ARRAY: { + int map_fd; + + map_fd = create_jt_map(obj, prog, relo); + if (map_fd < 0) { + pr_warn("prog '%s': relo #%d: can't create jump table: sym_off %u\n", + prog->name, i, relo->sym_off); + return map_fd; + } + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn->imm = map_fd; + insn->off = 0; + } + break; default: pr_warn("prog '%s': relo #%d: bad relo type %d\n", prog->name, i, relo->type); @@ -5793,14 +6468,13 @@ static int adjust_prog_btf_ext_info(const struct bpf_object *obj, void *rec, *rec_end, *new_prog_info; const struct btf_ext_info_sec *sec; size_t old_sz, new_sz; - const char *sec_name; - int i, off_adj; + int i, sec_num, sec_idx, off_adj; + sec_num = 0; for_each_btf_ext_sec(ext_info, sec) { - sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); - if (!sec_name) - return -EINVAL; - if (strcmp(sec_name, prog->sec_name) != 0) + sec_idx = ext_info->sec_idxs[sec_num]; + sec_num++; + if (prog->sec_idx != sec_idx) continue; for_each_btf_ext_rec(ext_info, sec, i, rec) { @@ -5860,7 +6534,7 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj, int err; /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't - * supprot func/line info + * support func/line info */ if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC)) return 0; @@ -5877,8 +6551,8 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj, &main_prog->func_info_rec_size); if (err) { if (err != -ENOENT) { - pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n", - prog->name, err); + pr_warn("prog '%s': error relocating .BTF.ext function info: %s\n", + prog->name, errstr(err)); return err; } if (main_prog->func_info) { @@ -5905,8 +6579,8 @@ line_info: &main_prog->line_info_rec_size); if (err) { if (err != -ENOENT) { - pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n", - prog->name, err); + pr_warn("prog '%s': error relocating .BTF.ext line info: %s\n", + prog->name, errstr(err)); return err; } if (main_prog->line_info) { @@ -5951,7 +6625,11 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra if (main_prog == subprog) return 0; relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); - if (!relos) + /* if new count is zero, reallocarray can return a valid NULL result; + * in this case the previous pointer will be freed, so we *have to* + * reassign old pointer to the new value (even if it's NULL) + */ + if (!relos && new_cnt) return -ENOMEM; if (subprog->nr_reloc) memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, @@ -5967,14 +6645,72 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra return 0; } +static int save_subprog_offsets(struct bpf_program *main_prog, struct bpf_program *subprog) +{ + size_t size = sizeof(main_prog->subprogs[0]); + int cnt = main_prog->subprog_cnt; + void *tmp; + + tmp = libbpf_reallocarray(main_prog->subprogs, cnt + 1, size); + if (!tmp) + return -ENOMEM; + + main_prog->subprogs = tmp; + main_prog->subprogs[cnt].sec_insn_off = subprog->sec_insn_off; + main_prog->subprogs[cnt].sub_insn_off = subprog->sub_insn_off; + main_prog->subprog_cnt++; + + return 0; +} + +static int +bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, + struct bpf_program *subprog) +{ + struct bpf_insn *insns; + size_t new_cnt; + int err; + + subprog->sub_insn_off = main_prog->insns_cnt; + + new_cnt = main_prog->insns_cnt + subprog->insns_cnt; + insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); + return -ENOMEM; + } + main_prog->insns = insns; + main_prog->insns_cnt = new_cnt; + + memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, + subprog->insns_cnt * sizeof(*insns)); + + pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", + main_prog->name, subprog->insns_cnt, subprog->name); + + /* The subprog insns are now appended. Append its relos too. */ + err = append_subprog_relos(main_prog, subprog); + if (err) + return err; + + err = save_subprog_offsets(main_prog, subprog); + if (err) { + pr_warn("prog '%s': failed to add subprog offsets: %s\n", + main_prog->name, errstr(err)); + return err; + } + + return 0; +} + static int bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, struct bpf_program *prog) { - size_t sub_insn_idx, insn_idx, new_cnt; + size_t sub_insn_idx, insn_idx; struct bpf_program *subprog; - struct bpf_insn *insns, *insn; struct reloc_desc *relo; + struct bpf_insn *insn; int err; err = reloc_prog_func_and_line_info(obj, main_prog, prog); @@ -5987,7 +6723,7 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, continue; relo = find_prog_insn_relo(prog, insn_idx); - if (relo && relo->type == RELO_EXTERN_FUNC) + if (relo && relo->type == RELO_EXTERN_CALL) /* kfunc relocations will be handled later * in bpf_object__relocate_data() */ @@ -6049,25 +6785,7 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, * and relocate. */ if (subprog->sub_insn_off == 0) { - subprog->sub_insn_off = main_prog->insns_cnt; - - new_cnt = main_prog->insns_cnt + subprog->insns_cnt; - insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); - if (!insns) { - pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); - return -ENOMEM; - } - main_prog->insns = insns; - main_prog->insns_cnt = new_cnt; - - memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, - subprog->insns_cnt * sizeof(*insns)); - - pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", - main_prog->name, subprog->insns_cnt, subprog->name); - - /* The subprog insns are now appended. Append its relos too. */ - err = append_subprog_relos(main_prog, subprog); + err = bpf_object__append_subprog_code(obj, main_prog, subprog); if (err) return err; err = bpf_object__reloc_code(obj, main_prog, subprog); @@ -6083,7 +6801,8 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, * prog; each main prog can have a different set of * subprograms appended (potentially in different order as * well), so position of any subprog can be different for - * different main programs */ + * different main programs + */ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", @@ -6195,7 +6914,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) if (err) return err; - return 0; } @@ -6242,8 +6960,408 @@ static void bpf_object__sort_relos(struct bpf_object *obj) } } -static int -bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) +static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog) +{ + const char *str = "exception_callback:"; + size_t pfx_len = strlen(str); + int i, j, n; + + if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG)) + return 0; + + n = btf__type_cnt(obj->btf); + for (i = 1; i < n; i++) { + const char *name; + struct btf_type *t; + + t = btf_type_by_id(obj->btf, i); + if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) + continue; + + name = btf__str_by_offset(obj->btf, t->name_off); + if (strncmp(name, str, pfx_len) != 0) + continue; + + t = btf_type_by_id(obj->btf, t->type); + if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { + pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n", + prog->name); + return -EINVAL; + } + if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0) + continue; + /* Multiple callbacks are specified for the same prog, + * the verifier will eventually return an error for this + * case, hence simply skip appending a subprog. + */ + if (prog->exception_cb_idx >= 0) { + prog->exception_cb_idx = -1; + break; + } + + name += pfx_len; + if (str_is_empty(name)) { + pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n", + prog->name); + return -EINVAL; + } + + for (j = 0; j < obj->nr_programs; j++) { + struct bpf_program *subprog = &obj->programs[j]; + + if (!prog_is_subprog(obj, subprog)) + continue; + if (strcmp(name, subprog->name) != 0) + continue; + /* Enforce non-hidden, as from verifier point of + * view it expects global functions, whereas the + * mark_btf_static fixes up linkage as static. + */ + if (!subprog->sym_global || subprog->mark_btf_static) { + pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", + prog->name, subprog->name); + return -EINVAL; + } + /* Let's see if we already saw a static exception callback with the same name */ + if (prog->exception_cb_idx >= 0) { + pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", + prog->name, subprog->name); + return -EINVAL; + } + prog->exception_cb_idx = j; + break; + } + + if (prog->exception_cb_idx >= 0) + continue; + + pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); + return -ENOENT; + } + + return 0; +} + +static struct { + enum bpf_prog_type prog_type; + const char *ctx_name; +} global_ctx_map[] = { + { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" }, + { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" }, + { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" }, + { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" }, + { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" }, + { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" }, + { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" }, + { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" }, + { BPF_PROG_TYPE_LWT_IN, "__sk_buff" }, + { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" }, + { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" }, + { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" }, + { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" }, + { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" }, + { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" }, + { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" }, + { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" }, + { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" }, + { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" }, + { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" }, + { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" }, + { BPF_PROG_TYPE_SK_SKB, "__sk_buff" }, + { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" }, + { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" }, + { BPF_PROG_TYPE_XDP, "xdp_md" }, + /* all other program types don't have "named" context structs */ +}; + +/* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef, + * for below __builtin_types_compatible_p() checks; + * with this approach we don't need any extra arch-specific #ifdef guards + */ +struct pt_regs; +struct user_pt_regs; +struct user_regs_struct; + +static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog, + const char *subprog_name, int arg_idx, + int arg_type_id, const char *ctx_name) +{ + const struct btf_type *t; + const char *tname; + + /* check if existing parameter already matches verifier expectations */ + t = skip_mods_and_typedefs(btf, arg_type_id, NULL); + if (!btf_is_ptr(t)) + goto out_warn; + + /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe + * and perf_event programs, so check this case early on and forget + * about it for subsequent checks + */ + while (btf_is_mod(t)) + t = btf__type_by_id(btf, t->type); + if (btf_is_typedef(t) && + (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) { + tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; + if (strcmp(tname, "bpf_user_pt_regs_t") == 0) + return false; /* canonical type for kprobe/perf_event */ + } + + /* now we can ignore typedefs moving forward */ + t = skip_mods_and_typedefs(btf, t->type, NULL); + + /* if it's `void *`, definitely fix up BTF info */ + if (btf_is_void(t)) + return true; + + /* if it's already proper canonical type, no need to fix up */ + tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; + if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0) + return false; + + /* special cases */ + switch (prog->type) { + case BPF_PROG_TYPE_KPROBE: + /* `struct pt_regs *` is expected, but we need to fix up */ + if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) + return true; + break; + case BPF_PROG_TYPE_PERF_EVENT: + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && + btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) + return true; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && + btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) + return true; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && + btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) + return true; + break; + case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: + /* allow u64* as ctx */ + if (btf_is_int(t) && t->size == 8) + return true; + break; + default: + break; + } + +out_warn: + pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n", + prog->name, subprog_name, arg_idx, ctx_name); + return false; +} + +static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog) +{ + int fn_id, fn_proto_id, ret_type_id, orig_proto_id; + int i, err, arg_cnt, fn_name_off, linkage; + struct btf_type *fn_t, *fn_proto_t, *t; + struct btf_param *p; + + /* caller already validated FUNC -> FUNC_PROTO validity */ + fn_t = btf_type_by_id(btf, orig_fn_id); + fn_proto_t = btf_type_by_id(btf, fn_t->type); + + /* Note that each btf__add_xxx() operation invalidates + * all btf_type and string pointers, so we need to be + * very careful when cloning BTF types. BTF type + * pointers have to be always refetched. And to avoid + * problems with invalidated string pointers, we + * add empty strings initially, then just fix up + * name_off offsets in place. Offsets are stable for + * existing strings, so that works out. + */ + fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */ + linkage = btf_func_linkage(fn_t); + orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */ + ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */ + arg_cnt = btf_vlen(fn_proto_t); + + /* clone FUNC_PROTO and its params */ + fn_proto_id = btf__add_func_proto(btf, ret_type_id); + if (fn_proto_id < 0) + return -EINVAL; + + for (i = 0; i < arg_cnt; i++) { + int name_off; + + /* copy original parameter data */ + t = btf_type_by_id(btf, orig_proto_id); + p = &btf_params(t)[i]; + name_off = p->name_off; + + err = btf__add_func_param(btf, "", p->type); + if (err) + return err; + + fn_proto_t = btf_type_by_id(btf, fn_proto_id); + p = &btf_params(fn_proto_t)[i]; + p->name_off = name_off; /* use remembered str offset */ + } + + /* clone FUNC now, btf__add_func() enforces non-empty name, so use + * entry program's name as a placeholder, which we replace immediately + * with original name_off + */ + fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id); + if (fn_id < 0) + return -EINVAL; + + fn_t = btf_type_by_id(btf, fn_id); + fn_t->name_off = fn_name_off; /* reuse original string */ + + return fn_id; +} + +/* Check if main program or global subprog's function prototype has `arg:ctx` + * argument tags, and, if necessary, substitute correct type to match what BPF + * verifier would expect, taking into account specific program type. This + * allows to support __arg_ctx tag transparently on old kernels that don't yet + * have a native support for it in the verifier, making user's life much + * easier. + */ +static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog) +{ + const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name; + struct bpf_func_info_min *func_rec; + struct btf_type *fn_t, *fn_proto_t; + struct btf *btf = obj->btf; + const struct btf_type *t; + struct btf_param *p; + int ptr_id = 0, struct_id, tag_id, orig_fn_id; + int i, n, arg_idx, arg_cnt, err, rec_idx; + int *orig_ids; + + /* no .BTF.ext, no problem */ + if (!obj->btf_ext || !prog->func_info) + return 0; + + /* don't do any fix ups if kernel natively supports __arg_ctx */ + if (kernel_supports(obj, FEAT_ARG_CTX_TAG)) + return 0; + + /* some BPF program types just don't have named context structs, so + * this fallback mechanism doesn't work for them + */ + for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) { + if (global_ctx_map[i].prog_type != prog->type) + continue; + ctx_name = global_ctx_map[i].ctx_name; + break; + } + if (!ctx_name) + return 0; + + /* remember original func BTF IDs to detect if we already cloned them */ + orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids)); + if (!orig_ids) + return -ENOMEM; + for (i = 0; i < prog->func_info_cnt; i++) { + func_rec = prog->func_info + prog->func_info_rec_size * i; + orig_ids[i] = func_rec->type_id; + } + + /* go through each DECL_TAG with "arg:ctx" and see if it points to one + * of our subprogs; if yes and subprog is global and needs adjustment, + * clone and adjust FUNC -> FUNC_PROTO combo + */ + for (i = 1, n = btf__type_cnt(btf); i < n; i++) { + /* only DECL_TAG with "arg:ctx" value are interesting */ + t = btf__type_by_id(btf, i); + if (!btf_is_decl_tag(t)) + continue; + if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0) + continue; + + /* only global funcs need adjustment, if at all */ + orig_fn_id = t->type; + fn_t = btf_type_by_id(btf, orig_fn_id); + if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL) + continue; + + /* sanity check FUNC -> FUNC_PROTO chain, just in case */ + fn_proto_t = btf_type_by_id(btf, fn_t->type); + if (!fn_proto_t || !btf_is_func_proto(fn_proto_t)) + continue; + + /* find corresponding func_info record */ + func_rec = NULL; + for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) { + if (orig_ids[rec_idx] == t->type) { + func_rec = prog->func_info + prog->func_info_rec_size * rec_idx; + break; + } + } + /* current main program doesn't call into this subprog */ + if (!func_rec) + continue; + + /* some more sanity checking of DECL_TAG */ + arg_cnt = btf_vlen(fn_proto_t); + arg_idx = btf_decl_tag(t)->component_idx; + if (arg_idx < 0 || arg_idx >= arg_cnt) + continue; + + /* check if we should fix up argument type */ + p = &btf_params(fn_proto_t)[arg_idx]; + fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>"; + if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name)) + continue; + + /* clone fn/fn_proto, unless we already did it for another arg */ + if (func_rec->type_id == orig_fn_id) { + int fn_id; + + fn_id = clone_func_btf_info(btf, orig_fn_id, prog); + if (fn_id < 0) { + err = fn_id; + goto err_out; + } + + /* point func_info record to a cloned FUNC type */ + func_rec->type_id = fn_id; + } + + /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument; + * we do it just once per main BPF program, as all global + * funcs share the same program type, so need only PTR -> + * STRUCT type chain + */ + if (ptr_id == 0) { + struct_id = btf__add_struct(btf, ctx_name, 0); + ptr_id = btf__add_ptr(btf, struct_id); + if (ptr_id < 0 || struct_id < 0) { + err = -EINVAL; + goto err_out; + } + } + + /* for completeness, clone DECL_TAG and point it to cloned param */ + tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx); + if (tag_id < 0) { + err = -EINVAL; + goto err_out; + } + + /* all the BTF manipulations invalidated pointers, refetch them */ + fn_t = btf_type_by_id(btf, func_rec->type_id); + fn_proto_t = btf_type_by_id(btf, fn_t->type); + + /* fix up type ID pointed to by param */ + p = &btf_params(fn_proto_t)[arg_idx]; + p->type = ptr_id; + } + + free(orig_ids); + return 0; +err_out: + free(orig_ids); + return err; +} + +static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) { struct bpf_program *prog; size_t i, j; @@ -6252,12 +7370,11 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) if (obj->btf_ext) { err = bpf_object__relocate_core(obj, targ_btf_path); if (err) { - pr_warn("failed to perform CO-RE relocations: %d\n", - err); + pr_warn("failed to perform CO-RE relocations: %s\n", + errstr(err)); return err; } - if (obj->gen_loader) - bpf_object__sort_relos(obj); + bpf_object__sort_relos(obj); } /* Before relocating calls pre-process relocations and mark @@ -6293,32 +7410,62 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) */ if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; err = bpf_object__relocate_calls(obj, prog); if (err) { - pr_warn("prog '%s': failed to relocate calls: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to relocate calls: %s\n", + prog->name, errstr(err)); return err; } + + err = bpf_prog_assign_exc_cb(obj, prog); + if (err) + return err; + /* Now, also append exception callback if it has not been done already. */ + if (prog->exception_cb_idx >= 0) { + struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx]; + + /* Calling exception callback directly is disallowed, which the + * verifier will reject later. In case it was processed already, + * we can skip this step, otherwise for all other valid cases we + * have to append exception callback now. + */ + if (subprog->sub_insn_off == 0) { + err = bpf_object__append_subprog_code(obj, prog, subprog); + if (err) + return err; + err = bpf_object__reloc_code(obj, prog, subprog); + if (err) + return err; + } + } } - /* Process data relos for main programs */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; + + /* Process data relos for main programs */ err = bpf_object__relocate_data(obj, prog); if (err) { - pr_warn("prog '%s': failed to relocate data references: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to relocate data references: %s\n", + prog->name, errstr(err)); + return err; + } + + /* Fix up .BTF.ext information, if necessary */ + err = bpf_program_fixup_func_info(obj, prog); + if (err) { + pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %s\n", + prog->name, errstr(err)); return err; } } - if (!obj->gen_loader) - bpf_object__free_relocs(obj); + return 0; } @@ -6479,12 +7626,12 @@ static int bpf_object__collect_relos(struct bpf_object *obj) data = sec_desc->data; idx = shdr->sh_info; - if (shdr->sh_type != SHT_REL) { + if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) { pr_warn("internal error at %d\n", __LINE__); return -LIBBPF_ERRNO__INTERNAL; } - if (idx == obj->efile.st_ops_shndx) + if (obj->efile.secs[idx].sec_type == SEC_ST_OPS) err = bpf_object__collect_st_ops_relos(obj, shdr, data); else if (idx == obj->efile.btf_maps_shndx) err = bpf_object__collect_map_relos(obj, shdr, data); @@ -6549,9 +7696,9 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, int *btf_obj_fd, int *btf_type_id); -/* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */ -static int libbpf_preload_prog(struct bpf_program *prog, - struct bpf_prog_load_opts *opts, long cookie) +/* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */ +static int libbpf_prepare_prog_load(struct bpf_program *prog, + struct bpf_prog_load_opts *opts, long cookie) { enum sec_def_flags def = cookie; @@ -6562,13 +7709,40 @@ static int libbpf_preload_prog(struct bpf_program *prog, if (def & SEC_SLEEPABLE) opts->prog_flags |= BPF_F_SLEEPABLE; - if ((prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_LSM || - prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { + if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) + opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; + + /* special check for usdt to use uprobe_multi link */ + if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) { + /* for BPF_TRACE_UPROBE_MULTI, user might want to query expected_attach_type + * in prog, and expected_attach_type we set in kernel is from opts, so we + * update both. + */ + prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; + opts->expected_attach_type = BPF_TRACE_UPROBE_MULTI; + } + + if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; const char *attach_name; - attach_name = strchr(prog->sec_name, '/') + 1; + attach_name = strchr(prog->sec_name, '/'); + if (!attach_name) { + /* if BPF program is annotated with just SEC("fentry") + * (or similar) without declaratively specifying + * target, then it is expected that target will be + * specified with bpf_program__set_attach_target() at + * runtime before BPF object load step. If not, then + * there is nothing to load into the kernel as BPF + * verifier won't be able to validate BPF program + * correctness anyways. + */ + pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", + prog->name); + return -EINVAL; + } + attach_name++; /* skip over / */ + err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); if (err) return err; @@ -6588,21 +7762,25 @@ static int libbpf_preload_prog(struct bpf_program *prog, return 0; } -static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, - struct bpf_insn *insns, int insns_cnt, - const char *license, __u32 kern_version, - int *prog_fd) +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); + +static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, + struct bpf_insn *insns, int insns_cnt, + const char *license, __u32 kern_version, int *prog_fd) { LIBBPF_OPTS(bpf_prog_load_opts, load_attr); const char *prog_name = NULL; - char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; char *log_buf = NULL, *tmp; - int btf_fd, ret, err; bool own_log_buf = true; __u32 log_level = prog->log_level; + int ret, err; - if (prog->type == BPF_PROG_TYPE_UNSPEC) { + /* Be more helpful by rejecting programs that can't be validated early + * with more meaningful and actionable error message. + */ + switch (prog->type) { + case BPF_PROG_TYPE_UNSPEC: /* * The program type must be set. Most likely we couldn't find a proper * section definition at load time, and thus we didn't infer the type. @@ -6610,12 +7788,20 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n", prog->name, prog->sec_name); return -EINVAL; + case BPF_PROG_TYPE_STRUCT_OPS: + if (prog->attach_btf_id == 0) { + pr_warn("prog '%s': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?\n", + prog->name); + return -EINVAL; + } + break; + default: + break; } if (!insns || !insns_cnt) return -EINVAL; - load_attr.expected_attach_type = prog->expected_attach_type; if (kernel_supports(obj, FEAT_PROG_NAME)) prog_name = prog->name; load_attr.attach_prog_fd = prog->attach_prog_fd; @@ -6623,11 +7809,11 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.expected_attach_type = prog->expected_attach_type; /* specify func_info/line_info only if kernel supports them */ - btf_fd = bpf_object__btf_fd(obj); - if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { - load_attr.prog_btf_fd = btf_fd; + if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { + load_attr.prog_btf_fd = btf__fd(obj->btf); load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; load_attr.func_info_cnt = prog->func_info_cnt; @@ -6639,14 +7825,20 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog load_attr.prog_flags = prog->prog_flags; load_attr.fd_array = obj->fd_array; + load_attr.token_fd = obj->token_fd; + if (obj->token_fd) + load_attr.prog_flags |= BPF_F_TOKEN_FD; + /* adjust load_attr if sec_def provides custom preload callback */ - if (prog->sec_def && prog->sec_def->preload_fn) { - err = prog->sec_def->preload_fn(prog, &load_attr, prog->sec_def->cookie); + if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { + err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); if (err < 0) { - pr_warn("prog '%s': failed to prepare load attributes: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to prepare load attributes: %s\n", + prog->name, errstr(err)); return err; } + insns = prog->insns; + insns_cnt = prog->insns_cnt; } if (obj->gen_loader) { @@ -6658,7 +7850,7 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog } retry_load: - /* if log_level is zero, we don't request logs initiallly even if + /* if log_level is zero, we don't request logs initially even if * custom log_buf is specified; if the program load fails, then we'll * bump log_level to 1 and use either custom log_buf or we'll allocate * our own and retry the load to get details on what failed @@ -6705,10 +7897,9 @@ retry_load: if (map->libbpf_type != LIBBPF_MAP_RODATA) continue; - if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + if (bpf_prog_bind_map(ret, map->fd, NULL)) { pr_warn("prog '%s': failed to bind map '%s': %s\n", - prog->name, map->real_name, cp); + prog->name, map->real_name, errstr(errno)); /* Don't fail hard if can't bind rodata. */ } } @@ -6734,18 +7925,17 @@ retry_load: goto retry_load; ret = -errno; - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); + + /* post-process verifier log to improve error descriptions */ + fixup_verifier_log(prog, log_buf, log_buf_size); + + pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, errstr(errno)); pr_perm_msg(ret); if (own_log_buf && log_buf && log_buf[0] != '\0') { pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", prog->name, log_buf); } - if (insns_cnt >= BPF_MAXINSNS) { - pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", - prog->name, insns_cnt, BPF_MAXINSNS); - } out: if (own_log_buf) @@ -6753,6 +7943,212 @@ out: return ret; } +static char *find_prev_line(char *buf, char *cur) +{ + char *p; + + if (cur == buf) /* end of a log buf */ + return NULL; + + p = cur - 1; + while (p - 1 >= buf && *(p - 1) != '\n') + p--; + + return p; +} + +static void patch_log(char *buf, size_t buf_sz, size_t log_sz, + char *orig, size_t orig_sz, const char *patch) +{ + /* size of the remaining log content to the right from the to-be-replaced part */ + size_t rem_sz = (buf + log_sz) - (orig + orig_sz); + size_t patch_sz = strlen(patch); + + if (patch_sz != orig_sz) { + /* If patch line(s) are longer than original piece of verifier log, + * shift log contents by (patch_sz - orig_sz) bytes to the right + * starting from after to-be-replaced part of the log. + * + * If patch line(s) are shorter than original piece of verifier log, + * shift log contents by (orig_sz - patch_sz) bytes to the left + * starting from after to-be-replaced part of the log + * + * We need to be careful about not overflowing available + * buf_sz capacity. If that's the case, we'll truncate the end + * of the original log, as necessary. + */ + if (patch_sz > orig_sz) { + if (orig + patch_sz >= buf + buf_sz) { + /* patch is big enough to cover remaining space completely */ + patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; + rem_sz = 0; + } else if (patch_sz - orig_sz > buf_sz - log_sz) { + /* patch causes part of remaining log to be truncated */ + rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); + } + } + /* shift remaining log to the right by calculated amount */ + memmove(orig + patch_sz, orig + orig_sz, rem_sz); + } + + memcpy(orig, patch, patch_sz); +} + +static void fixup_log_failed_core_relo(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded CO-RE relocation: + * line1 -> 123: (85) call unknown#195896080 + * line2 -> invalid func unknown#195896080 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. We extract + * instruction index to find corresponding CO-RE relocation and + * replace this part of the log with more relevant information about + * failed CO-RE relocation. + */ + const struct bpf_core_relo *relo; + struct bpf_core_spec spec; + char patch[512], spec_buf[256]; + int insn_idx, err, spec_len; + + if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) + return; + + relo = find_relo_core(prog, insn_idx); + if (!relo) + return; + + err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); + if (err) + return; + + spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); + snprintf(patch, sizeof(patch), + "%d: <invalid CO-RE relocation>\n" + "failed to resolve CO-RE relocation %s%s\n", + insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_log_missing_map_load(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded map reference: + * line1 -> 123: (85) call unknown#2001000345 + * line2 -> invalid func unknown#2001000345 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. + * "345" in "2001000345" is a map index in obj->maps to fetch map name. + */ + struct bpf_object *obj = prog->obj; + const struct bpf_map *map; + int insn_idx, map_idx; + char patch[128]; + + if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) + return; + + map_idx -= POISON_LDIMM64_MAP_BASE; + if (map_idx < 0 || map_idx >= obj->nr_maps) + return; + map = &obj->maps[map_idx]; + + snprintf(patch, sizeof(patch), + "%d: <invalid BPF map reference>\n" + "BPF map '%s' is referenced but wasn't created\n", + insn_idx, map->name); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_log_missing_kfunc_call(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded kfunc call: + * line1 -> 123: (85) call unknown#2002000345 + * line2 -> invalid func unknown#2002000345 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. + * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name. + */ + struct bpf_object *obj = prog->obj; + const struct extern_desc *ext; + int insn_idx, ext_idx; + char patch[128]; + + if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2) + return; + + ext_idx -= POISON_CALL_KFUNC_BASE; + if (ext_idx < 0 || ext_idx >= obj->nr_extern) + return; + ext = &obj->externs[ext_idx]; + + snprintf(patch, sizeof(patch), + "%d: <invalid kfunc call>\n" + "kfunc '%s' is referenced but wasn't resolved\n", + insn_idx, ext->name); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) +{ + /* look for familiar error patterns in last N lines of the log */ + const size_t max_last_line_cnt = 10; + char *prev_line, *cur_line, *next_line; + size_t log_sz; + int i; + + if (!buf) + return; + + log_sz = strlen(buf) + 1; + next_line = buf + log_sz - 1; + + for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { + cur_line = find_prev_line(buf, next_line); + if (!cur_line) + return; + + if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + /* failed CO-RE relocation case */ + fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + /* reference to uncreated BPF map */ + fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + /* reference to unresolved kfunc */ + fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } + } +} + static int bpf_program_record_relos(struct bpf_program *prog) { struct bpf_object *obj = prog->obj; @@ -6760,19 +8156,22 @@ static int bpf_program_record_relos(struct bpf_program *prog) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; - struct extern_desc *ext = &obj->externs[relo->sym_off]; + struct extern_desc *ext = &obj->externs[relo->ext_idx]; + int kind; switch (relo->type) { - case RELO_EXTERN_VAR: + case RELO_EXTERN_LD64: if (ext->type != EXT_KSYM) continue; + kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ? + BTF_KIND_VAR : BTF_KIND_FUNC; bpf_gen__record_extern(obj->gen_loader, ext->name, ext->is_weak, !ext->ksym.type_id, - BTF_KIND_VAR, relo->insn_idx); + true, kind, relo->insn_idx); break; - case RELO_EXTERN_FUNC: + case RELO_EXTERN_CALL: bpf_gen__record_extern(obj->gen_loader, ext->name, - ext->is_weak, false, BTF_KIND_FUNC, + ext->is_weak, false, false, BTF_KIND_FUNC, relo->insn_idx); break; case RELO_CORE: { @@ -6793,95 +8192,39 @@ static int bpf_program_record_relos(struct bpf_program *prog) return 0; } -static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, - const char *license, __u32 kern_ver) +static int +bpf_object__load_progs(struct bpf_object *obj, int log_level) { - int err = 0, fd, i; - - if (obj->loaded) { - pr_warn("prog '%s': can't load after object was loaded\n", prog->name); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr < 0 || !prog->instances.fds) { - if (prog->preprocessor) { - pr_warn("Internal error: can't load program '%s'\n", - prog->name); - return libbpf_err(-LIBBPF_ERRNO__INTERNAL); - } + struct bpf_program *prog; + size_t i; + int err; - prog->instances.fds = malloc(sizeof(int)); - if (!prog->instances.fds) { - pr_warn("Not enough memory for BPF fds\n"); - return libbpf_err(-ENOMEM); + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + if (prog_is_subprog(obj, prog)) + continue; + if (!prog->autoload) { + pr_debug("prog '%s': skipped loading\n", prog->name); + continue; } - prog->instances.nr = 1; - prog->instances.fds[0] = -1; - } + prog->log_level |= log_level; - if (!prog->preprocessor) { - if (prog->instances.nr != 1) { - pr_warn("prog '%s': inconsistent nr(%d) != 1\n", - prog->name, prog->instances.nr); - } if (obj->gen_loader) bpf_program_record_relos(prog); - err = bpf_object_load_prog_instance(obj, prog, - prog->insns, prog->insns_cnt, - license, kern_ver, &fd); - if (!err) - prog->instances.fds[0] = fd; - goto out; - } - - for (i = 0; i < prog->instances.nr; i++) { - struct bpf_prog_prep_result result; - bpf_program_prep_t preprocessor = prog->preprocessor; - - memset(&result, 0, sizeof(result)); - err = preprocessor(prog, i, prog->insns, - prog->insns_cnt, &result); - if (err) { - pr_warn("Preprocessing the %dth instance of program '%s' failed\n", - i, prog->name); - goto out; - } - if (!result.new_insn_ptr || !result.new_insn_cnt) { - pr_debug("Skip loading the %dth instance of program '%s'\n", - i, prog->name); - prog->instances.fds[i] = -1; - if (result.pfd) - *result.pfd = -1; - continue; - } - - err = bpf_object_load_prog_instance(obj, prog, - result.new_insn_ptr, result.new_insn_cnt, - license, kern_ver, &fd); + err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, + obj->license, obj->kern_version, &prog->fd); if (err) { - pr_warn("Loading the %dth instance of program '%s' failed\n", - i, prog->name); - goto out; + pr_warn("prog '%s': failed to load: %s\n", prog->name, errstr(err)); + return err; } - - if (result.pfd) - *result.pfd = fd; - prog->instances.fds[i] = fd; } -out: - if (err) - pr_warn("failed to load program '%s'\n", prog->name); - return libbpf_err(err); -} -int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_ver) -{ - return bpf_object_load_prog(prog->obj, prog, license, kern_ver); + bpf_object__free_relocs(obj); + return 0; } -static int -bpf_object__load_progs(struct bpf_object *obj, int log_level) +static int bpf_object_prepare_progs(struct bpf_object *obj) { struct bpf_program *prog; size_t i; @@ -6893,22 +8236,6 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) if (err) return err; } - - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - if (prog_is_subprog(obj, prog)) - continue; - if (!prog->load) { - pr_debug("prog '%s': skipped loading\n", prog->name); - continue; - } - prog->log_level |= log_level; - err = bpf_object_load_prog(obj, prog, obj->license, obj->kern_version); - if (err) - return err; - } - if (obj->gen_loader) - bpf_object__free_relocs(obj); return 0; } @@ -6928,24 +8255,17 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object continue; } - bpf_program__set_type(prog, prog->sec_def->prog_type); - bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING || - prog->sec_def->prog_type == BPF_PROG_TYPE_EXT) - prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); -#pragma GCC diagnostic pop + prog->type = prog->sec_def->prog_type; + prog->expected_attach_type = prog->sec_def->expected_attach_type; /* sec_def can have custom callback which should be called * after bpf_program is initialized to adjust its properties */ - if (prog->sec_def->init_fn) { - err = prog->sec_def->init_fn(prog, prog->sec_def->cookie); + if (prog->sec_def->prog_setup_fn) { + err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); if (err < 0) { - pr_warn("prog '%s': failed to initialize: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to initialize: %s\n", + prog->name, errstr(err)); return err; } } @@ -6955,16 +8275,19 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object } static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, + const char *obj_name, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig, *btf_tmp_path; + const char *kconfig, *btf_tmp_path, *token_path; struct bpf_object *obj; - char tmp_name[64]; int err; char *log_buf; size_t log_size; __u32 log_level; + if (obj_buf && !obj_name) + return ERR_PTR(-EINVAL); + if (elf_version(EV_CURRENT) == EV_NONE) { pr_warn("failed to init libelf for %s\n", path ? : "(mem buf)"); @@ -6974,16 +8297,12 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (!OPTS_VALID(opts, bpf_object_open_opts)) return ERR_PTR(-EINVAL); - obj_name = OPTS_GET(opts, object_name, NULL); + obj_name = OPTS_GET(opts, object_name, NULL) ?: obj_name; if (obj_buf) { - if (!obj_name) { - snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", - (unsigned long)obj_buf, - (unsigned long)obj_buf_sz); - obj_name = tmp_name; - } path = obj_name; pr_debug("loading object '%s' from buffer\n", obj_name); + } else { + pr_debug("loading object from %s\n", path); } log_buf = OPTS_GET(opts, kernel_log_buf, NULL); @@ -6994,6 +8313,16 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (log_size && !log_buf) return ERR_PTR(-EINVAL); + token_path = OPTS_GET(opts, bpf_token_path, NULL); + /* if user didn't specify bpf_token_path explicitly, check if + * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path + * option + */ + if (!token_path) + token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); + if (token_path && strlen(token_path) >= PATH_MAX) + return ERR_PTR(-ENAMETOOLONG); + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; @@ -7002,6 +8331,14 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, obj->log_size = log_size; obj->log_level = log_level; + if (token_path) { + obj->token_path = strdup(token_path); + if (!obj->token_path) { + err = -ENOMEM; + goto out; + } + } + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -7025,10 +8362,9 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, } err = bpf_object__elf_init(obj); - err = err ? : bpf_object__check_endianness(obj); err = err ? : bpf_object__elf_collect(obj); err = err ? : bpf_object__collect_externs(obj); - err = err ? : bpf_object__finalize_btf(obj); + err = err ? : bpf_object_fixup_btf(obj); err = err ? : bpf_object__init_maps(obj, opts); err = err ? : bpf_object_init_progs(obj, opts); err = err ? : bpf_object__collect_relos(obj); @@ -7043,72 +8379,33 @@ out: return ERR_PTR(err); } -static struct bpf_object * -__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) -{ - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .relaxed_maps = flags & MAPS_RELAX_COMPAT, - ); - - /* param validation */ - if (!attr->file) - return NULL; - - pr_debug("loading %s\n", attr->file); - return bpf_object_open(attr->file, NULL, 0, &opts); -} - -struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) -{ - return libbpf_ptr(__bpf_object__open_xattr(attr, 0)); -} - -struct bpf_object *bpf_object__open(const char *path) -{ - struct bpf_object_open_attr attr = { - .file = path, - .prog_type = BPF_PROG_TYPE_UNSPEC, - }; - - return libbpf_ptr(__bpf_object__open_xattr(&attr, 0)); -} - struct bpf_object * bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) { if (!path) return libbpf_err_ptr(-EINVAL); - pr_debug("loading %s\n", path); + return libbpf_ptr(bpf_object_open(path, NULL, 0, NULL, opts)); +} - return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); +struct bpf_object *bpf_object__open(const char *path) +{ + return bpf_object__open_file(path, NULL); } struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { + char tmp_name[64]; + if (!obj_buf || obj_buf_sz == 0) return libbpf_err_ptr(-EINVAL); - return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); -} - -struct bpf_object * -bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, - const char *name) -{ - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .object_name = name, - /* wrong default, but backwards-compatible */ - .relaxed_maps = true, - ); - - /* returning NULL is wrong, but backwards-compatible */ - if (!obj_buf || obj_buf_sz == 0) - return errno = EINVAL, NULL; + /* create a (quite useless) default "name" for this memory buffer object */ + snprintf(tmp_name, sizeof(tmp_name), "%lx-%zx", (unsigned long)obj_buf, obj_buf_sz); - return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, &opts)); + return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, tmp_name, opts)); } static int bpf_object_unload(struct bpf_object *obj) @@ -7130,8 +8427,6 @@ static int bpf_object_unload(struct bpf_object *obj) return 0; } -int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload"))); - static int bpf_object__sanitize_maps(struct bpf_object *obj) { struct bpf_map *m; @@ -7140,25 +8435,26 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) if (!bpf_map__is_internal(m)) continue; if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) - m->def.map_flags ^= BPF_F_MMAPABLE; + m->def.map_flags &= ~BPF_F_MMAPABLE; } return 0; } -static int bpf_object__read_kallsyms_file(struct bpf_object *obj) +typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type, + const char *sym_name, void *ctx); + +static int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) { char sym_type, sym_name[500]; unsigned long long sym_addr; - const struct btf_type *t; - struct extern_desc *ext; int ret, err = 0; FILE *f; - f = fopen("/proc/kallsyms", "r"); + f = fopen("/proc/kallsyms", "re"); if (!f) { err = -errno; - pr_warn("failed to open /proc/kallsyms: %d\n", err); + pr_warn("failed to open /proc/kallsyms: %s\n", errstr(err)); return err; } @@ -7170,33 +8466,54 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj) if (ret != 3) { pr_warn("failed to read kallsyms entry: %d\n", ret); err = -EINVAL; - goto out; + break; } + err = cb(sym_addr, sym_type, sym_name, ctx); + if (err) + break; + } + + fclose(f); + return err; +} + +static int kallsyms_cb(unsigned long long sym_addr, char sym_type, + const char *sym_name, void *ctx) +{ + struct bpf_object *obj = ctx; + const struct btf_type *t; + struct extern_desc *ext; + char *res; + + res = strstr(sym_name, ".llvm."); + if (sym_type == 'd' && res) + ext = find_extern_by_name_with_len(obj, sym_name, res - sym_name); + else ext = find_extern_by_name(obj, sym_name); - if (!ext || ext->type != EXT_KSYM) - continue; + if (!ext || ext->type != EXT_KSYM) + return 0; - t = btf__type_by_id(obj->btf, ext->btf_id); - if (!btf_is_var(t)) - continue; + t = btf__type_by_id(obj->btf, ext->btf_id); + if (!btf_is_var(t)) + return 0; - if (ext->is_set && ext->ksym.addr != sym_addr) { - pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n", - sym_name, ext->ksym.addr, sym_addr); - err = -EINVAL; - goto out; - } - if (!ext->is_set) { - ext->is_set = true; - ext->ksym.addr = sym_addr; - pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr); - } + if (ext->is_set && ext->ksym.addr != sym_addr) { + pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n", + sym_name, ext->ksym.addr, sym_addr); + return -EINVAL; + } + if (!ext->is_set) { + ext->is_set = true; + ext->ksym.addr = sym_addr; + pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr); } + return 0; +} -out: - fclose(f); - return err; +static int bpf_object__read_kallsyms_file(struct bpf_object *obj) +{ + return libbpf_kallsyms_parse(kallsyms_cb, obj); } static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, @@ -7297,7 +8614,8 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, local_func_proto_id = ext->ksym.type_id; - kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); + kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf, + &mod_btf); if (kfunc_id < 0) { if (kfunc_id == -ESRCH && ext->is_weak) return 0; @@ -7312,8 +8630,12 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, kern_btf, kfunc_proto_id); if (ret <= 0) { - pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n", - ext->name, local_func_proto_id, kfunc_proto_id); + if (ext->is_weak) + return 0; + + pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n", + ext->name, local_func_proto_id, + mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id); return -EINVAL; } @@ -7341,8 +8663,14 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, ext->is_set = true; ext->ksym.kernel_btf_id = kfunc_id; ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; - pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n", - ext->name, kfunc_id); + /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data() + * populates FD into ld_imm64 insn when it's used to point to kfunc. + * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call. + * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64. + */ + ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; + pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n", + ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id); return 0; } @@ -7393,28 +8721,52 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; - if (ext->type == EXT_KCFG && - strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { - void *ext_val = kcfg_data + ext->kcfg.data_off; - __u32 kver = get_kernel_version(); + if (ext->type == EXT_KSYM) { + if (ext->ksym.type_id) + need_vmlinux_btf = true; + else + need_kallsyms = true; + continue; + } else if (ext->type == EXT_KCFG) { + void *ext_ptr = kcfg_data + ext->kcfg.data_off; + __u64 value = 0; + + /* Kconfig externs need actual /proc/config.gz */ + if (str_has_pfx(ext->name, "CONFIG_")) { + need_config = true; + continue; + } - if (!kver) { - pr_warn("failed to get kernel version\n"); + /* Virtual kcfg externs are customly handled by libbpf */ + if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { + value = get_kernel_version(); + if (!value) { + pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name); + return -EINVAL; + } + } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) { + value = kernel_supports(obj, FEAT_BPF_COOKIE); + } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) { + value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER); + } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) { + /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed + * __kconfig externs, where LINUX_ ones are virtual and filled out + * customly by libbpf (their values don't come from Kconfig). + * If LINUX_xxx variable is not recognized by libbpf, but is marked + * __weak, it defaults to zero value, just like for CONFIG_xxx + * externs. + */ + pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name); return -EINVAL; } - err = set_kcfg_value_num(ext, ext_val, kver); + + err = set_kcfg_value_num(ext, ext_ptr, value); if (err) return err; - pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver); - } else if (ext->type == EXT_KCFG && str_has_pfx(ext->name, "CONFIG_")) { - need_config = true; - } else if (ext->type == EXT_KSYM) { - if (ext->ksym.type_id) - need_vmlinux_btf = true; - else - need_kallsyms = true; + pr_debug("extern (kcfg) '%s': set to 0x%llx\n", + ext->name, (long long)value); } else { - pr_warn("unrecognized extern '%s'\n", ext->name); + pr_warn("extern '%s': unrecognized extern kind\n", ext->name); return -EINVAL; } } @@ -7450,10 +8802,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, ext = &obj->externs[i]; if (!ext->is_set && !ext->is_weak) { - pr_warn("extern %s (strong) not resolved\n", ext->name); + pr_warn("extern '%s' (strong): not resolved\n", ext->name); return -ESRCH; } else if (!ext->is_set) { - pr_debug("extern %s (weak) not resolved, defaulting to zero\n", + pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n", ext->name); } } @@ -7461,42 +8813,62 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, return 0; } -static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) +static void bpf_map_prepare_vdata(const struct bpf_map *map) { - int err, i; + const struct btf_type *type; + struct bpf_struct_ops *st_ops; + __u32 i; - if (!obj) - return libbpf_err(-EINVAL); + st_ops = map->st_ops; + type = btf__type_by_id(map->obj->btf, st_ops->type_id); + for (i = 0; i < btf_vlen(type); i++) { + struct bpf_program *prog = st_ops->progs[i]; + void *kern_data; + int prog_fd; - if (obj->loaded) { - pr_warn("object '%s': load can't be attempted twice\n", obj->name); - return libbpf_err(-EINVAL); + if (!prog) + continue; + + prog_fd = bpf_program__fd(prog); + kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; + *(unsigned long *)kern_data = prog_fd; } +} - if (obj->gen_loader) - bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); +static int bpf_object_prepare_struct_ops(struct bpf_object *obj) +{ + struct bpf_map *map; + int i; - err = bpf_object__probe_loading(obj); - err = err ? : bpf_object__load_vmlinux_btf(obj, false); - err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); - err = err ? : bpf_object__sanitize_and_load_btf(obj); - err = err ? : bpf_object__sanitize_maps(obj); - err = err ? : bpf_object__init_kern_struct_ops_maps(obj); - err = err ? : bpf_object__create_maps(obj); - err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); - err = err ? : bpf_object__load_progs(obj, extra_log_level); - err = err ? : bpf_object_init_prog_arrays(obj); + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; - if (obj->gen_loader) { - /* reset FDs */ - if (obj->btf) - btf__set_fd(obj->btf, -1); - for (i = 0; i < obj->nr_maps; i++) - obj->maps[i].fd = -1; - if (!err) - err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); + if (!bpf_map__is_struct_ops(map)) + continue; + + if (!map->autocreate) + continue; + + bpf_map_prepare_vdata(map); } + return 0; +} + +static void bpf_object_unpin(struct bpf_object *obj) +{ + int i; + + /* unpin any maps that were auto-pinned during load */ + for (i = 0; i < obj->nr_maps; i++) + if (obj->maps[i].pinned && !obj->maps[i].reused) + bpf_map__unpin(&obj->maps[i], NULL); +} + +static void bpf_object_post_load_cleanup(struct bpf_object *obj) +{ + int i; + /* clean up fd_array */ zfree(&obj->fd_array); @@ -7506,32 +8878,101 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch btf__free(obj->btf_modules[i].btf); free(obj->btf_modules[i].name); } - free(obj->btf_modules); + obj->btf_module_cnt = 0; + zfree(&obj->btf_modules); /* clean up vmlinux BTF */ btf__free(obj->btf_vmlinux); obj->btf_vmlinux = NULL; +} - obj->loaded = true; /* doesn't matter if successfully or not */ +static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path) +{ + int err; - if (err) - goto out; + if (obj->state >= OBJ_PREPARED) { + pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name); + return -EINVAL; + } + err = bpf_object_prepare_token(obj); + err = err ? : bpf_object__probe_loading(obj); + err = err ? : bpf_object__load_vmlinux_btf(obj, false); + err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); + err = err ? : bpf_object__sanitize_maps(obj); + err = err ? : bpf_object__init_kern_struct_ops_maps(obj); + err = err ? : bpf_object_adjust_struct_ops_autoload(obj); + err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); + err = err ? : bpf_object__sanitize_and_load_btf(obj); + err = err ? : bpf_object__create_maps(obj); + err = err ? : bpf_object_prepare_progs(obj); + + if (err) { + bpf_object_unpin(obj); + bpf_object_unload(obj); + obj->state = OBJ_LOADED; + return err; + } + + obj->state = OBJ_PREPARED; return 0; -out: - /* unpin any maps that were auto-pinned during load */ - for (i = 0; i < obj->nr_maps; i++) - if (obj->maps[i].pinned && !obj->maps[i].reused) - bpf_map__unpin(&obj->maps[i], NULL); +} - bpf_object_unload(obj); - pr_warn("failed to load object '%s'\n", obj->path); - return libbpf_err(err); +static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) +{ + int err; + + if (!obj) + return libbpf_err(-EINVAL); + + if (obj->state >= OBJ_LOADED) { + pr_warn("object '%s': load can't be attempted twice\n", obj->name); + return libbpf_err(-EINVAL); + } + + /* Disallow kernel loading programs of non-native endianness but + * permit cross-endian creation of "light skeleton". + */ + if (obj->gen_loader) { + bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); + } else if (!is_native_endianness(obj)) { + pr_warn("object '%s': loading non-native endianness is unsupported\n", obj->name); + return libbpf_err(-LIBBPF_ERRNO__ENDIAN); + } + + if (obj->state < OBJ_PREPARED) { + err = bpf_object_prepare(obj, target_btf_path); + if (err) + return libbpf_err(err); + } + err = bpf_object__load_progs(obj, extra_log_level); + err = err ? : bpf_object_init_prog_arrays(obj); + err = err ? : bpf_object_prepare_struct_ops(obj); + + if (obj->gen_loader) { + /* reset FDs */ + if (obj->btf) + btf__set_fd(obj->btf, -1); + if (!err) + err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); + } + + bpf_object_post_load_cleanup(obj); + obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */ + + if (err) { + bpf_object_unpin(obj); + bpf_object_unload(obj); + pr_warn("failed to load object '%s'\n", obj->path); + return libbpf_err(err); + } + + return 0; } -int bpf_object__load_xattr(struct bpf_object_load_attr *attr) +int bpf_object__prepare(struct bpf_object *obj) { - return bpf_object_load(attr->obj, attr->log_level, attr->target_btf_path); + return libbpf_err(bpf_object_prepare(obj, NULL)); } int bpf_object__load(struct bpf_object *obj) @@ -7541,7 +8982,6 @@ int bpf_object__load(struct bpf_object *obj) static int make_parent_dir(const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; char *dname, *dir; int err = 0; @@ -7555,15 +8995,13 @@ static int make_parent_dir(const char *path) free(dname); if (err) { - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to mkdir %s: %s\n", path, cp); + pr_warn("failed to mkdir %s: %s\n", path, errstr(err)); } return err; } static int check_path(const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; struct statfs st_fs; char *dname, *dir; int err = 0; @@ -7577,8 +9015,7 @@ static int check_path(const char *path) dir = dirname(dname); if (statfs(dir, &st_fs)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("failed to statfs %s: %s\n", dir, cp); + pr_warn("failed to statfs %s: %s\n", dir, errstr(errno)); err = -errno; } free(dname); @@ -7591,11 +9028,15 @@ static int check_path(const char *path) return err; } -static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, int instance) +int bpf_program__pin(struct bpf_program *prog, const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; int err; + if (prog->fd < 0) { + pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name); + return libbpf_err(-EINVAL); + } + err = make_parent_dir(path); if (err) return libbpf_err(err); @@ -7604,176 +9045,39 @@ static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, if (err) return libbpf_err(err); - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return libbpf_err(-EINVAL); - } - - if (instance < 0 || instance >= prog->instances.nr) { - pr_warn("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->name, prog->instances.nr); - return libbpf_err(-EINVAL); - } - - if (bpf_obj_pin(prog->instances.fds[instance], path)) { + if (bpf_obj_pin(prog->fd, path)) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("failed to pin program: %s\n", cp); + pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, errstr(err)); return libbpf_err(err); } - pr_debug("pinned program '%s'\n", path); + pr_debug("prog '%s': pinned at '%s'\n", prog->name, path); return 0; } -static int bpf_program_unpin_instance(struct bpf_program *prog, const char *path, int instance) +int bpf_program__unpin(struct bpf_program *prog, const char *path) { int err; - err = check_path(path); - if (err) - return libbpf_err(err); - - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return libbpf_err(-EINVAL); - } - - if (instance < 0 || instance >= prog->instances.nr) { - pr_warn("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->name, prog->instances.nr); + if (prog->fd < 0) { + pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name); return libbpf_err(-EINVAL); } - err = unlink(path); - if (err != 0) - return libbpf_err(-errno); - - pr_debug("unpinned program '%s'\n", path); - - return 0; -} - -__attribute__((alias("bpf_program_pin_instance"))) -int bpf_object__pin_instance(struct bpf_program *prog, const char *path, int instance); - -__attribute__((alias("bpf_program_unpin_instance"))) -int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); - -int bpf_program__pin(struct bpf_program *prog, const char *path) -{ - int i, err; - - err = make_parent_dir(path); - if (err) - return libbpf_err(err); - err = check_path(path); if (err) return libbpf_err(err); - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr <= 0) { - pr_warn("no instances of prog %s to pin\n", prog->name); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr == 1) { - /* don't create subdirs when pinning single instance */ - return bpf_program_pin_instance(prog, path, 0); - } - - for (i = 0; i < prog->instances.nr; i++) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%d", path, i); - if (len < 0) { - err = -EINVAL; - goto err_unpin; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; - goto err_unpin; - } - - err = bpf_program_pin_instance(prog, buf, i); - if (err) - goto err_unpin; - } - - return 0; - -err_unpin: - for (i = i - 1; i >= 0; i--) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%d", path, i); - if (len < 0) - continue; - else if (len >= PATH_MAX) - continue; - - bpf_program_unpin_instance(prog, buf, i); - } - - rmdir(path); - - return libbpf_err(err); -} - -int bpf_program__unpin(struct bpf_program *prog, const char *path) -{ - int i, err; - - err = check_path(path); - if (err) - return libbpf_err(err); - - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr <= 0) { - pr_warn("no instances of prog %s to pin\n", prog->name); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr == 1) { - /* don't create subdirs when pinning single instance */ - return bpf_program_unpin_instance(prog, path, 0); - } - - for (i = 0; i < prog->instances.nr; i++) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%d", path, i); - if (len < 0) - return libbpf_err(-EINVAL); - else if (len >= PATH_MAX) - return libbpf_err(-ENAMETOOLONG); - - err = bpf_program_unpin_instance(prog, buf, i); - if (err) - return err; - } - - err = rmdir(path); + err = unlink(path); if (err) return libbpf_err(-errno); + pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path); return 0; } int bpf_map__pin(struct bpf_map *map, const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; int err; if (map == NULL) { @@ -7781,6 +9085,11 @@ int bpf_map__pin(struct bpf_map *map, const char *path) return libbpf_err(-EINVAL); } + if (map->fd < 0) { + pr_warn("map '%s': can't pin BPF map without FD (was it created?)\n", map->name); + return libbpf_err(-EINVAL); + } + if (map->pin_path) { if (path && strcmp(path, map->pin_path)) { pr_warn("map '%s' already has pin path '%s' different from '%s'\n", @@ -7827,8 +9136,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path) return 0; out_err: - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to pin map: %s\n", cp); + pr_warn("failed to pin map: %s\n", errstr(err)); return libbpf_err(err); } @@ -7883,10 +9191,8 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path) return 0; } -const char *bpf_map__get_pin_path(const struct bpf_map *map) -{ - return map->pin_path; -} +__alias(bpf_map__pin_path) +const char *bpf_map__get_pin_path(const struct bpf_map *map); const char *bpf_map__pin_path(const struct bpf_map *map) { @@ -7916,7 +9222,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) if (!obj) return libbpf_err(-ENOENT); - if (!obj->loaded) { + if (obj->state < OBJ_PREPARED) { pr_warn("object not yet loaded; load it first\n"); return libbpf_err(-ENOENT); } @@ -7925,21 +9231,13 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) char *pin_path = NULL; char buf[PATH_MAX]; - if (map->skipped) + if (!map->autocreate) continue; if (path) { - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) { - err = -EINVAL; - goto err_unpin_maps; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; + err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); + if (err) goto err_unpin_maps; - } sanitize_pin_path(buf); pin_path = buf; } else if (!map->pin_path) { @@ -7977,14 +9275,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) char buf[PATH_MAX]; if (path) { - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) - return libbpf_err(-EINVAL); - else if (len >= PATH_MAX) - return libbpf_err(-ENAMETOOLONG); + err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); + if (err) + return libbpf_err(err); sanitize_pin_path(buf); pin_path = buf; } else if (!map->pin_path) { @@ -8002,29 +9295,21 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) int bpf_object__pin_programs(struct bpf_object *obj, const char *path) { struct bpf_program *prog; + char buf[PATH_MAX]; int err; if (!obj) return libbpf_err(-ENOENT); - if (!obj->loaded) { + if (obj->state < OBJ_LOADED) { pr_warn("object not yet loaded; load it first\n"); return libbpf_err(-ENOENT); } bpf_object__for_each_program(prog, obj) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->pin_name); - if (len < 0) { - err = -EINVAL; - goto err_unpin_programs; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; + err = pathname_concat(buf, sizeof(buf), path, prog->name); + if (err) goto err_unpin_programs; - } err = bpf_program__pin(prog, buf); if (err) @@ -8035,14 +9320,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) err_unpin_programs: while ((prog = bpf_object__prev_program(obj, prog))) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->pin_name); - if (len < 0) - continue; - else if (len >= PATH_MAX) + if (pathname_concat(buf, sizeof(buf), path, prog->name)) continue; bpf_program__unpin(prog, buf); @@ -8061,14 +9339,10 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) bpf_object__for_each_program(prog, obj) { char buf[PATH_MAX]; - int len; - len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->pin_name); - if (len < 0) - return libbpf_err(-EINVAL); - else if (len >= PATH_MAX) - return libbpf_err(-ENAMETOOLONG); + err = pathname_concat(buf, sizeof(buf), path, prog->name); + if (err) + return libbpf_err(err); err = bpf_program__unpin(prog, buf); if (err) @@ -8095,13 +9369,23 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) return 0; } -static void bpf_map__destroy(struct bpf_map *map) +int bpf_object__unpin(struct bpf_object *obj, const char *path) { - if (map->clear_priv) - map->clear_priv(map, map->priv); - map->priv = NULL; - map->clear_priv = NULL; + int err; + err = bpf_object__unpin_programs(obj, path); + if (err) + return libbpf_err(err); + + err = bpf_object__unpin_maps(obj, path); + if (err) + return libbpf_err(err); + + return 0; +} + +static void bpf_map__destroy(struct bpf_map *map) +{ if (map->inner_map) { bpf_map__destroy(map->inner_map); zfree(&map->inner_map); @@ -8110,10 +9394,9 @@ static void bpf_map__destroy(struct bpf_map *map) zfree(&map->init_slots); map->init_slots_sz = 0; - if (map->mmaped) { + if (map->mmaped && map->mmaped != map->obj->arena_data) munmap(map->mmaped, bpf_map_mmap_sz(map)); - map->mmaped = NULL; - } + map->mmaped = NULL; if (map->st_ops) { zfree(&map->st_ops->data); @@ -8137,13 +9420,21 @@ void bpf_object__close(struct bpf_object *obj) if (IS_ERR_OR_NULL(obj)) return; - if (obj->clear_priv) - obj->clear_priv(obj, obj->priv); + /* + * if user called bpf_object__prepare() without ever getting to + * bpf_object__load(), we need to clean up stuff that is normally + * cleaned up at the end of loading step + */ + bpf_object_post_load_cleanup(obj); + + usdt_manager_free(obj->usdt_man); + obj->usdt_man = NULL; bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); bpf_object_unload(obj); btf__free(obj->btf); + btf__free(obj->btf_vmlinux); btf_ext__free(obj->btf_ext); for (i = 0; i < obj->nr_maps; i++) @@ -8151,6 +9442,12 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->btf_custom_path); zfree(&obj->kconfig); + + for (i = 0; i < obj->nr_extern; i++) { + zfree(&obj->externs[i].name); + zfree(&obj->externs[i].essent_name); + } + zfree(&obj->externs); obj->nr_extern = 0; @@ -8163,31 +9460,21 @@ void bpf_object__close(struct bpf_object *obj) } zfree(&obj->programs); - list_del(&obj->list); - free(obj); -} + zfree(&obj->feat_cache); + zfree(&obj->token_path); + if (obj->token_fd > 0) + close(obj->token_fd); -struct bpf_object * -bpf_object__next(struct bpf_object *prev) -{ - struct bpf_object *next; - bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); - - if (strict) - return NULL; + zfree(&obj->arena_data); - if (!prev) - next = list_first_entry(&bpf_objects_list, - struct bpf_object, - list); - else - next = list_next_entry(prev, list); + zfree(&obj->jumptables_data); + obj->jumptables_data_sz = 0; - /* Empty list is noticed here so don't need checking on entry. */ - if (&next->list == &bpf_objects_list) - return NULL; + for (i = 0; i < obj->jumptable_map_cnt; i++) + close(obj->jumptable_maps[i].fd); + zfree(&obj->jumptable_maps); - return next; + free(obj); } const char *bpf_object__name(const struct bpf_object *obj) @@ -8200,6 +9487,11 @@ unsigned int bpf_object__kversion(const struct bpf_object *obj) return obj ? obj->kern_version : 0; } +int bpf_object__token_fd(const struct bpf_object *obj) +{ + return obj->token_fd ?: -1; +} + struct btf *bpf_object__btf(const struct bpf_object *obj) { return obj ? obj->btf : NULL; @@ -8212,7 +9504,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj) int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) { - if (obj->loaded) + if (obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); obj->kern_version = kern_version; @@ -8220,34 +9512,19 @@ int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) return 0; } -int bpf_object__set_priv(struct bpf_object *obj, void *priv, - bpf_object_clear_priv_t clear_priv) -{ - if (obj->priv && obj->clear_priv) - obj->clear_priv(obj, obj->priv); - - obj->priv = priv; - obj->clear_priv = clear_priv; - return 0; -} - -void *bpf_object__priv(const struct bpf_object *obj) -{ - return obj ? obj->priv : libbpf_err_ptr(-EINVAL); -} - int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) { struct bpf_gen *gen; if (!opts) - return -EFAULT; + return libbpf_err(-EFAULT); if (!OPTS_VALID(opts, gen_loader_opts)) - return -EINVAL; - gen = calloc(sizeof(*gen), 1); + return libbpf_err(-EINVAL); + gen = calloc(1, sizeof(*gen)); if (!gen) - return -ENOMEM; + return libbpf_err(-ENOMEM); gen->opts = opts; + gen->swapped_endian = !is_native_endianness(obj); obj->gen_loader = gen; return 0; } @@ -8279,12 +9556,6 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, } struct bpf_program * -bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) -{ - return bpf_object__next_program(obj, prev); -} - -struct bpf_program * bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) { struct bpf_program *prog = prev; @@ -8297,12 +9568,6 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) } struct bpf_program * -bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj) -{ - return bpf_object__prev_program(obj, next); -} - -struct bpf_program * bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) { struct bpf_program *prog = next; @@ -8314,22 +9579,6 @@ bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) return prog; } -int bpf_program__set_priv(struct bpf_program *prog, void *priv, - bpf_program_clear_priv_t clear_priv) -{ - if (prog->priv && prog->clear_priv) - prog->clear_priv(prog, prog->priv); - - prog->priv = priv; - prog->clear_priv = clear_priv; - return 0; -} - -void *bpf_program__priv(const struct bpf_program *prog) -{ - return prog ? prog->priv : libbpf_err_ptr(-EINVAL); -} - void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) { prog->prog_ifindex = ifindex; @@ -8345,46 +9594,28 @@ const char *bpf_program__section_name(const struct bpf_program *prog) return prog->sec_name; } -const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) -{ - const char *title; - - title = prog->sec_name; - if (needs_copy) { - title = strdup(title); - if (!title) { - pr_warn("failed to strdup program title\n"); - return libbpf_err_ptr(-ENOMEM); - } - } - - return title; -} - bool bpf_program__autoload(const struct bpf_program *prog) { - return prog->load; + return prog->autoload; } int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); - prog->load = autoload; + prog->autoload = autoload; return 0; } -static int bpf_program_nth_fd(const struct bpf_program *prog, int n); - -int bpf_program__fd(const struct bpf_program *prog) +bool bpf_program__autoattach(const struct bpf_program *prog) { - return bpf_program_nth_fd(prog, 0); + return prog->autoattach; } -size_t bpf_program__size(const struct bpf_program *prog) +void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach) { - return prog->insns_cnt * BPF_INSN_SZ; + prog->autoattach = autoattach; } const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) @@ -8397,114 +9628,90 @@ size_t bpf_program__insn_cnt(const struct bpf_program *prog) return prog->insns_cnt; } -int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, - bpf_program_prep_t prep) +int bpf_program__set_insns(struct bpf_program *prog, + struct bpf_insn *new_insns, size_t new_insn_cnt) { - int *instances_fds; - - if (nr_instances <= 0 || !prep) - return libbpf_err(-EINVAL); + struct bpf_insn *insns; - if (prog->instances.nr > 0 || prog->instances.fds) { - pr_warn("Can't set pre-processor after loading\n"); - return libbpf_err(-EINVAL); - } + if (prog->obj->state >= OBJ_LOADED) + return libbpf_err(-EBUSY); - instances_fds = malloc(sizeof(int) * nr_instances); - if (!instances_fds) { - pr_warn("alloc memory failed for fds\n"); + insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); + /* NULL is a valid return from reallocarray if the new count is zero */ + if (!insns && new_insn_cnt) { + pr_warn("prog '%s': failed to realloc prog code\n", prog->name); return libbpf_err(-ENOMEM); } + memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); - /* fill all fd with -1 */ - memset(instances_fds, -1, sizeof(int) * nr_instances); - - prog->instances.nr = nr_instances; - prog->instances.fds = instances_fds; - prog->preprocessor = prep; + prog->insns = insns; + prog->insns_cnt = new_insn_cnt; return 0; } -__attribute__((alias("bpf_program_nth_fd"))) -int bpf_program__nth_fd(const struct bpf_program *prog, int n); - -static int bpf_program_nth_fd(const struct bpf_program *prog, int n) +int bpf_program__fd(const struct bpf_program *prog) { - int fd; - if (!prog) return libbpf_err(-EINVAL); - if (n >= prog->instances.nr || n < 0) { - pr_warn("Can't get the %dth fd from program %s: only %d instances\n", - n, prog->name, prog->instances.nr); - return libbpf_err(-EINVAL); - } - - fd = prog->instances.fds[n]; - if (fd < 0) { - pr_warn("%dth instance of program '%s' is invalid\n", - n, prog->name); + if (prog->fd < 0) return libbpf_err(-ENOENT); - } - return fd; + return prog->fd; } -enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog) +__alias(bpf_program__type) +enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); + +enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) { return prog->type; } -void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) +static size_t custom_sec_def_cnt; +static struct bpf_sec_def *custom_sec_defs; +static struct bpf_sec_def custom_fallback_def; +static bool has_custom_fallback_def; +static int last_custom_sec_def_handler_id; + +int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { + if (prog->obj->state >= OBJ_LOADED) + return libbpf_err(-EBUSY); + + /* if type is not changed, do nothing */ + if (prog->type == type) + return 0; + prog->type = type; + + /* If a program type was changed, we need to reset associated SEC() + * handler, as it will be invalid now. The only exception is a generic + * fallback handler, which by definition is program type-agnostic and + * is a catch-all custom handler, optionally set by the application, + * so should be able to handle any type of BPF program. + */ + if (prog->sec_def != &custom_fallback_def) + prog->sec_def = NULL; + return 0; } -static bool bpf_program__is_type(const struct bpf_program *prog, - enum bpf_prog_type type) -{ - return prog ? (prog->type == type) : false; -} - -#define BPF_PROG_TYPE_FNS(NAME, TYPE) \ -int bpf_program__set_##NAME(struct bpf_program *prog) \ -{ \ - if (!prog) \ - return libbpf_err(-EINVAL); \ - bpf_program__set_type(prog, TYPE); \ - return 0; \ -} \ - \ -bool bpf_program__is_##NAME(const struct bpf_program *prog) \ -{ \ - return bpf_program__is_type(prog, TYPE); \ -} \ - -BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER); -BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM); -BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE); -BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS); -BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT); -BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); -BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); -BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); -BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); -BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); -BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS); -BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT); -BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP); - -enum bpf_attach_type -bpf_program__get_expected_attach_type(const struct bpf_program *prog) +__alias(bpf_program__expected_attach_type) +enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); + +enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog) { return prog->expected_attach_type; } -void bpf_program__set_expected_attach_type(struct bpf_program *prog, +int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type) { + if (prog->obj->state >= OBJ_LOADED) + return libbpf_err(-EBUSY); + prog->expected_attach_type = type; + return 0; } __u32 bpf_program__flags(const struct bpf_program *prog) @@ -8514,7 +9721,7 @@ __u32 bpf_program__flags(const struct bpf_program *prog) int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->prog_flags = flags; @@ -8528,7 +9735,7 @@ __u32 bpf_program__log_level(const struct bpf_program *prog) int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->log_level = log_level; @@ -8544,160 +9751,313 @@ const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_siz int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) { if (log_size && !log_buf) - return -EINVAL; + return libbpf_err(-EINVAL); if (prog->log_size > UINT_MAX) - return -EINVAL; - if (prog->obj->loaded) - return -EBUSY; + return libbpf_err(-EINVAL); + if (prog->obj->state >= OBJ_LOADED) + return libbpf_err(-EBUSY); prog->log_buf = log_buf; prog->log_size = log_size; return 0; } +struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog) +{ + if (prog->func_info_rec_size != sizeof(struct bpf_func_info)) + return libbpf_err_ptr(-EOPNOTSUPP); + return prog->func_info; +} + +__u32 bpf_program__func_info_cnt(const struct bpf_program *prog) +{ + return prog->func_info_cnt; +} + +struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog) +{ + if (prog->line_info_rec_size != sizeof(struct bpf_line_info)) + return libbpf_err_ptr(-EOPNOTSUPP); + return prog->line_info; +} + +__u32 bpf_program__line_info_cnt(const struct bpf_program *prog) +{ + return prog->line_info_cnt; +} + #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ - .sec = sec_pfx, \ + .sec = (char *)sec_pfx, \ .prog_type = BPF_PROG_TYPE_##ptype, \ .expected_attach_type = atype, \ .cookie = (long)(flags), \ - .preload_fn = libbpf_preload_prog, \ + .prog_prepare_load_fn = libbpf_prepare_prog_load, \ __VA_ARGS__ \ } -static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie); +static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_kprobe_session(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); static const struct bpf_sec_def section_defs[] = { - SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), - SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), - SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), - SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tracepoint.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fexit/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry.s/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fmod_ret.s/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fexit.s/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("freplace/", EXT, 0, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), - SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), - SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), + SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE), + SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE), + SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), + SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), + SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), + SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), + SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kprobe.session+", KPROBE, BPF_TRACE_KPROBE_SESSION, SEC_NONE, attach_kprobe_session), + SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), + SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), + SEC_DEF("uprobe.session+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_NONE, attach_uprobe_multi), + SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), + SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), + SEC_DEF("uprobe.session.s+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_SLEEPABLE, attach_uprobe_multi), + SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), + SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), + SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), + SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt), + SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */ + SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */ + SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), + SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), + SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ + SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ + SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */ + SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE), + SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE), + SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), + SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), + SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF), + SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), - SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), - SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), - SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), + SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), + SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), + SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), + SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), + SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT), + SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), + SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), + SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), + SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), + SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), + SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb/verdict", SK_SKB, BPF_SK_SKB_VERDICT, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), + SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), + SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), + SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), + SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE), + SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE), + SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), + SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), + SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT), SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), - SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE), + SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), + SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), }; -#define MAX_TYPE_NAME_SIZE 32 +int libbpf_register_prog_handler(const char *sec, + enum bpf_prog_type prog_type, + enum bpf_attach_type exp_attach_type, + const struct libbpf_prog_handler_opts *opts) +{ + struct bpf_sec_def *sec_def; -static const struct bpf_sec_def *find_sec_def(const char *sec_name) + if (!OPTS_VALID(opts, libbpf_prog_handler_opts)) + return libbpf_err(-EINVAL); + + if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */ + return libbpf_err(-E2BIG); + + if (sec) { + sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1, + sizeof(*sec_def)); + if (!sec_def) + return libbpf_err(-ENOMEM); + + custom_sec_defs = sec_def; + sec_def = &custom_sec_defs[custom_sec_def_cnt]; + } else { + if (has_custom_fallback_def) + return libbpf_err(-EBUSY); + + sec_def = &custom_fallback_def; + } + + sec_def->sec = sec ? strdup(sec) : NULL; + if (sec && !sec_def->sec) + return libbpf_err(-ENOMEM); + + sec_def->prog_type = prog_type; + sec_def->expected_attach_type = exp_attach_type; + sec_def->cookie = OPTS_GET(opts, cookie, 0); + + sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL); + sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL); + sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL); + + sec_def->handler_id = ++last_custom_sec_def_handler_id; + + if (sec) + custom_sec_def_cnt++; + else + has_custom_fallback_def = true; + + return sec_def->handler_id; +} + +int libbpf_unregister_prog_handler(int handler_id) { - const struct bpf_sec_def *sec_def; - enum sec_def_flags sec_flags; - int i, n = ARRAY_SIZE(section_defs), len; - bool strict = libbpf_mode & LIBBPF_STRICT_SEC_NAME; + struct bpf_sec_def *sec_defs; + int i; - for (i = 0; i < n; i++) { - sec_def = §ion_defs[i]; - sec_flags = sec_def->cookie; - len = strlen(sec_def->sec); + if (handler_id <= 0) + return libbpf_err(-EINVAL); - /* "type/" always has to have proper SEC("type/extras") form */ - if (sec_def->sec[len - 1] == '/') { - if (str_has_pfx(sec_name, sec_def->sec)) - return sec_def; - continue; - } + if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) { + memset(&custom_fallback_def, 0, sizeof(custom_fallback_def)); + has_custom_fallback_def = false; + return 0; + } - /* "type+" means it can be either exact SEC("type") or - * well-formed SEC("type/extras") with proper '/' separator - */ - if (sec_def->sec[len - 1] == '+') { - len--; - /* not even a prefix */ - if (strncmp(sec_name, sec_def->sec, len) != 0) - continue; - /* exact match or has '/' separator */ - if (sec_name[len] == '\0' || sec_name[len] == '/') - return sec_def; - continue; - } + for (i = 0; i < custom_sec_def_cnt; i++) { + if (custom_sec_defs[i].handler_id == handler_id) + break; + } - /* SEC_SLOPPY_PFX definitions are allowed to be just prefix - * matches, unless strict section name mode - * (LIBBPF_STRICT_SEC_NAME) is enabled, in which case the - * match has to be exact. - */ - if ((sec_flags & SEC_SLOPPY_PFX) && !strict) { - if (str_has_pfx(sec_name, sec_def->sec)) - return sec_def; - continue; - } + if (i == custom_sec_def_cnt) + return libbpf_err(-ENOENT); - /* Definitions not marked SEC_SLOPPY_PFX (e.g., - * SEC("syscall")) are exact matches in both modes. - */ - if (strcmp(sec_name, sec_def->sec) == 0) + free(custom_sec_defs[i].sec); + for (i = i + 1; i < custom_sec_def_cnt; i++) + custom_sec_defs[i - 1] = custom_sec_defs[i]; + custom_sec_def_cnt--; + + /* try to shrink the array, but it's ok if we couldn't */ + sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs)); + /* if new count is zero, reallocarray can return a valid NULL result; + * in this case the previous pointer will be freed, so we *have to* + * reassign old pointer to the new value (even if it's NULL) + */ + if (sec_defs || custom_sec_def_cnt == 0) + custom_sec_defs = sec_defs; + + return 0; +} + +static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name) +{ + size_t len = strlen(sec_def->sec); + + /* "type/" always has to have proper SEC("type/extras") form */ + if (sec_def->sec[len - 1] == '/') { + if (str_has_pfx(sec_name, sec_def->sec)) + return true; + return false; + } + + /* "type+" means it can be either exact SEC("type") or + * well-formed SEC("type/extras") with proper '/' separator + */ + if (sec_def->sec[len - 1] == '+') { + len--; + /* not even a prefix */ + if (strncmp(sec_name, sec_def->sec, len) != 0) + return false; + /* exact match or has '/' separator */ + if (sec_name[len] == '\0' || sec_name[len] == '/') + return true; + return false; + } + + return strcmp(sec_name, sec_def->sec) == 0; +} + +static const struct bpf_sec_def *find_sec_def(const char *sec_name) +{ + const struct bpf_sec_def *sec_def; + int i, n; + + n = custom_sec_def_cnt; + for (i = 0; i < n; i++) { + sec_def = &custom_sec_defs[i]; + if (sec_def_matches(sec_def, sec_name)) + return sec_def; + } + + n = ARRAY_SIZE(section_defs); + for (i = 0; i < n; i++) { + sec_def = §ion_defs[i]; + if (sec_def_matches(sec_def, sec_name)) return sec_def; } + + if (has_custom_fallback_def) + return &custom_fallback_def; + return NULL; } +#define MAX_TYPE_NAME_SIZE 32 + static char *libbpf_get_type_names(bool attach_type) { int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; @@ -8713,7 +10073,7 @@ static char *libbpf_get_type_names(bool attach_type) const struct bpf_sec_def *sec_def = §ion_defs[i]; if (attach_type) { - if (sec_def->preload_fn != libbpf_preload_prog) + if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) continue; if (!(sec_def->cookie & SEC_ATTACHABLE)) @@ -8757,7 +10117,40 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, return libbpf_err(-ESRCH); } +const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(attach_type_name)) + return NULL; + + return attach_type_name[t]; +} + +const char *libbpf_bpf_link_type_str(enum bpf_link_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(link_type_name)) + return NULL; + + return link_type_name[t]; +} + +const char *libbpf_bpf_map_type_str(enum bpf_map_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(map_type_name)) + return NULL; + + return map_type_name[t]; +} + +const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(prog_type_name)) + return NULL; + + return prog_type_name[t]; +} + static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, + int sec_idx, size_t offset) { struct bpf_map *map; @@ -8767,7 +10160,8 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, map = &obj->maps[i]; if (!bpf_map__is_struct_ops(map)) continue; - if (map->sec_offset <= offset && + if (map->sec_idx == sec_idx && + map->sec_offset <= offset && offset - map->sec_offset < map->def.value_size) return map; } @@ -8775,10 +10169,13 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, return NULL; } -/* Collect the reloc from ELF and populate the st_ops->progs[] */ +/* Collect the reloc from ELF, populate the st_ops->progs[], and update + * st_ops->data for shadow type. + */ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) { + const struct btf_type *type; const struct btf_member *member; struct bpf_struct_ops *st_ops; struct bpf_program *prog; @@ -8809,7 +10206,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } name = elf_sym_str(obj, sym->st_name) ?: "<?>"; - map = find_struct_ops_map_by_offset(obj, rel->r_offset); + map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset); if (!map) { pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", (size_t)rel->r_offset); @@ -8838,13 +10235,14 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } insn_idx = sym->st_value / BPF_INSN_SZ; - member = find_member_by_offset(st_ops->type, moff * 8); + type = btf__type_by_id(btf, st_ops->type_id); + member = find_member_by_offset(type, moff * 8); if (!member) { pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", map->name, moff); return -EINVAL; } - member_idx = member - btf_members(st_ops->type); + member_idx = member - btf_members(type); name = btf__name_by_offset(btf, member->name_off); if (!resolve_func_ptr(btf, member->type, NULL)) { @@ -8867,27 +10265,15 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, return -EINVAL; } - /* if we haven't yet processed this BPF program, record proper - * attach_btf_id and member_idx - */ - if (!prog->attach_btf_id) { - prog->attach_btf_id = st_ops->type_id; - prog->expected_attach_type = member_idx; - } + st_ops->progs[member_idx] = prog; - /* struct_ops BPF prog can be re-used between multiple - * .struct_ops as long as it's the same struct_ops struct - * definition and the same function pointer field + /* st_ops->data will be exposed to users, being returned by + * bpf_map__initial_value() as a pointer to the shadow + * type. All function pointers in the original struct type + * should be converted to a pointer to struct bpf_program + * in the shadow type. */ - if (prog->attach_btf_id != st_ops->type_id || - prog->expected_attach_type != member_idx) { - pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", - map->name, prog->name, prog->sec_name, prog->type, - prog->attach_btf_id, prog->expected_attach_type, name); - return -EINVAL; - } - - st_ops->progs[member_idx] = prog; + *((struct bpf_program **)(st_ops->data + moff)) = prog; } return 0; @@ -8907,6 +10293,7 @@ void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, *kind = BTF_KIND_TYPEDEF; break; case BPF_LSM_MAC: + case BPF_LSM_CGROUP: *prefix = BTF_LSM_PREFIX; *kind = BTF_KIND_FUNC; break; @@ -8968,17 +10355,18 @@ int libbpf_find_vmlinux_btf_id(const char *name, return libbpf_err(err); } -static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) +static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd) { - struct bpf_prog_info info = {}; + struct bpf_prog_info info; __u32 info_len = sizeof(info); struct btf *btf; int err; - err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len); + memset(&info, 0, info_len); + err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len); if (err) { - pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n", - attach_prog_fd, err); + pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %s\n", + attach_prog_fd, errstr(err)); return err; } @@ -8987,10 +10375,10 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) pr_warn("The target program doesn't have BTF\n"); goto out; } - btf = btf__load_from_kernel_by_id(info.btf_id); + btf = btf_load_from_kernel(info.btf_id, NULL, token_fd); err = libbpf_get_error(btf); if (err) { - pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); + pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err)); goto out; } err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); @@ -9007,16 +10395,28 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, enum bpf_attach_type attach_type, int *btf_obj_fd, int *btf_type_id) { - int ret, i; + int ret, i, mod_len = 0; + const char *fn_name, *mod_name = NULL; - ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type); - if (ret > 0) { - *btf_obj_fd = 0; /* vmlinux BTF */ - *btf_type_id = ret; - return 0; + fn_name = strchr(attach_name, ':'); + if (fn_name) { + mod_name = attach_name; + mod_len = fn_name - mod_name; + fn_name++; + } + + if (!mod_name || strncmp(mod_name, "vmlinux", mod_len) == 0) { + ret = find_attach_btf_id(obj->btf_vmlinux, + mod_name ? fn_name : attach_name, + attach_type); + if (ret > 0) { + *btf_obj_fd = 0; /* vmlinux BTF */ + *btf_type_id = ret; + return 0; + } + if (ret != -ENOENT) + return ret; } - if (ret != -ENOENT) - return ret; ret = load_module_btfs(obj); if (ret) @@ -9025,7 +10425,12 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, for (i = 0; i < obj->btf_module_cnt; i++) { const struct module_btf *mod = &obj->btf_modules[i]; - ret = find_attach_btf_id(mod->btf, attach_name, attach_type); + if (mod_name && strncmp(mod->name, mod_name, mod_len) != 0) + continue; + + ret = find_attach_btf_id(mod->btf, + mod_name ? fn_name : attach_name, + attach_type); if (ret > 0) { *btf_obj_fd = mod->fd; *btf_type_id = ret; @@ -9048,11 +10453,15 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac int err = 0; /* BPF program's BTF ID */ - if (attach_prog_fd) { - err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); + if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) { + if (!attach_prog_fd) { + pr_warn("prog '%s': attach program FD is not set\n", prog->name); + return -EINVAL; + } + err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd); if (err < 0) { - pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n", - attach_prog_fd, attach_name, err); + pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n", + prog->name, attach_prog_fd, attach_name, errstr(err)); return err; } *btf_obj_fd = 0; @@ -9066,10 +10475,13 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac *btf_obj_fd = 0; *btf_type_id = 1; } else { - err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); + err = find_kernel_btf_id(prog->obj, attach_name, + attach_type, btf_obj_fd, + btf_type_id); } if (err) { - pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err); + pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %s\n", + prog->name, attach_name, errstr(err)); return err; } return 0; @@ -9096,7 +10508,7 @@ int libbpf_attach_type_by_name(const char *name, return libbpf_err(-EINVAL); } - if (sec_def->preload_fn != libbpf_preload_prog) + if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) return libbpf_err(-EINVAL); if (!(sec_def->cookie & SEC_ATTACHABLE)) return libbpf_err(-EINVAL); @@ -9107,12 +10519,11 @@ int libbpf_attach_type_by_name(const char *name, int bpf_map__fd(const struct bpf_map *map) { - return map ? map->fd : libbpf_err(-EINVAL); -} - -const struct bpf_map_def *bpf_map__def(const struct bpf_map *map) -{ - return map ? &map->def : libbpf_err_ptr(-EINVAL); + if (!map) + return libbpf_err(-EINVAL); + if (!map_is_created(map)) + return -1; + return map->fd; } static bool map_uses_real_name(const struct bpf_map *map) @@ -9148,7 +10559,7 @@ enum bpf_map_type bpf_map__type(const struct bpf_map *map) int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->def.type = type; return 0; @@ -9161,7 +10572,7 @@ __u32 bpf_map__map_flags(const struct bpf_map *map) int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->def.map_flags = flags; return 0; @@ -9174,7 +10585,7 @@ __u64 bpf_map__map_extra(const struct bpf_map *map) int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->map_extra = map_extra; return 0; @@ -9187,7 +10598,7 @@ __u32 bpf_map__numa_node(const struct bpf_map *map) int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->numa_node = numa_node; return 0; @@ -9200,7 +10611,7 @@ __u32 bpf_map__key_size(const struct bpf_map *map) int bpf_map__set_key_size(struct bpf_map *map, __u32 size) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->def.key_size = size; return 0; @@ -9211,10 +10622,106 @@ __u32 bpf_map__value_size(const struct bpf_map *map) return map->def.value_size; } +static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) +{ + struct btf *btf; + struct btf_type *datasec_type, *var_type; + struct btf_var_secinfo *var; + const struct btf_type *array_type; + const struct btf_array *array; + int vlen, element_sz, new_array_id; + __u32 nr_elements; + + /* check btf existence */ + btf = bpf_object__btf(map->obj); + if (!btf) + return -ENOENT; + + /* verify map is datasec */ + datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); + if (!btf_is_datasec(datasec_type)) { + pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", + bpf_map__name(map)); + return -EINVAL; + } + + /* verify datasec has at least one var */ + vlen = btf_vlen(datasec_type); + if (vlen == 0) { + pr_warn("map '%s': cannot be resized, map value datasec is empty\n", + bpf_map__name(map)); + return -EINVAL; + } + + /* verify last var in the datasec is an array */ + var = &btf_var_secinfos(datasec_type)[vlen - 1]; + var_type = btf_type_by_id(btf, var->type); + array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); + if (!btf_is_array(array_type)) { + pr_warn("map '%s': cannot be resized, last var must be an array\n", + bpf_map__name(map)); + return -EINVAL; + } + + /* verify request size aligns with array */ + array = btf_array(array_type); + element_sz = btf__resolve_size(btf, array->type); + if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { + pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", + bpf_map__name(map), element_sz, size); + return -EINVAL; + } + + /* create a new array based on the existing array, but with new length */ + nr_elements = (size - var->offset) / element_sz; + new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); + if (new_array_id < 0) + return new_array_id; + + /* adding a new btf type invalidates existing pointers to btf objects, + * so refresh pointers before proceeding + */ + datasec_type = btf_type_by_id(btf, map->btf_value_type_id); + var = &btf_var_secinfos(datasec_type)[vlen - 1]; + var_type = btf_type_by_id(btf, var->type); + + /* finally update btf info */ + datasec_type->size = size; + var->size = size - var->offset; + var_type->type = new_array_id; + + return 0; +} + int bpf_map__set_value_size(struct bpf_map *map, __u32 size) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); + + if (map->mmaped) { + size_t mmap_old_sz, mmap_new_sz; + int err; + + if (map->def.type != BPF_MAP_TYPE_ARRAY) + return libbpf_err(-EOPNOTSUPP); + + mmap_old_sz = bpf_map_mmap_sz(map); + mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); + err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); + if (err) { + pr_warn("map '%s': failed to resize memory-mapped region: %s\n", + bpf_map__name(map), errstr(err)); + return libbpf_err(err); + } + err = map_btf_datasec_resize(map, size); + if (err && err != -ENOENT) { + pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %s\n", + bpf_map__name(map), errstr(err)); + map->btf_value_type_id = 0; + map->btf_key_type_id = 0; + } + } + map->def.value_size = size; return 0; } @@ -9229,49 +10736,45 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map) return map ? map->btf_value_type_id : 0; } -int bpf_map__set_priv(struct bpf_map *map, void *priv, - bpf_map_clear_priv_t clear_priv) +int bpf_map__set_initial_value(struct bpf_map *map, + const void *data, size_t size) { - if (!map) - return libbpf_err(-EINVAL); + size_t actual_sz; - if (map->priv) { - if (map->clear_priv) - map->clear_priv(map, map->priv); - } - - map->priv = priv; - map->clear_priv = clear_priv; - return 0; -} + if (map_is_created(map)) + return libbpf_err(-EBUSY); -void *bpf_map__priv(const struct bpf_map *map) -{ - return map ? map->priv : libbpf_err_ptr(-EINVAL); -} + if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG) + return libbpf_err(-EINVAL); -int bpf_map__set_initial_value(struct bpf_map *map, - const void *data, size_t size) -{ - if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG || - size != map->def.value_size || map->fd >= 0) + if (map->def.type == BPF_MAP_TYPE_ARENA) + actual_sz = map->obj->arena_data_sz; + else + actual_sz = map->def.value_size; + if (size != actual_sz) return libbpf_err(-EINVAL); memcpy(map->mmaped, data, size); return 0; } -const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) +void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize) { + if (bpf_map__is_struct_ops(map)) { + if (psize) + *psize = map->def.value_size; + return map->st_ops->data; + } + if (!map->mmaped) return NULL; - *psize = map->def.value_size; - return map->mmaped; -} -bool bpf_map__is_offload_neutral(const struct bpf_map *map) -{ - return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; + if (map->def.type == BPF_MAP_TYPE_ARENA) + *psize = map->obj->arena_data_sz; + else + *psize = map->def.value_size; + + return map->mmaped; } bool bpf_map__is_internal(const struct bpf_map *map) @@ -9286,7 +10789,7 @@ __u32 bpf_map__ifindex(const struct bpf_map *map) int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->map_ifindex = ifindex; return 0; @@ -9310,6 +10813,27 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) return 0; } +int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog) +{ + if (map_is_created(map)) { + pr_warn("exclusive programs must be set before map creation\n"); + return libbpf_err(-EINVAL); + } + + if (map->obj != prog->obj) { + pr_warn("excl_prog and map must be from the same bpf object\n"); + return libbpf_err(-EINVAL); + } + + map->excl_prog = prog; + return 0; +} + +struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map) +{ + return map->excl_prog; +} + static struct bpf_map * __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) { @@ -9335,30 +10859,18 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) } struct bpf_map * -bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) -{ - return bpf_object__next_map(obj, prev); -} - -struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { - if (prev == NULL) + if (prev == NULL && obj != NULL) return obj->maps; return __bpf_map__iter(prev, obj, 1); } struct bpf_map * -bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj) -{ - return bpf_object__prev_map(obj, next); -} - -struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) { - if (next == NULL) { + if (next == NULL && obj != NULL) { if (!obj->nr_maps) return NULL; return obj->maps + obj->nr_maps - 1; @@ -9400,132 +10912,144 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); } -struct bpf_map * -bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) +static int validate_map_op(const struct bpf_map *map, size_t key_sz, + size_t value_sz, bool check_value_sz) { - return libbpf_err_ptr(-ENOTSUP); + if (!map_is_created(map)) /* map is not yet created */ + return -ENOENT; + + if (map->def.key_size != key_sz) { + pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", + map->name, key_sz, map->def.key_size); + return -EINVAL; + } + + if (map->fd < 0) { + pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); + return -EINVAL; + } + + if (!check_value_sz) + return 0; + + switch (map->def.type) { + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { + int num_cpu = libbpf_num_possible_cpus(); + size_t elem_sz = roundup(map->def.value_size, 8); + + if (value_sz != num_cpu * elem_sz) { + pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", + map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); + return -EINVAL; + } + break; + } + default: + if (map->def.value_size != value_sz) { + pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", + map->name, value_sz, map->def.value_size); + return -EINVAL; + } + break; + } + return 0; } -long libbpf_get_error(const void *ptr) +int bpf_map__lookup_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) { - if (!IS_ERR_OR_NULL(ptr)) - return 0; + int err; - if (IS_ERR(ptr)) - errno = -PTR_ERR(ptr); + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); - /* If ptr == NULL, then errno should be already set by the failing - * API, because libbpf never returns NULL on success and it now always - * sets errno on error. So no extra errno handling for ptr == NULL - * case. - */ - return -errno; + return bpf_map_lookup_elem_flags(map->fd, key, value, flags); } -__attribute__((alias("bpf_prog_load_xattr2"))) -int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd); - -static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd) +int bpf_map__update_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + const void *value, size_t value_sz, __u64 flags) { - struct bpf_object_open_attr open_attr = {}; - struct bpf_program *prog, *first_prog = NULL; - struct bpf_object *obj; - struct bpf_map *map; int err; - if (!attr) - return libbpf_err(-EINVAL); - if (!attr->file) - return libbpf_err(-EINVAL); + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); - open_attr.file = attr->file; - open_attr.prog_type = attr->prog_type; + return bpf_map_update_elem(map->fd, key, value, flags); +} + +int bpf_map__delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, __u64 flags) +{ + int err; - obj = bpf_object__open_xattr(&open_attr); - err = libbpf_get_error(obj); + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); if (err) - return libbpf_err(-ENOENT); + return libbpf_err(err); - bpf_object__for_each_program(prog, obj) { - enum bpf_attach_type attach_type = attr->expected_attach_type; - /* - * to preserve backwards compatibility, bpf_prog_load treats - * attr->prog_type, if specified, as an override to whatever - * bpf_object__open guessed - */ - if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { - bpf_program__set_type(prog, attr->prog_type); - bpf_program__set_expected_attach_type(prog, - attach_type); - } - if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) { - /* - * we haven't guessed from section name and user - * didn't provide a fallback type, too bad... - */ - bpf_object__close(obj); - return libbpf_err(-EINVAL); - } + return bpf_map_delete_elem_flags(map->fd, key, flags); +} - prog->prog_ifindex = attr->ifindex; - prog->log_level = attr->log_level; - prog->prog_flags |= attr->prog_flags; - if (!first_prog) - first_prog = prog; - } +int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) +{ + int err; - bpf_object__for_each_map(map, obj) { - if (!bpf_map__is_offload_neutral(map)) - map->map_ifindex = attr->ifindex; - } + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); - if (!first_prog) { - pr_warn("object file doesn't contain bpf program\n"); - bpf_object__close(obj); - return libbpf_err(-ENOENT); - } + return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); +} - err = bpf_object__load(obj); - if (err) { - bpf_object__close(obj); +int bpf_map__get_next_key(const struct bpf_map *map, + const void *cur_key, void *next_key, size_t key_sz) +{ + int err; + + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + if (err) return libbpf_err(err); - } - *pobj = obj; - *prog_fd = bpf_program__fd(first_prog); - return 0; + return bpf_map_get_next_key(map->fd, cur_key, next_key); } -COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1) -int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd) +long libbpf_get_error(const void *ptr) { - struct bpf_prog_load_attr attr; + if (!IS_ERR_OR_NULL(ptr)) + return 0; - memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); - attr.file = file; - attr.prog_type = type; - attr.expected_attach_type = 0; + if (IS_ERR(ptr)) + errno = -PTR_ERR(ptr); - return bpf_prog_load_xattr2(&attr, pobj, prog_fd); + /* If ptr == NULL, then errno should be already set by the failing + * API, because libbpf never returns NULL on success and it now always + * sets errno on error. So no extra errno handling for ptr == NULL + * case. + */ + return -errno; } -struct bpf_link { - int (*detach)(struct bpf_link *link); - void (*dealloc)(struct bpf_link *link); - char *pin_path; /* NULL, if not pinned */ - int fd; /* hook FD, -1 if not applicable */ - bool disconnected; -}; - /* Replace link's underlying BPF program with the new one */ int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) { int ret; + int prog_fd = bpf_program__fd(prog); + + if (prog_fd < 0) { + pr_warn("prog '%s': can't use BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err(-EINVAL); + } - ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL); + ret = bpf_link_update(bpf_link__fd(link), prog_fd, NULL); return libbpf_err_errno(ret); } @@ -9704,9 +11228,9 @@ static void bpf_link_perf_dealloc(struct bpf_link *link) struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts) { - char errmsg[STRERR_BUFSIZE]; struct bpf_link_perf *link; int prog_fd, link_fd = -1, err; + bool force_ioctl_attach; if (!OPTS_VALID(opts, bpf_perf_event_opts)) return libbpf_err_ptr(-EINVAL); @@ -9718,7 +11242,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p } prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { - pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", prog->name); return libbpf_err_ptr(-EINVAL); } @@ -9730,16 +11254,16 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p link->link.dealloc = &bpf_link_perf_dealloc; link->perf_event_fd = pfd; - if (kernel_supports(prog->obj, FEAT_PERF_LINK)) { + force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false); + if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts, .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0)); link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); if (link_fd < 0) { err = -errno; - pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n", - prog->name, pfd, - err, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %s\n", + prog->name, pfd, errstr(err)); goto err_out; } link->link.fd = link_fd; @@ -9753,7 +11277,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { err = -errno; pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", - prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, pfd, errstr(err)); if (err == -EPROTO) pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", prog->name, pfd); @@ -9761,11 +11285,14 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p } link->link.fd = pfd; } - if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { - err = -errno; - pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", - prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - goto err_out; + + if (!OPTS_GET(opts, dont_enable, false)) { + if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { + err = -errno; + pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", + prog->name, pfd, errstr(err)); + goto err_out; + } } return &link->link; @@ -9788,22 +11315,19 @@ struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, */ static int parse_uint_from_file(const char *file, const char *fmt) { - char buf[STRERR_BUFSIZE]; int err, ret; FILE *f; - f = fopen(file, "r"); + f = fopen(file, "re"); if (!f) { err = -errno; - pr_debug("failed to open '%s': %s\n", file, - libbpf_strerror_r(err, buf, sizeof(buf))); + pr_debug("failed to open '%s': %s\n", file, errstr(err)); return err; } err = fscanf(f, fmt, &ret); if (err != 1) { err = err == EOF ? -EIO : -errno; - pr_debug("failed to parse '%s': %s\n", file, - libbpf_strerror_r(err, buf, sizeof(buf))); + pr_debug("failed to parse '%s': %s\n", file, errstr(err)); fclose(f); return err; } @@ -9845,19 +11369,21 @@ static int determine_uprobe_retprobe_bit(void) static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, uint64_t offset, int pid, size_t ref_ctr_off) { - struct perf_event_attr attr = {}; - char errmsg[STRERR_BUFSIZE]; - int type, pfd, err; + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; + int type, pfd; - if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) + if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) return -EINVAL; + memset(&attr, 0, attr_sz); + type = uprobe ? determine_uprobe_perf_type() : determine_kprobe_perf_type(); if (type < 0) { pr_warn("failed to determine %s perf type: %s\n", uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(type, errmsg, sizeof(errmsg))); + errstr(type)); return type; } if (retprobe) { @@ -9867,12 +11393,12 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, if (bit < 0) { pr_warn("failed to determine %s retprobe bit: %s\n", uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); + errstr(bit)); return bit; } attr.config |= 1 << bit; } - attr.size = sizeof(attr); + attr.size = attr_sz; attr.type = type; attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT; attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ @@ -9883,51 +11409,93 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, pid < 0 ? -1 : pid /* pid */, pid == -1 ? 0 : -1 /* cpu */, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); - if (pfd < 0) { - err = -errno; - pr_warn("%s perf_event_open() failed: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return err; - } - return pfd; + return pfd >= 0 ? pfd : -errno; } static int append_to_file(const char *file, const char *fmt, ...) { int fd, n, err = 0; va_list ap; + char buf[1024]; + + va_start(ap, fmt); + n = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (n < 0 || n >= sizeof(buf)) + return -EINVAL; fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); if (fd < 0) return -errno; - va_start(ap, fmt); - n = vdprintf(fd, fmt, ap); - va_end(ap); - - if (n < 0) + if (write(fd, buf, n) < 0) err = -errno; close(fd); return err; } -static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *kfunc_name, size_t offset) +#define DEBUGFS "/sys/kernel/debug/tracing" +#define TRACEFS "/sys/kernel/tracing" + +static bool use_debugfs(void) +{ + static int has_debugfs = -1; + + if (has_debugfs < 0) + has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0; + + return has_debugfs == 1; +} + +static const char *tracefs_path(void) +{ + return use_debugfs() ? DEBUGFS : TRACEFS; +} + +static const char *tracefs_kprobe_events(void) +{ + return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events"; +} + +static const char *tracefs_uprobe_events(void) +{ + return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events"; +} + +static const char *tracefs_available_filter_functions(void) +{ + return use_debugfs() ? DEBUGFS"/available_filter_functions" + : TRACEFS"/available_filter_functions"; +} + +static const char *tracefs_available_filter_functions_addrs(void) +{ + return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs" + : TRACEFS"/available_filter_functions_addrs"; +} + +static void gen_probe_legacy_event_name(char *buf, size_t buf_sz, + const char *name, size_t offset) { static int index = 0; + int i; - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, - __sync_fetch_and_add(&index, 1)); + snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(), + __sync_fetch_and_add(&index, 1), name, offset); + + /* sanitize name in the probe name */ + for (i = 0; buf[i]; i++) { + if (!isalnum(buf[i])) + buf[i] = '_'; + } } static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, const char *kfunc_name, size_t offset) { - const char *file = "/sys/kernel/debug/tracing/kprobe_events"; - - return append_to_file(file, "%c:%s/%s %s+0x%zx", + return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx", retprobe ? 'r' : 'p', retprobe ? "kretprobes" : "kprobes", probe_name, kfunc_name, offset); @@ -9935,18 +11503,16 @@ static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) { - const char *file = "/sys/kernel/debug/tracing/kprobe_events"; - - return append_to_file(file, "-:%s/%s", retprobe ? "kretprobes" : "kprobes", probe_name); + return append_to_file(tracefs_kprobe_events(), "-:%s/%s", + retprobe ? "kretprobes" : "kprobes", probe_name); } static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) { char file[256]; - snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - retprobe ? "kretprobes" : "kprobes", probe_name); + snprintf(file, sizeof(file), "%s/events/%s/%s/id", + tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name); return parse_uint_from_file(file, "%d\n"); } @@ -9954,25 +11520,28 @@ static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retpro static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, const char *kfunc_name, size_t offset, int pid) { - struct perf_event_attr attr = {}; - char errmsg[STRERR_BUFSIZE]; + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; int type, pfd, err; err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); if (err < 0) { pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", kfunc_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return err; } type = determine_kprobe_perf_type_legacy(probe_name, retprobe); if (type < 0) { + err = type; pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", kfunc_name, offset, - libbpf_strerror_r(type, errmsg, sizeof(errmsg))); - return type; + errstr(err)); + goto err_clean_legacy; } - attr.size = sizeof(attr); + + memset(&attr, 0, attr_sz); + attr.size = attr_sz; attr.config = type; attr.type = PERF_TYPE_TRACEPOINT; @@ -9983,10 +11552,71 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, if (pfd < 0) { err = -errno; pr_warn("legacy kprobe perf_event_open() failed: %s\n", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return err; + errstr(err)); + goto err_clean_legacy; } return pfd; + +err_clean_legacy: + /* Clear the newly added legacy kprobe_event */ + remove_kprobe_event_legacy(probe_name, retprobe); + return err; +} + +static const char *arch_specific_syscall_pfx(void) +{ +#if defined(__x86_64__) + return "x64"; +#elif defined(__i386__) + return "ia32"; +#elif defined(__s390x__) + return "s390x"; +#elif defined(__arm__) + return "arm"; +#elif defined(__aarch64__) + return "arm64"; +#elif defined(__mips__) + return "mips"; +#elif defined(__riscv) + return "riscv"; +#elif defined(__powerpc__) + return "powerpc"; +#elif defined(__powerpc64__) + return "powerpc64"; +#else + return NULL; +#endif +} + +int probe_kern_syscall_wrapper(int token_fd) +{ + char syscall_name[64]; + const char *ksys_pfx; + + ksys_pfx = arch_specific_syscall_pfx(); + if (!ksys_pfx) + return 0; + + snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); + + if (determine_kprobe_perf_type() >= 0) { + int pfd; + + pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0); + if (pfd >= 0) + close(pfd); + + return pfd >= 0 ? 1 : 0; + } else { /* legacy mode */ + char probe_name[MAX_EVENT_NAME_LEN]; + + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); + if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) + return 0; + + (void)remove_kprobe_event_legacy(probe_name, false); + return 1; + } } struct bpf_link * @@ -9995,7 +11625,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, const struct bpf_kprobe_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); - char errmsg[STRERR_BUFSIZE]; + enum probe_attach_mode attach_mode; char *legacy_probe = NULL; struct bpf_link *link; size_t offset; @@ -10005,20 +11635,41 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, if (!OPTS_VALID(opts, bpf_kprobe_opts)) return libbpf_err_ptr(-EINVAL); + attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); retprobe = OPTS_GET(opts, retprobe, false); offset = OPTS_GET(opts, offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); legacy = determine_kprobe_perf_type() < 0; + switch (attach_mode) { + case PROBE_ATTACH_MODE_LEGACY: + legacy = true; + pe_opts.force_ioctl_attach = true; + break; + case PROBE_ATTACH_MODE_PERF: + if (legacy) + return libbpf_err_ptr(-ENOTSUP); + pe_opts.force_ioctl_attach = true; + break; + case PROBE_ATTACH_MODE_LINK: + if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) + return libbpf_err_ptr(-ENOTSUP); + break; + case PROBE_ATTACH_MODE_DEFAULT: + break; + default: + return libbpf_err_ptr(-EINVAL); + } + if (!legacy) { pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, offset, -1 /* pid */, 0 /* ref_ctr_off */); } else { - char probe_name[256]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), - func_name, offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + func_name, offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -10032,7 +11683,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_out; } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); @@ -10042,8 +11693,8 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - goto err_out; + errstr(err)); + goto err_clean_legacy; } if (legacy) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); @@ -10054,6 +11705,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, } return link; + +err_clean_legacy: + if (legacy) + remove_kprobe_event_legacy(legacy_probe, retprobe); err_out: free(legacy_probe); return libbpf_err_ptr(err); @@ -10070,14 +11725,359 @@ struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, return bpf_program__attach_kprobe_opts(prog, func_name, &opts); } -static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie) +struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, + const char *syscall_name, + const struct bpf_ksyscall_opts *opts) +{ + LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); + char func_name[128]; + + if (!OPTS_VALID(opts, bpf_ksyscall_opts)) + return libbpf_err_ptr(-EINVAL); + + if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { + /* arch_specific_syscall_pfx() should never return NULL here + * because it is guarded by kernel_supports(). However, since + * compiler does not know that we have an explicit conditional + * as well. + */ + snprintf(func_name, sizeof(func_name), "__%s_sys_%s", + arch_specific_syscall_pfx() ? : "", syscall_name); + } else { + snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); + } + + kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false); + kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + + return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts); +} + +/* Adapted from perf/util/string.c */ +bool glob_match(const char *str, const char *pat) +{ + while (*str && *pat && *pat != '*') { + if (*pat == '?') { /* Matches any single character */ + str++; + pat++; + continue; + } + if (*str != *pat) + return false; + str++; + pat++; + } + /* Check wild card */ + if (*pat == '*') { + while (*pat == '*') + pat++; + if (!*pat) /* Tail wild card matches all */ + return true; + while (*str) + if (glob_match(str++, pat)) + return true; + } + return !*str && !*pat; +} + +struct kprobe_multi_resolve { + const char *pattern; + unsigned long *addrs; + size_t cap; + size_t cnt; +}; + +struct avail_kallsyms_data { + char **syms; + size_t cnt; + struct kprobe_multi_resolve *res; +}; + +static int avail_func_cmp(const void *a, const void *b) +{ + return strcmp(*(const char **)a, *(const char **)b); +} + +static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, + const char *sym_name, void *ctx) +{ + struct avail_kallsyms_data *data = ctx; + struct kprobe_multi_resolve *res = data->res; + int err; + + if (!glob_match(sym_name, res->pattern)) + return 0; + + if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) { + /* Some versions of kernel strip out .llvm.<hash> suffix from + * function names reported in available_filter_functions, but + * don't do so for kallsyms. While this is clearly a kernel + * bug (fixed by [0]) we try to accommodate that in libbpf to + * make multi-kprobe usability a bit better: if no match is + * found, we will strip .llvm. suffix and try one more time. + * + * [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG") + */ + char sym_trim[256], *psym_trim = sym_trim, *sym_sfx; + + if (!(sym_sfx = strstr(sym_name, ".llvm."))) + return 0; + + /* psym_trim vs sym_trim dance is done to avoid pointer vs array + * coercion differences and get proper `const char **` pointer + * which avail_func_cmp() expects + */ + snprintf(sym_trim, sizeof(sym_trim), "%.*s", (int)(sym_sfx - sym_name), sym_name); + if (!bsearch(&psym_trim, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) + return 0; + } + + err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1); + if (err) + return err; + + res->addrs[res->cnt++] = (unsigned long)sym_addr; + return 0; +} + +static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res) +{ + const char *available_functions_file = tracefs_available_filter_functions(); + struct avail_kallsyms_data data; + char sym_name[500]; + FILE *f; + int err = 0, ret, i; + char **syms = NULL; + size_t cap = 0, cnt = 0; + + f = fopen(available_functions_file, "re"); + if (!f) { + err = -errno; + pr_warn("failed to open %s: %s\n", available_functions_file, errstr(err)); + return err; + } + + while (true) { + char *name; + + ret = fscanf(f, "%499s%*[^\n]\n", sym_name); + if (ret == EOF && feof(f)) + break; + + if (ret != 1) { + pr_warn("failed to parse available_filter_functions entry: %d\n", ret); + err = -EINVAL; + goto cleanup; + } + + if (!glob_match(sym_name, res->pattern)) + continue; + + err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1); + if (err) + goto cleanup; + + name = strdup(sym_name); + if (!name) { + err = -errno; + goto cleanup; + } + + syms[cnt++] = name; + } + + /* no entries found, bail out */ + if (cnt == 0) { + err = -ENOENT; + goto cleanup; + } + + /* sort available functions */ + qsort(syms, cnt, sizeof(*syms), avail_func_cmp); + + data.syms = syms; + data.res = res; + data.cnt = cnt; + libbpf_kallsyms_parse(avail_kallsyms_cb, &data); + + if (res->cnt == 0) + err = -ENOENT; + +cleanup: + for (i = 0; i < cnt; i++) + free((char *)syms[i]); + free(syms); + + fclose(f); + return err; +} + +static bool has_available_filter_functions_addrs(void) +{ + return access(tracefs_available_filter_functions_addrs(), R_OK) != -1; +} + +static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res) +{ + const char *available_path = tracefs_available_filter_functions_addrs(); + char sym_name[500]; + FILE *f; + int ret, err = 0; + unsigned long long sym_addr; + + f = fopen(available_path, "re"); + if (!f) { + err = -errno; + pr_warn("failed to open %s: %s\n", available_path, errstr(err)); + return err; + } + + while (true) { + ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name); + if (ret == EOF && feof(f)) + break; + + if (ret != 2) { + pr_warn("failed to parse available_filter_functions_addrs entry: %d\n", + ret); + err = -EINVAL; + goto cleanup; + } + + if (!glob_match(sym_name, res->pattern)) + continue; + + err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, + sizeof(*res->addrs), res->cnt + 1); + if (err) + goto cleanup; + + res->addrs[res->cnt++] = (unsigned long)sym_addr; + } + + if (res->cnt == 0) + err = -ENOENT; + +cleanup: + fclose(f); + return err; +} + +struct bpf_link * +bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, + const char *pattern, + const struct bpf_kprobe_multi_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, lopts); + struct kprobe_multi_resolve res = { + .pattern = pattern, + }; + enum bpf_attach_type attach_type; + struct bpf_link *link = NULL; + const unsigned long *addrs; + int err, link_fd, prog_fd; + bool retprobe, session, unique_match; + const __u64 *cookies; + const char **syms; + size_t cnt; + + if (!OPTS_VALID(opts, bpf_kprobe_multi_opts)) + return libbpf_err_ptr(-EINVAL); + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + syms = OPTS_GET(opts, syms, false); + addrs = OPTS_GET(opts, addrs, false); + cnt = OPTS_GET(opts, cnt, false); + cookies = OPTS_GET(opts, cookies, false); + unique_match = OPTS_GET(opts, unique_match, false); + + if (!pattern && !addrs && !syms) + return libbpf_err_ptr(-EINVAL); + if (pattern && (addrs || syms || cookies || cnt)) + return libbpf_err_ptr(-EINVAL); + if (!pattern && !cnt) + return libbpf_err_ptr(-EINVAL); + if (!pattern && unique_match) + return libbpf_err_ptr(-EINVAL); + if (addrs && syms) + return libbpf_err_ptr(-EINVAL); + + if (pattern) { + if (has_available_filter_functions_addrs()) + err = libbpf_available_kprobes_parse(&res); + else + err = libbpf_available_kallsyms_parse(&res); + if (err) + goto error; + + if (unique_match && res.cnt != 1) { + pr_warn("prog '%s': failed to find a unique match for '%s' (%zu matches)\n", + prog->name, pattern, res.cnt); + err = -EINVAL; + goto error; + } + + addrs = res.addrs; + cnt = res.cnt; + } + + retprobe = OPTS_GET(opts, retprobe, false); + session = OPTS_GET(opts, session, false); + + if (retprobe && session) + return libbpf_err_ptr(-EINVAL); + + attach_type = session ? BPF_TRACE_KPROBE_SESSION : BPF_TRACE_KPROBE_MULTI; + + lopts.kprobe_multi.syms = syms; + lopts.kprobe_multi.addrs = addrs; + lopts.kprobe_multi.cookies = cookies; + lopts.kprobe_multi.cnt = cnt; + lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0; + + link = calloc(1, sizeof(*link)); + if (!link) { + err = -ENOMEM; + goto error; + } + link->detach = &bpf_link__detach_fd; + + link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); + if (link_fd < 0) { + err = -errno; + pr_warn("prog '%s': failed to attach: %s\n", + prog->name, errstr(err)); + goto error; + } + link->fd = link_fd; + free(res.addrs); + return link; + +error: + free(link); + free(res.addrs); + return libbpf_err_ptr(err); +} + +static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) { DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); unsigned long offset = 0; - struct bpf_link *link; const char *func_name; char *func; - int n, err; + int n; + + *link = NULL; + + /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ + if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) + return 0; opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); if (opts.retprobe) @@ -10087,43 +12087,136 @@ static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cooki n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); if (n < 1) { - err = -EINVAL; pr_warn("kprobe name is invalid: %s\n", func_name); - return libbpf_err_ptr(err); + return -EINVAL; } if (opts.retprobe && offset != 0) { free(func); - err = -EINVAL; pr_warn("kretprobes do not support offset specification\n"); - return libbpf_err_ptr(err); + return -EINVAL; } opts.offset = offset; - link = bpf_program__attach_kprobe_opts(prog, func, &opts); + *link = bpf_program__attach_kprobe_opts(prog, func, &opts); free(func); - return link; + return libbpf_get_error(*link); } -static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *binary_path, uint64_t offset) +static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link) { - int i; + LIBBPF_OPTS(bpf_ksyscall_opts, opts); + const char *syscall_name; - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); + *link = NULL; - /* sanitize binary_path in the probe name */ - for (i = 0; buf[i]; i++) { - if (!isalnum(buf[i])) - buf[i] = '_'; + /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */ + if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0) + return 0; + + opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/"); + if (opts.retprobe) + syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1; + else + syscall_name = prog->sec_name + sizeof("ksyscall/") - 1; + + *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts); + return *link ? 0 : -errno; +} + +static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + const char *spec; + char *pattern; + int n; + + *link = NULL; + + /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ + if (strcmp(prog->sec_name, "kprobe.multi") == 0 || + strcmp(prog->sec_name, "kretprobe.multi") == 0) + return 0; + + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); + if (opts.retprobe) + spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; + else + spec = prog->sec_name + sizeof("kprobe.multi/") - 1; + + n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); + if (n < 1) { + pr_warn("kprobe multi pattern is invalid: %s\n", spec); + return -EINVAL; + } + + *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); + free(pattern); + return libbpf_get_error(*link); +} + +static int attach_kprobe_session(const struct bpf_program *prog, long cookie, + struct bpf_link **link) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, .session = true); + const char *spec; + char *pattern; + int n; + + *link = NULL; + + /* no auto-attach for SEC("kprobe.session") */ + if (strcmp(prog->sec_name, "kprobe.session") == 0) + return 0; + + spec = prog->sec_name + sizeof("kprobe.session/") - 1; + n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); + if (n < 1) { + pr_warn("kprobe session pattern is invalid: %s\n", spec); + return -EINVAL; } + + *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); + free(pattern); + return *link ? 0 : -errno; +} + +static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); + int n, ret = -EINVAL; + + *link = NULL; + + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", + &probe_type, &binary_path, &func_name); + switch (n) { + case 1: + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ + ret = 0; + break; + case 3: + opts.session = str_has_pfx(probe_type, "uprobe.session"); + opts.retprobe = str_has_pfx(probe_type, "uretprobe.multi"); + + *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); + ret = libbpf_get_error(*link); + break; + default: + pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, + prog->sec_name); + break; + } + free(probe_type); + free(binary_path); + free(func_name); + return ret; } static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset) { - const char *file = "/sys/kernel/debug/tracing/uprobe_events"; - - return append_to_file(file, "%c:%s/%s %s:0x%zx", + return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx", retprobe ? 'r' : 'p', retprobe ? "uretprobes" : "uprobes", probe_name, binary_path, offset); @@ -10131,18 +12224,16 @@ static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) { - const char *file = "/sys/kernel/debug/tracing/uprobe_events"; - - return append_to_file(file, "-:%s/%s", retprobe ? "uretprobes" : "uprobes", probe_name); + return append_to_file(tracefs_uprobe_events(), "-:%s/%s", + retprobe ? "uretprobes" : "uprobes", probe_name); } static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) { char file[512]; - snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - retprobe ? "uretprobes" : "uprobes", probe_name); + snprintf(file, sizeof(file), "%s/events/%s/%s/id", + tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name); return parse_uint_from_file(file, "%d\n"); } @@ -10150,24 +12241,26 @@ static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retpro static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset, int pid) { + const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; int type, pfd, err; err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); if (err < 0) { - pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", - binary_path, (size_t)offset, err); + pr_warn("failed to add legacy uprobe event for %s:0x%zx: %s\n", + binary_path, (size_t)offset, errstr(err)); return err; } type = determine_uprobe_perf_type_legacy(probe_name, retprobe); if (type < 0) { - pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", - binary_path, offset, err); - return type; + err = type; + pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %s\n", + binary_path, offset, errstr(err)); + goto err_clean_legacy; } - memset(&attr, 0, sizeof(attr)); - attr.size = sizeof(attr); + memset(&attr, 0, attr_sz); + attr.size = attr_sz; attr.config = type; attr.type = PERF_TYPE_TRACEPOINT; @@ -10177,10 +12270,283 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); if (pfd < 0) { err = -errno; - pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); - return err; + pr_warn("legacy uprobe perf_event_open() failed: %s\n", errstr(err)); + goto err_clean_legacy; } return pfd; + +err_clean_legacy: + /* Clear the newly added legacy uprobe_event */ + remove_uprobe_event_legacy(probe_name, retprobe); + return err; +} + +/* Find offset of function name in archive specified by path. Currently + * supported are .zip files that do not compress their contents, as used on + * Android in the form of APKs, for example. "file_name" is the name of the ELF + * file inside the archive. "func_name" matches symbol name or name@@LIB for + * library functions. + * + * An overview of the APK format specifically provided here: + * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents + */ +static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name, + const char *func_name) +{ + struct zip_archive *archive; + struct zip_entry entry; + long ret; + Elf *elf; + + archive = zip_archive_open(archive_path); + if (IS_ERR(archive)) { + ret = PTR_ERR(archive); + pr_warn("zip: failed to open %s: %ld\n", archive_path, ret); + return ret; + } + + ret = zip_archive_find_entry(archive, file_name, &entry); + if (ret) { + pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name, + archive_path, ret); + goto out; + } + pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path, + (unsigned long)entry.data_offset); + + if (entry.compression) { + pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name, + archive_path); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + + elf = elf_memory((void *)entry.data, entry.data_length); + if (!elf) { + pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path, + elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__LIBELF; + goto out; + } + + ret = elf_find_func_offset(elf, file_name, func_name); + if (ret > 0) { + pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n", + func_name, file_name, archive_path, entry.data_offset, ret, + ret + entry.data_offset); + ret += entry.data_offset; + } + elf_end(elf); + +out: + zip_archive_close(archive); + return ret; +} + +static const char *arch_specific_lib_paths(void) +{ + /* + * Based on https://packages.debian.org/sid/libc6. + * + * Assume that the traced program is built for the same architecture + * as libbpf, which should cover the vast majority of cases. + */ +#if defined(__x86_64__) + return "/lib/x86_64-linux-gnu"; +#elif defined(__i386__) + return "/lib/i386-linux-gnu"; +#elif defined(__s390x__) + return "/lib/s390x-linux-gnu"; +#elif defined(__arm__) && defined(__SOFTFP__) + return "/lib/arm-linux-gnueabi"; +#elif defined(__arm__) && !defined(__SOFTFP__) + return "/lib/arm-linux-gnueabihf"; +#elif defined(__aarch64__) + return "/lib/aarch64-linux-gnu"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 + return "/lib/mips64el-linux-gnuabi64"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 + return "/lib/mipsel-linux-gnu"; +#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return "/lib/powerpc64le-linux-gnu"; +#elif defined(__sparc__) && defined(__arch64__) + return "/lib/sparc64-linux-gnu"; +#elif defined(__riscv) && __riscv_xlen == 64 + return "/lib/riscv64-linux-gnu"; +#else + return NULL; +#endif +} + +/* Get full path to program/shared library. */ +static int resolve_full_path(const char *file, char *result, size_t result_sz) +{ + const char *search_paths[3] = {}; + int i, perm; + + if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { + search_paths[0] = getenv("LD_LIBRARY_PATH"); + search_paths[1] = "/usr/lib64:/usr/lib"; + search_paths[2] = arch_specific_lib_paths(); + perm = R_OK; + } else { + search_paths[0] = getenv("PATH"); + search_paths[1] = "/usr/bin:/usr/sbin"; + perm = R_OK | X_OK; + } + + for (i = 0; i < ARRAY_SIZE(search_paths); i++) { + const char *s; + + if (!search_paths[i]) + continue; + for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { + char *next_path; + int seg_len; + + if (s[0] == ':') + s++; + next_path = strchr(s, ':'); + seg_len = next_path ? next_path - s : strlen(s); + if (!seg_len) + continue; + snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); + /* ensure it has required permissions */ + if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0) + continue; + pr_debug("resolved '%s' to '%s'\n", file, result); + return 0; + } + } + return -ENOENT; +} + +struct bpf_link * +bpf_program__attach_uprobe_multi(const struct bpf_program *prog, + pid_t pid, + const char *path, + const char *func_pattern, + const struct bpf_uprobe_multi_opts *opts) +{ + const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; + LIBBPF_OPTS(bpf_link_create_opts, lopts); + unsigned long *resolved_offsets = NULL; + enum bpf_attach_type attach_type; + int err = 0, link_fd, prog_fd; + struct bpf_link *link = NULL; + char full_path[PATH_MAX]; + bool retprobe, session; + const __u64 *cookies; + const char **syms; + size_t cnt; + + if (!OPTS_VALID(opts, bpf_uprobe_multi_opts)) + return libbpf_err_ptr(-EINVAL); + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + syms = OPTS_GET(opts, syms, NULL); + offsets = OPTS_GET(opts, offsets, NULL); + ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); + cookies = OPTS_GET(opts, cookies, NULL); + cnt = OPTS_GET(opts, cnt, 0); + retprobe = OPTS_GET(opts, retprobe, false); + session = OPTS_GET(opts, session, false); + + /* + * User can specify 2 mutually exclusive set of inputs: + * + * 1) use only path/func_pattern/pid arguments + * + * 2) use path/pid with allowed combinations of: + * syms/offsets/ref_ctr_offsets/cookies/cnt + * + * - syms and offsets are mutually exclusive + * - ref_ctr_offsets and cookies are optional + * + * Any other usage results in error. + */ + + if (!path) + return libbpf_err_ptr(-EINVAL); + if (!func_pattern && cnt == 0) + return libbpf_err_ptr(-EINVAL); + + if (func_pattern) { + if (syms || offsets || ref_ctr_offsets || cookies || cnt) + return libbpf_err_ptr(-EINVAL); + } else { + if (!!syms == !!offsets) + return libbpf_err_ptr(-EINVAL); + } + + if (retprobe && session) + return libbpf_err_ptr(-EINVAL); + + if (func_pattern) { + if (!strchr(path, '/')) { + err = resolve_full_path(path, full_path, sizeof(full_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, path, errstr(err)); + return libbpf_err_ptr(err); + } + path = full_path; + } + + err = elf_resolve_pattern_offsets(path, func_pattern, + &resolved_offsets, &cnt); + if (err < 0) + return libbpf_err_ptr(err); + offsets = resolved_offsets; + } else if (syms) { + err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC); + if (err < 0) + return libbpf_err_ptr(err); + offsets = resolved_offsets; + } + + attach_type = session ? BPF_TRACE_UPROBE_SESSION : BPF_TRACE_UPROBE_MULTI; + + lopts.uprobe_multi.path = path; + lopts.uprobe_multi.offsets = offsets; + lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; + lopts.uprobe_multi.cookies = cookies; + lopts.uprobe_multi.cnt = cnt; + lopts.uprobe_multi.flags = retprobe ? BPF_F_UPROBE_MULTI_RETURN : 0; + + if (pid == 0) + pid = getpid(); + if (pid > 0) + lopts.uprobe_multi.pid = pid; + + link = calloc(1, sizeof(*link)); + if (!link) { + err = -ENOMEM; + goto error; + } + link->detach = &bpf_link__detach_fd; + + link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); + if (link_fd < 0) { + err = -errno; + pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", + prog->name, errstr(err)); + goto error; + } + link->fd = link_fd; + free(resolved_offsets); + return link; + +error: + free(resolved_offsets); + free(link); + return libbpf_err_ptr(err); } LIBBPF_API struct bpf_link * @@ -10188,32 +12554,94 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts) { + const char *archive_path = NULL, *archive_sep = NULL; + char *legacy_probe = NULL; DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); - char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; + enum probe_attach_mode attach_mode; + char full_path[PATH_MAX]; struct bpf_link *link; size_t ref_ctr_off; int pfd, err; bool retprobe, legacy; + const char *func_name; if (!OPTS_VALID(opts, bpf_uprobe_opts)) return libbpf_err_ptr(-EINVAL); + attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); retprobe = OPTS_GET(opts, retprobe, false); ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + if (!binary_path) + return libbpf_err_ptr(-EINVAL); + + /* Check if "binary_path" refers to an archive. */ + archive_sep = strstr(binary_path, "!/"); + if (archive_sep) { + full_path[0] = '\0'; + libbpf_strlcpy(full_path, binary_path, + min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1))); + archive_path = full_path; + binary_path = archive_sep + 2; + } else if (!strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, full_path, sizeof(full_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, binary_path, errstr(err)); + return libbpf_err_ptr(err); + } + binary_path = full_path; + } + func_name = OPTS_GET(opts, func_name, NULL); + if (func_name) { + long sym_off; + + if (archive_path) { + sym_off = elf_find_func_offset_from_archive(archive_path, binary_path, + func_name); + binary_path = archive_path; + } else { + sym_off = elf_find_func_offset_from_file(binary_path, func_name); + } + if (sym_off < 0) + return libbpf_err_ptr(sym_off); + func_offset += sym_off; + } + legacy = determine_uprobe_perf_type() < 0; + switch (attach_mode) { + case PROBE_ATTACH_MODE_LEGACY: + legacy = true; + pe_opts.force_ioctl_attach = true; + break; + case PROBE_ATTACH_MODE_PERF: + if (legacy) + return libbpf_err_ptr(-ENOTSUP); + pe_opts.force_ioctl_attach = true; + break; + case PROBE_ATTACH_MODE_LINK: + if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) + return libbpf_err_ptr(-ENOTSUP); + break; + case PROBE_ATTACH_MODE_DEFAULT: + break; + default: + return libbpf_err_ptr(-EINVAL); + } + if (!legacy) { pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[512]; + char probe_name[MAX_EVENT_NAME_LEN]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); - gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), - binary_path, func_offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + strrchr(binary_path, '/') ? : binary_path, + func_offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -10227,7 +12655,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_out; } @@ -10238,8 +12666,8 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - goto err_out; + errstr(err)); + goto err_clean_legacy; } if (legacy) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); @@ -10249,10 +12677,78 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, perf_link->legacy_is_retprobe = retprobe; } return link; + +err_clean_legacy: + if (legacy) + remove_uprobe_event_legacy(legacy_probe, retprobe); err_out: free(legacy_probe); return libbpf_err_ptr(err); +} + +/* Format of u[ret]probe section definition supporting auto-attach: + * u[ret]probe/binary:function[+offset] + * + * binary can be an absolute/relative path or a filename; the latter is resolved to a + * full binary path via bpf_program__attach_uprobe_opts. + * + * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be + * specified (and auto-attach is not possible) or the above format is specified for + * auto-attach. + */ +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; + int n, c, ret = -EINVAL; + long offset = 0; + *link = NULL; + + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", + &probe_type, &binary_path, &func_name); + switch (n) { + case 1: + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ + ret = 0; + break; + case 2: + pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", + prog->name, prog->sec_name); + break; + case 3: + /* check if user specifies `+offset`, if yes, this should be + * the last part of the string, make sure sscanf read to EOL + */ + func_off = strrchr(func_name, '+'); + if (func_off) { + n = sscanf(func_off, "+%li%n", &offset, &c); + if (n == 1 && *(func_off + c) == '\0') + func_off[0] = '\0'; + else + offset = 0; + } + opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || + strcmp(probe_type, "uretprobe.s") == 0; + if (opts.retprobe && offset != 0) { + pr_warn("prog '%s': uretprobes do not support offset specification\n", + prog->name); + break; + } + opts.func_name = func_name; + *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); + ret = libbpf_get_error(*link); + break; + default: + pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, + prog->sec_name); + break; + } + free(probe_type); + free(binary_path); + free(func_name); + + return ret; } struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, @@ -10265,15 +12761,96 @@ struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); } +struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, + pid_t pid, const char *binary_path, + const char *usdt_provider, const char *usdt_name, + const struct bpf_usdt_opts *opts) +{ + char resolved_path[512]; + struct bpf_object *obj = prog->obj; + struct bpf_link *link; + __u64 usdt_cookie; + int err; + + if (!OPTS_VALID(opts, bpf_uprobe_opts)) + return libbpf_err_ptr(-EINVAL); + + if (bpf_program__fd(prog) < 0) { + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + if (!binary_path) + return libbpf_err_ptr(-EINVAL); + + if (!strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, binary_path, errstr(err)); + return libbpf_err_ptr(err); + } + binary_path = resolved_path; + } + + /* USDT manager is instantiated lazily on first USDT attach. It will + * be destroyed together with BPF object in bpf_object__close(). + */ + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + if (!obj->usdt_man) { + obj->usdt_man = usdt_manager_new(obj); + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + } + + usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); + link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, + usdt_provider, usdt_name, usdt_cookie); + err = libbpf_get_error(link); + if (err) + return libbpf_err_ptr(err); + return link; +} + +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + char *path = NULL, *provider = NULL, *name = NULL; + const char *sec_name; + int n, err; + + sec_name = bpf_program__section_name(prog); + if (strcmp(sec_name, "usdt") == 0) { + /* no auto-attach for just SEC("usdt") */ + *link = NULL; + return 0; + } + + n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); + if (n != 3) { + pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n", + sec_name); + err = -EINVAL; + } else { + *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, + provider, name, NULL); + err = libbpf_get_error(*link); + } + free(path); + free(provider); + free(name); + return err; +} + static int determine_tracepoint_id(const char *tp_category, const char *tp_name) { char file[PATH_MAX]; int ret; - ret = snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - tp_category, tp_name); + ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id", + tracefs_path(), tp_category, tp_name); if (ret < 0) return -errno; if (ret >= sizeof(file)) { @@ -10287,20 +12864,21 @@ static int determine_tracepoint_id(const char *tp_category, static int perf_event_open_tracepoint(const char *tp_category, const char *tp_name) { - struct perf_event_attr attr = {}; - char errmsg[STRERR_BUFSIZE]; + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; int tp_id, pfd, err; tp_id = determine_tracepoint_id(tp_category, tp_name); if (tp_id < 0) { pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", tp_category, tp_name, - libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); + errstr(tp_id)); return tp_id; } + memset(&attr, 0, attr_sz); attr.type = PERF_TYPE_TRACEPOINT; - attr.size = sizeof(attr); + attr.size = attr_sz; attr.config = tp_id; pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, @@ -10309,7 +12887,7 @@ static int perf_event_open_tracepoint(const char *tp_category, err = -errno; pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return err; } return pfd; @@ -10321,7 +12899,6 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p const struct bpf_tracepoint_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int pfd, err; @@ -10334,7 +12911,7 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p if (pfd < 0) { pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", prog->name, tp_category, tp_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + errstr(pfd)); return libbpf_err_ptr(pfd); } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); @@ -10343,7 +12920,7 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p close(pfd); pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", prog->name, tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return libbpf_err_ptr(err); } return link; @@ -10356,14 +12933,19 @@ struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); } -static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie) +static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) { char *sec_name, *tp_cat, *tp_name; - struct bpf_link *link; + + *link = NULL; + + /* no auto-attach for SEC("tp") or SEC("tracepoint") */ + if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) + return 0; sec_name = strdup(prog->sec_name); if (!sec_name) - return libbpf_err_ptr(-ENOMEM); + return -ENOMEM; /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */ if (str_has_pfx(prog->sec_name, "tp/")) @@ -10373,23 +12955,28 @@ static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie) tp_name = strchr(tp_cat, '/'); if (!tp_name) { free(sec_name); - return libbpf_err_ptr(-EINVAL); + return -EINVAL; } *tp_name = '\0'; tp_name++; - link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); + *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); free(sec_name); - return link; + return libbpf_get_error(*link); } -struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, - const char *tp_name) +struct bpf_link * +bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, + const char *tp_name, + struct bpf_raw_tracepoint_opts *opts) { - char errmsg[STRERR_BUFSIZE]; + LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts); struct bpf_link *link; int prog_fd, pfd; + if (!OPTS_VALID(opts, bpf_raw_tracepoint_opts)) + return libbpf_err_ptr(-EINVAL); + prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); @@ -10401,51 +12988,78 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); + raw_opts.tp_name = tp_name; + raw_opts.cookie = OPTS_GET(opts, cookie, 0); + pfd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_opts); if (pfd < 0) { pfd = -errno; free(link); pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", - prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + prog->name, tp_name, errstr(pfd)); return libbpf_err_ptr(pfd); } link->fd = pfd; return link; } -static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie) +struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, + const char *tp_name) +{ + return bpf_program__attach_raw_tracepoint_opts(prog, tp_name, NULL); +} + +static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) { static const char *const prefixes[] = { - "raw_tp/", - "raw_tracepoint/", - "raw_tp.w/", - "raw_tracepoint.w/", + "raw_tp", + "raw_tracepoint", + "raw_tp.w", + "raw_tracepoint.w", }; size_t i; const char *tp_name = NULL; + *link = NULL; + for (i = 0; i < ARRAY_SIZE(prefixes); i++) { - if (str_has_pfx(prog->sec_name, prefixes[i])) { - tp_name = prog->sec_name + strlen(prefixes[i]); - break; - } + size_t pfx_len; + + if (!str_has_pfx(prog->sec_name, prefixes[i])) + continue; + + pfx_len = strlen(prefixes[i]); + /* no auto-attach case of, e.g., SEC("raw_tp") */ + if (prog->sec_name[pfx_len] == '\0') + return 0; + + if (prog->sec_name[pfx_len] != '/') + continue; + + tp_name = prog->sec_name + pfx_len + 1; + break; } + if (!tp_name) { pr_warn("prog '%s': invalid section name '%s'\n", prog->name, prog->sec_name); - return libbpf_err_ptr(-EINVAL); + return -EINVAL; } - return bpf_program__attach_raw_tracepoint(prog, tp_name); + *link = bpf_program__attach_raw_tracepoint(prog, tp_name); + return libbpf_get_error(*link); } /* Common logic for all BPF program types that attach to a btf_id */ -static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog) +static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) { - char errmsg[STRERR_BUFSIZE]; + LIBBPF_OPTS(bpf_link_create_opts, link_opts); struct bpf_link *link; int prog_fd, pfd; + if (!OPTS_VALID(opts, bpf_trace_opts)) + return libbpf_err_ptr(-EINVAL); + prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); @@ -10457,46 +13071,54 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - pfd = bpf_raw_tracepoint_open(NULL, prog_fd); + /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ + link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); + pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); if (pfd < 0) { pfd = -errno; free(link); pr_warn("prog '%s': failed to attach: %s\n", - prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + prog->name, errstr(pfd)); return libbpf_err_ptr(pfd); } link->fd = pfd; - return (struct bpf_link *)link; + return link; } struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); +} + +struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) +{ + return bpf_program__attach_btf_id(prog, opts); } struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); } -static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie) +static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) { - return bpf_program__attach_trace(prog); + *link = bpf_program__attach_trace(prog); + return libbpf_get_error(*link); } -static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie) +static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link) { - return bpf_program__attach_lsm(prog); + *link = bpf_program__attach_lsm(prog); + return libbpf_get_error(*link); } static struct bpf_link * -bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id, - const char *target_name) +bpf_program_attach_fd(const struct bpf_program *prog, + int target_fd, const char *target_name, + const struct bpf_link_create_opts *opts) { - DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, - .target_btf_id = btf_id); enum bpf_attach_type attach_type; - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, link_fd; @@ -10511,14 +13133,14 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - attach_type = bpf_program__get_expected_attach_type(prog); - link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts); + attach_type = bpf_program__expected_attach_type(prog); + link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts); if (link_fd < 0) { link_fd = -errno; free(link); pr_warn("prog '%s': failed to attach to %s: %s\n", prog->name, target_name, - libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; @@ -10528,19 +13150,122 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id struct bpf_link * bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd) { - return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup"); + return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL); } struct bpf_link * bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) { - return bpf_program__attach_fd(prog, netns_fd, 0, "netns"); + return bpf_program_attach_fd(prog, netns_fd, "netns", NULL); +} + +struct bpf_link * +bpf_program__attach_sockmap(const struct bpf_program *prog, int map_fd) +{ + return bpf_program_attach_fd(prog, map_fd, "sockmap", NULL); } struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) { /* target_fd/target_ifindex use the same field in LINK_CREATE */ - return bpf_program__attach_fd(prog, ifindex, 0, "xdp"); + return bpf_program_attach_fd(prog, ifindex, "xdp", NULL); +} + +struct bpf_link * +bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd, + const struct bpf_cgroup_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); + __u32 relative_id; + int relative_fd; + + if (!OPTS_VALID(opts, bpf_cgroup_opts)) + return libbpf_err_ptr(-EINVAL); + + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + + if (relative_fd && relative_id) { + pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link_create_opts.cgroup.expected_revision = OPTS_GET(opts, expected_revision, 0); + link_create_opts.cgroup.relative_fd = relative_fd; + link_create_opts.cgroup.relative_id = relative_id; + link_create_opts.flags = OPTS_GET(opts, flags, 0); + + return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", &link_create_opts); +} + +struct bpf_link * +bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, + const struct bpf_tcx_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); + __u32 relative_id; + int relative_fd; + + if (!OPTS_VALID(opts, bpf_tcx_opts)) + return libbpf_err_ptr(-EINVAL); + + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + + /* validate we don't have unexpected combinations of non-zero fields */ + if (!ifindex) { + pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + if (relative_fd && relative_id) { + pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0); + link_create_opts.tcx.relative_fd = relative_fd; + link_create_opts.tcx.relative_id = relative_id; + link_create_opts.flags = OPTS_GET(opts, flags, 0); + + /* target_fd/target_ifindex use the same field in LINK_CREATE */ + return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts); +} + +struct bpf_link * +bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, + const struct bpf_netkit_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); + __u32 relative_id; + int relative_fd; + + if (!OPTS_VALID(opts, bpf_netkit_opts)) + return libbpf_err_ptr(-EINVAL); + + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + + /* validate we don't have unexpected combinations of non-zero fields */ + if (!ifindex) { + pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + if (relative_fd && relative_id) { + pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0); + link_create_opts.netkit.relative_fd = relative_fd; + link_create_opts.netkit.relative_id = relative_id; + link_create_opts.flags = OPTS_GET(opts, flags, 0); + + return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts); } struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, @@ -10556,17 +13281,22 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, } if (prog->type != BPF_PROG_TYPE_EXT) { - pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", + pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace\n", prog->name); return libbpf_err_ptr(-EINVAL); } if (target_fd) { - btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); + LIBBPF_OPTS(bpf_link_create_opts, target_opts); + + btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd); if (btf_id < 0) return libbpf_err_ptr(btf_id); - return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace"); + target_opts.target_btf_id = btf_id; + + return bpf_program_attach_fd(prog, target_fd, "freplace", + &target_opts); } else { /* no target, so use raw_tracepoint_open for compatibility * with old kernels @@ -10580,7 +13310,6 @@ bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, link_fd; __u32 target_fd = 0; @@ -10608,77 +13337,203 @@ bpf_program__attach_iter(const struct bpf_program *prog, link_fd = -errno; free(link); pr_warn("prog '%s': failed to attach to iterator: %s\n", - prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + prog->name, errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; return link; } -static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie) +static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link) { - return bpf_program__attach_iter(prog, NULL); + *link = bpf_program__attach_iter(prog, NULL); + return libbpf_get_error(*link); +} + +struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog, + const struct bpf_netfilter_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, lopts); + struct bpf_link *link; + int prog_fd, link_fd; + + if (!OPTS_VALID(opts, bpf_netfilter_opts)) + return libbpf_err_ptr(-EINVAL); + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("prog '%s': can't attach before loaded\n", prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link = calloc(1, sizeof(*link)); + if (!link) + return libbpf_err_ptr(-ENOMEM); + + link->detach = &bpf_link__detach_fd; + + lopts.netfilter.pf = OPTS_GET(opts, pf, 0); + lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0); + lopts.netfilter.priority = OPTS_GET(opts, priority, 0); + lopts.netfilter.flags = OPTS_GET(opts, flags, 0); + + link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts); + if (link_fd < 0) { + link_fd = -errno; + free(link); + pr_warn("prog '%s': failed to attach to netfilter: %s\n", + prog->name, errstr(link_fd)); + return libbpf_err_ptr(link_fd); + } + link->fd = link_fd; + + return link; } struct bpf_link *bpf_program__attach(const struct bpf_program *prog) { - if (!prog->sec_def || !prog->sec_def->attach_fn) - return libbpf_err_ptr(-ESRCH); + struct bpf_link *link = NULL; + int err; + + if (!prog->sec_def || !prog->sec_def->prog_attach_fn) + return libbpf_err_ptr(-EOPNOTSUPP); - return prog->sec_def->attach_fn(prog, prog->sec_def->cookie); + if (bpf_program__fd(prog) < 0) { + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link); + if (err) + return libbpf_err_ptr(err); + + /* When calling bpf_program__attach() explicitly, auto-attach support + * is expected to work, so NULL returned link is considered an error. + * This is different for skeleton's attach, see comment in + * bpf_object__attach_skeleton(). + */ + if (!link) + return libbpf_err_ptr(-EOPNOTSUPP); + + return link; } +struct bpf_link_struct_ops { + struct bpf_link link; + int map_fd; +}; + static int bpf_link__detach_struct_ops(struct bpf_link *link) { + struct bpf_link_struct_ops *st_link; __u32 zero = 0; - if (bpf_map_delete_elem(link->fd, &zero)) - return -errno; + st_link = container_of(link, struct bpf_link_struct_ops, link); - return 0; + if (st_link->map_fd < 0) + /* w/o a real link */ + return bpf_map_delete_elem(link->fd, &zero); + + return close(link->fd); } struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) { - struct bpf_struct_ops *st_ops; - struct bpf_link *link; - __u32 i, zero = 0; - int err; + struct bpf_link_struct_ops *link; + __u32 zero = 0; + int err, fd; + + if (!bpf_map__is_struct_ops(map)) { + pr_warn("map '%s': can't attach non-struct_ops map\n", map->name); + return libbpf_err_ptr(-EINVAL); + } - if (!bpf_map__is_struct_ops(map) || map->fd == -1) + if (map->fd < 0) { + pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name); return libbpf_err_ptr(-EINVAL); + } link = calloc(1, sizeof(*link)); if (!link) return libbpf_err_ptr(-EINVAL); - st_ops = map->st_ops; - for (i = 0; i < btf_vlen(st_ops->type); i++) { - struct bpf_program *prog = st_ops->progs[i]; - void *kern_data; - int prog_fd; + /* kern_vdata should be prepared during the loading phase. */ + err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); + /* It can be EBUSY if the map has been used to create or + * update a link before. We don't allow updating the value of + * a struct_ops once it is set. That ensures that the value + * never changed. So, it is safe to skip EBUSY. + */ + if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) { + free(link); + return libbpf_err_ptr(err); + } - if (!prog) - continue; + link->link.detach = bpf_link__detach_struct_ops; - prog_fd = bpf_program__fd(prog); - kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; - *(unsigned long *)kern_data = prog_fd; + if (!(map->def.map_flags & BPF_F_LINK)) { + /* w/o a real link */ + link->link.fd = map->fd; + link->map_fd = -1; + return &link->link; } - err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0); - if (err) { - err = -errno; + fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL); + if (fd < 0) { free(link); - return libbpf_err_ptr(err); + return libbpf_err_ptr(fd); } - link->detach = bpf_link__detach_struct_ops; - link->fd = map->fd; + link->link.fd = fd; + link->map_fd = map->fd; - return link; + return &link->link; } +/* + * Swap the back struct_ops of a link with a new struct_ops map. + */ +int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) +{ + struct bpf_link_struct_ops *st_ops_link; + __u32 zero = 0; + int err; + + if (!bpf_map__is_struct_ops(map)) + return libbpf_err(-EINVAL); + + if (map->fd < 0) { + pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); + return libbpf_err(-EINVAL); + } + + st_ops_link = container_of(link, struct bpf_link_struct_ops, link); + /* Ensure the type of a link is correct */ + if (st_ops_link->map_fd < 0) + return libbpf_err(-EINVAL); + + err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); + /* It can be EBUSY if the map has been used to create or + * update a link before. We don't allow updating the value of + * a struct_ops once it is set. That ensures that the value + * never changed. So, it is safe to skip EBUSY. + */ + if (err && err != -EBUSY) + return err; + + err = bpf_link_update(link->fd, map->fd, NULL); + if (err < 0) + return err; + + st_ops_link->map_fd = map->fd; + + return 0; +} + +typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, + void *private_data); + static enum bpf_perf_event_ret perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, @@ -10727,12 +13582,6 @@ perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, return libbpf_err(ret); } -__attribute__((alias("perf_event_read_simple"))) -enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, - void **copy_mem, size_t *copy_size, - bpf_perf_event_print_t fn, void *private_data); - struct perf_buffer; struct perf_buffer_params { @@ -10818,7 +13667,6 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, int cpu, int map_key) { struct perf_cpu_buf *cpu_buf; - char msg[STRERR_BUFSIZE]; int err; cpu_buf = calloc(1, sizeof(*cpu_buf)); @@ -10834,7 +13682,7 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, if (cpu_buf->fd < 0) { err = -errno; pr_warn("failed to open perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } @@ -10845,14 +13693,14 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, cpu_buf->base = NULL; err = -errno; pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } @@ -10866,24 +13714,30 @@ error: static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p); -DEFAULT_VERSION(perf_buffer__new_v0_6_0, perf_buffer__new, LIBBPF_0.6.0) -struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, - perf_buffer_sample_fn sample_cb, - perf_buffer_lost_fn lost_cb, - void *ctx, - const struct perf_buffer_opts *opts) +struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, + perf_buffer_sample_fn sample_cb, + perf_buffer_lost_fn lost_cb, + void *ctx, + const struct perf_buffer_opts *opts) { + const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_buffer_params p = {}; - struct perf_event_attr attr = {}; + struct perf_event_attr attr; + __u32 sample_period; if (!OPTS_VALID(opts, perf_buffer_opts)) return libbpf_err_ptr(-EINVAL); + sample_period = OPTS_GET(opts, sample_period, 1); + if (!sample_period) + sample_period = 1; + + memset(&attr, 0, attr_sz); + attr.size = attr_sz; attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; - attr.sample_period = 1; - attr.wakeup_events = 1; + attr.wakeup_events = sample_period; p.attr = &attr; p.sample_cb = sample_cb; @@ -10893,26 +13747,14 @@ struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } -COMPAT_VERSION(perf_buffer__new_deprecated, perf_buffer__new, LIBBPF_0.0.4) -struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_opts *opts) -{ - return perf_buffer__new_v0_6_0(map_fd, page_cnt, - opts ? opts->sample_cb : NULL, - opts ? opts->lost_cb : NULL, - opts ? opts->ctx : NULL, - NULL); -} - -DEFAULT_VERSION(perf_buffer__new_raw_v0_6_0, perf_buffer__new_raw, LIBBPF_0.6.0) -struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, - struct perf_event_attr *attr, - perf_buffer_event_fn event_cb, void *ctx, - const struct perf_buffer_raw_opts *opts) +struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt, + struct perf_event_attr *attr, + perf_buffer_event_fn event_cb, void *ctx, + const struct perf_buffer_raw_opts *opts) { struct perf_buffer_params p = {}; - if (page_cnt == 0 || !attr) + if (!attr) return libbpf_err_ptr(-EINVAL); if (!OPTS_VALID(opts, perf_buffer_raw_opts)) @@ -10928,32 +13770,17 @@ struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } -COMPAT_VERSION(perf_buffer__new_raw_deprecated, perf_buffer__new_raw, LIBBPF_0.0.4) -struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_raw_opts *opts) -{ - LIBBPF_OPTS(perf_buffer_raw_opts, inner_opts, - .cpu_cnt = opts->cpu_cnt, - .cpus = opts->cpus, - .map_keys = opts->map_keys, - ); - - return perf_buffer__new_raw_v0_6_0(map_fd, page_cnt, opts->attr, - opts->event_cb, opts->ctx, &inner_opts); -} - static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p) { const char *online_cpus_file = "/sys/devices/system/cpu/online"; struct bpf_map_info map; - char msg[STRERR_BUFSIZE]; struct perf_buffer *pb; bool *online = NULL; __u32 map_info_len; int err, i, j, n; - if (page_cnt & (page_cnt - 1)) { + if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) { pr_warn("page count should be power of two, but is %zu\n", page_cnt); return ERR_PTR(-EINVAL); @@ -10962,7 +13789,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, /* best-effort sanity checks */ memset(&map, 0, sizeof(map)); map_info_len = sizeof(map); - err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len); + err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len); if (err) { err = -errno; /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return @@ -10970,7 +13797,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, */ if (err != -EINVAL) { pr_warn("failed to get map info for map FD %d: %s\n", - map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); + map_fd, errstr(err)); return ERR_PTR(err); } pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n", @@ -11000,7 +13827,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, if (pb->epoll_fd < 0) { err = -errno; pr_warn("failed to create epoll instance: %s\n", - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } @@ -11031,7 +13858,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = parse_cpu_mask_file(online_cpus_file, &online, &n); if (err) { - pr_warn("failed to get online CPU mask: %d\n", err); + pr_warn("failed to get online CPU mask: %s\n", errstr(err)); goto error; } @@ -11062,7 +13889,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = -errno; pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", cpu, map_key, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } @@ -11073,7 +13900,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = -errno; pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", cpu, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } j++; @@ -11168,7 +13995,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warn("error while processing records: %d\n", err); + pr_warn("error while processing records: %s\n", errstr(err)); return libbpf_err(err); } } @@ -11202,6 +14029,22 @@ int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) return cpu_buf->fd; } +int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size) +{ + struct perf_cpu_buf *cpu_buf; + + if (buf_idx >= pb->cpu_cnt) + return libbpf_err(-EINVAL); + + cpu_buf = pb->cpu_bufs[buf_idx]; + if (!cpu_buf) + return libbpf_err(-ENOENT); + + *buf = cpu_buf->base; + *buf_size = pb->mmap_size; + return 0; +} + /* * Consume data from perf ring buffer corresponding to slot *buf_idx* in * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to @@ -11236,261 +14079,14 @@ int perf_buffer__consume(struct perf_buffer *pb) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err); + pr_warn("perf_buffer: failed to process records in buffer #%d: %s\n", + i, errstr(err)); return libbpf_err(err); } } return 0; } -struct bpf_prog_info_array_desc { - int array_offset; /* e.g. offset of jited_prog_insns */ - int count_offset; /* e.g. offset of jited_prog_len */ - int size_offset; /* > 0: offset of rec size, - * < 0: fix size of -size_offset - */ -}; - -static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = { - [BPF_PROG_INFO_JITED_INSNS] = { - offsetof(struct bpf_prog_info, jited_prog_insns), - offsetof(struct bpf_prog_info, jited_prog_len), - -1, - }, - [BPF_PROG_INFO_XLATED_INSNS] = { - offsetof(struct bpf_prog_info, xlated_prog_insns), - offsetof(struct bpf_prog_info, xlated_prog_len), - -1, - }, - [BPF_PROG_INFO_MAP_IDS] = { - offsetof(struct bpf_prog_info, map_ids), - offsetof(struct bpf_prog_info, nr_map_ids), - -(int)sizeof(__u32), - }, - [BPF_PROG_INFO_JITED_KSYMS] = { - offsetof(struct bpf_prog_info, jited_ksyms), - offsetof(struct bpf_prog_info, nr_jited_ksyms), - -(int)sizeof(__u64), - }, - [BPF_PROG_INFO_JITED_FUNC_LENS] = { - offsetof(struct bpf_prog_info, jited_func_lens), - offsetof(struct bpf_prog_info, nr_jited_func_lens), - -(int)sizeof(__u32), - }, - [BPF_PROG_INFO_FUNC_INFO] = { - offsetof(struct bpf_prog_info, func_info), - offsetof(struct bpf_prog_info, nr_func_info), - offsetof(struct bpf_prog_info, func_info_rec_size), - }, - [BPF_PROG_INFO_LINE_INFO] = { - offsetof(struct bpf_prog_info, line_info), - offsetof(struct bpf_prog_info, nr_line_info), - offsetof(struct bpf_prog_info, line_info_rec_size), - }, - [BPF_PROG_INFO_JITED_LINE_INFO] = { - offsetof(struct bpf_prog_info, jited_line_info), - offsetof(struct bpf_prog_info, nr_jited_line_info), - offsetof(struct bpf_prog_info, jited_line_info_rec_size), - }, - [BPF_PROG_INFO_PROG_TAGS] = { - offsetof(struct bpf_prog_info, prog_tags), - offsetof(struct bpf_prog_info, nr_prog_tags), - -(int)sizeof(__u8) * BPF_TAG_SIZE, - }, - -}; - -static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, - int offset) -{ - __u32 *array = (__u32 *)info; - - if (offset >= 0) - return array[offset / sizeof(__u32)]; - return -(int)offset; -} - -static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, - int offset) -{ - __u64 *array = (__u64 *)info; - - if (offset >= 0) - return array[offset / sizeof(__u64)]; - return -(int)offset; -} - -static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset, - __u32 val) -{ - __u32 *array = (__u32 *)info; - - if (offset >= 0) - array[offset / sizeof(__u32)] = val; -} - -static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset, - __u64 val) -{ - __u64 *array = (__u64 *)info; - - if (offset >= 0) - array[offset / sizeof(__u64)] = val; -} - -struct bpf_prog_info_linear * -bpf_program__get_prog_info_linear(int fd, __u64 arrays) -{ - struct bpf_prog_info_linear *info_linear; - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - __u32 data_len = 0; - int i, err; - void *ptr; - - if (arrays >> BPF_PROG_INFO_LAST_ARRAY) - return libbpf_err_ptr(-EINVAL); - - /* step 1: get array dimensions */ - err = bpf_obj_get_info_by_fd(fd, &info, &info_len); - if (err) { - pr_debug("can't get prog info: %s", strerror(errno)); - return libbpf_err_ptr(-EFAULT); - } - - /* step 2: calculate total size of all arrays */ - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - bool include_array = (arrays & (1UL << i)) > 0; - struct bpf_prog_info_array_desc *desc; - __u32 count, size; - - desc = bpf_prog_info_array_desc + i; - - /* kernel is too old to support this field */ - if (info_len < desc->array_offset + sizeof(__u32) || - info_len < desc->count_offset + sizeof(__u32) || - (desc->size_offset > 0 && info_len < desc->size_offset)) - include_array = false; - - if (!include_array) { - arrays &= ~(1UL << i); /* clear the bit */ - continue; - } - - count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - - data_len += count * size; - } - - /* step 3: allocate continuous memory */ - data_len = roundup(data_len, sizeof(__u64)); - info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len); - if (!info_linear) - return libbpf_err_ptr(-ENOMEM); - - /* step 4: fill data to info_linear->info */ - info_linear->arrays = arrays; - memset(&info_linear->info, 0, sizeof(info)); - ptr = info_linear->data; - - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u32 count, size; - - if ((arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - bpf_prog_info_set_offset_u32(&info_linear->info, - desc->count_offset, count); - bpf_prog_info_set_offset_u32(&info_linear->info, - desc->size_offset, size); - bpf_prog_info_set_offset_u64(&info_linear->info, - desc->array_offset, - ptr_to_u64(ptr)); - ptr += count * size; - } - - /* step 5: call syscall again to get required arrays */ - err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len); - if (err) { - pr_debug("can't get prog info: %s", strerror(errno)); - free(info_linear); - return libbpf_err_ptr(-EFAULT); - } - - /* step 6: verify the data */ - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u32 v1, v2; - - if ((arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, - desc->count_offset); - if (v1 != v2) - pr_warn("%s: mismatch in element count\n", __func__); - - v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, - desc->size_offset); - if (v1 != v2) - pr_warn("%s: mismatch in rec size\n", __func__); - } - - /* step 7: update info_len and data_len */ - info_linear->info_len = sizeof(struct bpf_prog_info); - info_linear->data_len = data_len; - - return info_linear; -} - -void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear) -{ - int i; - - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u64 addr, offs; - - if ((info_linear->arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - addr = bpf_prog_info_read_offset_u64(&info_linear->info, - desc->array_offset); - offs = addr - ptr_to_u64(info_linear->data); - bpf_prog_info_set_offset_u64(&info_linear->info, - desc->array_offset, offs); - } -} - -void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) -{ - int i; - - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u64 addr, offs; - - if ((info_linear->arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - offs = bpf_prog_info_read_offset_u64(&info_linear->info, - desc->array_offset); - addr = offs + ptr_to_u64(info_linear->data); - bpf_prog_info_set_offset_u64(&info_linear->info, - desc->array_offset, addr); - } -} - int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name) @@ -11500,12 +14096,12 @@ int bpf_program__set_attach_target(struct bpf_program *prog, if (!prog || attach_prog_fd < 0) return libbpf_err(-EINVAL); - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); if (attach_prog_fd && !attach_func_name) { - /* remember attach_prog_fd and let bpf_program__load() find - * BTF ID during the program load + /* Store attach_prog_fd. The BTF ID will be resolved later during + * the normal object/program load phase. */ prog->attach_prog_fd = attach_prog_fd; return 0; @@ -11513,7 +14109,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog, if (attach_prog_fd) { btf_id = libbpf_find_prog_btf_id(attach_func_name, - attach_prog_fd); + attach_prog_fd, prog->obj->token_fd); if (btf_id < 0) return libbpf_err(btf_id); } else { @@ -11595,14 +14191,14 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) fd = open(fcpu, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; - pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); + pr_warn("Failed to open cpu mask file %s: %s\n", fcpu, errstr(err)); return err; } len = read(fd, buf, sizeof(buf)); close(fd); if (len <= 0) { err = len ? -errno : -EINVAL; - pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); + pr_warn("Failed to read cpu mask from %s: %s\n", fcpu, errstr(err)); return err; } if (len >= sizeof(buf)) { @@ -11640,115 +14236,169 @@ int libbpf_num_possible_cpus(void) return tmp_cpus; } -int bpf_object__open_skeleton(struct bpf_object_skeleton *s, - const struct bpf_object_open_opts *opts) +static int populate_skeleton_maps(const struct bpf_object *obj, + struct bpf_map_skeleton *maps, + size_t map_cnt, size_t map_skel_sz) { - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, - .object_name = s->name, - ); - struct bpf_object *obj; - int i, err; - - /* Attempt to preserve opts->object_name, unless overriden by user - * explicitly. Overwriting object name for skeletons is discouraged, - * as it breaks global data maps, because they contain object name - * prefix as their own map name prefix. When skeleton is generated, - * bpftool is making an assumption that this name will stay the same. - */ - if (opts) { - memcpy(&skel_opts, opts, sizeof(*opts)); - if (!opts->object_name) - skel_opts.object_name = s->name; - } - - obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); - err = libbpf_get_error(obj); - if (err) { - pr_warn("failed to initialize skeleton BPF object '%s': %d\n", - s->name, err); - return libbpf_err(err); - } - - *s->obj = obj; + int i; - for (i = 0; i < s->map_cnt; i++) { - struct bpf_map **map = s->maps[i].map; - const char *name = s->maps[i].name; - void **mmaped = s->maps[i].mmaped; + for (i = 0; i < map_cnt; i++) { + struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz; + struct bpf_map **map = map_skel->map; + const char *name = map_skel->name; + void **mmaped = map_skel->mmaped; *map = bpf_object__find_map_by_name(obj, name); if (!*map) { pr_warn("failed to find skeleton map '%s'\n", name); - return libbpf_err(-ESRCH); + return -ESRCH; } /* externs shouldn't be pre-setup from user code */ if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) *mmaped = (*map)->mmaped; } + return 0; +} - for (i = 0; i < s->prog_cnt; i++) { - struct bpf_program **prog = s->progs[i].prog; - const char *name = s->progs[i].name; +static int populate_skeleton_progs(const struct bpf_object *obj, + struct bpf_prog_skeleton *progs, + size_t prog_cnt, size_t prog_skel_sz) +{ + int i; + + for (i = 0; i < prog_cnt; i++) { + struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz; + struct bpf_program **prog = prog_skel->prog; + const char *name = prog_skel->name; *prog = bpf_object__find_program_by_name(obj, name); if (!*prog) { pr_warn("failed to find skeleton program '%s'\n", name); - return libbpf_err(-ESRCH); + return -ESRCH; } } + return 0; +} + +int bpf_object__open_skeleton(struct bpf_object_skeleton *s, + const struct bpf_object_open_opts *opts) +{ + struct bpf_object *obj; + int err; + + obj = bpf_object_open(NULL, s->data, s->data_sz, s->name, opts); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_warn("failed to initialize skeleton BPF object '%s': %s\n", + s->name, errstr(err)); + return libbpf_err(err); + } + + *s->obj = obj; + err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz); + if (err) { + pr_warn("failed to populate skeleton maps for '%s': %s\n", s->name, errstr(err)); + return libbpf_err(err); + } + + err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz); + if (err) { + pr_warn("failed to populate skeleton progs for '%s': %s\n", s->name, errstr(err)); + return libbpf_err(err); + } return 0; } +int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) +{ + int err, len, var_idx, i; + const char *var_name; + const struct bpf_map *map; + struct btf *btf; + __u32 map_type_id; + const struct btf_type *map_type, *var_type; + const struct bpf_var_skeleton *var_skel; + struct btf_var_secinfo *var; + + if (!s->obj) + return libbpf_err(-EINVAL); + + btf = bpf_object__btf(s->obj); + if (!btf) { + pr_warn("subskeletons require BTF at runtime (object %s)\n", + bpf_object__name(s->obj)); + return libbpf_err(-errno); + } + + err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz); + if (err) { + pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); + return libbpf_err(err); + } + + err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz); + if (err) { + pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); + return libbpf_err(err); + } + + for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { + var_skel = (void *)s->vars + var_idx * s->var_skel_sz; + map = *var_skel->map; + map_type_id = bpf_map__btf_value_type_id(map); + map_type = btf__type_by_id(btf, map_type_id); + + if (!btf_is_datasec(map_type)) { + pr_warn("type for map '%1$s' is not a datasec: %2$s\n", + bpf_map__name(map), + __btf_kind_str(btf_kind(map_type))); + return libbpf_err(-EINVAL); + } + + len = btf_vlen(map_type); + var = btf_var_secinfos(map_type); + for (i = 0; i < len; i++, var++) { + var_type = btf__type_by_id(btf, var->type); + var_name = btf__name_by_offset(btf, var_type->name_off); + if (strcmp(var_name, var_skel->name) == 0) { + *var_skel->addr = map->mmaped + var->offset; + break; + } + } + } + return 0; +} + +void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s) +{ + if (!s) + return; + free(s->maps); + free(s->progs); + free(s->vars); + free(s); +} + int bpf_object__load_skeleton(struct bpf_object_skeleton *s) { int i, err; err = bpf_object__load(*s->obj); if (err) { - pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); + pr_warn("failed to load BPF skeleton '%s': %s\n", s->name, errstr(err)); return libbpf_err(err); } for (i = 0; i < s->map_cnt; i++) { - struct bpf_map *map = *s->maps[i].map; - size_t mmap_sz = bpf_map_mmap_sz(map); - int prot, map_fd = bpf_map__fd(map); - void **mmaped = s->maps[i].mmaped; + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_map *map = *map_skel->map; - if (!mmaped) + if (!map_skel->mmaped) continue; - if (!(map->def.map_flags & BPF_F_MMAPABLE)) { - *mmaped = NULL; - continue; - } - - if (map->def.map_flags & BPF_F_RDONLY_PROG) - prot = PROT_READ; - else - prot = PROT_READ | PROT_WRITE; - - /* Remap anonymous mmap()-ed "map initialization image" as - * a BPF map-backed mmap()-ed memory, but preserving the same - * memory address. This will cause kernel to change process' - * page table to point to a different piece of kernel memory, - * but from userspace point of view memory address (and its - * contents, being identical at this point) will stay the - * same. This mapping will be released by bpf_object__close() - * as per normal clean up procedure, so we don't need to worry - * about it from skeleton's clean up perspective. - */ - *mmaped = mmap(map->mmaped, mmap_sz, prot, - MAP_SHARED | MAP_FIXED, map_fd, 0); - if (*mmaped == MAP_FAILED) { - err = -errno; - *mmaped = NULL; - pr_warn("failed to re-mmap() map '%s': %d\n", - bpf_map__name(map), err); - return libbpf_err(err); - } + *map_skel->mmaped = map->mmaped; } return 0; @@ -11759,21 +14409,75 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) int i, err; for (i = 0; i < s->prog_cnt; i++) { - struct bpf_program *prog = *s->progs[i].prog; - struct bpf_link **link = s->progs[i].link; + struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; + struct bpf_program *prog = *prog_skel->prog; + struct bpf_link **link = prog_skel->link; - if (!prog->load) + if (!prog->autoload || !prog->autoattach) continue; /* auto-attaching not supported for this program */ - if (!prog->sec_def || !prog->sec_def->attach_fn) + if (!prog->sec_def || !prog->sec_def->prog_attach_fn) continue; - *link = bpf_program__attach(prog); - err = libbpf_get_error(*link); + /* if user already set the link manually, don't attempt auto-attach */ + if (*link) + continue; + + err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); if (err) { - pr_warn("failed to auto-attach program '%s': %d\n", - bpf_program__name(prog), err); + pr_warn("prog '%s': failed to auto-attach: %s\n", + bpf_program__name(prog), errstr(err)); + return libbpf_err(err); + } + + /* It's possible that for some SEC() definitions auto-attach + * is supported in some cases (e.g., if definition completely + * specifies target information), but is not in other cases. + * SEC("uprobe") is one such case. If user specified target + * binary and function name, such BPF program can be + * auto-attached. But if not, it shouldn't trigger skeleton's + * attach to fail. It should just be skipped. + * attach_fn signals such case with returning 0 (no error) and + * setting link to NULL. + */ + } + + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_map *map = *map_skel->map; + struct bpf_link **link; + + if (!map->autocreate || !map->autoattach) + continue; + + /* only struct_ops maps can be attached */ + if (!bpf_map__is_struct_ops(map)) + continue; + + /* skeleton is created with earlier version of bpftool, notify user */ + if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) { + pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n", + bpf_map__name(map)); + continue; + } + + link = map_skel->link; + if (!link) { + pr_warn("map '%s': BPF map skeleton link is uninitialized\n", + bpf_map__name(map)); + continue; + } + + if (*link) + continue; + + *link = bpf_map__attach_struct_ops(map); + if (!*link) { + err = -errno; + pr_warn("map '%s': failed to auto-attach: %s\n", + bpf_map__name(map), errstr(err)); return libbpf_err(err); } } @@ -11786,17 +14490,33 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) int i; for (i = 0; i < s->prog_cnt; i++) { - struct bpf_link **link = s->progs[i].link; + struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; + struct bpf_link **link = prog_skel->link; bpf_link__destroy(*link); *link = NULL; } + + if (s->map_skel_sz < sizeof(struct bpf_map_skeleton)) + return; + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_link **link = map_skel->link; + + if (link) { + bpf_link__destroy(*link); + *link = NULL; + } + } } void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) { - if (s->progs) - bpf_object__detach_skeleton(s); + if (!s) + return; + + bpf_object__detach_skeleton(s); if (s->obj) bpf_object__close(*s->obj); free(s->maps); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 8b9bc5e90c2b..65e68e964b89 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -24,8 +24,25 @@ extern "C" { #endif +/** + * @brief **libbpf_major_version()** provides the major version of libbpf. + * @return An integer, the major version number + */ LIBBPF_API __u32 libbpf_major_version(void); + +/** + * @brief **libbpf_minor_version()** provides the minor version of libbpf. + * @return An integer, the minor version number + */ LIBBPF_API __u32 libbpf_minor_version(void); + +/** + * @brief **libbpf_version_string()** provides the version of libbpf in a + * human-readable form, e.g., "v1.7". + * @return Pointer to a static string containing the version + * + * The format is *not* a part of a stable API and may change in the future. + */ LIBBPF_API const char *libbpf_version_string(void); enum libbpf_errno { @@ -49,8 +66,52 @@ enum libbpf_errno { __LIBBPF_ERRNO__END, }; +/** + * @brief **libbpf_strerror()** converts the provided error code into a + * human-readable string. + * @param err The error code to convert + * @param buf Pointer to a buffer where the error message will be stored + * @param size The number of bytes in the buffer + * @return 0, on success; negative error code, otherwise + */ LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); +/** + * @brief **libbpf_bpf_attach_type_str()** converts the provided attach type + * value into a textual representation. + * @param t The attach type. + * @return Pointer to a static string identifying the attach type. NULL is + * returned for unknown **bpf_attach_type** values. + */ +LIBBPF_API const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t); + +/** + * @brief **libbpf_bpf_link_type_str()** converts the provided link type value + * into a textual representation. + * @param t The link type. + * @return Pointer to a static string identifying the link type. NULL is + * returned for unknown **bpf_link_type** values. + */ +LIBBPF_API const char *libbpf_bpf_link_type_str(enum bpf_link_type t); + +/** + * @brief **libbpf_bpf_map_type_str()** converts the provided map type value + * into a textual representation. + * @param t The map type. + * @return Pointer to a static string identifying the map type. NULL is + * returned for unknown **bpf_map_type** values. + */ +LIBBPF_API const char *libbpf_bpf_map_type_str(enum bpf_map_type t); + +/** + * @brief **libbpf_bpf_prog_type_str()** converts the provided program type + * value into a textual representation. + * @param t The program type. + * @return Pointer to a static string identifying the program type. NULL is + * returned for unknown **bpf_prog_type** values. + */ +LIBBPF_API const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t); + enum libbpf_print_level { LIBBPF_WARN, LIBBPF_INFO, @@ -60,18 +121,24 @@ enum libbpf_print_level { typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, const char *, va_list ap); +/** + * @brief **libbpf_set_print()** sets user-provided log callback function to + * be used for libbpf warnings and informational messages. If the user callback + * is not set, messages are logged to stderr by default. The verbosity of these + * messages can be controlled by setting the environment variable + * LIBBPF_LOG_LEVEL to either warn, info, or debug. + * @param fn The log print function. If NULL, libbpf won't print anything. + * @return Pointer to old print function. + * + * This function is thread-safe. + */ LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn); /* Hide internal to user */ struct bpf_object; -struct bpf_object_open_attr { - const char *file; - enum bpf_prog_type prog_type; -}; - struct bpf_object_open_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* object name override, if provided: * - for object open from file, this will override setting object @@ -82,21 +149,14 @@ struct bpf_object_open_opts { const char *object_name; /* parse map definitions non-strictly, allowing extra attributes/data */ bool relaxed_maps; - /* DEPRECATED: handle CO-RE relocations non-strictly, allowing failures. - * Value is ignored. Relocations always are processed non-strictly. - * Non-relocatable instructions are replaced with invalid ones to - * prevent accidental errors. - * */ - LIBBPF_DEPRECATED_SINCE(0, 6, "field has no effect") - bool relaxed_core_relocs; /* maps that set the 'pinning' attribute in their definition will have * their pin_path attribute set to a file in this directory, and be * auto-pinned to that path on load; defaults to "/sys/fs/bpf". */ const char *pin_root_path; - LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__set_attach_target() on each individual bpf_program") - __u32 attach_prog_fd; + __u32 :32; /* stub out now removed attach_prog_fd */ + /* Additional kernel config content that augments and overrides * system Kconfig for CONFIG_xxx externs. */ @@ -117,7 +177,7 @@ struct bpf_object_open_opts { * log_buf and log_level settings. * * If specified, this log buffer will be passed for: - * - each BPF progral load (BPF_PROG_LOAD) attempt, unless overriden + * - each BPF progral load (BPF_PROG_LOAD) attempt, unless overridden * with bpf_program__set_log() on per-program level, to get * BPF verifier log output. * - during BPF object's BTF load into kernel (BPF_BTF_LOAD) to get @@ -145,11 +205,38 @@ struct bpf_object_open_opts { * logs through its print callback. */ __u32 kernel_log_level; + /* Path to BPF FS mount point to derive BPF token from. + * + * Created BPF token will be used for all bpf() syscall operations + * that accept BPF token (e.g., map creation, BTF and program loads, + * etc) automatically within instantiated BPF object. + * + * If bpf_token_path is not specified, libbpf will consult + * LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will be + * taken as a value of bpf_token_path option and will force libbpf to + * either create BPF token from provided custom BPF FS path, or will + * disable implicit BPF token creation, if envvar value is an empty + * string. bpf_token_path overrides LIBBPF_BPF_TOKEN_PATH, if both are + * set at the same time. + * + * Setting bpf_token_path option to empty string disables libbpf's + * automatic attempt to create BPF token from default BPF FS mount + * point (/sys/fs/bpf), in case this default behavior is undesirable. + */ + const char *bpf_token_path; size_t :0; }; -#define bpf_object_open_opts__last_field kernel_log_level +#define bpf_object_open_opts__last_field bpf_token_path +/** + * @brief **bpf_object__open()** creates a bpf_object by opening + * the BPF ELF object file pointed to by the passed path and loading it + * into memory. + * @param path BPF object file path. + * @return pointer to the new bpf_object; or NULL is returned on error, + * error code is stored in errno + */ LIBBPF_API struct bpf_object *bpf_object__open(const char *path); /** @@ -179,24 +266,59 @@ LIBBPF_API struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts); -/* deprecated bpf_object__open variants */ -LIBBPF_API struct bpf_object * -bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, - const char *name); -LIBBPF_API struct bpf_object * -bpf_object__open_xattr(struct bpf_object_open_attr *attr); +/** + * @brief **bpf_object__prepare()** prepares BPF object for loading: + * performs ELF processing, relocations, prepares final state of BPF program + * instructions (accessible with bpf_program__insns()), creates and + * (potentially) pins maps. Leaves BPF object in the state ready for program + * loading. + * @param obj Pointer to a valid BPF object instance returned by + * **bpf_object__open*()** API + * @return 0, on success; negative error code, otherwise, error code is + * stored in errno + */ +LIBBPF_API int bpf_object__prepare(struct bpf_object *obj); -enum libbpf_pin_type { - LIBBPF_PIN_NONE, - /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ - LIBBPF_PIN_BY_NAME, -}; +/** + * @brief **bpf_object__load()** loads BPF object into kernel. + * @param obj Pointer to a valid BPF object instance returned by + * **bpf_object__open*()** APIs + * @return 0, on success; negative error code, otherwise, error code is + * stored in errno + */ +LIBBPF_API int bpf_object__load(struct bpf_object *obj); + +/** + * @brief **bpf_object__close()** closes a BPF object and releases all + * resources. + * @param obj Pointer to a valid BPF object + */ +LIBBPF_API void bpf_object__close(struct bpf_object *obj); -/* pin_maps and unpin_maps can both be called with a NULL path, in which case - * they will use the pin_path attribute of each map (and ignore all maps that - * don't have a pin_path set). +/** + * @brief **bpf_object__pin_maps()** pins each map contained within + * the BPF object at the passed directory. + * @param obj Pointer to a valid BPF object + * @param path A directory where maps should be pinned. + * @return 0, on success; negative error code, otherwise + * + * If `path` is NULL `bpf_map__pin` (which is being used on each map) + * will use the pin_path attribute of each map. In this case, maps that + * don't have a pin_path set will be ignored. */ LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); + +/** + * @brief **bpf_object__unpin_maps()** unpins each map contained within + * the BPF object found in the passed directory. + * @param obj Pointer to a valid BPF object + * @param path A directory where pinned maps should be searched for. + * @return 0, on success; negative error code, otherwise + * + * If `path` is NULL `bpf_map__unpin` (which is being used on each map) + * will use the pin_path attribute of each map. In this case, maps that + * don't have a pin_path set will be ignored. + */ LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, const char *path); LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj, @@ -204,50 +326,28 @@ LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj, LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, const char *path); LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); -LIBBPF_API void bpf_object__close(struct bpf_object *object); - -struct bpf_object_load_attr { - struct bpf_object *obj; - int log_level; - const char *target_btf_path; -}; - -/* Load/unload object into/from kernel */ -LIBBPF_API int bpf_object__load(struct bpf_object *obj); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__load() instead") -LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); -LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead") -LIBBPF_API int bpf_object__unload(struct bpf_object *obj); +LIBBPF_API int bpf_object__unpin(struct bpf_object *object, const char *path); LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); LIBBPF_API int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version); +/** + * @brief **bpf_object__token_fd** is an accessor for BPF token FD associated + * with BPF object. + * @param obj Pointer to a valid BPF object + * @return BPF token FD or -1, if it wasn't set + */ +LIBBPF_API int bpf_object__token_fd(const struct bpf_object *obj); + struct btf; LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj); LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__find_program_by_name() instead") -LIBBPF_API struct bpf_program * -bpf_object__find_program_by_title(const struct bpf_object *obj, - const char *title); LIBBPF_API struct bpf_program * bpf_object__find_program_by_name(const struct bpf_object *obj, const char *name); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "track bpf_objects in application code instead") -struct bpf_object *bpf_object__next(struct bpf_object *prev); -#define bpf_object__for_each_safe(pos, tmp) \ - for ((pos) = bpf_object__next(NULL), \ - (tmp) = bpf_object__next(pos); \ - (pos) != NULL; \ - (pos) = (tmp), (tmp) = bpf_object__next(tmp)) - -typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *); -LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv, - bpf_object_clear_priv_t clear_priv); -LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog); - LIBBPF_API int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, enum bpf_attach_type *expected_attach_type); @@ -258,9 +358,7 @@ LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, /* Accessors of bpf_program */ struct bpf_program; -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_program() instead") -struct bpf_program *bpf_program__next(struct bpf_program *prog, - const struct bpf_object *obj); + LIBBPF_API struct bpf_program * bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog); @@ -269,31 +367,18 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog) (pos) != NULL; \ (pos) = bpf_object__next_program((obj), (pos))) -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_program() instead") -struct bpf_program *bpf_program__prev(struct bpf_program *prog, - const struct bpf_object *obj); LIBBPF_API struct bpf_program * bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog); -typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); - -LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv, - bpf_program_clear_priv_t clear_priv); - -LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog); LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog); -LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead") -const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy); LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog); LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); - -/* returns program size in bytes */ -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insn_cnt() instead") -LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__autoattach(const struct bpf_program *prog); +LIBBPF_API void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach); struct bpf_insn; @@ -318,6 +403,24 @@ struct bpf_insn; * different. */ LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog); + +/** + * @brief **bpf_program__set_insns()** can set BPF program's underlying + * BPF instructions. + * + * WARNING: This is a very advanced libbpf API and users need to know + * what they are doing. This should be used from prog_prepare_load_fn + * callback only. + * + * @param prog BPF program for which to return instructions + * @param new_insns a pointer to an array of BPF instructions + * @param new_insn_cnt number of `struct bpf_insn`'s that form + * specified BPF program + * @return 0, on success; negative error code, otherwise + */ +LIBBPF_API int bpf_program__set_insns(struct bpf_program *prog, + struct bpf_insn *new_insns, size_t new_insn_cnt); + /** * @brief **bpf_program__insn_cnt()** returns number of `struct bpf_insn`'s * that form specified BPF program. @@ -329,17 +432,7 @@ LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *p */ LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 6, "use bpf_object__load() instead") -LIBBPF_API int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_version); LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); -LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") -LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, - const char *path, - int instance); -LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") -LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, - const char *path, - int instance); /** * @brief **bpf_program__pin()** pins the BPF program to a file @@ -355,7 +448,7 @@ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); /** * @brief **bpf_program__unpin()** unpins the BPF program from a file - * in the BPFFS specified by a path. This decrements the programs + * in the BPFFS specified by a path. This decrements program's in-kernel * reference count. * * The file pinning the BPF program can also be unlinked by a different @@ -373,7 +466,29 @@ struct bpf_link; LIBBPF_API struct bpf_link *bpf_link__open(const char *path); LIBBPF_API int bpf_link__fd(const struct bpf_link *link); LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link); +/** + * @brief **bpf_link__pin()** pins the BPF link to a file + * in the BPF FS specified by a path. This increments the links + * reference count, allowing it to stay loaded after the process + * which loaded it has exited. + * + * @param link BPF link to pin, must already be loaded + * @param path file path in a BPF file system + * @return 0, on success; negative error code, otherwise + */ + LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); + +/** + * @brief **bpf_link__unpin()** unpins the BPF link from a file + * in the BPFFS. This decrements link's in-kernel reference count. + * + * The file pinning the BPF link can also be unlinked by a different + * process in which case this function will return an error. + * + * @param link BPF link to unpin + * @return 0, on success; negative error code, otherwise + */ LIBBPF_API int bpf_link__unpin(struct bpf_link *link); LIBBPF_API int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog); @@ -381,16 +496,37 @@ LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); LIBBPF_API int bpf_link__detach(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); +/** + * @brief **bpf_program__attach()** is a generic function for attaching + * a BPF program based on auto-detection of program type, attach type, + * and extra parameters, where applicable. + * + * @param prog BPF program to attach + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + * + * This is supported for: + * - kprobe/kretprobe (depends on SEC() definition) + * - uprobe/uretprobe (depends on SEC() definition) + * - tracepoint + * - raw tracepoint + * - tracing programs (typed raw TP/fentry/fexit/fmod_ret) + */ LIBBPF_API struct bpf_link * bpf_program__attach(const struct bpf_program *prog); struct bpf_perf_event_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* custom user-provided value fetchable through bpf_get_attach_cookie() */ __u64 bpf_cookie; + /* don't use BPF link when attach BPF program */ + bool force_ioctl_attach; + /* don't automatically enable the event */ + bool dont_enable; + size_t :0; }; -#define bpf_perf_event_opts__last_field bpf_cookie +#define bpf_perf_event_opts__last_field dont_enable LIBBPF_API struct bpf_link * bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd); @@ -399,8 +535,25 @@ LIBBPF_API struct bpf_link * bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts); +/** + * enum probe_attach_mode - the mode to attach kprobe/uprobe + * + * force libbpf to attach kprobe/uprobe in specific mode, -ENOTSUP will + * be returned if it is not supported by the kernel. + */ +enum probe_attach_mode { + /* attach probe in latest supported mode by kernel */ + PROBE_ATTACH_MODE_DEFAULT = 0, + /* attach probe in legacy mode, using debugfs/tracefs */ + PROBE_ATTACH_MODE_LEGACY, + /* create perf event with perf_event_open() syscall */ + PROBE_ATTACH_MODE_PERF, + /* attach probe with BPF link */ + PROBE_ATTACH_MODE_LINK, +}; + struct bpf_kprobe_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* custom user-provided value fetchable through bpf_get_attach_cookie() */ __u64 bpf_cookie; @@ -408,9 +561,11 @@ struct bpf_kprobe_opts { size_t offset; /* kprobe is return probe */ bool retprobe; + /* kprobe attach mode */ + enum probe_attach_mode attach_mode; size_t :0; }; -#define bpf_kprobe_opts__last_field retprobe +#define bpf_kprobe_opts__last_field attach_mode LIBBPF_API struct bpf_link * bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe, @@ -420,8 +575,134 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, const char *func_name, const struct bpf_kprobe_opts *opts); +struct bpf_kprobe_multi_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* array of function symbols to attach */ + const char **syms; + /* array of function addresses to attach */ + const unsigned long *addrs; + /* array of user-provided values fetchable through bpf_get_attach_cookie */ + const __u64 *cookies; + /* number of elements in syms/addrs/cookies arrays */ + size_t cnt; + /* create return kprobes */ + bool retprobe; + /* create session kprobes */ + bool session; + /* enforce unique match */ + bool unique_match; + size_t :0; +}; + +#define bpf_kprobe_multi_opts__last_field unique_match + +LIBBPF_API struct bpf_link * +bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, + const char *pattern, + const struct bpf_kprobe_multi_opts *opts); + +struct bpf_uprobe_multi_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* array of function symbols to attach to */ + const char **syms; + /* array of function addresses to attach to */ + const unsigned long *offsets; + /* optional, array of associated ref counter offsets */ + const unsigned long *ref_ctr_offsets; + /* optional, array of associated BPF cookies */ + const __u64 *cookies; + /* number of elements in syms/addrs/cookies arrays */ + size_t cnt; + /* create return uprobes */ + bool retprobe; + /* create session kprobes */ + bool session; + size_t :0; +}; + +#define bpf_uprobe_multi_opts__last_field session + +/** + * @brief **bpf_program__attach_uprobe_multi()** attaches a BPF program + * to multiple uprobes with uprobe_multi link. + * + * User can specify 2 mutually exclusive set of inputs: + * + * 1) use only path/func_pattern/pid arguments + * + * 2) use path/pid with allowed combinations of + * syms/offsets/ref_ctr_offsets/cookies/cnt + * + * - syms and offsets are mutually exclusive + * - ref_ctr_offsets and cookies are optional + * + * + * @param prog BPF program to attach + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary + * @param func_pattern Regular expression to specify functions to attach + * BPF program to + * @param opts Additional options (see **struct bpf_uprobe_multi_opts**) + * @return 0, on success; negative error code, otherwise + */ +LIBBPF_API struct bpf_link * +bpf_program__attach_uprobe_multi(const struct bpf_program *prog, + pid_t pid, + const char *binary_path, + const char *func_pattern, + const struct bpf_uprobe_multi_opts *opts); + +struct bpf_ksyscall_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* custom user-provided value fetchable through bpf_get_attach_cookie() */ + __u64 bpf_cookie; + /* attach as return probe? */ + bool retprobe; + size_t :0; +}; +#define bpf_ksyscall_opts__last_field retprobe + +/** + * @brief **bpf_program__attach_ksyscall()** attaches a BPF program + * to kernel syscall handler of a specified syscall. Optionally it's possible + * to request to install retprobe that will be triggered at syscall exit. It's + * also possible to associate BPF cookie (though options). + * + * Libbpf automatically will determine correct full kernel function name, + * which depending on system architecture and kernel version/configuration + * could be of the form __<arch>_sys_<syscall> or __se_sys_<syscall>, and will + * attach specified program using kprobe/kretprobe mechanism. + * + * **bpf_program__attach_ksyscall()** is an API counterpart of declarative + * **SEC("ksyscall/<syscall>")** annotation of BPF programs. + * + * At the moment **SEC("ksyscall")** and **bpf_program__attach_ksyscall()** do + * not handle all the calling convention quirks for mmap(), clone() and compat + * syscalls. It also only attaches to "native" syscall interfaces. If host + * system supports compat syscalls or defines 32-bit syscalls in 64-bit + * kernel, such syscall interfaces won't be attached to by libbpf. + * + * These limitations may or may not change in the future. Therefore it is + * recommended to use SEC("kprobe") for these syscalls or if working with + * compat and 32-bit interfaces is required. + * + * @param prog BPF program to attach + * @param syscall_name Symbolic name of the syscall (e.g., "bpf") + * @param opts Additional options (see **struct bpf_ksyscall_opts**) + * @return Reference to the newly created BPF link; or NULL is returned on + * error, error code is stored in errno + */ +LIBBPF_API struct bpf_link * +bpf_program__attach_ksyscall(const struct bpf_program *prog, + const char *syscall_name, + const struct bpf_ksyscall_opts *opts); + struct bpf_uprobe_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* offset of kernel reference counted USDT semaphore, added in * a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe") @@ -431,14 +712,24 @@ struct bpf_uprobe_opts { __u64 bpf_cookie; /* uprobe is return probe, invoked at function return time */ bool retprobe; + /* Function name to attach to. Could be an unqualified ("abc") or library-qualified + * "abc@LIBXYZ" name. To specify function entry, func_name should be set while + * func_offset argument to bpf_prog__attach_uprobe_opts() should be 0. To trace an + * offset within a function, specify func_name and use func_offset argument to specify + * offset within the function. Shared library functions must specify the shared library + * binary_path. + */ + const char *func_name; + /* uprobe attach mode */ + enum probe_attach_mode attach_mode; size_t :0; }; -#define bpf_uprobe_opts__last_field retprobe +#define bpf_uprobe_opts__last_field attach_mode /** * @brief **bpf_program__attach_uprobe()** attaches a BPF program * to the userspace function which is found by binary path and - * offset. You can optionally specify a particular proccess to attach + * offset. You can optionally specify a particular process to attach * to. You can also optionally attach the program to the function * exit instead of entry. * @@ -475,8 +766,39 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts); +struct bpf_usdt_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* custom user-provided value accessible through usdt_cookie() */ + __u64 usdt_cookie; + size_t :0; +}; +#define bpf_usdt_opts__last_field usdt_cookie + +/** + * @brief **bpf_program__attach_usdt()** is just like + * bpf_program__attach_uprobe_opts() except it covers USDT (User-space + * Statically Defined Tracepoint) attachment, instead of attaching to + * user-space function entry or exit. + * + * @param prog BPF program to attach + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary that contains provided USDT probe + * @param usdt_provider USDT provider name + * @param usdt_name USDT probe name + * @param opts Options for altering program attachment + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + */ +LIBBPF_API struct bpf_link * +bpf_program__attach_usdt(const struct bpf_program *prog, + pid_t pid, const char *binary_path, + const char *usdt_provider, const char *usdt_name, + const struct bpf_usdt_opts *opts); + struct bpf_tracepoint_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* custom user-provided value fetchable through bpf_get_attach_cookie() */ __u64 bpf_cookie; @@ -493,26 +815,112 @@ bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, const char *tp_name, const struct bpf_tracepoint_opts *opts); +struct bpf_raw_tracepoint_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u64 cookie; + size_t :0; +}; +#define bpf_raw_tracepoint_opts__last_field cookie + LIBBPF_API struct bpf_link * bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, const char *tp_name); LIBBPF_API struct bpf_link * +bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, + const char *tp_name, + struct bpf_raw_tracepoint_opts *opts); + +struct bpf_trace_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* custom user-provided value fetchable through bpf_get_attach_cookie() */ + __u64 cookie; +}; +#define bpf_trace_opts__last_field cookie + +LIBBPF_API struct bpf_link * bpf_program__attach_trace(const struct bpf_program *prog); LIBBPF_API struct bpf_link * +bpf_program__attach_trace_opts(const struct bpf_program *prog, const struct bpf_trace_opts *opts); + +LIBBPF_API struct bpf_link * bpf_program__attach_lsm(const struct bpf_program *prog); LIBBPF_API struct bpf_link * bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd); LIBBPF_API struct bpf_link * bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd); LIBBPF_API struct bpf_link * +bpf_program__attach_sockmap(const struct bpf_program *prog, int map_fd); +LIBBPF_API struct bpf_link * bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex); LIBBPF_API struct bpf_link * bpf_program__attach_freplace(const struct bpf_program *prog, int target_fd, const char *attach_func_name); +struct bpf_netfilter_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; +}; +#define bpf_netfilter_opts__last_field flags + +LIBBPF_API struct bpf_link * +bpf_program__attach_netfilter(const struct bpf_program *prog, + const struct bpf_netfilter_opts *opts); + +struct bpf_tcx_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + __u32 flags; + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + size_t :0; +}; +#define bpf_tcx_opts__last_field expected_revision + +LIBBPF_API struct bpf_link * +bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, + const struct bpf_tcx_opts *opts); + +struct bpf_netkit_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + __u32 flags; + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + size_t :0; +}; +#define bpf_netkit_opts__last_field expected_revision + +LIBBPF_API struct bpf_link * +bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, + const struct bpf_netkit_opts *opts); + +struct bpf_cgroup_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + __u32 flags; + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + size_t :0; +}; +#define bpf_cgroup_opts__last_field expected_revision + +LIBBPF_API struct bpf_link * +bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd, + const struct bpf_cgroup_opts *opts); + struct bpf_map; LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); +LIBBPF_API int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -525,93 +933,38 @@ LIBBPF_API struct bpf_link * bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts); -/* - * Libbpf allows callers to adjust BPF programs before being loaded - * into kernel. One program in an object file can be transformed into - * multiple variants to be attached to different hooks. - * - * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd - * form an API for this purpose. - * - * - bpf_program_prep_t: - * Defines a 'preprocessor', which is a caller defined function - * passed to libbpf through bpf_program__set_prep(), and will be - * called before program is loaded. The processor should adjust - * the program one time for each instance according to the instance id - * passed to it. - * - * - bpf_program__set_prep: - * Attaches a preprocessor to a BPF program. The number of instances - * that should be created is also passed through this function. - * - * - bpf_program__nth_fd: - * After the program is loaded, get resulting FD of a given instance - * of the BPF program. - * - * If bpf_program__set_prep() is not used, the program would be loaded - * without adjustment during bpf_object__load(). The program has only - * one instance. In this case bpf_program__fd(prog) is equal to - * bpf_program__nth_fd(prog, 0). - */ -struct bpf_prog_prep_result { - /* - * If not NULL, load new instruction array. - * If set to NULL, don't load this instance. - */ - struct bpf_insn *new_insn_ptr; - int new_insn_cnt; - - /* If not NULL, result FD is written to it. */ - int *pfd; -}; +LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog); -/* - * Parameters of bpf_program_prep_t: - * - prog: The bpf_program being loaded. - * - n: Index of instance being generated. - * - insns: BPF instructions array. - * - insns_cnt:Number of instructions in insns. - * - res: Output parameter, result of transformation. +/** + * @brief **bpf_program__set_type()** sets the program + * type of the passed BPF program. + * @param prog BPF program to set the program type for + * @param type program type to set the BPF map to have + * @return error code; or 0 if no error. An error occurs + * if the object is already loaded. * - * Return value: - * - Zero: pre-processing success. - * - Non-zero: pre-processing error, stop loading. + * This must be called before the BPF object is loaded, + * otherwise it has no effect and an error is returned. */ -typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, - struct bpf_insn *insns, int insns_cnt, - struct bpf_prog_prep_result *res); - -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions") -LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, - bpf_program_prep_t prep); - -LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") -LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n); - -/* - * Adjust type of BPF program. Default is kprobe. - */ -LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog); -LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); - -LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); -LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, - enum bpf_prog_type type); +LIBBPF_API int bpf_program__set_type(struct bpf_program *prog, + enum bpf_prog_type type); LIBBPF_API enum bpf_attach_type -bpf_program__get_expected_attach_type(const struct bpf_program *prog); -LIBBPF_API void +bpf_program__expected_attach_type(const struct bpf_program *prog); + +/** + * @brief **bpf_program__set_expected_attach_type()** sets the + * attach type of the passed BPF program. This is used for + * auto-detection of attachment when programs are loaded. + * @param prog BPF program to set the attach type for + * @param type attach type to set the BPF map to have + * @return error code; or 0 if no error. An error occurs + * if the object is already loaded. + * + * This must be called before the BPF object is loaded, + * otherwise it has no effect and an error is returned. + */ +LIBBPF_API int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); @@ -627,38 +980,32 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); +LIBBPF_API struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog); +LIBBPF_API __u32 bpf_program__func_info_cnt(const struct bpf_program *prog); + +LIBBPF_API struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog); +LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog); + +/** + * @brief **bpf_program__set_attach_target()** sets BTF-based attach target + * for supported BPF program types: + * - BTF-aware raw tracepoints (tp_btf); + * - fentry/fexit/fmod_ret; + * - lsm; + * - freplace. + * @param prog BPF program to configure; must be not yet loaded. + * @param attach_prog_fd FD of target BPF program (for freplace/extension). + * If >0 and func name omitted, defers BTF ID resolution. + * @param attach_func_name Target function name. Used either with + * attach_prog_fd to find destination BTF type ID in that BPF program, or + * alone (no attach_prog_fd) to resolve kernel (vmlinux/module) BTF ID. + * Must be provided if attach_prog_fd is 0. + * @return error code; or 0 if no error occurred. + */ LIBBPF_API int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name); -LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog); -LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog); - -/* - * No need for __attribute__((packed)), all members of 'bpf_map_def' - * are all aligned. In addition, using __attribute__((packed)) - * would trigger a -Wpacked warning message, and lead to an error - * if -Werror is set. - */ -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; -}; - /** * @brief **bpf_object__find_map_by_name()** returns BPF map of * the given name, if it exists within the passed BPF object @@ -673,16 +1020,6 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name); LIBBPF_API int bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); -/* - * Get bpf_map through the offset of corresponding struct bpf_map_def - * in the BPF object file. - */ -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead") -struct bpf_map * -bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); - -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead") -struct bpf_map *bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map); @@ -692,12 +1029,49 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map); (pos) = bpf_object__next_map((obj), (pos))) #define bpf_map__for_each bpf_object__for_each_map -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_map() instead") -struct bpf_map *bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); /** + * @brief **bpf_map__set_autocreate()** sets whether libbpf has to auto-create + * BPF map during BPF object load phase. + * @param map the BPF map instance + * @param autocreate whether to create BPF map during BPF object load + * @return 0 on success; -EBUSY if BPF object was already loaded + * + * **bpf_map__set_autocreate()** allows to opt-out from libbpf auto-creating + * BPF map. By default, libbpf will attempt to create every single BPF map + * defined in BPF object file using BPF_MAP_CREATE command of bpf() syscall + * and fill in map FD in BPF instructions. + * + * This API allows to opt-out of this process for specific map instance. This + * can be useful if host kernel doesn't support such BPF map type or used + * combination of flags and user application wants to avoid creating such + * a map in the first place. User is still responsible to make sure that their + * BPF-side code that expects to use such missing BPF map is recognized by BPF + * verifier as dead code, otherwise BPF verifier will reject such BPF program. + */ +LIBBPF_API int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate); +LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map); + +/** + * @brief **bpf_map__set_autoattach()** sets whether libbpf has to auto-attach + * map during BPF skeleton attach phase. + * @param map the BPF map instance + * @param autoattach whether to attach map during BPF skeleton attach phase + * @return 0 on success; negative error code, otherwise + */ +LIBBPF_API int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach); + +/** + * @brief **bpf_map__autoattach()** returns whether BPF map is configured to + * auto-attach during BPF skeleton attach phase. + * @param map the BPF map instance + * @return true if map is set to auto-attach during skeleton attach phase; false, otherwise + */ +LIBBPF_API bool bpf_map__autoattach(const struct bpf_map *map); + +/** * @brief **bpf_map__fd()** gets the file descriptor of the passed * BPF map * @param map the BPF map instance @@ -705,8 +1079,6 @@ bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); */ LIBBPF_API int bpf_map__fd(const struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); -/* get map definition */ -LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); /* get map name */ LIBBPF_API const char *bpf_map__name(const struct bpf_map *map); /* get/set map type */ @@ -715,7 +1087,6 @@ LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type); /* get/set map size (max_entries) */ LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map); LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries); -LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); /* get/set map flags */ LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map); LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags); @@ -725,8 +1096,23 @@ LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node); /* get/set map key size */ LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map); LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size); -/* get/set map value size */ +/* get map value size */ LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map); +/** + * @brief **bpf_map__set_value_size()** sets map value size. + * @param map the BPF map instance + * @param size the new value size + * @return 0, on success; negative error, otherwise + * + * There is a special case for maps with associated memory-mapped regions, like + * the global data section maps (bss, data, rodata). When this function is used + * on such a map, the mapped region is resized. Afterward, an attempt is made to + * adjust the corresponding BTF info. This attempt is best-effort and can only + * succeed if the last variable of the data section map is an array. The array + * BTF type is replaced by a new BTF array type with a different length. + * Any previously existing pointers returned from bpf_map__initial_value() or + * corresponding data section skeleton pointer must be reinitialized. + */ LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size); /* get map key/value BTF type IDs */ LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); @@ -738,15 +1124,9 @@ LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map); LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); -typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); -LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, - bpf_map_clear_priv_t clear_priv); -LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); -LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead") -LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); +LIBBPF_API void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize); /** * @brief **bpf_map__is_internal()** tells the caller whether or not the @@ -756,74 +1136,187 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); * @return true, if the map is an internal map; false, otherwise */ LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); + +/** + * @brief **bpf_map__set_pin_path()** sets the path attribute that tells where the + * BPF map should be pinned. This does not actually create the 'pin'. + * @param map The bpf_map + * @param path The path + * @return 0, on success; negative error, otherwise + */ LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); -LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); + +/** + * @brief **bpf_map__pin_path()** gets the path attribute that tells where the + * BPF map should be pinned. + * @param map The bpf_map + * @return The path string; which can be NULL + */ LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map); + +/** + * @brief **bpf_map__is_pinned()** tells the caller whether or not the + * passed map has been pinned via a 'pin' file. + * @param map The bpf_map + * @return true, if the map is pinned; false, otherwise + */ LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); + +/** + * @brief **bpf_map__pin()** creates a file that serves as a 'pin' + * for the BPF map. This increments the reference count on the + * BPF map which will keep the BPF map loaded even after the + * userspace process which loaded it has exited. + * @param map The bpf_map to pin + * @param path A file path for the 'pin' + * @return 0, on success; negative error, otherwise + * + * If `path` is NULL the maps `pin_path` attribute will be used. If this is + * also NULL, an error will be returned and the map will not be pinned. + */ LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); + +/** + * @brief **bpf_map__unpin()** removes the file that serves as a + * 'pin' for the BPF map. + * @param map The bpf_map to unpin + * @param path A file path for the 'pin' + * @return 0, on success; negative error, otherwise + * + * The `path` parameter can be NULL, in which case the `pin_path` + * map attribute is unpinned. If both the `path` parameter and + * `pin_path` map attribute are set, they must be equal. + */ LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); /** - * @brief **libbpf_get_error()** extracts the error code from the passed - * pointer - * @param ptr pointer returned from libbpf API function - * @return error code; or 0 if no error occured + * @brief **bpf_map__lookup_elem()** allows to lookup BPF map value + * corresponding to provided key. + * @param map BPF map to lookup element in + * @param key pointer to memory containing bytes of the key used for lookup + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory in which looked up value will be stored + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @param flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__lookup_elem()** is high-level equivalent of + * **bpf_map_lookup_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__update_elem()** allows to insert or update value in BPF + * map that corresponds to provided key. + * @param map BPF map to insert to or update element in + * @param key pointer to memory containing bytes of the key + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory containing bytes of the value + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @param flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise * - * Many libbpf API functions which return pointers have logic to encode error - * codes as pointers, and do not return NULL. Meaning **libbpf_get_error()** - * should be used on the return value from these functions immediately after - * calling the API function, with no intervening calls that could clobber the - * `errno` variable. Consult the individual functions documentation to verify - * if this logic applies should be used. + * **bpf_map__update_elem()** is high-level equivalent of + * **bpf_map_update_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + const void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__delete_elem()** allows to delete element in BPF map that + * corresponds to provided key. + * @param map BPF map to delete element from + * @param key pointer to memory containing bytes of the key + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise * - * For these API functions, if `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` - * is enabled, NULL is returned on error instead. + * **bpf_map__delete_elem()** is high-level equivalent of + * **bpf_map_delete_elem()** API with added check for key size. + */ +LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, __u64 flags); + +/** + * @brief **bpf_map__lookup_and_delete_elem()** allows to lookup BPF map value + * corresponding to provided key and atomically delete it afterwards. + * @param map BPF map to lookup element in + * @param key pointer to memory containing bytes of the key used for lookup + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory in which looked up value will be stored + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @param flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise * - * If ptr is NULL, then errno should be already set by the failing - * API, because libbpf never returns NULL on success and it now always - * sets errno on error. + * **bpf_map__lookup_and_delete_elem()** is high-level equivalent of + * **bpf_map_lookup_and_delete_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__get_next_key()** allows to iterate BPF map keys by + * fetching next key that follows current key. + * @param map BPF map to fetch next key from + * @param cur_key pointer to memory containing bytes of current key or NULL to + * fetch the first key + * @param next_key pointer to memory to write next key into + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @return 0, on success; -ENOENT if **cur_key** is the last key in BPF map; + * negative error, otherwise * - * Example usage: + * **bpf_map__get_next_key()** is high-level equivalent of + * **bpf_map_get_next_key()** API with added check for key size. + */ +LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map, + const void *cur_key, void *next_key, size_t key_sz); +/** + * @brief **bpf_map__set_exclusive_program()** sets a map to be exclusive to the + * specified program. This must be called *before* the map is created. * - * struct perf_buffer *pb; + * @param map BPF map to make exclusive. + * @param prog BPF program to be the exclusive user of the map. Must belong + * to the same bpf_object as the map. + * @return 0 on success; a negative error code otherwise. * - * pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES, &opts); - * err = libbpf_get_error(pb); - * if (err) { - * pb = NULL; - * fprintf(stderr, "failed to open perf buffer: %d\n", err); - * goto cleanup; - * } + * This function must be called after the BPF object is opened but before + * it is loaded. Once the object is loaded, only the specified program + * will be able to access the map's contents. */ -LIBBPF_API long libbpf_get_error(const void *ptr); - -struct bpf_prog_load_attr { - const char *file; - enum bpf_prog_type prog_type; - enum bpf_attach_type expected_attach_type; - int ifindex; - int log_level; - int prog_flags; -}; - -LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open() and bpf_object__load() instead") -LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open() and bpf_object__load() instead") -LIBBPF_API int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd); +LIBBPF_API int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog); -/* XDP related API */ -struct xdp_link_info { - __u32 prog_id; - __u32 drv_prog_id; - __u32 hw_prog_id; - __u32 skb_prog_id; - __u8 attach_mode; -}; +/** + * @brief **bpf_map__exclusive_program()** returns the exclusive program + * that is registered with the map (if any). + * @param map BPF map to which the exclusive program is registered. + * @return the registered exclusive program. + */ +LIBBPF_API struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map); struct bpf_xdp_set_link_opts { size_t sz; @@ -832,18 +1325,39 @@ struct bpf_xdp_set_link_opts { }; #define bpf_xdp_set_link_opts__last_field old_fd -LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); -LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, - const struct bpf_xdp_set_link_opts *opts); -LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); -LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, - size_t info_size, __u32 flags); +struct bpf_xdp_attach_opts { + size_t sz; + int old_prog_fd; + size_t :0; +}; +#define bpf_xdp_attach_opts__last_field old_prog_fd + +struct bpf_xdp_query_opts { + size_t sz; + __u32 prog_id; /* output */ + __u32 drv_prog_id; /* output */ + __u32 hw_prog_id; /* output */ + __u32 skb_prog_id; /* output */ + __u8 attach_mode; /* output */ + __u64 feature_flags; /* output */ + __u32 xdp_zc_max_segs; /* output */ + size_t :0; +}; +#define bpf_xdp_query_opts__last_field xdp_zc_max_segs + +LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, + const struct bpf_xdp_attach_opts *opts); +LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags, + const struct bpf_xdp_attach_opts *opts); +LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts); +LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id); /* TC related API */ enum bpf_tc_attach_point { BPF_TC_INGRESS = 1 << 0, BPF_TC_EGRESS = 1 << 1, BPF_TC_CUSTOM = 1 << 2, + BPF_TC_QDISC = 1 << 3, }; #define BPF_TC_PARENT(a, b) \ @@ -858,9 +1372,11 @@ struct bpf_tc_hook { int ifindex; enum bpf_tc_attach_point attach_point; __u32 parent; + __u32 handle; + const char *qdisc; size_t :0; }; -#define bpf_tc_hook__last_field parent +#define bpf_tc_hook__last_field qdisc struct bpf_tc_opts { size_t sz; @@ -884,11 +1400,13 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook, /* Ring buffer APIs */ struct ring_buffer; +struct ring; +struct user_ring_buffer; typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); struct ring_buffer_opts { - size_t sz; /* size of this struct, for forward/backward compatiblity */ + size_t sz; /* size of this struct, for forward/backward compatibility */ }; #define ring_buffer_opts__last_field sz @@ -901,8 +1419,204 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd, ring_buffer_sample_fn sample_cb, void *ctx); LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb); +LIBBPF_API int ring_buffer__consume_n(struct ring_buffer *rb, size_t n); LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb); +/** + * @brief **ring_buffer__ring()** returns the ringbuffer object inside a given + * ringbuffer manager representing a single BPF_MAP_TYPE_RINGBUF map instance. + * + * @param rb A ringbuffer manager object. + * @param idx An index into the ringbuffers contained within the ringbuffer + * manager object. The index is 0-based and corresponds to the order in which + * ring_buffer__add was called. + * @return A ringbuffer object on success; NULL and errno set if the index is + * invalid. + */ +LIBBPF_API struct ring *ring_buffer__ring(struct ring_buffer *rb, + unsigned int idx); + +/** + * @brief **ring__consumer_pos()** returns the current consumer position in the + * given ringbuffer. + * + * @param r A ringbuffer object. + * @return The current consumer position. + */ +LIBBPF_API unsigned long ring__consumer_pos(const struct ring *r); + +/** + * @brief **ring__producer_pos()** returns the current producer position in the + * given ringbuffer. + * + * @param r A ringbuffer object. + * @return The current producer position. + */ +LIBBPF_API unsigned long ring__producer_pos(const struct ring *r); + +/** + * @brief **ring__avail_data_size()** returns the number of bytes in the + * ringbuffer not yet consumed. This has no locking associated with it, so it + * can be inaccurate if operations are ongoing while this is called. However, it + * should still show the correct trend over the long-term. + * + * @param r A ringbuffer object. + * @return The number of bytes not yet consumed. + */ +LIBBPF_API size_t ring__avail_data_size(const struct ring *r); + +/** + * @brief **ring__size()** returns the total size of the ringbuffer's map data + * area (excluding special producer/consumer pages). Effectively this gives the + * amount of usable bytes of data inside the ringbuffer. + * + * @param r A ringbuffer object. + * @return The total size of the ringbuffer map data area. + */ +LIBBPF_API size_t ring__size(const struct ring *r); + +/** + * @brief **ring__map_fd()** returns the file descriptor underlying the given + * ringbuffer. + * + * @param r A ringbuffer object. + * @return The underlying ringbuffer file descriptor + */ +LIBBPF_API int ring__map_fd(const struct ring *r); + +/** + * @brief **ring__consume()** consumes available ringbuffer data without event + * polling. + * + * @param r A ringbuffer object. + * @return The number of records consumed (or INT_MAX, whichever is less), or + * a negative number if any of the callbacks return an error. + */ +LIBBPF_API int ring__consume(struct ring *r); + +/** + * @brief **ring__consume_n()** consumes up to a requested amount of items from + * a ringbuffer without event polling. + * + * @param r A ringbuffer object. + * @param n Maximum amount of items to consume. + * @return The number of items consumed, or a negative number if any of the + * callbacks return an error. + */ +LIBBPF_API int ring__consume_n(struct ring *r, size_t n); + +struct user_ring_buffer_opts { + size_t sz; /* size of this struct, for forward/backward compatibility */ +}; + +#define user_ring_buffer_opts__last_field sz + +/** + * @brief **user_ring_buffer__new()** creates a new instance of a user ring + * buffer. + * + * @param map_fd A file descriptor to a BPF_MAP_TYPE_USER_RINGBUF map. + * @param opts Options for how the ring buffer should be created. + * @return A user ring buffer on success; NULL and errno being set on a + * failure. + */ +LIBBPF_API struct user_ring_buffer * +user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts); + +/** + * @brief **user_ring_buffer__reserve()** reserves a pointer to a sample in the + * user ring buffer. + * @param rb A pointer to a user ring buffer. + * @param size The size of the sample, in bytes. + * @return A pointer to an 8-byte aligned reserved region of the user ring + * buffer; NULL, and errno being set if a sample could not be reserved. + * + * This function is *not* thread safe, and callers must synchronize accessing + * this function if there are multiple producers. If a size is requested that + * is larger than the size of the entire ring buffer, errno will be set to + * E2BIG and NULL is returned. If the ring buffer could accommodate the size, + * but currently does not have enough space, errno is set to ENOSPC and NULL is + * returned. + * + * After initializing the sample, callers must invoke + * **user_ring_buffer__submit()** to post the sample to the kernel. Otherwise, + * the sample must be freed with **user_ring_buffer__discard()**. + */ +LIBBPF_API void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size); + +/** + * @brief **user_ring_buffer__reserve_blocking()** reserves a record in the + * ring buffer, possibly blocking for up to @timeout_ms until a sample becomes + * available. + * @param rb The user ring buffer. + * @param size The size of the sample, in bytes. + * @param timeout_ms The amount of time, in milliseconds, for which the caller + * should block when waiting for a sample. -1 causes the caller to block + * indefinitely. + * @return A pointer to an 8-byte aligned reserved region of the user ring + * buffer; NULL, and errno being set if a sample could not be reserved. + * + * This function is *not* thread safe, and callers must synchronize + * accessing this function if there are multiple producers + * + * If **timeout_ms** is -1, the function will block indefinitely until a sample + * becomes available. Otherwise, **timeout_ms** must be non-negative, or errno + * is set to EINVAL, and NULL is returned. If **timeout_ms** is 0, no blocking + * will occur and the function will return immediately after attempting to + * reserve a sample. + * + * If **size** is larger than the size of the entire ring buffer, errno is set + * to E2BIG and NULL is returned. If the ring buffer could accommodate + * **size**, but currently does not have enough space, the caller will block + * until at most **timeout_ms** has elapsed. If insufficient space is available + * at that time, errno is set to ENOSPC, and NULL is returned. + * + * The kernel guarantees that it will wake up this thread to check if + * sufficient space is available in the ring buffer at least once per + * invocation of the **bpf_ringbuf_drain()** helper function, provided that at + * least one sample is consumed, and the BPF program did not invoke the + * function with BPF_RB_NO_WAKEUP. A wakeup may occur sooner than that, but the + * kernel does not guarantee this. If the helper function is invoked with + * BPF_RB_FORCE_WAKEUP, a wakeup event will be sent even if no sample is + * consumed. + * + * When a sample of size **size** is found within **timeout_ms**, a pointer to + * the sample is returned. After initializing the sample, callers must invoke + * **user_ring_buffer__submit()** to post the sample to the ring buffer. + * Otherwise, the sample must be freed with **user_ring_buffer__discard()**. + */ +LIBBPF_API void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb, + __u32 size, + int timeout_ms); + +/** + * @brief **user_ring_buffer__submit()** submits a previously reserved sample + * into the ring buffer. + * @param rb The user ring buffer. + * @param sample A reserved sample. + * + * It is not necessary to synchronize amongst multiple producers when invoking + * this function. + */ +LIBBPF_API void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample); + +/** + * @brief **user_ring_buffer__discard()** discards a previously reserved sample. + * @param rb The user ring buffer. + * @param sample A reserved sample. + * + * It is not necessary to synchronize amongst multiple producers when invoking + * this function. + */ +LIBBPF_API void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample); + +/** + * @brief **user_ring_buffer__free()** frees a ring buffer that was previously + * created with **user_ring_buffer__new()**. + * @param rb The user ring buffer being freed. + */ +LIBBPF_API void user_ring_buffer__free(struct user_ring_buffer *rb); + /* Perf buffer APIs */ struct perf_buffer; @@ -912,19 +1626,11 @@ typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt); /* common use perf buffer options */ struct perf_buffer_opts { - union { - size_t sz; - struct { /* DEPRECATED: will be removed in v1.0 */ - /* if specified, sample_cb is called for each sample */ - perf_buffer_sample_fn sample_cb; - /* if specified, lost_cb is called for each batch of lost samples */ - perf_buffer_lost_fn lost_cb; - /* ctx is provided to sample_cb and lost_cb */ - void *ctx; - }; - }; + size_t sz; + __u32 sample_period; + size_t :0; }; -#define perf_buffer_opts__last_field sz +#define perf_buffer_opts__last_field sample_period /** * @brief **perf_buffer__new()** creates BPF perfbuf manager for a specified @@ -935,6 +1641,7 @@ struct perf_buffer_opts { * @param sample_cb function called on each received data record * @param lost_cb function called when record loss has occurred * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb* + * @param opts optional parameters for the perf buffer, can be null * @return a new instance of struct perf_buffer on success, NULL on error with * *errno* containing an error code */ @@ -943,21 +1650,6 @@ perf_buffer__new(int map_fd, size_t page_cnt, perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, const struct perf_buffer_opts *opts); -LIBBPF_API struct perf_buffer * -perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, - perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, - const struct perf_buffer_opts *opts); - -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new() instead") -struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_opts *opts); - -#define perf_buffer__new(...) ___libbpf_overload(___perf_buffer_new, __VA_ARGS__) -#define ___perf_buffer_new6(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) \ - perf_buffer__new(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) -#define ___perf_buffer_new3(map_fd, page_cnt, opts) \ - perf_buffer__new_deprecated(map_fd, page_cnt, opts) - enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_DONE = 0, LIBBPF_PERF_EVENT_ERROR = -1, @@ -971,21 +1663,9 @@ typedef enum bpf_perf_event_ret /* raw perf buffer options, giving most power and control */ struct perf_buffer_raw_opts { - union { - struct { - size_t sz; - long :0; - long :0; - }; - struct { /* DEPRECATED: will be removed in v1.0 */ - /* perf event attrs passed directly into perf_event_open() */ - struct perf_event_attr *attr; - /* raw event callback */ - perf_buffer_event_fn event_cb; - /* ctx is provided to event_cb */ - void *ctx; - }; - }; + size_t sz; + long :0; + long :0; /* if cpu_cnt == 0, open all on all possible CPUs (up to the number of * max_entries of given PERF_EVENT_ARRAY map) */ @@ -997,26 +1677,13 @@ struct perf_buffer_raw_opts { }; #define perf_buffer_raw_opts__last_field map_keys +struct perf_event_attr; + LIBBPF_API struct perf_buffer * perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr, perf_buffer_event_fn event_cb, void *ctx, const struct perf_buffer_raw_opts *opts); -LIBBPF_API struct perf_buffer * -perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, struct perf_event_attr *attr, - perf_buffer_event_fn event_cb, void *ctx, - const struct perf_buffer_raw_opts *opts); - -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new_raw() instead") -struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_raw_opts *opts); - -#define perf_buffer__new_raw(...) ___libbpf_overload(___perf_buffer_new_raw, __VA_ARGS__) -#define ___perf_buffer_new_raw6(map_fd, page_cnt, attr, event_cb, ctx, opts) \ - perf_buffer__new_raw(map_fd, page_cnt, attr, event_cb, ctx, opts) -#define ___perf_buffer_new_raw3(map_fd, page_cnt, opts) \ - perf_buffer__new_raw_deprecated(map_fd, page_cnt, opts) - LIBBPF_API void perf_buffer__free(struct perf_buffer *pb); LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb); LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms); @@ -1024,15 +1691,22 @@ LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb); LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx); LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb); LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx); - -typedef enum bpf_perf_event_ret - (*bpf_perf_event_print_t)(struct perf_event_header *hdr, - void *private_data); -LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or perf_buffer__consume() instead") -LIBBPF_API enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, - void **copy_mem, size_t *copy_size, - bpf_perf_event_print_t fn, void *private_data); +/** + * @brief **perf_buffer__buffer()** returns the per-cpu raw mmap()'ed underlying + * memory region of the ring buffer. + * This ring buffer can be used to implement a custom events consumer. + * The ring buffer starts with the *struct perf_event_mmap_page*, which + * holds the ring buffer management fields, when accessing the header + * structure it's important to be SMP aware. + * You can refer to *perf_event_read_simple* for a simple example. + * @param pb the perf buffer structure + * @param buf_idx the buffer index to retrieve + * @param buf (out) gets the base pointer of the mmap()'ed memory + * @param buf_size (out) gets the size of the mmap()'ed region + * @return 0 on success, negative error code for failure + */ +LIBBPF_API int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, + size_t *buf_size); struct bpf_prog_linfo; struct bpf_prog_info; @@ -1055,14 +1729,6 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, * user, causing subsequent probes to fail. In this case, the caller may want * to adjust that limit with setrlimit(). */ -LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_prog_type() instead") -LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex); -LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_map_type() instead") -LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); -LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_helper() instead") -LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex); -LIBBPF_DEPRECATED_SINCE(0, 8, "implement your own or use bpftool for feature detection") -LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex); /** * @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports @@ -1106,72 +1772,6 @@ LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id, const void *opts); -/* - * Get bpf_prog_info in continuous memory - * - * struct bpf_prog_info has multiple arrays. The user has option to choose - * arrays to fetch from kernel. The following APIs provide an uniform way to - * fetch these data. All arrays in bpf_prog_info are stored in a single - * continuous memory region. This makes it easy to store the info in a - * file. - * - * Before writing bpf_prog_info_linear to files, it is necessary to - * translate pointers in bpf_prog_info to offsets. Helper functions - * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr() - * are introduced to switch between pointers and offsets. - * - * Examples: - * # To fetch map_ids and prog_tags: - * __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) | - * (1UL << BPF_PROG_INFO_PROG_TAGS); - * struct bpf_prog_info_linear *info_linear = - * bpf_program__get_prog_info_linear(fd, arrays); - * - * # To save data in file - * bpf_program__bpil_addr_to_offs(info_linear); - * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len); - * - * # To read data from file - * read(f, info_linear, <proper_size>); - * bpf_program__bpil_offs_to_addr(info_linear); - */ -enum bpf_prog_info_array { - BPF_PROG_INFO_FIRST_ARRAY = 0, - BPF_PROG_INFO_JITED_INSNS = 0, - BPF_PROG_INFO_XLATED_INSNS, - BPF_PROG_INFO_MAP_IDS, - BPF_PROG_INFO_JITED_KSYMS, - BPF_PROG_INFO_JITED_FUNC_LENS, - BPF_PROG_INFO_FUNC_INFO, - BPF_PROG_INFO_LINE_INFO, - BPF_PROG_INFO_JITED_LINE_INFO, - BPF_PROG_INFO_PROG_TAGS, - BPF_PROG_INFO_LAST_ARRAY, -}; - -struct bpf_prog_info_linear { - /* size of struct bpf_prog_info, when the tool is compiled */ - __u32 info_len; - /* total bytes allocated for data, round up to 8 bytes */ - __u32 data_len; - /* which arrays are included in data */ - __u64 arrays; - struct bpf_prog_info info; - __u8 data[]; -}; - -LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") -LIBBPF_API struct bpf_prog_info_linear * -bpf_program__get_prog_info_linear(int fd, __u64 arrays); - -LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") -LIBBPF_API void -bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); - -LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") -LIBBPF_API void -bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); - /** * @brief **libbpf_num_possible_cpus()** is a helper function to get the * number of possible CPUs that the host kernel supports and expects. @@ -1192,6 +1792,7 @@ struct bpf_map_skeleton { const char *name; struct bpf_map **map; void **mmaped; + struct bpf_link **link; }; struct bpf_prog_skeleton { @@ -1226,15 +1827,45 @@ LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s); LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s); LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s); +struct bpf_var_skeleton { + const char *name; + struct bpf_map **map; + void **addr; +}; + +struct bpf_object_subskeleton { + size_t sz; /* size of this struct, for forward/backward compatibility */ + + const struct bpf_object *obj; + + int map_cnt; + int map_skel_sz; /* sizeof(struct bpf_map_skeleton) */ + struct bpf_map_skeleton *maps; + + int prog_cnt; + int prog_skel_sz; /* sizeof(struct bpf_prog_skeleton) */ + struct bpf_prog_skeleton *progs; + + int var_cnt; + int var_skel_sz; /* sizeof(struct bpf_var_skeleton) */ + struct bpf_var_skeleton *vars; +}; + +LIBBPF_API int +bpf_object__open_subskeleton(struct bpf_object_subskeleton *s); +LIBBPF_API void +bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s); + struct gen_loader_opts { - size_t sz; /* size of this struct, for forward/backward compatiblity */ + size_t sz; /* size of this struct, for forward/backward compatibility */ const char *data; const char *insns; __u32 data_sz; __u32 insns_sz; + bool gen_hash; }; -#define gen_loader_opts__last_field insns_sz +#define gen_loader_opts__last_field gen_hash LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts); @@ -1245,13 +1876,13 @@ enum libbpf_tristate { }; struct bpf_linker_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; }; #define bpf_linker_opts__last_field sz struct bpf_linker_file_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; }; #define bpf_linker_file_opts__last_field sz @@ -1259,12 +1890,126 @@ struct bpf_linker_file_opts { struct bpf_linker; LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts); +LIBBPF_API struct bpf_linker *bpf_linker__new_fd(int fd, struct bpf_linker_opts *opts); LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts); +LIBBPF_API int bpf_linker__add_fd(struct bpf_linker *linker, int fd, + const struct bpf_linker_file_opts *opts); +LIBBPF_API int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, + const struct bpf_linker_file_opts *opts); LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); +/* + * Custom handling of BPF program's SEC() definitions + */ + +struct bpf_prog_load_opts; /* defined in bpf.h */ + +/* Called during bpf_object__open() for each recognized BPF program. Callback + * can use various bpf_program__set_*() setters to adjust whatever properties + * are necessary. + */ +typedef int (*libbpf_prog_setup_fn_t)(struct bpf_program *prog, long cookie); + +/* Called right before libbpf performs bpf_prog_load() to load BPF program + * into the kernel. Callback can adjust opts as necessary. + */ +typedef int (*libbpf_prog_prepare_load_fn_t)(struct bpf_program *prog, + struct bpf_prog_load_opts *opts, long cookie); + +/* Called during skeleton attach or through bpf_program__attach(). If + * auto-attach is not supported, callback should return 0 and set link to + * NULL (it's not considered an error during skeleton attach, but it will be + * an error for bpf_program__attach() calls). On error, error should be + * returned directly and link set to NULL. On success, return 0 and set link + * to a valid struct bpf_link. + */ +typedef int (*libbpf_prog_attach_fn_t)(const struct bpf_program *prog, long cookie, + struct bpf_link **link); + +struct libbpf_prog_handler_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* User-provided value that is passed to prog_setup_fn, + * prog_prepare_load_fn, and prog_attach_fn callbacks. Allows user to + * register one set of callbacks for multiple SEC() definitions and + * still be able to distinguish them, if necessary. For example, + * libbpf itself is using this to pass necessary flags (e.g., + * sleepable flag) to a common internal SEC() handler. + */ + long cookie; + /* BPF program initialization callback (see libbpf_prog_setup_fn_t). + * Callback is optional, pass NULL if it's not necessary. + */ + libbpf_prog_setup_fn_t prog_setup_fn; + /* BPF program loading callback (see libbpf_prog_prepare_load_fn_t). + * Callback is optional, pass NULL if it's not necessary. + */ + libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; + /* BPF program attach callback (see libbpf_prog_attach_fn_t). + * Callback is optional, pass NULL if it's not necessary. + */ + libbpf_prog_attach_fn_t prog_attach_fn; +}; +#define libbpf_prog_handler_opts__last_field prog_attach_fn + +/** + * @brief **libbpf_register_prog_handler()** registers a custom BPF program + * SEC() handler. + * @param sec section prefix for which custom handler is registered + * @param prog_type BPF program type associated with specified section + * @param exp_attach_type Expected BPF attach type associated with specified section + * @param opts optional cookie, callbacks, and other extra options + * @return Non-negative handler ID is returned on success. This handler ID has + * to be passed to *libbpf_unregister_prog_handler()* to unregister such + * custom handler. Negative error code is returned on error. + * + * *sec* defines which SEC() definitions are handled by this custom handler + * registration. *sec* can have few different forms: + * - if *sec* is just a plain string (e.g., "abc"), it will match only + * SEC("abc"). If BPF program specifies SEC("abc/whatever") it will result + * in an error; + * - if *sec* is of the form "abc/", proper SEC() form is + * SEC("abc/something"), where acceptable "something" should be checked by + * *prog_init_fn* callback, if there are additional restrictions; + * - if *sec* is of the form "abc+", it will successfully match both + * SEC("abc") and SEC("abc/whatever") forms; + * - if *sec* is NULL, custom handler is registered for any BPF program that + * doesn't match any of the registered (custom or libbpf's own) SEC() + * handlers. There could be only one such generic custom handler registered + * at any given time. + * + * All custom handlers (except the one with *sec* == NULL) are processed + * before libbpf's own SEC() handlers. It is allowed to "override" libbpf's + * SEC() handlers by registering custom ones for the same section prefix + * (i.e., it's possible to have custom SEC("perf_event/LLC-load-misses") + * handler). + * + * Note, like much of global libbpf APIs (e.g., libbpf_set_print(), + * libbpf_set_strict_mode(), etc)) these APIs are not thread-safe. User needs + * to ensure synchronization if there is a risk of running this API from + * multiple threads simultaneously. + */ +LIBBPF_API int libbpf_register_prog_handler(const char *sec, + enum bpf_prog_type prog_type, + enum bpf_attach_type exp_attach_type, + const struct libbpf_prog_handler_opts *opts); +/** + * @brief *libbpf_unregister_prog_handler()* unregisters previously registered + * custom BPF program SEC() handler. + * @param handler_id handler ID returned by *libbpf_register_prog_handler()* + * after successful registration + * @return 0 on success, negative error code if handler isn't found + * + * Note, like much of global libbpf APIs (e.g., libbpf_set_print(), + * libbpf_set_strict_mode(), etc)) these APIs are not thread-safe. User needs + * to ensure synchronization if there is a risk of running this API from + * multiple threads simultaneously. + */ +LIBBPF_API int libbpf_unregister_prog_handler(int handler_id); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 529783967793..8ed8749907d4 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -1,29 +1,14 @@ LIBBPF_0.0.1 { global: bpf_btf_get_fd_by_id; - bpf_create_map; - bpf_create_map_in_map; - bpf_create_map_in_map_node; - bpf_create_map_name; - bpf_create_map_node; - bpf_create_map_xattr; - bpf_load_btf; - bpf_load_program; - bpf_load_program_xattr; bpf_map__btf_key_type_id; bpf_map__btf_value_type_id; - bpf_map__def; bpf_map__fd; - bpf_map__is_offload_neutral; bpf_map__name; - bpf_map__next; bpf_map__pin; - bpf_map__prev; - bpf_map__priv; bpf_map__reuse_fd; bpf_map__set_ifindex; bpf_map__set_inner_map_fd; - bpf_map__set_priv; bpf_map__unpin; bpf_map_delete_elem; bpf_map_get_fd_by_id; @@ -38,79 +23,37 @@ LIBBPF_0.0.1 { bpf_object__btf_fd; bpf_object__close; bpf_object__find_map_by_name; - bpf_object__find_map_by_offset; - bpf_object__find_program_by_title; bpf_object__kversion; bpf_object__load; bpf_object__name; - bpf_object__next; bpf_object__open; - bpf_object__open_buffer; - bpf_object__open_xattr; bpf_object__pin; bpf_object__pin_maps; bpf_object__pin_programs; - bpf_object__priv; - bpf_object__set_priv; - bpf_object__unload; bpf_object__unpin_maps; bpf_object__unpin_programs; - bpf_perf_event_read_simple; bpf_prog_attach; bpf_prog_detach; bpf_prog_detach2; bpf_prog_get_fd_by_id; bpf_prog_get_next_id; - bpf_prog_load; - bpf_prog_load_xattr; bpf_prog_query; - bpf_prog_test_run; - bpf_prog_test_run_xattr; bpf_program__fd; - bpf_program__is_kprobe; - bpf_program__is_perf_event; - bpf_program__is_raw_tracepoint; - bpf_program__is_sched_act; - bpf_program__is_sched_cls; - bpf_program__is_socket_filter; - bpf_program__is_tracepoint; - bpf_program__is_xdp; - bpf_program__load; - bpf_program__next; - bpf_program__nth_fd; bpf_program__pin; - bpf_program__pin_instance; - bpf_program__prev; - bpf_program__priv; bpf_program__set_expected_attach_type; bpf_program__set_ifindex; - bpf_program__set_kprobe; - bpf_program__set_perf_event; - bpf_program__set_prep; - bpf_program__set_priv; - bpf_program__set_raw_tracepoint; - bpf_program__set_sched_act; - bpf_program__set_sched_cls; - bpf_program__set_socket_filter; - bpf_program__set_tracepoint; bpf_program__set_type; - bpf_program__set_xdp; - bpf_program__title; bpf_program__unload; bpf_program__unpin; - bpf_program__unpin_instance; bpf_prog_linfo__free; bpf_prog_linfo__new; bpf_prog_linfo__lfind_addr_func; bpf_prog_linfo__lfind; bpf_raw_tracepoint_open; - bpf_set_link_xdp_fd; bpf_task_fd_query; - bpf_verify_program; btf__fd; btf__find_by_name; btf__free; - btf__get_from_id; btf__name_by_offset; btf__new; btf__resolve_size; @@ -127,48 +70,24 @@ LIBBPF_0.0.1 { LIBBPF_0.0.2 { global: - bpf_probe_helper; - bpf_probe_map_type; - bpf_probe_prog_type; - bpf_map__resize; bpf_map_lookup_elem_flags; bpf_object__btf; bpf_object__find_map_fd_by_name; - bpf_get_link_xdp_id; - btf__dedup; - btf__get_map_kv_tids; - btf__get_nr_types; btf__get_raw_data; - btf__load; btf_ext__free; - btf_ext__func_info_rec_size; btf_ext__get_raw_data; - btf_ext__line_info_rec_size; btf_ext__new; - btf_ext__reloc_func_info; - btf_ext__reloc_line_info; - xsk_umem__create; - xsk_socket__create; - xsk_umem__delete; - xsk_socket__delete; - xsk_umem__fd; - xsk_socket__fd; - bpf_program__get_prog_info_linear; - bpf_program__bpil_addr_to_offs; - bpf_program__bpil_offs_to_addr; } LIBBPF_0.0.1; LIBBPF_0.0.3 { global: bpf_map__is_internal; bpf_map_freeze; - btf__finalize_data; } LIBBPF_0.0.2; LIBBPF_0.0.4 { global: bpf_link__destroy; - bpf_object__load_xattr; bpf_program__attach_kprobe; bpf_program__attach_perf_event; bpf_program__attach_raw_tracepoint; @@ -176,14 +95,10 @@ LIBBPF_0.0.4 { bpf_program__attach_uprobe; btf_dump__dump_type; btf_dump__free; - btf_dump__new; btf__parse_elf; libbpf_num_possible_cpus; perf_buffer__free; - perf_buffer__new; - perf_buffer__new_raw; perf_buffer__poll; - xsk_umem__create; } LIBBPF_0.0.3; LIBBPF_0.0.5 { @@ -193,7 +108,6 @@ LIBBPF_0.0.5 { LIBBPF_0.0.6 { global: - bpf_get_link_xdp_info; bpf_map__get_pin_path; bpf_map__is_pinned; bpf_map__set_pin_path; @@ -202,9 +116,6 @@ LIBBPF_0.0.6 { bpf_program__attach_trace; bpf_program__get_expected_attach_type; bpf_program__get_type; - bpf_program__is_tracing; - bpf_program__set_tracing; - bpf_program__size; btf__find_by_name_kind; libbpf_find_vmlinux_btf_id; } LIBBPF_0.0.5; @@ -224,14 +135,8 @@ LIBBPF_0.0.7 { bpf_object__detach_skeleton; bpf_object__load_skeleton; bpf_object__open_skeleton; - bpf_probe_large_insn_limit; - bpf_prog_attach_xattr; bpf_program__attach; bpf_program__name; - bpf_program__is_extension; - bpf_program__is_struct_ops; - bpf_program__set_extension; - bpf_program__set_struct_ops; btf__align_of; libbpf_find_kernel_btf; } LIBBPF_0.0.6; @@ -247,12 +152,10 @@ LIBBPF_0.0.8 { bpf_link_create; bpf_link_update; bpf_map__set_initial_value; + bpf_prog_attach_opts; bpf_program__attach_cgroup; bpf_program__attach_lsm; - bpf_program__is_lsm; bpf_program__set_attach_target; - bpf_program__set_lsm; - bpf_set_link_xdp_fd_opts; } LIBBPF_0.0.7; LIBBPF_0.0.9 { @@ -290,9 +193,7 @@ LIBBPF_0.1.0 { bpf_map__value_size; bpf_program__attach_xdp; bpf_program__autoload; - bpf_program__is_sk_lookup; bpf_program__set_autoload; - bpf_program__set_sk_lookup; btf__parse; btf__parse_raw; btf__pointer_size; @@ -335,7 +236,6 @@ LIBBPF_0.2.0 { perf_buffer__buffer_fd; perf_buffer__epoll_fd; perf_buffer__consume_buffer; - xsk_socket__create_shared; } LIBBPF_0.1.0; LIBBPF_0.3.0 { @@ -345,10 +245,7 @@ LIBBPF_0.3.0 { btf__parse_raw_split; btf__parse_split; btf__new_empty_split; - btf__new_split; ring_buffer__epoll_fd; - xsk_setup_xdp_prog; - xsk_socket__update_xskmap; } LIBBPF_0.2.0; LIBBPF_0.4.0 { @@ -396,7 +293,6 @@ LIBBPF_0.6.0 { bpf_object__next_program; bpf_object__prev_map; bpf_object__prev_program; - bpf_prog_load_deprecated; bpf_prog_load; bpf_program__flags; bpf_program__insn_cnt; @@ -406,29 +302,153 @@ LIBBPF_0.6.0 { btf__add_decl_tag; btf__add_type_tag; btf__dedup; - btf__dedup_deprecated; btf__raw_data; btf__type_cnt; btf_dump__new; - btf_dump__new_deprecated; libbpf_major_version; libbpf_minor_version; libbpf_version_string; perf_buffer__new; - perf_buffer__new_deprecated; perf_buffer__new_raw; - perf_buffer__new_raw_deprecated; } LIBBPF_0.5.0; LIBBPF_0.7.0 { global: bpf_btf_load; + bpf_program__expected_attach_type; bpf_program__log_buf; bpf_program__log_level; bpf_program__set_log_buf; bpf_program__set_log_level; + bpf_program__type; + bpf_xdp_attach; + bpf_xdp_detach; + bpf_xdp_query; + bpf_xdp_query_id; libbpf_probe_bpf_helper; libbpf_probe_bpf_map_type; libbpf_probe_bpf_prog_type; - libbpf_set_memlock_rlim_max; -}; + libbpf_set_memlock_rlim; +} LIBBPF_0.6.0; + +LIBBPF_0.8.0 { + global: + bpf_map__autocreate; + bpf_map__get_next_key; + bpf_map__delete_elem; + bpf_map__lookup_and_delete_elem; + bpf_map__lookup_elem; + bpf_map__set_autocreate; + bpf_map__update_elem; + bpf_map_delete_elem_flags; + bpf_object__destroy_subskeleton; + bpf_object__open_subskeleton; + bpf_program__attach_kprobe_multi_opts; + bpf_program__attach_trace_opts; + bpf_program__attach_usdt; + bpf_program__set_insns; + libbpf_register_prog_handler; + libbpf_unregister_prog_handler; +} LIBBPF_0.7.0; + +LIBBPF_1.0.0 { + global: + bpf_obj_get_opts; + bpf_prog_query_opts; + bpf_program__attach_ksyscall; + bpf_program__autoattach; + bpf_program__set_autoattach; + btf__add_enum64; + btf__add_enum64_value; + libbpf_bpf_attach_type_str; + libbpf_bpf_link_type_str; + libbpf_bpf_map_type_str; + libbpf_bpf_prog_type_str; + perf_buffer__buffer; +} LIBBPF_0.8.0; + +LIBBPF_1.1.0 { + global: + bpf_btf_get_fd_by_id_opts; + bpf_link_get_fd_by_id_opts; + bpf_map_get_fd_by_id_opts; + bpf_prog_get_fd_by_id_opts; + user_ring_buffer__discard; + user_ring_buffer__free; + user_ring_buffer__new; + user_ring_buffer__reserve; + user_ring_buffer__reserve_blocking; + user_ring_buffer__submit; +} LIBBPF_1.0.0; + +LIBBPF_1.2.0 { + global: + bpf_btf_get_info_by_fd; + bpf_link__update_map; + bpf_link_get_info_by_fd; + bpf_map_get_info_by_fd; + bpf_prog_get_info_by_fd; +} LIBBPF_1.1.0; + +LIBBPF_1.3.0 { + global: + bpf_obj_pin_opts; + bpf_object__unpin; + bpf_prog_detach_opts; + bpf_program__attach_netfilter; + bpf_program__attach_netkit; + bpf_program__attach_tcx; + bpf_program__attach_uprobe_multi; + ring__avail_data_size; + ring__consume; + ring__consumer_pos; + ring__map_fd; + ring__producer_pos; + ring__size; + ring_buffer__ring; +} LIBBPF_1.2.0; + +LIBBPF_1.4.0 { + global: + bpf_program__attach_raw_tracepoint_opts; + bpf_raw_tracepoint_open_opts; + bpf_token_create; + btf__new_split; + btf_ext__raw_data; +} LIBBPF_1.3.0; + +LIBBPF_1.5.0 { + global: + btf__distill_base; + btf__relocate; + btf_ext__endianness; + btf_ext__set_endianness; + bpf_map__autoattach; + bpf_map__set_autoattach; + bpf_object__token_fd; + bpf_program__attach_sockmap; + ring__consume_n; + ring_buffer__consume_n; +} LIBBPF_1.4.0; + +LIBBPF_1.6.0 { + global: + bpf_linker__add_buf; + bpf_linker__add_fd; + bpf_linker__new_fd; + bpf_object__prepare; + bpf_prog_stream_read; + bpf_program__attach_cgroup_opts; + bpf_program__func_info; + bpf_program__func_info_cnt; + bpf_program__line_info; + bpf_program__line_info_cnt; + btf__add_decl_attr; + btf__add_type_attr; +} LIBBPF_1.5.0; + +LIBBPF_1.7.0 { + global: + bpf_map__set_exclusive_program; + bpf_map__exclusive_program; +} LIBBPF_1.6.0; diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index 000e37798ff2..8fe248e14eb6 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -30,20 +30,10 @@ /* Add checks for other versions below when planning deprecation of API symbols * with the LIBBPF_DEPRECATED_SINCE macro. */ -#if __LIBBPF_CURRENT_VERSION_GEQ(0, 6) -#define __LIBBPF_MARK_DEPRECATED_0_6(X) X +#if __LIBBPF_CURRENT_VERSION_GEQ(1, 0) +#define __LIBBPF_MARK_DEPRECATED_1_0(X) X #else -#define __LIBBPF_MARK_DEPRECATED_0_6(X) -#endif -#if __LIBBPF_CURRENT_VERSION_GEQ(0, 7) -#define __LIBBPF_MARK_DEPRECATED_0_7(X) X -#else -#define __LIBBPF_MARK_DEPRECATED_0_7(X) -#endif -#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8) -#define __LIBBPF_MARK_DEPRECATED_0_8(X) X -#else -#define __LIBBPF_MARK_DEPRECATED_0_8(X) +#define __LIBBPF_MARK_DEPRECATED_1_0(X) #endif /* This set of internal macros allows to do "function overloading" based on @@ -80,4 +70,23 @@ }; \ }) +/* Helper macro to clear and optionally reinitialize libbpf options struct + * + * Small helper macro to reset all fields and to reinitialize the common + * structure size member. Values provided by users in struct initializer- + * syntax as varargs can be provided as well to reinitialize options struct + * specific members. + */ +#define LIBBPF_OPTS_RESET(NAME, ...) \ + do { \ + typeof(NAME) ___##NAME = ({ \ + memset(&___##NAME, 0, sizeof(NAME)); \ + (typeof(NAME)) { \ + .sz = sizeof(NAME), \ + __VA_ARGS__ \ + }; \ + }); \ + memcpy(&NAME, &___##NAME, sizeof(NAME)); \ + } while (0) + #endif /* __LIBBPF_LIBBPF_COMMON_H */ diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c deleted file mode 100644 index 96f67a772a1b..000000000000 --- a/tools/lib/bpf/libbpf_errno.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -/* - * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> - * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015 Huawei Inc. - * Copyright (C) 2017 Nicira, Inc. - */ - -#undef _GNU_SOURCE -#include <stdio.h> -#include <string.h> - -#include "libbpf.h" -#include "libbpf_internal.h" - -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - -#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) -#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) -#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) - -static const char *libbpf_strerror_table[NR_ERRNO] = { - [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf", - [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid", - [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost", - [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch", - [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf", - [ERRCODE_OFFSET(RELOC)] = "Relocation failed", - [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading", - [ERRCODE_OFFSET(PROG2BIG)] = "Program too big", - [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version", - [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", - [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", - [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", - [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing", -}; - -int libbpf_strerror(int err, char *buf, size_t size) -{ - if (!buf || !size) - return libbpf_err(-EINVAL); - - err = err > 0 ? err : -err; - - if (err < __LIBBPF_ERRNO__START) { - int ret; - - ret = strerror_r(err, buf, size); - buf[size - 1] = '\0'; - return libbpf_err_errno(ret); - } - - if (err < __LIBBPF_ERRNO__END) { - const char *msg; - - msg = libbpf_strerror_table[ERRNO_OFFSET(err)]; - snprintf(buf, size, "%s", msg); - buf[size - 1] = '\0'; - return 0; - } - - snprintf(buf, size, "Unknown libbpf error %d", err); - buf[size - 1] = '\0'; - return libbpf_err(-ENOENT); -} diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 1565679eb432..fc59b21b51b5 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -10,14 +10,30 @@ #define __LIBBPF_LIBBPF_INTERNAL_H #include <stdlib.h> +#include <byteswap.h> #include <limits.h> #include <errno.h> #include <linux/err.h> #include <fcntl.h> #include <unistd.h> -#include "libbpf_legacy.h" +#include <sys/syscall.h> +#include <libelf.h> #include "relo_core.h" +/* Android's libc doesn't support AT_EACCESS in faccessat() implementation + * ([0]), and just returns -EINVAL even if file exists and is accessible. + * See [1] for issues caused by this. + * + * So just redefine it to 0 on Android. + * + * [0] https://android.googlesource.com/platform/bionic/+/refs/heads/android13-release/libc/bionic/faccessat.cpp#50 + * [1] https://github.com/libbpf/libbpf-bootstrap/issues/250#issuecomment-1911324250 + */ +#ifdef __ANDROID__ +#undef AT_EACCESS +#define AT_EACCESS 0 +#endif + /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 @@ -58,6 +74,8 @@ #define ELF64_ST_VISIBILITY(o) ((o) & 0x03) #endif +#define JUMPTABLES_SEC ".jumptables" + #define BTF_INFO_ENC(kind, kind_flag, vlen) \ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) @@ -92,6 +110,9 @@ # define offsetofend(TYPE, FIELD) \ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD)) #endif +#ifndef __alias +#define __alias(symbol) __attribute__((alias(#symbol))) +#endif /* Check whether a string `str` has prefix `pfx`, regardless if `pfx` is * a string literal known at compilation time or char * pointer known only at @@ -100,6 +121,17 @@ #define str_has_pfx(str, pfx) \ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) +/* suffix check */ +static inline bool str_has_sfx(const char *str, const char *sfx) +{ + size_t str_len = strlen(str); + size_t sfx_len = strlen(sfx); + + if (sfx_len > str_len) + return false; + return strcmp(str + str_len - sfx_len, sfx) == 0; +} + /* Symbol versioning is different between static and shared library. * Properly versioned symbols are needed for shared library, but * only the symbol of the new version is needed for static library. @@ -142,9 +174,28 @@ do { \ #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) +/** + * @brief **libbpf_errstr()** returns string corresponding to numeric errno + * @param err negative numeric errno + * @return pointer to string representation of the errno, that is invalidated + * upon the next call. + */ +const char *libbpf_errstr(int err); + +#define errstr(err) libbpf_errstr(err) + #ifndef __has_builtin #define __has_builtin(x) 0 #endif + +struct bpf_link { + int (*detach)(struct bpf_link *link); + void (*dealloc)(struct bpf_link *link); + char *pin_path; /* NULL, if not pinned */ + int fd; /* hook FD, -1 if not applicable */ + bool disconnected; +}; + /* * Re-implement glibc's reallocarray() for libbpf internal-only use. * reallocarray(), unfortunately, is not available in all versions of glibc, @@ -196,6 +247,9 @@ struct btf_type; struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id); const char *btf_kind_str(const struct btf_type *t); const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); +const struct btf_header *btf_header(const struct btf *btf); +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf); +int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map); static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t) { @@ -326,18 +380,48 @@ enum kern_feature_id { FEAT_BTF_TYPE_TAG, /* memcg-based accounting for BPF maps and progs */ FEAT_MEMCG_ACCOUNT, + /* BPF cookie (bpf_get_attach_cookie() BPF helper) support */ + FEAT_BPF_COOKIE, + /* BTF_KIND_ENUM64 support and BTF_KIND_ENUM kflag support */ + FEAT_BTF_ENUM64, + /* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */ + FEAT_SYSCALL_WRAPPER, + /* BPF multi-uprobe link support */ + FEAT_UPROBE_MULTI_LINK, + /* Kernel supports arg:ctx tag (__arg_ctx) for global subprogs natively */ + FEAT_ARG_CTX_TAG, + /* Kernel supports '?' at the front of datasec names */ + FEAT_BTF_QMARK_DATASEC, __FEAT_CNT, }; -int probe_memcg_account(void); +enum kern_feature_result { + FEAT_UNKNOWN = 0, + FEAT_SUPPORTED = 1, + FEAT_MISSING = 2, +}; + +struct kern_feature_cache { + enum kern_feature_result res[__FEAT_CNT]; + int token_fd; +}; + +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); + +int probe_kern_syscall_wrapper(int token_fd); +int probe_memcg_account(int token_fd); int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); + const char *str_sec, size_t str_len, + int token_fd); +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd); +struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, @@ -351,6 +435,13 @@ struct btf_ext_info { void *info; __u32 rec_size; __u32 len; + /* optional (maintained internally by libbpf) mapping between .BTF.ext + * section and corresponding ELF section. This is used to join + * information like CO-RE relocation records with corresponding BPF + * programs defined in ELF sections + */ + __u32 *sec_idxs; + int sec_cnt; }; #define for_each_btf_ext_sec(seg, sec) \ @@ -371,11 +462,11 @@ struct btf_ext_info { * * The func_info subsection layout: * record size for struct bpf_func_info in the func_info subsection - * struct btf_sec_func_info for section #1 + * struct btf_ext_info_sec for section #1 * a list of bpf_func_info records for section #1 * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h * but may not be identical - * struct btf_sec_func_info for section #2 + * struct btf_ext_info_sec for section #2 * a list of bpf_func_info records for section #2 * ...... * @@ -407,6 +498,8 @@ struct btf_ext { struct btf_ext_header *hdr; void *data; }; + void *data_swapped; + bool swapped_endian; struct btf_ext_info func_info; struct btf_ext_info line_info; struct btf_ext_info core_relo_info; @@ -434,18 +527,64 @@ struct bpf_line_info_min { __u32 line_col; }; +/* Functions to byte-swap info records */ + +typedef void (*info_rec_bswap_fn)(void *); + +static inline void bpf_func_info_bswap(struct bpf_func_info *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->type_id = bswap_32(i->type_id); +} + +static inline void bpf_line_info_bswap(struct bpf_line_info *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->file_name_off = bswap_32(i->file_name_off); + i->line_off = bswap_32(i->line_off); + i->line_col = bswap_32(i->line_col); +} + +static inline void bpf_core_relo_bswap(struct bpf_core_relo *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->type_id = bswap_32(i->type_id); + i->access_str_off = bswap_32(i->access_str_off); + i->kind = bswap_32(i->kind); +} + +enum btf_field_iter_kind { + BTF_FIELD_ITER_IDS, + BTF_FIELD_ITER_STRS, +}; + +struct btf_field_desc { + /* once-per-type offsets */ + int t_off_cnt, t_offs[2]; + /* member struct size, or zero, if no members */ + int m_sz; + /* repeated per-member offsets */ + int m_off_cnt, m_offs[1]; +}; + +struct btf_field_iter { + struct btf_field_desc desc; + void *p; + int m_idx; + int off_idx; + int vlen; +}; + +int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, enum btf_field_iter_kind iter_kind); +__u32 *btf_field_iter_next(struct btf_field_iter *it); typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx); typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx); -int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx); -int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx); int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx); int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx); __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, __u32 kind); -extern enum libbpf_strict_mode libbpf_mode; - /* handle direct returned errors */ static inline int libbpf_err(int ret) { @@ -459,12 +598,8 @@ static inline int libbpf_err(int ret) */ static inline int libbpf_err_errno(int ret) { - if (libbpf_mode & LIBBPF_STRICT_DIRECT_ERRS) - /* errno is already assumed to be set on error */ - return ret < 0 ? -errno : ret; - - /* legacy: on error return -1 directly and don't touch errno */ - return ret; + /* errno is already assumed to be set on error */ + return ret < 0 ? -errno : ret; } /* handle error for pointer-returning APIs, err is assumed to be < 0 always */ @@ -472,12 +607,7 @@ static inline void *libbpf_err_ptr(int err) { /* set errno on error, this doesn't break anything */ errno = -err; - - if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS) - return NULL; - - /* legacy: encode err as ptr */ - return ERR_PTR(err); + return NULL; } /* handle pointer-returning APIs' error handling */ @@ -487,11 +617,7 @@ static inline void *libbpf_ptr(void *ret) if (IS_ERR(ret)) errno = -PTR_ERR(ret); - if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS) - return IS_ERR(ret) ? NULL : ret; - - /* legacy: pass-through original pointer */ - return ret; + return IS_ERR(ret) ? NULL : ret; } static inline bool str_is_empty(const char *s) @@ -504,6 +630,27 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +static inline void bpf_insn_bswap(struct bpf_insn *insn) +{ + __u8 tmp_reg = insn->dst_reg; + + insn->dst_reg = insn->src_reg; + insn->src_reg = tmp_reg; + insn->off = bswap_16(insn->off); + insn->imm = bswap_32(insn->imm); +} + +/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. + * Original FD is not closed or altered in any other way. + * Preserves original FD value, if it's invalid (negative). + */ +static inline int dup_good_fd(int fd) +{ + if (fd < 0) + return fd; + return fcntl(fd, F_DUPFD_CLOEXEC, 3); +} + /* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 * Takes ownership of the fd passed in, and closes it if calling * fcntl(fd, F_DUPFD_CLOEXEC, 3). @@ -515,9 +662,10 @@ static inline int ensure_good_fd(int fd) if (fd < 0) return fd; if (fd < 3) { - fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + fd = dup_good_fd(fd); saved_errno = errno; close(old_fd); + errno = saved_errno; if (fd < 0) { pr_warn("failed to dup FD %d to FD > 2: %d\n", old_fd, -saved_errno); errno = saved_errno; @@ -526,4 +674,87 @@ static inline int ensure_good_fd(int fd) return fd; } +static inline int sys_dup3(int oldfd, int newfd, int flags) +{ + return syscall(__NR_dup3, oldfd, newfd, flags); +} + +/* Some versions of Android don't provide memfd_create() in their libc + * implementation, so avoid complications and just go straight to Linux + * syscall. + */ +static inline int sys_memfd_create(const char *name, unsigned flags) +{ + return syscall(__NR_memfd_create, name, flags); +} + +/* Point *fixed_fd* to the same file that *tmp_fd* points to. + * Regardless of success, *tmp_fd* is closed. + * Whatever *fixed_fd* pointed to is closed silently. + */ +static inline int reuse_fd(int fixed_fd, int tmp_fd) +{ + int err; + + err = sys_dup3(tmp_fd, fixed_fd, O_CLOEXEC); + err = err < 0 ? -errno : 0; + close(tmp_fd); /* clean up temporary FD */ + return err; +} + +/* The following two functions are exposed to bpftool */ +int bpf_core_add_cands(struct bpf_core_cand *local_cand, + size_t local_essent_len, + const struct btf *targ_btf, + const char *targ_btf_name, + int targ_start_id, + struct bpf_core_cand_list *cands); +void bpf_core_free_cands(struct bpf_core_cand_list *cands); + +struct usdt_manager *usdt_manager_new(struct bpf_object *obj); +void usdt_manager_free(struct usdt_manager *man); +struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man, + const struct bpf_program *prog, + pid_t pid, const char *path, + const char *usdt_provider, const char *usdt_name, + __u64 usdt_cookie); + +static inline bool is_pow_of_2(size_t x) +{ + return x && (x & (x - 1)) == 0; +} + +static inline __u32 ror32(__u32 v, int bits) +{ + return (v >> bits) | (v << (32 - bits)); +} + +#define PROG_LOAD_ATTEMPTS 5 +int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts); + +bool glob_match(const char *str, const char *pat); + +long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name); +long elf_find_func_offset_from_file(const char *binary_path, const char *name); + +struct elf_fd { + Elf *elf; + int fd; +}; + +int elf_open(const char *binary_path, struct elf_fd *elf_fd); +void elf_close(struct elf_fd *elf_fd); + +int elf_resolve_syms_offsets(const char *binary_path, int cnt, + const char **syms, unsigned long **poffsets, + int st_type); +int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, + unsigned long **poffsets, size_t *pcnt); + +int probe_fd(int fd); + +#define SHA256_DIGEST_LENGTH 32 +#define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64) + +void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]); #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 79131f761a27..60b2600be88a 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -20,6 +20,11 @@ extern "C" { #endif +/* As of libbpf 1.0 libbpf_set_strict_mode() and enum libbpf_struct_mode have + * no effect. But they are left in libbpf_legacy.h so that applications that + * prepared for libbpf 1.0 before final release by using + * libbpf_set_strict_mode() still work with libbpf 1.0+ without any changes. + */ enum libbpf_strict_mode { /* Turn on all supported strict features of libbpf to simulate libbpf * v1.0 behavior. @@ -54,6 +59,10 @@ enum libbpf_strict_mode { * * Note, in this mode the program pin path will be based on the * function name instead of section name. + * + * Additionally, routines in the .text section are always considered + * sub-programs. Legacy behavior allows for a single routine in .text + * to be a program. */ LIBBPF_STRICT_SEC_NAME = 0x04, /* @@ -67,20 +76,63 @@ enum libbpf_strict_mode { * first BPF program or map creation operation. This is done only if * kernel is too old to support memcg-based memory accounting for BPF * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY, - * but it can be overriden with libbpf_set_memlock_rlim_max() API. - * Note that libbpf_set_memlock_rlim_max() needs to be called before + * but it can be overridden with libbpf_set_memlock_rlim() API. + * Note that libbpf_set_memlock_rlim() needs to be called before * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load() * operation. */ LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10, + /* + * Error out on any SEC("maps") map definition, which are deprecated + * in favor of BTF-defined map definitions in SEC(".maps"). + */ + LIBBPF_STRICT_MAP_DEFINITIONS = 0x20, __LIBBPF_STRICT_LAST, }; LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode); +/** + * @brief **libbpf_get_error()** extracts the error code from the passed + * pointer + * @param ptr pointer returned from libbpf API function + * @return error code; or 0 if no error occurred + * + * Note, as of libbpf 1.0 this function is not necessary and not recommended + * to be used. Libbpf doesn't return error code embedded into the pointer + * itself. Instead, NULL is returned on error and error code is passed through + * thread-local errno variable. **libbpf_get_error()** is just returning -errno + * value if it receives NULL, which is correct only if errno hasn't been + * modified between libbpf API call and corresponding **libbpf_get_error()** + * call. Prefer to check return for NULL and use errno directly. + * + * This API is left in libbpf 1.0 to allow applications that were 1.0-ready + * before final libbpf 1.0 without needing to change them. + */ +LIBBPF_API long libbpf_get_error(const void *ptr); + #define DECLARE_LIBBPF_OPTS LIBBPF_OPTS +/* "Discouraged" APIs which don't follow consistent libbpf naming patterns. + * They are normally a trivial aliases or wrappers for proper APIs and are + * left to minimize unnecessary disruption for users of libbpf. But they + * shouldn't be used going forward. + */ + +struct bpf_program; +struct bpf_map; +struct btf; +struct btf_ext; + +LIBBPF_API struct btf *libbpf_find_kernel_btf(void); + +LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); +LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); +LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); +LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); +LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 97b06cede56f..bccf4bb747e1 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -12,52 +12,102 @@ #include <linux/btf.h> #include <linux/filter.h> #include <linux/kernel.h> +#include <linux/version.h> #include "bpf.h" #include "libbpf.h" #include "libbpf_internal.h" -static bool grep(const char *buffer, const char *pattern) +/* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release, + * but Ubuntu provides /proc/version_signature file, as described at + * https://ubuntu.com/kernel, with an example contents below, which we + * can use to get a proper LINUX_VERSION_CODE. + * + * Ubuntu 5.4.0-12.15-generic 5.4.8 + * + * In the above, 5.4.8 is what kernel is actually expecting, while + * uname() call will return 5.4.0 in info.release. + */ +static __u32 get_ubuntu_kernel_version(void) +{ + const char *ubuntu_kver_file = "/proc/version_signature"; + __u32 major, minor, patch; + int ret; + FILE *f; + + if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) != 0) + return 0; + + f = fopen(ubuntu_kver_file, "re"); + if (!f) + return 0; + + ret = fscanf(f, "%*s %*s %u.%u.%u\n", &major, &minor, &patch); + fclose(f); + if (ret != 3) + return 0; + + return KERNEL_VERSION(major, minor, patch); +} + +/* On Debian LINUX_VERSION_CODE doesn't correspond to info.release. + * Instead, it is provided in info.version. An example content of + * Debian 10 looks like the below. + * + * utsname::release 4.19.0-22-amd64 + * utsname::version #1 SMP Debian 4.19.260-1 (2022-09-29) + * + * In the above, 4.19.260 is what kernel is actually expecting, while + * uname() call will return 4.19.0 in info.release. + */ +static __u32 get_debian_kernel_version(struct utsname *info) { - return !!strstr(buffer, pattern); + __u32 major, minor, patch; + char *p; + + p = strstr(info->version, "Debian "); + if (!p) { + /* This is not a Debian kernel. */ + return 0; + } + + if (sscanf(p, "Debian %u.%u.%u", &major, &minor, &patch) != 3) + return 0; + + return KERNEL_VERSION(major, minor, patch); } -static int get_vendor_id(int ifindex) +__u32 get_kernel_version(void) { - char ifname[IF_NAMESIZE], path[64], buf[8]; - ssize_t len; - int fd; + __u32 major, minor, patch, version; + struct utsname info; - if (!if_indextoname(ifindex, ifname)) - return -1; + /* Check if this is an Ubuntu kernel. */ + version = get_ubuntu_kernel_version(); + if (version != 0) + return version; - snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname); + uname(&info); - fd = open(path, O_RDONLY | O_CLOEXEC); - if (fd < 0) - return -1; + /* Check if this is a Debian kernel. */ + version = get_debian_kernel_version(&info); + if (version != 0) + return version; - len = read(fd, buf, sizeof(buf)); - close(fd); - if (len < 0) - return -1; - if (len >= (ssize_t)sizeof(buf)) - return -1; - buf[len] = '\0'; + if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) + return 0; - return strtol(buf, NULL, 0); + return KERNEL_VERSION(major, minor, patch); } static int probe_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insns_cnt, - char *log_buf, size_t log_buf_sz, - __u32 ifindex) + char *log_buf, size_t log_buf_sz) { LIBBPF_OPTS(bpf_prog_load_opts, opts, .log_buf = log_buf, .log_size = log_buf_sz, .log_level = log_buf ? 1 : 0, - .prog_ifindex = ifindex, ); int fd, err, exp_err = 0; const char *exp_msg = NULL; @@ -131,6 +181,9 @@ static int probe_prog_load(enum bpf_prog_type prog_type, case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_CGROUP_SYSCTL: break; + case BPF_PROG_TYPE_NETFILTER: + opts.expected_attach_type = BPF_NETFILTER; + break; default: return -EOPNOTSUPP; } @@ -161,33 +214,13 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) if (opts) return libbpf_err(-EINVAL); - ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0); + ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0); return libbpf_err(ret); } -bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) -{ - struct bpf_insn insns[2] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN() - }; - - /* prefer libbpf_probe_bpf_prog_type() unless offload is requested */ - if (ifindex == 0) - return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1; - - if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS) - /* nfp returns -EINVAL on exit(0) with TC offload */ - insns[0].imm = 2; - - errno = 0; - probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); - - return errno != EINVAL && errno != EOPNOTSUPP; -} - int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len) + const char *str_sec, size_t str_len, + int token_fd) { struct btf_header hdr = { .magic = BTF_MAGIC, @@ -197,6 +230,10 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, .str_off = types_len, .str_len = str_len, }; + LIBBPF_OPTS(bpf_btf_load_opts, opts, + .token_fd = token_fd, + .btf_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); int btf_fd, btf_len; __u8 *raw_btf; @@ -209,7 +246,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); + btf_fd = bpf_btf_load(raw_btf, btf_len, &opts); free(raw_btf); return btf_fd; @@ -239,17 +276,15 @@ static int load_local_storage_btf(void) }; return libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); + strs, sizeof(strs), 0); } -static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) +static int probe_map_create(enum bpf_map_type map_type) { LIBBPF_OPTS(bpf_map_create_opts, opts); int key_size, value_size, max_entries; __u32 btf_key_type_id = 0, btf_value_type_id = 0; - int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err; - - opts.map_ifindex = ifindex; + int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err = 0; key_size = sizeof(__u32); value_size = sizeof(__u32); @@ -277,6 +312,7 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_SK_STORAGE: case BPF_MAP_TYPE_INODE_STORAGE: case BPF_MAP_TYPE_TASK_STORAGE: + case BPF_MAP_TYPE_CGRP_STORAGE: btf_key_type_id = 1; btf_value_type_id = 3; value_size = 8; @@ -287,19 +323,28 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) return btf_fd; break; case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_USER_RINGBUF: key_size = 0; value_size = 0; - max_entries = 4096; + max_entries = sysconf(_SC_PAGE_SIZE); break; case BPF_MAP_TYPE_STRUCT_OPS: /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ opts.btf_vmlinux_value_type_id = 1; + opts.value_type_btf_obj_fd = -1; exp_err = -524; /* -ENOTSUPP */ break; case BPF_MAP_TYPE_BLOOM_FILTER: key_size = 0; max_entries = 1; break; + case BPF_MAP_TYPE_ARENA: + key_size = 0; + value_size = 0; + max_entries = 1; /* one page */ + opts.map_extra = 0; /* can mmap() at any address */ + opts.map_flags = BPF_F_MMAPABLE; + break; case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_PROG_ARRAY: @@ -319,6 +364,10 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: break; + case BPF_MAP_TYPE_INSN_ARRAY: + key_size = sizeof(__u32); + value_size = sizeof(struct bpf_insn_array_value); + break; case BPF_MAP_TYPE_UNSPEC: default: return -EOPNOTSUPP; @@ -326,12 +375,6 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { - /* TODO: probe for device, once libbpf has a function to create - * map-in-map for offload - */ - if (ifindex) - goto cleanup; - fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(__u32), sizeof(__u32), 1, NULL); if (fd_inner < 0) @@ -370,15 +413,10 @@ int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts) if (opts) return libbpf_err(-EINVAL); - ret = probe_map_create(map_type, 0); + ret = probe_map_create(map_type); return libbpf_err(ret); } -bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) -{ - return probe_map_create(map_type, ifindex) == 1; -} - int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id, const void *opts) { @@ -407,14 +445,15 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe } buf[0] = '\0'; - ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0); + ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf)); if (ret < 0) return libbpf_err(ret); /* If BPF verifier doesn't recognize BPF helper ID (enum bpf_func_id) * at all, it will emit something like "invalid func unknown#181". * If BPF verifier recognizes BPF helper but it's not supported for - * given BPF program type, it will emit "unknown func bpf_sys_bpf#166". + * given BPF program type, it will emit "unknown func bpf_sys_bpf#166" + * or "program of this type cannot use helper bpf_sys_bpf#166". * In both cases, provided combination of BPF program type and BPF * helper is not supported by the kernel. * In all other cases, probe_prog_load() above will either succeed (e.g., @@ -423,55 +462,8 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe * that), or we'll get some more specific BPF verifier error about * some unsatisfied conditions. */ - if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func "))) + if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func ") || + strstr(buf, "program of this type cannot use helper "))) return 0; return 1; /* assume supported */ } - -bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, - __u32 ifindex) -{ - struct bpf_insn insns[2] = { - BPF_EMIT_CALL(id), - BPF_EXIT_INSN() - }; - char buf[4096] = {}; - bool res; - - probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex); - res = !grep(buf, "invalid func ") && !grep(buf, "unknown func "); - - if (ifindex) { - switch (get_vendor_id(ifindex)) { - case 0x19ee: /* Netronome specific */ - res = res && !grep(buf, "not supported by FW") && - !grep(buf, "unsupported function id"); - break; - default: - break; - } - } - - return res; -} - -/* - * Probe for availability of kernel commit (5.3): - * - * c04c0d2b968a ("bpf: increase complexity limit and maximum program size") - */ -bool bpf_probe_large_insn_limit(__u32 ifindex) -{ - struct bpf_insn insns[BPF_MAXINSNS + 1]; - int i; - - for (i = 0; i < BPF_MAXINSNS; i++) - insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1); - insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); - - errno = 0; - probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, - ifindex); - - return errno != E2BIG && errno != EINVAL; -} diff --git a/tools/lib/bpf/libbpf_utils.c b/tools/lib/bpf/libbpf_utils.c new file mode 100644 index 000000000000..ac3beae54cf6 --- /dev/null +++ b/tools/lib/bpf/libbpf_utils.c @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + * Copyright (C) 2017 Nicira, Inc. + */ + +#undef _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <inttypes.h> +#include <linux/kernel.h> + +#include "libbpf.h" +#include "libbpf_internal.h" + +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + +#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) +#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) +#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) + +static const char *libbpf_strerror_table[NR_ERRNO] = { + [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf", + [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid", + [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost", + [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch", + [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf", + [ERRCODE_OFFSET(RELOC)] = "Relocation failed", + [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading", + [ERRCODE_OFFSET(PROG2BIG)] = "Program too big", + [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version", + [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", + [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", + [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", + [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing", +}; + +int libbpf_strerror(int err, char *buf, size_t size) +{ + int ret; + + if (!buf || !size) + return libbpf_err(-EINVAL); + + err = err > 0 ? err : -err; + + if (err < __LIBBPF_ERRNO__START) { + ret = strerror_r(err, buf, size); + buf[size - 1] = '\0'; + return libbpf_err_errno(ret); + } + + if (err < __LIBBPF_ERRNO__END) { + const char *msg; + + msg = libbpf_strerror_table[ERRNO_OFFSET(err)]; + ret = snprintf(buf, size, "%s", msg); + buf[size - 1] = '\0'; + /* The length of the buf and msg is positive. + * A negative number may be returned only when the + * size exceeds INT_MAX. Not likely to appear. + */ + if (ret >= size) + return libbpf_err(-ERANGE); + return 0; + } + + ret = snprintf(buf, size, "Unknown libbpf error %d", err); + buf[size - 1] = '\0'; + if (ret >= size) + return libbpf_err(-ERANGE); + return libbpf_err(-ENOENT); +} + +const char *libbpf_errstr(int err) +{ + static __thread char buf[12]; + + if (err > 0) + err = -err; + + switch (err) { + case -E2BIG: return "-E2BIG"; + case -EACCES: return "-EACCES"; + case -EADDRINUSE: return "-EADDRINUSE"; + case -EADDRNOTAVAIL: return "-EADDRNOTAVAIL"; + case -EAGAIN: return "-EAGAIN"; + case -EALREADY: return "-EALREADY"; + case -EBADF: return "-EBADF"; + case -EBADFD: return "-EBADFD"; + case -EBUSY: return "-EBUSY"; + case -ECANCELED: return "-ECANCELED"; + case -ECHILD: return "-ECHILD"; + case -EDEADLK: return "-EDEADLK"; + case -EDOM: return "-EDOM"; + case -EEXIST: return "-EEXIST"; + case -EFAULT: return "-EFAULT"; + case -EFBIG: return "-EFBIG"; + case -EILSEQ: return "-EILSEQ"; + case -EINPROGRESS: return "-EINPROGRESS"; + case -EINTR: return "-EINTR"; + case -EINVAL: return "-EINVAL"; + case -EIO: return "-EIO"; + case -EISDIR: return "-EISDIR"; + case -ELOOP: return "-ELOOP"; + case -EMFILE: return "-EMFILE"; + case -EMLINK: return "-EMLINK"; + case -EMSGSIZE: return "-EMSGSIZE"; + case -ENAMETOOLONG: return "-ENAMETOOLONG"; + case -ENFILE: return "-ENFILE"; + case -ENODATA: return "-ENODATA"; + case -ENODEV: return "-ENODEV"; + case -ENOENT: return "-ENOENT"; + case -ENOEXEC: return "-ENOEXEC"; + case -ENOLINK: return "-ENOLINK"; + case -ENOMEM: return "-ENOMEM"; + case -ENOSPC: return "-ENOSPC"; + case -ENOTBLK: return "-ENOTBLK"; + case -ENOTDIR: return "-ENOTDIR"; + case -ENOTSUPP: return "-ENOTSUPP"; + case -ENOTTY: return "-ENOTTY"; + case -ENXIO: return "-ENXIO"; + case -EOPNOTSUPP: return "-EOPNOTSUPP"; + case -EOVERFLOW: return "-EOVERFLOW"; + case -EPERM: return "-EPERM"; + case -EPIPE: return "-EPIPE"; + case -EPROTO: return "-EPROTO"; + case -EPROTONOSUPPORT: return "-EPROTONOSUPPORT"; + case -ERANGE: return "-ERANGE"; + case -EROFS: return "-EROFS"; + case -ESPIPE: return "-ESPIPE"; + case -ESRCH: return "-ESRCH"; + case -ETXTBSY: return "-ETXTBSY"; + case -EUCLEAN: return "-EUCLEAN"; + case -EXDEV: return "-EXDEV"; + default: + snprintf(buf, sizeof(buf), "%d", err); + return buf; + } +} + +static inline __u32 get_unaligned_be32(const void *p) +{ + __be32 val; + + memcpy(&val, p, sizeof(val)); + return be32_to_cpu(val); +} + +static inline void put_unaligned_be32(__u32 val, void *p) +{ + __be32 be_val = cpu_to_be32(val); + + memcpy(p, &be_val, sizeof(be_val)); +} + +#define SHA256_BLOCK_LENGTH 64 +#define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z))) +#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define Sigma_0(x) (ror32((x), 2) ^ ror32((x), 13) ^ ror32((x), 22)) +#define Sigma_1(x) (ror32((x), 6) ^ ror32((x), 11) ^ ror32((x), 25)) +#define sigma_0(x) (ror32((x), 7) ^ ror32((x), 18) ^ ((x) >> 3)) +#define sigma_1(x) (ror32((x), 17) ^ ror32((x), 19) ^ ((x) >> 10)) + +static const __u32 sha256_K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, + 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, + 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, + 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +}; + +#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) \ + { \ + __u32 tmp = h + Sigma_1(e) + Ch(e, f, g) + sha256_K[i] + w[i]; \ + d += tmp; \ + h = tmp + Sigma_0(a) + Maj(a, b, c); \ + } + +static void sha256_blocks(__u32 state[8], const __u8 *data, size_t nblocks) +{ + while (nblocks--) { + __u32 a = state[0]; + __u32 b = state[1]; + __u32 c = state[2]; + __u32 d = state[3]; + __u32 e = state[4]; + __u32 f = state[5]; + __u32 g = state[6]; + __u32 h = state[7]; + __u32 w[64]; + int i; + + for (i = 0; i < 16; i++) + w[i] = get_unaligned_be32(&data[4 * i]); + for (; i < ARRAY_SIZE(w); i++) + w[i] = sigma_1(w[i - 2]) + w[i - 7] + + sigma_0(w[i - 15]) + w[i - 16]; + for (i = 0; i < ARRAY_SIZE(w); i += 8) { + SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h); + SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g); + SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f); + SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e); + SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d); + SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c); + SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b); + SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a); + } + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + state[5] += f; + state[6] += g; + state[7] += h; + data += SHA256_BLOCK_LENGTH; + } +} + +void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]) +{ + __u32 state[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 }; + const __be64 bitcount = cpu_to_be64((__u64)len * 8); + __u8 final_data[2 * SHA256_BLOCK_LENGTH] = { 0 }; + size_t final_len = len % SHA256_BLOCK_LENGTH; + int i; + + sha256_blocks(state, data, len / SHA256_BLOCK_LENGTH); + + memcpy(final_data, data + len - final_len, final_len); + final_data[final_len] = 0x80; + final_len = roundup(final_len + 9, SHA256_BLOCK_LENGTH); + memcpy(&final_data[final_len - 8], &bitcount, 8); + + sha256_blocks(state, final_data, final_len / SHA256_BLOCK_LENGTH); + + for (i = 0; i < ARRAY_SIZE(state); i++) + put_unaligned_be32(state[i], &out[4 * i]); +} diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 0fefefc3500b..99331e317dee 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -3,7 +3,7 @@ #ifndef __LIBBPF_VERSION_H #define __LIBBPF_VERSION_H -#define LIBBPF_MAJOR_VERSION 0 +#define LIBBPF_MAJOR_VERSION 1 #define LIBBPF_MINOR_VERSION 7 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 9aa016fb55aa..f4403e3cf994 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -4,6 +4,10 @@ * * Copyright (c) 2021 Facebook */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + #include <stdbool.h> #include <stddef.h> #include <stdio.h> @@ -16,6 +20,7 @@ #include <elf.h> #include <libelf.h> #include <fcntl.h> +#include <sys/mman.h> #include "libbpf.h" #include "btf.h" #include "libbpf_internal.h" @@ -135,6 +140,7 @@ struct bpf_linker { int fd; Elf *elf; Elf64_Ehdr *elf_hdr; + bool swapped_endian; /* Output sections metadata */ struct dst_sec *secs; @@ -150,15 +156,19 @@ struct bpf_linker { /* global (including extern) ELF symbols */ int glob_sym_cnt; struct glob_sym *glob_syms; + + bool fd_is_owned; }; #define pr_warn_elf(fmt, ...) \ libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1)) -static int init_output_elf(struct bpf_linker *linker, const char *file); +static int init_output_elf(struct bpf_linker *linker); + +static int bpf_linker_add_file(struct bpf_linker *linker, int fd, + const char *filename); -static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, - const struct bpf_linker_file_opts *opts, +static int linker_load_obj_file(struct bpf_linker *linker, struct src_obj *obj); static int linker_sanity_check_elf(struct src_obj *obj); static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec); @@ -189,7 +199,7 @@ void bpf_linker__free(struct bpf_linker *linker) if (linker->elf) elf_end(linker->elf); - if (linker->fd >= 0) + if (linker->fd >= 0 && linker->fd_is_owned) close(linker->fd); strset__free(linker->strtab_strs); @@ -231,9 +241,63 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts if (!linker) return errno = ENOMEM, NULL; - linker->fd = -1; + linker->filename = strdup(filename); + if (!linker->filename) { + err = -ENOMEM; + goto err_out; + } - err = init_output_elf(linker, filename); + linker->fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); + if (linker->fd < 0) { + err = -errno; + pr_warn("failed to create '%s': %d\n", filename, err); + goto err_out; + } + linker->fd_is_owned = true; + + err = init_output_elf(linker); + if (err) + goto err_out; + + return linker; + +err_out: + bpf_linker__free(linker); + return errno = -err, NULL; +} + +struct bpf_linker *bpf_linker__new_fd(int fd, struct bpf_linker_opts *opts) +{ + struct bpf_linker *linker; + char filename[32]; + int err; + + if (fd < 0) + return errno = EINVAL, NULL; + + if (!OPTS_VALID(opts, bpf_linker_opts)) + return errno = EINVAL, NULL; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn_elf("libelf initialization failed"); + return errno = EINVAL, NULL; + } + + linker = calloc(1, sizeof(*linker)); + if (!linker) + return errno = ENOMEM, NULL; + + snprintf(filename, sizeof(filename), "fd:%d", fd); + linker->filename = strdup(filename); + if (!linker->filename) { + err = -ENOMEM; + goto err_out; + } + + linker->fd = fd; + linker->fd_is_owned = false; + + err = init_output_elf(linker); if (err) goto err_out; @@ -292,23 +356,12 @@ static Elf64_Sym *add_new_sym(struct bpf_linker *linker, size_t *sym_idx) return sym; } -static int init_output_elf(struct bpf_linker *linker, const char *file) +static int init_output_elf(struct bpf_linker *linker) { int err, str_off; Elf64_Sym *init_sym; struct dst_sec *sec; - linker->filename = strdup(file); - if (!linker->filename) - return -ENOMEM; - - linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); - if (linker->fd < 0) { - err = -errno; - pr_warn("failed to create '%s': %d\n", file, err); - return err; - } - linker->elf = elf_begin(linker->fd, ELF_C_WRITE, NULL); if (!linker->elf) { pr_warn_elf("failed to create ELF object"); @@ -324,13 +377,8 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) linker->elf_hdr->e_machine = EM_BPF; linker->elf_hdr->e_type = ET_REL; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB; -#else -#error "Unknown __BYTE_ORDER__" -#endif + /* Set unknown ELF endianness, assign later from input files */ + linker->elf_hdr->e_ident[EI_DATA] = ELFDATANONE; /* STRTAB */ /* initialize strset with an empty string to conform to ELF */ @@ -396,6 +444,8 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) pr_warn_elf("failed to create SYMTAB data"); return -EINVAL; } + /* Ensure libelf translates byte-order of symbol records */ + sec->data->d_type = ELF_T_SYM; str_off = strset__add_str(linker->strtab_strs, sec->sec_name); if (str_off < 0) @@ -437,19 +487,16 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) return 0; } -int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, - const struct bpf_linker_file_opts *opts) +static int bpf_linker_add_file(struct bpf_linker *linker, int fd, + const char *filename) { struct src_obj obj = {}; int err = 0; - if (!OPTS_VALID(opts, bpf_linker_file_opts)) - return libbpf_err(-EINVAL); - - if (!linker->elf) - return libbpf_err(-EINVAL); + obj.filename = filename; + obj.fd = fd; - err = err ?: linker_load_obj_file(linker, filename, opts, &obj); + err = err ?: linker_load_obj_file(linker, &obj); err = err ?: linker_append_sec_data(linker, &obj); err = err ?: linker_append_elf_syms(linker, &obj); err = err ?: linker_append_elf_relos(linker, &obj); @@ -464,12 +511,91 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, free(obj.sym_map); if (obj.elf) elf_end(obj.elf); - if (obj.fd >= 0) - close(obj.fd); + return err; +} + +int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, + const struct bpf_linker_file_opts *opts) +{ + int fd, err; + + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + + if (!linker->elf) + return libbpf_err(-EINVAL); + + fd = open(filename, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = -errno; + pr_warn("failed to open file '%s': %s\n", filename, errstr(err)); + return libbpf_err(err); + } + + err = bpf_linker_add_file(linker, fd, filename); + close(fd); return libbpf_err(err); } +int bpf_linker__add_fd(struct bpf_linker *linker, int fd, + const struct bpf_linker_file_opts *opts) +{ + char filename[32]; + int err; + + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + + if (!linker->elf) + return libbpf_err(-EINVAL); + + if (fd < 0) + return libbpf_err(-EINVAL); + + snprintf(filename, sizeof(filename), "fd:%d", fd); + err = bpf_linker_add_file(linker, fd, filename); + return libbpf_err(err); +} + +int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, + const struct bpf_linker_file_opts *opts) +{ + char filename[32]; + int fd, written, ret; + + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + + if (!linker->elf) + return libbpf_err(-EINVAL); + + snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz); + + fd = sys_memfd_create(filename, 0); + if (fd < 0) { + ret = -errno; + pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret)); + return libbpf_err(ret); + } + + written = 0; + while (written < buf_sz) { + ret = write(fd, buf, buf_sz); + if (ret < 0) { + ret = -errno; + pr_warn("failed to write '%s': %s\n", filename, errstr(ret)); + goto err_out; + } + written += ret; + } + + ret = bpf_linker_add_file(linker, fd, filename); +err_out: + close(fd); + return libbpf_err(ret); +} + static bool is_dwarf_sec_name(const char *name) { /* approximation, but the actual list is too long */ @@ -535,65 +661,69 @@ static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name) return sec; } -static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, - const struct bpf_linker_file_opts *opts, +static int linker_load_obj_file(struct bpf_linker *linker, struct src_obj *obj) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - const int host_endianness = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - const int host_endianness = ELFDATA2MSB; -#else -#error "Unknown __BYTE_ORDER__" -#endif int err = 0; Elf_Scn *scn; Elf_Data *data; Elf64_Ehdr *ehdr; Elf64_Shdr *shdr; struct src_sec *sec; + unsigned char obj_byteorder; + unsigned char link_byteorder = linker->elf_hdr->e_ident[EI_DATA]; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + const unsigned char host_byteorder = ELFDATA2LSB; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + const unsigned char host_byteorder = ELFDATA2MSB; +#else +#error "Unknown __BYTE_ORDER__" +#endif - pr_debug("linker: adding object file '%s'...\n", filename); - - obj->filename = filename; + pr_debug("linker: adding object file '%s'...\n", obj->filename); - obj->fd = open(filename, O_RDONLY | O_CLOEXEC); - if (obj->fd < 0) { - err = -errno; - pr_warn("failed to open file '%s': %d\n", filename, err); - return err; - } obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL); if (!obj->elf) { - err = -errno; - pr_warn_elf("failed to parse ELF file '%s'", filename); - return err; + pr_warn_elf("failed to parse ELF file '%s'", obj->filename); + return -EINVAL; } /* Sanity check ELF file high-level properties */ ehdr = elf64_getehdr(obj->elf); if (!ehdr) { - err = -errno; - pr_warn_elf("failed to get ELF header for %s", filename); + pr_warn_elf("failed to get ELF header for %s", obj->filename); + return -EINVAL; + } + + /* Linker output endianness set by first input object */ + obj_byteorder = ehdr->e_ident[EI_DATA]; + if (obj_byteorder != ELFDATA2LSB && obj_byteorder != ELFDATA2MSB) { + err = -EOPNOTSUPP; + pr_warn("unknown byte order of ELF file %s\n", obj->filename); return err; } - if (ehdr->e_ident[EI_DATA] != host_endianness) { + if (link_byteorder == ELFDATANONE) { + linker->elf_hdr->e_ident[EI_DATA] = obj_byteorder; + linker->swapped_endian = obj_byteorder != host_byteorder; + pr_debug("linker: set %s-endian output byte order\n", + obj_byteorder == ELFDATA2MSB ? "big" : "little"); + } else if (link_byteorder != obj_byteorder) { err = -EOPNOTSUPP; - pr_warn_elf("unsupported byte order of ELF file %s", filename); + pr_warn("byte order mismatch with ELF file %s\n", obj->filename); return err; } + if (ehdr->e_type != ET_REL || ehdr->e_machine != EM_BPF || ehdr->e_ident[EI_CLASS] != ELFCLASS64) { err = -EOPNOTSUPP; - pr_warn_elf("unsupported kind of ELF file %s", filename); + pr_warn_elf("unsupported kind of ELF file %s", obj->filename); return err; } if (elf_getshdrstrndx(obj->elf, &obj->shstrs_sec_idx)) { - err = -errno; - pr_warn_elf("failed to get SHSTRTAB section index for %s", filename); - return err; + pr_warn_elf("failed to get SHSTRTAB section index for %s", obj->filename); + return -EINVAL; } scn = NULL; @@ -603,26 +733,23 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, shdr = elf64_getshdr(scn); if (!shdr) { - err = -errno; pr_warn_elf("failed to get section #%zu header for %s", - sec_idx, filename); - return err; + sec_idx, obj->filename); + return -EINVAL; } sec_name = elf_strptr(obj->elf, obj->shstrs_sec_idx, shdr->sh_name); if (!sec_name) { - err = -errno; pr_warn_elf("failed to get section #%zu name for %s", - sec_idx, filename); - return err; + sec_idx, obj->filename); + return -EINVAL; } data = elf_getdata(scn, 0); if (!data) { - err = -errno; pr_warn_elf("failed to get section #%zu (%s) data from %s", - sec_idx, sec_name, filename); - return err; + sec_idx, sec_name, obj->filename); + return -EINVAL; } sec = add_src_sec(obj, sec_name); @@ -656,7 +783,8 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->btf = btf__new(data->d_buf, shdr->sh_size); err = libbpf_get_error(obj->btf); if (err) { - pr_warn("failed to parse .BTF from %s: %d\n", filename, err); + pr_warn("failed to parse .BTF from %s: %s\n", + obj->filename, errstr(err)); return err; } sec->skipped = true; @@ -666,7 +794,8 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->btf_ext = btf_ext__new(data->d_buf, shdr->sh_size); err = libbpf_get_error(obj->btf_ext); if (err) { - pr_warn("failed to parse .BTF.ext from '%s': %d\n", filename, err); + pr_warn("failed to parse .BTF.ext from '%s': %s\n", + obj->filename, errstr(err)); return err; } sec->skipped = true; @@ -683,7 +812,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, break; default: pr_warn("unrecognized section #%zu (%s) in %s\n", - sec_idx, sec_name, filename); + sec_idx, sec_name, obj->filename); err = -EINVAL; return err; } @@ -697,11 +826,6 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, return err; } -static bool is_pow_of_2(size_t x) -{ - return x && (x & (x - 1)) == 0; -} - static int linker_sanity_check_elf(struct src_obj *obj) { struct src_sec *sec; @@ -724,13 +848,28 @@ static int linker_sanity_check_elf(struct src_obj *obj) return -EINVAL; } - if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign)) + if (is_dwarf_sec_name(sec->sec_name)) + continue; + + if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign)) { + pr_warn("ELF section #%zu alignment %llu is non pow-of-2 alignment in %s\n", + sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign, + obj->filename); return -EINVAL; - if (sec->shdr->sh_addralign != sec->data->d_align) + } + if (sec->shdr->sh_addralign != sec->data->d_align) { + pr_warn("ELF section #%zu has inconsistent alignment addr=%llu != d=%llu in %s\n", + sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign, + (long long unsigned)sec->data->d_align, obj->filename); return -EINVAL; + } - if (sec->shdr->sh_size != sec->data->d_size) + if (sec->shdr->sh_size != sec->data->d_size) { + pr_warn("ELF section #%zu has inconsistent section size sh=%llu != d=%llu in %s\n", + sec->sec_idx, (long long unsigned)sec->shdr->sh_size, + (long long unsigned)sec->data->d_size, obj->filename); return -EINVAL; + } switch (sec->shdr->sh_type) { case SHT_SYMTAB: @@ -742,8 +881,12 @@ static int linker_sanity_check_elf(struct src_obj *obj) break; case SHT_PROGBITS: if (sec->shdr->sh_flags & SHF_EXECINSTR) { - if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0) + if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0) { + pr_warn("ELF section #%zu has unexpected size alignment %llu in %s\n", + sec->sec_idx, (long long unsigned)sec->shdr->sh_size, + obj->filename); return -EINVAL; + } } break; case SHT_NOBITS: @@ -943,19 +1086,33 @@ static int check_btf_str_off(__u32 *str_off, void *ctx) static int linker_sanity_check_btf(struct src_obj *obj) { struct btf_type *t; - int i, n, err = 0; + int i, n, err; if (!obj->btf) return 0; n = btf__type_cnt(obj->btf); for (i = 1; i < n; i++) { + struct btf_field_iter it; + __u32 *type_id, *str_off; + t = btf_type_by_id(obj->btf, i); - err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf); - err = err ?: btf_type_visit_str_offs(t, check_btf_str_off, obj->btf); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (err) return err; + while ((type_id = btf_field_iter_next(&it))) { + if (*type_id >= n) + return -EINVAL; + } + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); + if (err) + return err; + while ((str_off = btf_field_iter_next(&it))) { + if (!btf__str_by_offset(obj->btf, *str_off)) + return -EINVAL; + } } return 0; @@ -1081,6 +1238,24 @@ static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec return true; } +static bool is_exec_sec(struct dst_sec *sec) +{ + if (!sec || sec->ephemeral) + return false; + return (sec->shdr->sh_type == SHT_PROGBITS) && + (sec->shdr->sh_flags & SHF_EXECINSTR); +} + +static void exec_sec_bswap(void *raw_data, int size) +{ + const int insn_cnt = size / sizeof(struct bpf_insn); + struct bpf_insn *insn = raw_data; + int i; + + for (i = 0; i < insn_cnt; i++, insn++) + bpf_insn_bswap(insn); +} + static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src) { void *tmp; @@ -1120,7 +1295,19 @@ static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src if (src->shdr->sh_type != SHT_NOBITS) { tmp = realloc(dst->raw_data, dst_final_sz); - if (!tmp) + /* If dst_align_sz == 0, realloc() behaves in a special way: + * 1. When dst->raw_data is NULL it returns: + * "either NULL or a pointer suitable to be passed to free()" [1]. + * 2. When dst->raw_data is not-NULL it frees dst->raw_data and returns NULL, + * thus invalidating any "pointer suitable to be passed to free()" obtained + * at step (1). + * + * The dst_align_sz > 0 check avoids error exit after (2), otherwise + * dst->raw_data would be freed again in bpf_linker__free(). + * + * [1] man 3 realloc + */ + if (!tmp && dst_align_sz > 0) return -ENOMEM; dst->raw_data = tmp; @@ -1128,6 +1315,10 @@ static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src memset(dst->raw_data + dst->sec_sz, 0, dst_align_sz - dst->sec_sz); /* now copy src data at a properly aligned offset */ memcpy(dst->raw_data + dst_align_sz, src->data->d_buf, src->shdr->sh_size); + + /* convert added bpf insns to native byte-order */ + if (linker->swapped_endian && is_exec_sec(dst)) + exec_sec_bswap(dst->raw_data + dst_align_sz, src->shdr->sh_size); } dst->sec_sz = dst_final_sz; @@ -1184,7 +1375,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj } else { if (!secs_match(dst_sec, src_sec)) { pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* "license" and "version" sections are deduped */ @@ -1340,6 +1531,7 @@ recur: case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: case BTF_KIND_FWD: case BTF_KIND_FUNC: case BTF_KIND_VAR: @@ -1362,6 +1554,7 @@ recur: case BTF_KIND_INT: case BTF_KIND_FLOAT: case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: /* ignore encoding for int and enum values for enum */ if (t1->size != t2->size) { pr_warn("global '%s': incompatible %s '%s' size %u and %u\n", @@ -1371,7 +1564,7 @@ recur: return true; case BTF_KIND_PTR: /* just validate overall shape of the referenced type, so no - * contents comparison for struct/union, and allowd fwd vs + * contents comparison for struct/union, and allowed fwd vs * struct/union */ exact = false; @@ -1832,6 +2025,9 @@ static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj, obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx; return 0; } + + if (strcmp(src_sec->sec_name, JUMPTABLES_SEC) == 0) + goto add_sym; } if (sym_bind == STB_LOCAL) @@ -1920,7 +2116,7 @@ static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj, /* If existing symbol is a strong resolved symbol, bail out, * because we lost resolution battle have nothing to - * contribute. We already checked abover that there is no + * contribute. We already checked above that there is no * strong-strong conflict. We also already tightened binding * and visibility, so nothing else to contribute at that point. */ @@ -1969,7 +2165,7 @@ add_sym: obj->sym_map[src_sym_idx] = dst_sym_idx; - if (sym_type == STT_SECTION && dst_sym) { + if (sym_type == STT_SECTION && dst_sec) { dst_sec->sec_sym_idx = dst_sym_idx; dst_sym->st_value = 0; } @@ -2000,7 +2196,6 @@ add_sym: static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj) { struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx]; - struct dst_sec *dst_symtab; int i, err; for (i = 1; i < obj->sec_cnt; i++) { @@ -2030,12 +2225,9 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob } } else if (!secs_match(dst_sec, src_sec)) { pr_warn("sections %s are not compatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } - /* add_dst_sec() above could have invalidated linker->secs */ - dst_symtab = &linker->secs[linker->symtab_sec_idx]; - /* shdr->sh_link points to SYMTAB */ dst_sec->shdr->sh_link = linker->symtab_sec_idx; @@ -2052,16 +2244,13 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob dst_rel = dst_sec->raw_data + src_sec->dst_off; n = src_sec->shdr->sh_size / src_sec->shdr->sh_entsize; for (j = 0; j < n; j++, src_rel++, dst_rel++) { - size_t src_sym_idx = ELF64_R_SYM(src_rel->r_info); - size_t sym_type = ELF64_R_TYPE(src_rel->r_info); - Elf64_Sym *src_sym, *dst_sym; - size_t dst_sym_idx; + size_t src_sym_idx, dst_sym_idx, sym_type; + Elf64_Sym *src_sym; src_sym_idx = ELF64_R_SYM(src_rel->r_info); src_sym = src_symtab->data->d_buf + sizeof(*src_sym) * src_sym_idx; dst_sym_idx = obj->sym_map[src_sym_idx]; - dst_sym = dst_symtab->raw_data + sizeof(*dst_sym) * dst_sym_idx; dst_rel->r_offset += src_linked_sec->dst_off; sym_type = ELF64_R_TYPE(src_rel->r_info); dst_rel->r_info = ELF64_R_INFO(dst_sym_idx, sym_type); @@ -2192,10 +2381,17 @@ static int linker_fixup_btf(struct src_obj *obj) vi = btf_var_secinfos(t); for (j = 0, m = btf_vlen(t); j < m; j++, vi++) { const struct btf_type *vt = btf__type_by_id(obj->btf, vi->type); - const char *var_name = btf__str_by_offset(obj->btf, vt->name_off); - int var_linkage = btf_var(vt)->linkage; + const char *var_name; + int var_linkage; Elf64_Sym *sym; + /* could be a variable or function */ + if (!btf_is_var(vt)) + continue; + + var_name = btf__str_by_offset(obj->btf, vt->name_off); + var_linkage = btf_var(vt)->linkage; + /* no need to patch up static or extern vars */ if (var_linkage != BTF_VAR_GLOBAL_ALLOCATED) continue; @@ -2213,26 +2409,10 @@ static int linker_fixup_btf(struct src_obj *obj) return 0; } -static int remap_type_id(__u32 *type_id, void *ctx) -{ - int *id_map = ctx; - int new_id = id_map[*type_id]; - - /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */ - if (new_id == 0 && *type_id != 0) { - pr_warn("failed to find new ID mapping for original BTF type ID %u\n", *type_id); - return -EINVAL; - } - - *type_id = id_map[*type_id]; - - return 0; -} - static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) { const struct btf_type *t; - int i, j, n, start_id, id; + int i, j, n, start_id, id, err; const char *name; if (!obj->btf) @@ -2303,9 +2483,25 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) n = btf__type_cnt(linker->btf); for (i = start_id; i < n; i++) { struct btf_type *dst_t = btf_type_by_id(linker->btf, i); + struct btf_field_iter it; + __u32 *type_id; - if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map)) - return -EINVAL; + err = btf_field_iter_init(&it, dst_t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((type_id = btf_field_iter_next(&it))) { + int new_id = obj->btf_type_map[*type_id]; + + /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */ + if (new_id == 0 && *type_id != 0) { + pr_warn("failed to find new ID mapping for original BTF type ID %u\n", + *type_id); + return -EINVAL; + } + + *type_id = obj->btf_type_map[*type_id]; + } } /* Rewrite VAR/FUNC underlying types (i.e., FUNC's FUNC_PROTO and VAR's @@ -2373,6 +2569,10 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) if (glob_sym && glob_sym->var_idx >= 0) { __s64 sz; + /* FUNCs don't have size, nothing to update */ + if (btf_is_func(t)) + continue; + dst_var = &dst_sec->sec_vars[glob_sym->var_idx]; /* Because underlying BTF type might have * changed, so might its size have changed, so @@ -2586,27 +2786,32 @@ int bpf_linker__finalize(struct bpf_linker *linker) if (!sec->scn) continue; + /* restore sections with bpf insns to target byte-order */ + if (linker->swapped_endian && is_exec_sec(sec)) + exec_sec_bswap(sec->raw_data, sec->sec_sz); + sec->data->d_buf = sec->raw_data; } /* Finalize ELF layout */ if (elf_update(linker->elf, ELF_C_NULL) < 0) { - err = -errno; + err = -EINVAL; pr_warn_elf("failed to finalize ELF layout"); return libbpf_err(err); } /* Write out final ELF contents */ if (elf_update(linker->elf, ELF_C_WRITE) < 0) { - err = -errno; + err = -EINVAL; pr_warn_elf("failed to write ELF contents"); return libbpf_err(err); } elf_end(linker->elf); - close(linker->fd); - linker->elf = NULL; + + if (linker->fd_is_owned) + close(linker->fd); linker->fd = -1; return 0; @@ -2654,6 +2859,7 @@ static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name, static int finalize_btf(struct bpf_linker *linker) { + enum btf_endianness link_endianness; LIBBPF_OPTS(btf_dedup_opts, opts); struct btf *btf = linker->btf; const void *raw_data; @@ -2687,17 +2893,24 @@ static int finalize_btf(struct bpf_linker *linker) err = finalize_btf_ext(linker); if (err) { - pr_warn(".BTF.ext generation failed: %d\n", err); + pr_warn(".BTF.ext generation failed: %s\n", errstr(err)); return err; } opts.btf_ext = linker->btf_ext; err = btf__dedup(linker->btf, &opts); if (err) { - pr_warn("BTF dedup failed: %d\n", err); + pr_warn("BTF dedup failed: %s\n", errstr(err)); return err; } + /* Set .BTF and .BTF.ext output byte order */ + link_endianness = linker->elf_hdr->e_ident[EI_DATA] == ELFDATA2MSB ? + BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; + btf__set_endianness(linker->btf, link_endianness); + if (linker->btf_ext) + btf_ext__set_endianness(linker->btf_ext, link_endianness); + /* Emit .BTF section */ raw_data = btf__raw_data(linker->btf, &raw_sz); if (!raw_data) @@ -2705,19 +2918,19 @@ static int finalize_btf(struct bpf_linker *linker) err = emit_elf_data_sec(linker, BTF_ELF_SEC, 8, raw_data, raw_sz); if (err) { - pr_warn("failed to write out .BTF ELF section: %d\n", err); + pr_warn("failed to write out .BTF ELF section: %s\n", errstr(err)); return err; } /* Emit .BTF.ext section */ if (linker->btf_ext) { - raw_data = btf_ext__get_raw_data(linker->btf_ext, &raw_sz); + raw_data = btf_ext__raw_data(linker->btf_ext, &raw_sz); if (!raw_data) return -ENOMEM; err = emit_elf_data_sec(linker, BTF_EXT_ELF_SEC, 8, raw_data, raw_sz); if (err) { - pr_warn("failed to write out .BTF.ext ELF section: %d\n", err); + pr_warn("failed to write out .BTF.ext ELF section: %s\n", errstr(err)); return err; } } @@ -2893,7 +3106,7 @@ static int finalize_btf_ext(struct bpf_linker *linker) err = libbpf_get_error(linker->btf_ext); if (err) { linker->btf_ext = NULL; - pr_warn("failed to parse final .BTF.ext data: %d\n", err); + pr_warn("failed to parse final .BTF.ext data: %s\n", errstr(err)); goto out; } diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 39f25e09b51e..c997e69d507f 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -9,6 +9,7 @@ #include <linux/if_ether.h> #include <linux/pkt_cls.h> #include <linux/rtnetlink.h> +#include <linux/netdev.h> #include <sys/socket.h> #include <errno.h> #include <time.h> @@ -27,13 +28,28 @@ typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, void *cookie); +struct xdp_link_info { + __u32 prog_id; + __u32 drv_prog_id; + __u32 hw_prog_id; + __u32 skb_prog_id; + __u8 attach_mode; +}; + struct xdp_id_md { int ifindex; __u32 flags; struct xdp_link_info info; + __u64 feature_flags; +}; + +struct xdp_features_md { + int ifindex; + __u32 xdp_zc_max_segs; + __u64 flags; }; -static int libbpf_netlink_open(__u32 *nl_pid) +static int libbpf_netlink_open(__u32 *nl_pid, int proto) { struct sockaddr_nl sa; socklen_t addrlen; @@ -43,7 +59,7 @@ static int libbpf_netlink_open(__u32 *nl_pid) memset(&sa, 0, sizeof(sa)); sa.nl_family = AF_NETLINK; - sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); + sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto); if (sock < 0) return -errno; @@ -87,29 +103,75 @@ enum { NL_DONE, }; +static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags) +{ + int len; + + do { + len = recvmsg(sock, mhdr, flags); + } while (len < 0 && (errno == EINTR || errno == EAGAIN)); + + if (len < 0) + return -errno; + return len; +} + +static int alloc_iov(struct iovec *iov, int len) +{ + void *nbuf; + + nbuf = realloc(iov->iov_base, len); + if (!nbuf) + return -ENOMEM; + + iov->iov_base = nbuf; + iov->iov_len = len; + return 0; +} + static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, void *cookie) { + struct iovec iov = {}; + struct msghdr mhdr = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; bool multipart = true; struct nlmsgerr *err; struct nlmsghdr *nh; - char buf[4096]; int len, ret; + ret = alloc_iov(&iov, 4096); + if (ret) + goto done; + while (multipart) { start: multipart = false; - len = recv(sock, buf, sizeof(buf), 0); + len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC); if (len < 0) { - ret = -errno; + ret = len; + goto done; + } + + if (len > iov.iov_len) { + ret = alloc_iov(&iov, len); + if (ret) + goto done; + } + + len = netlink_recvmsg(sock, &mhdr, 0); + if (len < 0) { + ret = len; goto done; } if (len == 0) break; - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len); nh = NLMSG_NEXT(nh, len)) { if (nh->nlmsg_pid != nl_pid) { ret = -LIBBPF_ERRNO__WRNGPID; @@ -130,7 +192,8 @@ start: libbpf_nla_dump_errormsg(nh); goto done; case NLMSG_DONE: - return 0; + ret = 0; + goto done; default: break; } @@ -142,27 +205,29 @@ start: case NL_NEXT: goto start; case NL_DONE: - return 0; + ret = 0; + goto done; default: - return ret; + goto done; } } } } ret = 0; done: + free(iov.iov_base); return ret; } static int libbpf_netlink_send_recv(struct libbpf_nla_req *req, - __dump_nlmsg_t parse_msg, + int proto, __dump_nlmsg_t parse_msg, libbpf_dump_nlmsg_t parse_attr, void *cookie) { __u32 nl_pid = 0; int sock, ret; - sock = libbpf_netlink_open(&nl_pid); + sock = libbpf_netlink_open(&nl_pid, proto); if (sock < 0) return sock; @@ -181,6 +246,43 @@ out: return ret; } +static int parse_genl_family_id(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn, + void *cookie) +{ + struct genlmsghdr *gnl = NLMSG_DATA(nh); + struct nlattr *na = (struct nlattr *)((void *)gnl + GENL_HDRLEN); + struct nlattr *tb[CTRL_ATTR_FAMILY_ID + 1]; + __u16 *id = cookie; + + libbpf_nla_parse(tb, CTRL_ATTR_FAMILY_ID, na, + NLMSG_PAYLOAD(nh, sizeof(*gnl)), NULL); + if (!tb[CTRL_ATTR_FAMILY_ID]) + return NL_CONT; + + *id = libbpf_nla_getattr_u16(tb[CTRL_ATTR_FAMILY_ID]); + return NL_DONE; +} + +static int libbpf_netlink_resolve_genl_family_id(const char *name, + __u16 len, __u16 *id) +{ + struct libbpf_nla_req req = { + .nh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN), + .nh.nlmsg_type = GENL_ID_CTRL, + .nh.nlmsg_flags = NLM_F_REQUEST, + .gnl.cmd = CTRL_CMD_GETFAMILY, + .gnl.version = 2, + }; + int err; + + err = nlattr_add(&req, CTRL_ATTR_FAMILY_NAME, name, len); + if (err < 0) + return err; + + return libbpf_netlink_send_recv(&req, NETLINK_GENERIC, + parse_genl_family_id, NULL, id); +} + static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd, __u32 flags) { @@ -214,32 +316,29 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd, } nlattr_end_nested(&req, nla); - return libbpf_netlink_send_recv(&req, NULL, NULL, NULL); + return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL); } -int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, - const struct bpf_xdp_set_link_opts *opts) +int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts) { - int old_fd = -1, ret; + int old_prog_fd, err; - if (!OPTS_VALID(opts, bpf_xdp_set_link_opts)) + if (!OPTS_VALID(opts, bpf_xdp_attach_opts)) return libbpf_err(-EINVAL); - if (OPTS_HAS(opts, old_fd)) { - old_fd = OPTS_GET(opts, old_fd, -1); + old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); + if (old_prog_fd) flags |= XDP_FLAGS_REPLACE; - } + else + old_prog_fd = -1; - ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags); - return libbpf_err(ret); + err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags); + return libbpf_err(err); } -int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) +int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts) { - int ret; - - ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags); - return libbpf_err(ret); + return bpf_xdp_attach(ifindex, -1, flags, opts); } static int __dump_link_nlmsg(struct nlmsghdr *nlh, @@ -303,74 +402,136 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) return 0; } -int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, - size_t info_size, __u32 flags) +static int parse_xdp_features(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn, + void *cookie) +{ + struct genlmsghdr *gnl = NLMSG_DATA(nh); + struct nlattr *na = (struct nlattr *)((void *)gnl + GENL_HDRLEN); + struct nlattr *tb[NETDEV_CMD_MAX + 1]; + struct xdp_features_md *md = cookie; + __u32 ifindex; + + libbpf_nla_parse(tb, NETDEV_CMD_MAX, na, + NLMSG_PAYLOAD(nh, sizeof(*gnl)), NULL); + + if (!tb[NETDEV_A_DEV_IFINDEX] || !tb[NETDEV_A_DEV_XDP_FEATURES]) + return NL_CONT; + + ifindex = libbpf_nla_getattr_u32(tb[NETDEV_A_DEV_IFINDEX]); + if (ifindex != md->ifindex) + return NL_CONT; + + md->flags = libbpf_nla_getattr_u64(tb[NETDEV_A_DEV_XDP_FEATURES]); + if (tb[NETDEV_A_DEV_XDP_ZC_MAX_SEGS]) + md->xdp_zc_max_segs = + libbpf_nla_getattr_u32(tb[NETDEV_A_DEV_XDP_ZC_MAX_SEGS]); + return NL_DONE; +} + +int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts) { - struct xdp_id_md xdp_id = {}; - __u32 mask; - int ret; struct libbpf_nla_req req = { .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), .nh.nlmsg_type = RTM_GETLINK, .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, .ifinfo.ifi_family = AF_PACKET, }; + struct xdp_id_md xdp_id = {}; + struct xdp_features_md md = { + .ifindex = ifindex, + }; + __u16 id; + int err; - if (flags & ~XDP_FLAGS_MASK || !info_size) + if (!OPTS_VALID(opts, bpf_xdp_query_opts)) + return libbpf_err(-EINVAL); + + if (xdp_flags & ~XDP_FLAGS_MASK) return libbpf_err(-EINVAL); /* Check whether the single {HW,DRV,SKB} mode is set */ - flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); - mask = flags - 1; - if (flags && flags & mask) + xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE; + if (xdp_flags & (xdp_flags - 1)) return libbpf_err(-EINVAL); xdp_id.ifindex = ifindex; - xdp_id.flags = flags; + xdp_id.flags = xdp_flags; - ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg, + err = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, __dump_link_nlmsg, get_xdp_info, &xdp_id); - if (!ret) { - size_t sz = min(info_size, sizeof(xdp_id.info)); + if (err) + return libbpf_err(err); + + OPTS_SET(opts, prog_id, xdp_id.info.prog_id); + OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id); + OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id); + OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id); + OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode); - memcpy(info, &xdp_id.info, sz); - memset((void *) info + sz, 0, info_size - sz); + if (!OPTS_HAS(opts, feature_flags)) + return 0; + + err = libbpf_netlink_resolve_genl_family_id("netdev", sizeof("netdev"), &id); + if (err < 0) { + if (err == -ENOENT) { + opts->feature_flags = 0; + goto skip_feature_flags; + } + return libbpf_err(err); } - return libbpf_err(ret); -} + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_type = id; + req.gnl.cmd = NETDEV_CMD_DEV_GET; + req.gnl.version = 2; -static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) -{ - flags &= XDP_FLAGS_MODES; + err = nlattr_add(&req, NETDEV_A_DEV_IFINDEX, &ifindex, sizeof(ifindex)); + if (err < 0) + return libbpf_err(err); - if (info->attach_mode != XDP_ATTACHED_MULTI && !flags) - return info->prog_id; - if (flags & XDP_FLAGS_DRV_MODE) - return info->drv_prog_id; - if (flags & XDP_FLAGS_HW_MODE) - return info->hw_prog_id; - if (flags & XDP_FLAGS_SKB_MODE) - return info->skb_prog_id; + err = libbpf_netlink_send_recv(&req, NETLINK_GENERIC, + parse_xdp_features, NULL, &md); + if (err) + return libbpf_err(err); + OPTS_SET(opts, feature_flags, md.flags); + OPTS_SET(opts, xdp_zc_max_segs, md.xdp_zc_max_segs); + +skip_feature_flags: return 0; } -int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) { - struct xdp_link_info info; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); int ret; - ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags); - if (!ret) - *prog_id = get_xdp_id(&info, flags); + ret = bpf_xdp_query(ifindex, flags, &opts); + if (ret) + return libbpf_err(ret); - return libbpf_err(ret); + flags &= XDP_FLAGS_MODES; + + if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags) + *prog_id = opts.prog_id; + else if (flags & XDP_FLAGS_DRV_MODE) + *prog_id = opts.drv_prog_id; + else if (flags & XDP_FLAGS_HW_MODE) + *prog_id = opts.hw_prog_id; + else if (flags & XDP_FLAGS_SKB_MODE) + *prog_id = opts.skb_prog_id; + else + *prog_id = 0; + + return 0; } -typedef int (*qdisc_config_t)(struct libbpf_nla_req *req); -static int clsact_config(struct libbpf_nla_req *req) +typedef int (*qdisc_config_t)(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook); + +static int clsact_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook) { req->tc.tcm_parent = TC_H_CLSACT; req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); @@ -378,6 +539,16 @@ static int clsact_config(struct libbpf_nla_req *req) return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact")); } +static int qdisc_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook) +{ + const char *qdisc = OPTS_GET(hook, qdisc, NULL); + + req->tc.tcm_parent = OPTS_GET(hook, parent, TC_H_ROOT); + req->tc.tcm_handle = OPTS_GET(hook, handle, 0); + + return nlattr_add(req, TCA_KIND, qdisc, strlen(qdisc) + 1); +} + static int attach_point_to_config(struct bpf_tc_hook *hook, qdisc_config_t *config) { @@ -391,6 +562,9 @@ static int attach_point_to_config(struct bpf_tc_hook *hook, return 0; case BPF_TC_CUSTOM: return -EOPNOTSUPP; + case BPF_TC_QDISC: + *config = &qdisc_config; + return 0; default: return -EINVAL; } @@ -435,11 +609,11 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags) req.tc.tcm_family = AF_UNSPEC; req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0); - ret = config(&req); + ret = config(&req, hook); if (ret < 0) return ret; - return libbpf_netlink_send_recv(&req, NULL, NULL, NULL); + return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL); } static int tc_qdisc_create_excl(struct bpf_tc_hook *hook) @@ -478,6 +652,7 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook) case BPF_TC_INGRESS: case BPF_TC_EGRESS: return libbpf_err(__bpf_tc_detach(hook, NULL, true)); + case BPF_TC_QDISC: case BPF_TC_INGRESS | BPF_TC_EGRESS: return libbpf_err(tc_qdisc_delete(hook)); case BPF_TC_CUSTOM: @@ -533,12 +708,13 @@ static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn, static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd) { - struct bpf_prog_info info = {}; + struct bpf_prog_info info; __u32 info_len = sizeof(info); char name[256]; int len, ret; - ret = bpf_obj_get_info_by_fd(fd, &info, &info_len); + memset(&info, 0, info_len); + ret = bpf_prog_get_info_by_fd(fd, &info, &info_len); if (ret < 0) return ret; @@ -618,7 +794,8 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts) info.opts = opts; - ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info); + ret = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, get_tc_info, NULL, + &info); if (ret < 0) return libbpf_err(ret); if (!info.processed) @@ -684,7 +861,7 @@ static int __bpf_tc_detach(const struct bpf_tc_hook *hook, return ret; } - return libbpf_netlink_send_recv(&req, NULL, NULL, NULL); + return libbpf_netlink_send_recv(&req, NETLINK_ROUTE, NULL, NULL, NULL); } int bpf_tc_detach(const struct bpf_tc_hook *hook, @@ -749,7 +926,8 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts) info.opts = opts; - ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info); + ret = libbpf_netlink_send_recv(&req, NETLINK_ROUTE, get_tc_info, NULL, + &info); if (ret < 0) return libbpf_err(ret); if (!info.processed) diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index f57e77a6e40f..06663f9ea581 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -32,7 +32,7 @@ static struct nlattr *nla_next(const struct nlattr *nla, int *remaining) static int nla_ok(const struct nlattr *nla, int remaining) { - return remaining >= sizeof(*nla) && + return remaining >= (int)sizeof(*nla) && nla->nla_len >= sizeof(*nla) && nla->nla_len <= remaining; } @@ -63,16 +63,16 @@ static int validate_nla(struct nlattr *nla, int maxtype, minlen = nla_attr_minlen[pt->type]; if (libbpf_nla_len(nla) < minlen) - return -1; + return -EINVAL; if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen) - return -1; + return -EINVAL; if (pt->type == LIBBPF_NLA_STRING) { char *data = libbpf_nla_data(nla); if (data[libbpf_nla_len(nla) - 1] != '\0') - return -1; + return -EINVAL; } return 0; @@ -118,19 +118,18 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, if (policy) { err = validate_nla(nla, maxtype, policy); if (err < 0) - goto errout; + return err; } - if (tb[type]) + if (tb[type]) { pr_warn("Attribute of type %#x found multiple times in message, " "previous attribute is being ignored.\n", type); + } tb[type] = nla; } - err = 0; -errout: - return err; + return 0; } /** @@ -178,7 +177,7 @@ int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh) hlen += nlmsg_len(&err->msg); attr = (struct nlattr *) ((void *) err + hlen); - alen = nlh->nlmsg_len - hlen; + alen = (void *)nlh + nlh->nlmsg_len - (void *)attr; if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, extack_policy) != 0) { diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h index 4d15ae2ff812..d92d1c1de700 100644 --- a/tools/lib/bpf/nlattr.h +++ b/tools/lib/bpf/nlattr.h @@ -14,6 +14,7 @@ #include <errno.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> +#include <linux/genetlink.h> /* avoid multiple definition of netlink features */ #define __LINUX_NETLINK_H @@ -58,6 +59,7 @@ struct libbpf_nla_req { union { struct ifinfomsg ifinfo; struct tcmsg tc; + struct genlmsghdr gnl; }; char buf[128]; }; @@ -89,11 +91,21 @@ static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla) return *(uint8_t *)libbpf_nla_data(nla); } +static inline uint16_t libbpf_nla_getattr_u16(const struct nlattr *nla) +{ + return *(uint16_t *)libbpf_nla_data(nla); +} + static inline uint32_t libbpf_nla_getattr_u32(const struct nlattr *nla) { return *(uint32_t *)libbpf_nla_data(nla); } +static inline uint64_t libbpf_nla_getattr_u64(const struct nlattr *nla) +{ + return *(uint64_t *)libbpf_nla_data(nla); +} + static inline const char *libbpf_nla_getattr_str(const struct nlattr *nla) { return (const char *)libbpf_nla_data(nla); diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 910865e29edc..6eea5edba58a 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -64,7 +64,6 @@ enum libbpf_print_level { #include "libbpf.h" #include "bpf.h" #include "btf.h" -#include "str_error.h" #include "libbpf_internal.h" #endif @@ -95,6 +94,7 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id"; case BPF_CORE_TYPE_ID_TARGET: return "target_type_id"; case BPF_CORE_TYPE_EXISTS: return "type_exists"; + case BPF_CORE_TYPE_MATCHES: return "type_matches"; case BPF_CORE_TYPE_SIZE: return "type_size"; case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists"; case BPF_CORE_ENUMVAL_VALUE: return "enumval_value"; @@ -123,6 +123,7 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) case BPF_CORE_TYPE_ID_LOCAL: case BPF_CORE_TYPE_ID_TARGET: case BPF_CORE_TYPE_EXISTS: + case BPF_CORE_TYPE_MATCHES: case BPF_CORE_TYPE_SIZE: return true; default: @@ -141,6 +142,86 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) } } +int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id, int level) +{ + const struct btf_type *local_type, *targ_type; + int depth = 32; /* max recursion depth */ + + /* caller made sure that names match (ignoring flavor suffix) */ + local_type = btf_type_by_id(local_btf, local_id); + targ_type = btf_type_by_id(targ_btf, targ_id); + if (!btf_kind_core_compat(local_type, targ_type)) + return 0; + +recur: + depth--; + if (depth < 0) + return -EINVAL; + + local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!local_type || !targ_type) + return -EINVAL; + + if (!btf_kind_core_compat(local_type, targ_type)) + return 0; + + switch (btf_kind(local_type)) { + case BTF_KIND_UNKN: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + case BTF_KIND_FWD: + case BTF_KIND_ENUM64: + return 1; + case BTF_KIND_INT: + /* just reject deprecated bitfield-like integers; all other + * integers are by default compatible between each other + */ + return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0; + case BTF_KIND_PTR: + local_id = local_type->type; + targ_id = targ_type->type; + goto recur; + case BTF_KIND_ARRAY: + local_id = btf_array(local_type)->type; + targ_id = btf_array(targ_type)->type; + goto recur; + case BTF_KIND_FUNC_PROTO: { + struct btf_param *local_p = btf_params(local_type); + struct btf_param *targ_p = btf_params(targ_type); + __u16 local_vlen = btf_vlen(local_type); + __u16 targ_vlen = btf_vlen(targ_type); + int i, err; + + if (local_vlen != targ_vlen) + return 0; + + for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { + if (level <= 0) + return -EINVAL; + + skip_mods_and_typedefs(local_btf, local_p->type, &local_id); + skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id); + err = __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, + level - 1); + if (err <= 0) + return err; + } + + /* tail recurse for return type check */ + skip_mods_and_typedefs(local_btf, local_type->type, &local_id); + skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id); + goto recur; + } + default: + pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", + btf_kind_str(local_type), local_id, targ_id); + return 0; + } +} + /* * Turn bpf_core_relo into a low- and high-level spec representation, * validating correctness along the way, as well as calculating resulting @@ -167,40 +248,39 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) * just a parsed access string representation): [0, 1, 2, 3]. * * High-level spec will capture only 3 points: - * - intial zero-index access by pointer (&s->... is the same as &s[0]...); + * - initial zero-index access by pointer (&s->... is the same as &s[0]...); * - field 'a' access (corresponds to '2' in low-level spec); * - array element #3 access (corresponds to '3' in low-level spec). * - * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, + * Type-based relocations (TYPE_EXISTS/TYPE_MATCHES/TYPE_SIZE, * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their * spec and raw_spec are kept empty. * * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access * string to specify enumerator's value index that need to be relocated. */ -static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, - __u32 type_id, - const char *spec_str, - enum bpf_core_relo_kind relo_kind, - struct bpf_core_spec *spec) +int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, + const struct bpf_core_relo *relo, + struct bpf_core_spec *spec) { int access_idx, parsed_len, i; struct bpf_core_accessor *acc; const struct btf_type *t; - const char *name; - __u32 id; + const char *name, *spec_str; + __u32 id, name_off; __s64 sz; + spec_str = btf__name_by_offset(btf, relo->access_str_off); if (str_is_empty(spec_str) || *spec_str == ':') return -EINVAL; memset(spec, 0, sizeof(*spec)); spec->btf = btf; - spec->root_type_id = type_id; - spec->relo_kind = relo_kind; + spec->root_type_id = relo->type_id; + spec->relo_kind = relo->kind; /* type-based relocations don't have a field access string */ - if (core_relo_is_type_based(relo_kind)) { + if (core_relo_is_type_based(relo->kind)) { if (strcmp(spec_str, "0")) return -EINVAL; return 0; @@ -221,7 +301,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, if (spec->raw_len == 0) return -EINVAL; - t = skip_mods_and_typedefs(btf, type_id, &id); + t = skip_mods_and_typedefs(btf, relo->type_id, &id); if (!t) return -EINVAL; @@ -231,16 +311,18 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, acc->idx = access_idx; spec->len++; - if (core_relo_is_enumval_based(relo_kind)) { - if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) + if (core_relo_is_enumval_based(relo->kind)) { + if (!btf_is_any_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) return -EINVAL; /* record enumerator name in a first accessor */ - acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); + name_off = btf_is_enum(t) ? btf_enum(t)[access_idx].name_off + : btf_enum64(t)[access_idx].name_off; + acc->name = btf__name_by_offset(btf, name_off); return 0; } - if (!core_relo_is_field_based(relo_kind)) + if (!core_relo_is_field_based(relo->kind)) return -EINVAL; sz = btf__resolve_size(btf, id); @@ -301,7 +383,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, spec->bit_offset += access_idx * sz * 8; } else { pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", - prog_name, type_id, spec_str, i, id, btf_kind_str(t)); + prog_name, relo->type_id, spec_str, i, id, btf_kind_str(t)); return -EINVAL; } } @@ -341,7 +423,7 @@ recur: if (btf_is_composite(local_type) && btf_is_composite(targ_type)) return 1; - if (btf_kind(local_type) != btf_kind(targ_type)) + if (!btf_kind_core_compat(local_type, targ_type)) return 0; switch (btf_kind(local_type)) { @@ -349,6 +431,7 @@ recur: case BTF_KIND_FLOAT: return 1; case BTF_KIND_FWD: + case BTF_KIND_ENUM64: case BTF_KIND_ENUM: { const char *local_name, *targ_name; size_t local_len, targ_len; @@ -478,6 +561,7 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, const struct bpf_core_accessor *local_acc; struct bpf_core_accessor *targ_acc; int i, sz, matched; + __u32 name_off; memset(targ_spec, 0, sizeof(*targ_spec)); targ_spec->btf = targ_btf; @@ -485,9 +569,14 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, targ_spec->relo_kind = local_spec->relo_kind; if (core_relo_is_type_based(local_spec->relo_kind)) { - return bpf_core_types_are_compat(local_spec->btf, - local_spec->root_type_id, - targ_btf, targ_id); + if (local_spec->relo_kind == BPF_CORE_TYPE_MATCHES) + return bpf_core_types_match(local_spec->btf, + local_spec->root_type_id, + targ_btf, targ_id); + else + return bpf_core_types_are_compat(local_spec->btf, + local_spec->root_type_id, + targ_btf, targ_id); } local_acc = &local_spec->spec[0]; @@ -495,18 +584,22 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, if (core_relo_is_enumval_based(local_spec->relo_kind)) { size_t local_essent_len, targ_essent_len; - const struct btf_enum *e; const char *targ_name; /* has to resolve to an enum */ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); - if (!btf_is_enum(targ_type)) + if (!btf_is_any_enum(targ_type)) return 0; local_essent_len = bpf_core_essential_name_len(local_acc->name); - for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { - targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); + for (i = 0; i < btf_vlen(targ_type); i++) { + if (btf_is_enum(targ_type)) + name_off = btf_enum(targ_type)[i].name_off; + else + name_off = btf_enum64(targ_type)[i].name_off; + + targ_name = btf__name_by_offset(targ_spec->btf, name_off); targ_essent_len = bpf_core_essential_name_len(targ_name); if (targ_essent_len != local_essent_len) continue; @@ -584,12 +677,12 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, static int bpf_core_calc_field_relo(const char *prog_name, const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val, __u32 *field_sz, __u32 *type_id, + __u64 *val, __u32 *field_sz, __u32 *type_id, bool *validate) { const struct bpf_core_accessor *acc; const struct btf_type *t; - __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; + __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id, elem_id; const struct btf_member *m; const struct btf_type *mt; bool bitfield; @@ -612,8 +705,14 @@ static int bpf_core_calc_field_relo(const char *prog_name, if (!acc->name) { if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) { *val = spec->bit_offset / 8; - /* remember field size for load/store mem size */ - sz = btf__resolve_size(spec->btf, acc->type_id); + /* remember field size for load/store mem size; + * note, for arrays we care about individual element + * sizes, not the overall array size + */ + t = skip_mods_and_typedefs(spec->btf, acc->type_id, &elem_id); + while (btf_is_array(t)) + t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id); + sz = btf__resolve_size(spec->btf, elem_id); if (sz < 0) return -EINVAL; *field_sz = sz; @@ -673,7 +772,17 @@ static int bpf_core_calc_field_relo(const char *prog_name, case BPF_CORE_FIELD_BYTE_OFFSET: *val = byte_off; if (!bitfield) { - *field_sz = byte_sz; + /* remember field size for load/store mem size; + * note, for arrays we care about individual element + * sizes, not the overall array size + */ + t = skip_mods_and_typedefs(spec->btf, field_type_id, &elem_id); + while (btf_is_array(t)) + t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id); + sz = btf__resolve_size(spec->btf, elem_id); + if (sz < 0) + return -EINVAL; + *field_sz = sz; *type_id = field_type_id; } break; @@ -681,9 +790,8 @@ static int bpf_core_calc_field_relo(const char *prog_name, *val = byte_sz; break; case BPF_CORE_FIELD_SIGNED: - /* enums will be assumed unsigned */ - *val = btf_is_enum(mt) || - (btf_int_encoding(mt) & BTF_INT_SIGNED); + *val = (btf_is_any_enum(mt) && BTF_INFO_KFLAG(mt->info)) || + (btf_is_int(mt) && (btf_int_encoding(mt) & BTF_INT_SIGNED)); if (validate) *validate = true; /* signedness is never ambiguous */ break; @@ -709,7 +817,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val, bool *validate) + __u64 *val, bool *validate) { __s64 sz; @@ -733,6 +841,7 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, *validate = false; break; case BPF_CORE_TYPE_EXISTS: + case BPF_CORE_TYPE_MATCHES: *val = 1; break; case BPF_CORE_TYPE_SIZE: @@ -752,10 +861,9 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val) + __u64 *val) { const struct btf_type *t; - const struct btf_enum *e; switch (relo->kind) { case BPF_CORE_ENUMVAL_EXISTS: @@ -765,8 +873,10 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, if (!spec) return -EUCLEAN; /* request instruction poisoning */ t = btf_type_by_id(spec->btf, spec->spec[0].type_id); - e = btf_enum(t) + spec->spec[0].idx; - *val = e->val; + if (btf_is_enum(t)) + *val = btf_enum(t)[spec->spec[0].idx].val; + else + *val = btf_enum64_value(btf_enum64(t) + spec->spec[0].idx); break; default: return -EOPNOTSUPP; @@ -775,31 +885,6 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, return 0; } -struct bpf_core_relo_res -{ - /* expected value in the instruction, unless validate == false */ - __u32 orig_val; - /* new value that needs to be patched up to */ - __u32 new_val; - /* relocation unsuccessful, poison instruction, but don't fail load */ - bool poison; - /* some relocations can't be validated against orig_val */ - bool validate; - /* for field byte offset relocations or the forms: - * *(T *)(rX + <off>) = rY - * rX = *(T *)(rY + <off>), - * we remember original and resolved field size to adjust direct - * memory loads of pointers and integers; this is necessary for 32-bit - * host kernel architectures, but also allows to automatically - * relocate fields that were resized from, e.g., u32 to u64, etc. - */ - bool fail_memsz_adjust; - __u32 orig_sz; - __u32 orig_type_id; - __u32 new_sz; - __u32 new_type_id; -}; - /* Calculate original and target relocation values, given local and target * specs and relocation kind. These values are calculated for each candidate. * If there are multiple candidates, resulting values should all be consistent @@ -951,11 +1036,11 @@ static int insn_bytes_to_bpf_size(__u32 sz) * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}. */ -static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, - int insn_idx, const struct bpf_core_relo *relo, - int relo_idx, const struct bpf_core_relo_res *res) +int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, + int insn_idx, const struct bpf_core_relo *relo, + int relo_idx, const struct bpf_core_relo_res *res) { - __u32 orig_val, new_val; + __u64 orig_val, new_val; __u8 class; class = BPF_CLASS(insn->code); @@ -980,28 +1065,30 @@ poison: if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; if (res->validate && insn->imm != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", + pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %llu -> %llu\n", prog_name, relo_idx, - insn_idx, insn->imm, orig_val, new_val); + insn_idx, insn->imm, (unsigned long long)orig_val, + (unsigned long long)new_val); return -EINVAL; } orig_val = insn->imm; insn->imm = new_val; - pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", + pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %llu -> %llu\n", prog_name, relo_idx, insn_idx, - orig_val, new_val); + (unsigned long long)orig_val, (unsigned long long)new_val); break; case BPF_LDX: case BPF_ST: case BPF_STX: if (res->validate && insn->off != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", - prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val); + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %llu -> %llu\n", + prog_name, relo_idx, insn_idx, insn->off, (unsigned long long)orig_val, + (unsigned long long)new_val); return -EINVAL; } if (new_val > SHRT_MAX) { - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", - prog_name, relo_idx, insn_idx, new_val); + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %llu\n", + prog_name, relo_idx, insn_idx, (unsigned long long)new_val); return -ERANGE; } if (res->fail_memsz_adjust) { @@ -1013,8 +1100,9 @@ poison: orig_val = insn->off; insn->off = new_val; - pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", - prog_name, relo_idx, insn_idx, orig_val, new_val); + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %llu -> %llu\n", + prog_name, relo_idx, insn_idx, (unsigned long long)orig_val, + (unsigned long long)new_val); if (res->new_sz != res->orig_sz) { int insn_bytes_sz, insn_bpf_sz; @@ -1050,20 +1138,20 @@ poison: return -EINVAL; } - imm = insn[0].imm + ((__u64)insn[1].imm << 32); + imm = (__u32)insn[0].imm | ((__u64)insn[1].imm << 32); if (res->validate && imm != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %llu -> %llu\n", prog_name, relo_idx, insn_idx, (unsigned long long)imm, - orig_val, new_val); + (unsigned long long)orig_val, (unsigned long long)new_val); return -EINVAL; } insn[0].imm = new_val; - insn[1].imm = 0; /* currently only 32-bit values are supported */ - pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", + insn[1].imm = new_val >> 32; + pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %llu\n", prog_name, relo_idx, insn_idx, - (unsigned long long)imm, new_val); + (unsigned long long)imm, (unsigned long long)new_val); break; } default: @@ -1080,55 +1168,82 @@ poison: * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b */ -static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec) +int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec) { const struct btf_type *t; - const struct btf_enum *e; const char *s; __u32 type_id; - int i; + int i, len = 0; + +#define append_buf(fmt, args...) \ + ({ \ + int r; \ + r = snprintf(buf, buf_sz, fmt, ##args); \ + len += r; \ + if (r >= buf_sz) \ + r = buf_sz; \ + buf += r; \ + buf_sz -= r; \ + }) type_id = spec->root_type_id; t = btf_type_by_id(spec->btf, type_id); s = btf__name_by_offset(spec->btf, t->name_off); - libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); + append_buf("<%s> [%u] %s %s", + core_relo_kind_str(spec->relo_kind), + type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); if (core_relo_is_type_based(spec->relo_kind)) - return; + return len; if (core_relo_is_enumval_based(spec->relo_kind)) { t = skip_mods_and_typedefs(spec->btf, type_id, NULL); - e = btf_enum(t) + spec->raw_spec[0]; - s = btf__name_by_offset(spec->btf, e->name_off); + if (btf_is_enum(t)) { + const struct btf_enum *e; + const char *fmt_str; + + e = btf_enum(t) + spec->raw_spec[0]; + s = btf__name_by_offset(spec->btf, e->name_off); + fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %d" : "::%s = %u"; + append_buf(fmt_str, s, e->val); + } else { + const struct btf_enum64 *e; + const char *fmt_str; - libbpf_print(level, "::%s = %u", s, e->val); - return; + e = btf_enum64(t) + spec->raw_spec[0]; + s = btf__name_by_offset(spec->btf, e->name_off); + fmt_str = BTF_INFO_KFLAG(t->info) ? "::%s = %lld" : "::%s = %llu"; + append_buf(fmt_str, s, (unsigned long long)btf_enum64_value(e)); + } + return len; } if (core_relo_is_field_based(spec->relo_kind)) { for (i = 0; i < spec->len; i++) { if (spec->spec[i].name) - libbpf_print(level, ".%s", spec->spec[i].name); + append_buf(".%s", spec->spec[i].name); else if (i > 0 || spec->spec[i].idx > 0) - libbpf_print(level, "[%u]", spec->spec[i].idx); + append_buf("[%u]", spec->spec[i].idx); } - libbpf_print(level, " ("); + append_buf(" ("); for (i = 0; i < spec->raw_len; i++) - libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); + append_buf("%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); if (spec->bit_offset % 8) - libbpf_print(level, " @ offset %u.%u)", - spec->bit_offset / 8, spec->bit_offset % 8); + append_buf(" @ offset %u.%u)", spec->bit_offset / 8, spec->bit_offset % 8); else - libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); - return; + append_buf(" @ offset %u)", spec->bit_offset / 8); + return len; } + + return len; +#undef append_buf } /* - * CO-RE relocate single instruction. + * Calculate CO-RE relocation target result. * * The outline and important points of the algorithm: * 1. For given local type, find corresponding candidate target types. @@ -1159,11 +1274,11 @@ static void bpf_core_dump_spec(const char *prog_name, int level, const struct bp * 3. It is supported and expected that there might be multiple flavors * matching the spec. As long as all the specs resolve to the same set of * offsets across all candidates, there is no error. If there is any - * ambiguity, CO-RE relocation will fail. This is necessary to accomodate - * imprefection of BTF deduplication, which can cause slight duplication of + * ambiguity, CO-RE relocation will fail. This is necessary to accommodate + * imperfection of BTF deduplication, which can cause slight duplication of * the same BTF type, if some directly or indirectly referenced (by * pointer) type gets resolved to different actual types in different - * object files. If such situation occurs, deduplicated BTF will end up + * object files. If such a situation occurs, deduplicated BTF will end up * with two (or more) structurally identical types, which differ only in * types they refer to through pointer. This should be OK in most cases and * is not an error. @@ -1177,22 +1292,22 @@ static void bpf_core_dump_spec(const char *prog_name, int level, const struct bp * between multiple relocations for the same type ID and is updated as some * of the candidates are pruned due to structural incompatibility. */ -int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, - int insn_idx, - const struct bpf_core_relo *relo, - int relo_idx, - const struct btf *local_btf, - struct bpf_core_cand_list *cands, - struct bpf_core_spec *specs_scratch) +int bpf_core_calc_relo_insn(const char *prog_name, + const struct bpf_core_relo *relo, + int relo_idx, + const struct btf *local_btf, + struct bpf_core_cand_list *cands, + struct bpf_core_spec *specs_scratch, + struct bpf_core_relo_res *targ_res) { struct bpf_core_spec *local_spec = &specs_scratch[0]; struct bpf_core_spec *cand_spec = &specs_scratch[1]; struct bpf_core_spec *targ_spec = &specs_scratch[2]; - struct bpf_core_relo_res cand_res, targ_res; + struct bpf_core_relo_res cand_res; const struct btf_type *local_type; const char *local_name; __u32 local_id; - const char *spec_str; + char spec_buf[256]; int i, j, err; local_id = relo->type_id; @@ -1201,38 +1316,34 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, if (!local_name) return -EINVAL; - spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - if (str_is_empty(spec_str)) - return -EINVAL; - - err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str, - relo->kind, local_spec); + err = bpf_core_parse_spec(prog_name, local_btf, relo, local_spec); if (err) { + const char *spec_str; + + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", prog_name, relo_idx, local_id, btf_kind_str(local_type), str_is_empty(local_name) ? "<anon>" : local_name, - spec_str, err); + spec_str ?: "<?>", err); return -EINVAL; } - pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, - relo_idx, core_relo_kind_str(relo->kind), relo->kind); - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), local_spec); + pr_debug("prog '%s': relo #%d: %s\n", prog_name, relo_idx, spec_buf); /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { /* bpf_insn's imm value could get out of sync during linking */ - memset(&targ_res, 0, sizeof(targ_res)); - targ_res.validate = false; - targ_res.poison = false; - targ_res.orig_val = local_spec->root_type_id; - targ_res.new_val = local_spec->root_type_id; - goto patch_insn; + memset(targ_res, 0, sizeof(*targ_res)); + targ_res->validate = false; + targ_res->poison = false; + targ_res->orig_val = local_spec->root_type_id; + targ_res->new_val = local_spec->root_type_id; + return 0; } /* libbpf doesn't support candidate search for anonymous types */ - if (str_is_empty(spec_str)) { + if (str_is_empty(local_name)) { pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); return -EOPNOTSUPP; @@ -1242,17 +1353,15 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, err = bpf_core_spec_match(local_spec, cands->cands[i].btf, cands->cands[i].id, cand_spec); if (err < 0) { - pr_warn("prog '%s': relo #%d: error matching candidate #%d ", - prog_name, relo_idx, i); - bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec); - libbpf_print(LIBBPF_WARN, ": %d\n", err); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); + pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n", + prog_name, relo_idx, i, spec_buf, err); return err; } - pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, - relo_idx, err == 0 ? "non-matching" : "matching", i); - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); + pr_debug("prog '%s': relo #%d: %s candidate #%d %s\n", prog_name, + relo_idx, err == 0 ? "non-matching" : "matching", i, spec_buf); if (err == 0) continue; @@ -1262,7 +1371,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, return err; if (j == 0) { - targ_res = cand_res; + *targ_res = cand_res; *targ_spec = *cand_spec; } else if (cand_spec->bit_offset != targ_spec->bit_offset) { /* if there are many field relo candidates, they @@ -1272,15 +1381,18 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, prog_name, relo_idx, cand_spec->bit_offset, targ_spec->bit_offset); return -EINVAL; - } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { + } else if (cand_res.poison != targ_res->poison || + cand_res.new_val != targ_res->new_val) { /* all candidates should result in the same relocation * decision and value, otherwise it's dangerous to * proceed due to ambiguity */ - pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", + pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %llu != %s %llu\n", prog_name, relo_idx, - cand_res.poison ? "failure" : "success", cand_res.new_val, - targ_res.poison ? "failure" : "success", targ_res.new_val); + cand_res.poison ? "failure" : "success", + (unsigned long long)cand_res.new_val, + targ_res->poison ? "failure" : "success", + (unsigned long long)targ_res->new_val); return -EINVAL; } @@ -1314,19 +1426,277 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, prog_name, relo_idx); /* calculate single target relo result explicitly */ - err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, &targ_res); + err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, targ_res); if (err) return err; } -patch_insn: - /* bpf_core_patch_insn() should know how to handle missing targ_spec */ - err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res); - if (err) { - pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", - prog_name, relo_idx, relo->insn_off / 8, err); + return 0; +} + +static bool bpf_core_names_match(const struct btf *local_btf, size_t local_name_off, + const struct btf *targ_btf, size_t targ_name_off) +{ + const char *local_n, *targ_n; + size_t local_len, targ_len; + + local_n = btf__name_by_offset(local_btf, local_name_off); + targ_n = btf__name_by_offset(targ_btf, targ_name_off); + + if (str_is_empty(targ_n)) + return str_is_empty(local_n); + + targ_len = bpf_core_essential_name_len(targ_n); + local_len = bpf_core_essential_name_len(local_n); + + return targ_len == local_len && strncmp(local_n, targ_n, local_len) == 0; +} + +static int bpf_core_enums_match(const struct btf *local_btf, const struct btf_type *local_t, + const struct btf *targ_btf, const struct btf_type *targ_t) +{ + __u16 local_vlen = btf_vlen(local_t); + __u16 targ_vlen = btf_vlen(targ_t); + int i, j; + + if (local_t->size != targ_t->size) + return 0; + + if (local_vlen > targ_vlen) + return 0; + + /* iterate over the local enum's variants and make sure each has + * a symbolic name correspondent in the target + */ + for (i = 0; i < local_vlen; i++) { + bool matched = false; + __u32 local_n_off, targ_n_off; + + local_n_off = btf_is_enum(local_t) ? btf_enum(local_t)[i].name_off : + btf_enum64(local_t)[i].name_off; + + for (j = 0; j < targ_vlen; j++) { + targ_n_off = btf_is_enum(targ_t) ? btf_enum(targ_t)[j].name_off : + btf_enum64(targ_t)[j].name_off; + + if (bpf_core_names_match(local_btf, local_n_off, targ_btf, targ_n_off)) { + matched = true; + break; + } + } + + if (!matched) + return 0; + } + return 1; +} + +static int bpf_core_composites_match(const struct btf *local_btf, const struct btf_type *local_t, + const struct btf *targ_btf, const struct btf_type *targ_t, + bool behind_ptr, int level) +{ + const struct btf_member *local_m = btf_members(local_t); + __u16 local_vlen = btf_vlen(local_t); + __u16 targ_vlen = btf_vlen(targ_t); + int i, j, err; + + if (local_vlen > targ_vlen) + return 0; + + /* check that all local members have a match in the target */ + for (i = 0; i < local_vlen; i++, local_m++) { + const struct btf_member *targ_m = btf_members(targ_t); + bool matched = false; + + for (j = 0; j < targ_vlen; j++, targ_m++) { + if (!bpf_core_names_match(local_btf, local_m->name_off, + targ_btf, targ_m->name_off)) + continue; + + err = __bpf_core_types_match(local_btf, local_m->type, targ_btf, + targ_m->type, behind_ptr, level - 1); + if (err < 0) + return err; + if (err > 0) { + matched = true; + break; + } + } + + if (!matched) + return 0; + } + return 1; +} + +/* Check that two types "match". This function assumes that root types were + * already checked for name match. + * + * The matching relation is defined as follows: + * - modifiers and typedefs are stripped (and, hence, effectively ignored) + * - generally speaking types need to be of same kind (struct vs. struct, union + * vs. union, etc.) + * - exceptions are struct/union behind a pointer which could also match a + * forward declaration of a struct or union, respectively, and enum vs. + * enum64 (see below) + * Then, depending on type: + * - integers: + * - match if size and signedness match + * - arrays & pointers: + * - target types are recursively matched + * - structs & unions: + * - local members need to exist in target with the same name + * - for each member we recursively check match unless it is already behind a + * pointer, in which case we only check matching names and compatible kind + * - enums: + * - local variants have to have a match in target by symbolic name (but not + * numeric value) + * - size has to match (but enum may match enum64 and vice versa) + * - function pointers: + * - number and position of arguments in local type has to match target + * - for each argument and the return value we recursively check match + */ +int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, + __u32 targ_id, bool behind_ptr, int level) +{ + const struct btf_type *local_t, *targ_t; + int depth = 32; /* max recursion depth */ + __u16 local_k, targ_k; + + if (level <= 0) + return -EINVAL; + +recur: + depth--; + if (depth < 0) + return -EINVAL; + + local_t = skip_mods_and_typedefs(local_btf, local_id, &local_id); + targ_t = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!local_t || !targ_t) return -EINVAL; + + /* While the name check happens after typedefs are skipped, root-level + * typedefs would still be name-matched as that's the contract with + * callers. + */ + if (!bpf_core_names_match(local_btf, local_t->name_off, targ_btf, targ_t->name_off)) + return 0; + + local_k = btf_kind(local_t); + targ_k = btf_kind(targ_t); + + switch (local_k) { + case BTF_KIND_UNKN: + return local_k == targ_k; + case BTF_KIND_FWD: { + bool local_f = BTF_INFO_KFLAG(local_t->info); + + if (behind_ptr) { + if (local_k == targ_k) + return local_f == BTF_INFO_KFLAG(targ_t->info); + + /* for forward declarations kflag dictates whether the + * target is a struct (0) or union (1) + */ + return (targ_k == BTF_KIND_STRUCT && !local_f) || + (targ_k == BTF_KIND_UNION && local_f); + } else { + if (local_k != targ_k) + return 0; + + /* match if the forward declaration is for the same kind */ + return local_f == BTF_INFO_KFLAG(targ_t->info); + } } + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + if (!btf_is_any_enum(targ_t)) + return 0; - return 0; + return bpf_core_enums_match(local_btf, local_t, targ_btf, targ_t); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + if (behind_ptr) { + bool targ_f = BTF_INFO_KFLAG(targ_t->info); + + if (local_k == targ_k) + return 1; + + if (targ_k != BTF_KIND_FWD) + return 0; + + return (local_k == BTF_KIND_UNION) == targ_f; + } else { + if (local_k != targ_k) + return 0; + + return bpf_core_composites_match(local_btf, local_t, targ_btf, targ_t, + behind_ptr, level); + } + case BTF_KIND_INT: { + __u8 local_sgn; + __u8 targ_sgn; + + if (local_k != targ_k) + return 0; + + local_sgn = btf_int_encoding(local_t) & BTF_INT_SIGNED; + targ_sgn = btf_int_encoding(targ_t) & BTF_INT_SIGNED; + + return local_t->size == targ_t->size && local_sgn == targ_sgn; + } + case BTF_KIND_PTR: + if (local_k != targ_k) + return 0; + + behind_ptr = true; + + local_id = local_t->type; + targ_id = targ_t->type; + goto recur; + case BTF_KIND_ARRAY: { + const struct btf_array *local_array = btf_array(local_t); + const struct btf_array *targ_array = btf_array(targ_t); + + if (local_k != targ_k) + return 0; + + if (local_array->nelems != targ_array->nelems) + return 0; + + local_id = local_array->type; + targ_id = targ_array->type; + goto recur; + } + case BTF_KIND_FUNC_PROTO: { + struct btf_param *local_p = btf_params(local_t); + struct btf_param *targ_p = btf_params(targ_t); + __u16 local_vlen = btf_vlen(local_t); + __u16 targ_vlen = btf_vlen(targ_t); + int i, err; + + if (local_k != targ_k) + return 0; + + if (local_vlen != targ_vlen) + return 0; + + for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { + err = __bpf_core_types_match(local_btf, local_p->type, targ_btf, + targ_p->type, behind_ptr, level - 1); + if (err <= 0) + return err; + } + + /* tail recurse for return type check */ + local_id = local_t->type; + targ_id = targ_t->type; + goto recur; + } + default: + pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", + btf_kind_str(local_t), local_id, targ_id); + return 0; + } } diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index 17799819ad7c..1c0566daf8e8 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -44,14 +44,56 @@ struct bpf_core_spec { __u32 bit_offset; }; -int bpf_core_apply_relo_insn(const char *prog_name, - struct bpf_insn *insn, int insn_idx, - const struct bpf_core_relo *relo, int relo_idx, - const struct btf *local_btf, - struct bpf_core_cand_list *cands, - struct bpf_core_spec *specs_scratch); +struct bpf_core_relo_res { + /* expected value in the instruction, unless validate == false */ + __u64 orig_val; + /* new value that needs to be patched up to */ + __u64 new_val; + /* relocation unsuccessful, poison instruction, but don't fail load */ + bool poison; + /* some relocations can't be validated against orig_val */ + bool validate; + /* for field byte offset relocations or the forms: + * *(T *)(rX + <off>) = rY + * rX = *(T *)(rY + <off>), + * we remember original and resolved field size to adjust direct + * memory loads of pointers and integers; this is necessary for 32-bit + * host kernel architectures, but also allows to automatically + * relocate fields that were resized from, e.g., u32 to u64, etc. + */ + bool fail_memsz_adjust; + __u32 orig_sz; + __u32 orig_type_id; + __u32 new_sz; + __u32 new_type_id; +}; + +int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id, int level); int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id); +int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, + __u32 targ_id, bool behind_ptr, int level); +int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, + __u32 targ_id); size_t bpf_core_essential_name_len(const char *name); + +int bpf_core_calc_relo_insn(const char *prog_name, + const struct bpf_core_relo *relo, int relo_idx, + const struct btf *local_btf, + struct bpf_core_cand_list *cands, + struct bpf_core_spec *specs_scratch, + struct bpf_core_relo_res *targ_res); + +int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, + int insn_idx, const struct bpf_core_relo *relo, + int relo_idx, const struct bpf_core_relo_res *res); + +int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, + const struct bpf_core_relo *relo, + struct bpf_core_spec *spec); + +int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec); + #endif diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 8bc117bcc7bc..00ec4837a06d 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -16,6 +16,7 @@ #include <asm/barrier.h> #include <sys/mman.h> #include <sys/epoll.h> +#include <time.h> #include "libbpf.h" #include "libbpf_internal.h" @@ -33,13 +34,30 @@ struct ring { struct ring_buffer { struct epoll_event *events; - struct ring *rings; + struct ring **rings; size_t page_size; int epoll_fd; int ring_cnt; }; -static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) +struct user_ring_buffer { + struct epoll_event event; + unsigned long *consumer_pos; + unsigned long *producer_pos; + void *data; + unsigned long mask; + size_t page_size; + int map_fd; + int epoll_fd; +}; + +/* 8-byte ring buffer header structure */ +struct ringbuf_hdr { + __u32 len; + __u32 pad; +}; + +static void ringbuf_free_ring(struct ring_buffer *rb, struct ring *r) { if (r->consumer_pos) { munmap(r->consumer_pos, rb->page_size); @@ -49,6 +67,8 @@ static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1)); r->producer_pos = NULL; } + + free(r); } /* Add extra RINGBUF maps to this ring buffer manager */ @@ -59,16 +79,17 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, __u32 len = sizeof(info); struct epoll_event *e; struct ring *r; + __u64 mmap_sz; void *tmp; int err; memset(&info, 0, sizeof(info)); - err = bpf_obj_get_info_by_fd(map_fd, &info, &len); + err = bpf_map_get_info_by_fd(map_fd, &info, &len); if (err) { err = -errno; - pr_warn("ringbuf: failed to get map info for fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to get map info for fd=%d: %s\n", + map_fd, errstr(err)); return libbpf_err(err); } @@ -88,8 +109,10 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, return libbpf_err(-ENOMEM); rb->events = tmp; - r = &rb->rings[rb->ring_cnt]; - memset(r, 0, sizeof(*r)); + r = calloc(1, sizeof(*r)); + if (!r) + return libbpf_err(-ENOMEM); + rb->rings[rb->ring_cnt] = r; r->map_fd = map_fd; r->sample_cb = sample_cb; @@ -97,28 +120,31 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, r->mask = info.max_entries - 1; /* Map writable consumer page */ - tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, - map_fd, 0); + tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", - map_fd, err); - return libbpf_err(err); + pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %s\n", + map_fd, errstr(err)); + goto err_out; } r->consumer_pos = tmp; /* Map read-only producer page and data pages. We map twice as big * data size to allow simple reading of samples that wrap around the * end of a ring buffer. See kernel implementation for details. - * */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ, - MAP_SHARED, map_fd, rb->page_size); + */ + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + err = -E2BIG; + pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); + goto err_out; + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; - ringbuf_unmap_ring(rb, r); - pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n", - map_fd, err); - return libbpf_err(err); + pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %s\n", + map_fd, errstr(err)); + goto err_out; } r->producer_pos = tmp; r->data = tmp + rb->page_size; @@ -130,14 +156,17 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, e->data.fd = rb->ring_cnt; if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) { err = -errno; - ringbuf_unmap_ring(rb, r); - pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n", - map_fd, err); - return libbpf_err(err); + pr_warn("ringbuf: failed to epoll add map fd=%d: %s\n", + map_fd, errstr(err)); + goto err_out; } rb->ring_cnt++; return 0; + +err_out: + ringbuf_free_ring(rb, r); + return libbpf_err(err); } void ring_buffer__free(struct ring_buffer *rb) @@ -148,7 +177,7 @@ void ring_buffer__free(struct ring_buffer *rb) return; for (i = 0; i < rb->ring_cnt; ++i) - ringbuf_unmap_ring(rb, &rb->rings[i]); + ringbuf_free_ring(rb, rb->rings[i]); if (rb->epoll_fd >= 0) close(rb->epoll_fd); @@ -176,7 +205,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (rb->epoll_fd < 0) { err = -errno; - pr_warn("ringbuf: failed to create epoll instance: %d\n", err); + pr_warn("ringbuf: failed to create epoll instance: %s\n", errstr(err)); goto err_out; } @@ -202,7 +231,7 @@ static inline int roundup_len(__u32 len) return (len + 7) / 8 * 8; } -static int64_t ringbuf_process_ring(struct ring* r) +static int64_t ringbuf_process_ring(struct ring *r, size_t n) { int *len_ptr, len, err; /* 64-bit to avoid overflow in case of extreme application behavior */ @@ -239,12 +268,42 @@ static int64_t ringbuf_process_ring(struct ring* r) } smp_store_release(r->consumer_pos, cons_pos); + + if (cnt >= n) + goto done; } } while (got_new_data); done: return cnt; } +/* Consume available ring buffer(s) data without event polling, up to n + * records. + * + * Returns number of records consumed across all registered ring buffers (or + * n, whichever is less), or negative number if any of the callbacks return + * error. + */ +int ring_buffer__consume_n(struct ring_buffer *rb, size_t n) +{ + int64_t err, res = 0; + int i; + + for (i = 0; i < rb->ring_cnt; i++) { + struct ring *ring = rb->rings[i]; + + err = ringbuf_process_ring(ring, n); + if (err < 0) + return libbpf_err(err); + res += err; + n -= err; + + if (n == 0) + break; + } + return res > INT_MAX ? INT_MAX : res; +} + /* Consume available ring buffer(s) data without event polling. * Returns number of records consumed across all registered ring buffers (or * INT_MAX, whichever is less), or negative number if any of the callbacks @@ -256,15 +315,17 @@ int ring_buffer__consume(struct ring_buffer *rb) int i; for (i = 0; i < rb->ring_cnt; i++) { - struct ring *ring = &rb->rings[i]; + struct ring *ring = rb->rings[i]; - err = ringbuf_process_ring(ring); + err = ringbuf_process_ring(ring, INT_MAX); if (err < 0) return libbpf_err(err); res += err; + if (res > INT_MAX) { + res = INT_MAX; + break; + } } - if (res > INT_MAX) - return INT_MAX; return res; } @@ -283,15 +344,15 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) for (i = 0; i < cnt; i++) { __u32 ring_id = rb->events[i].data.fd; - struct ring *ring = &rb->rings[ring_id]; + struct ring *ring = rb->rings[ring_id]; - err = ringbuf_process_ring(ring); + err = ringbuf_process_ring(ring, INT_MAX); if (err < 0) return libbpf_err(err); res += err; } if (res > INT_MAX) - return INT_MAX; + res = INT_MAX; return res; } @@ -300,3 +361,324 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb) { return rb->epoll_fd; } + +struct ring *ring_buffer__ring(struct ring_buffer *rb, unsigned int idx) +{ + if (idx >= rb->ring_cnt) + return errno = ERANGE, NULL; + + return rb->rings[idx]; +} + +unsigned long ring__consumer_pos(const struct ring *r) +{ + /* Synchronizes with smp_store_release() in ringbuf_process_ring(). */ + return smp_load_acquire(r->consumer_pos); +} + +unsigned long ring__producer_pos(const struct ring *r) +{ + /* Synchronizes with smp_store_release() in __bpf_ringbuf_reserve() in + * the kernel. + */ + return smp_load_acquire(r->producer_pos); +} + +size_t ring__avail_data_size(const struct ring *r) +{ + unsigned long cons_pos, prod_pos; + + cons_pos = ring__consumer_pos(r); + prod_pos = ring__producer_pos(r); + return prod_pos - cons_pos; +} + +size_t ring__size(const struct ring *r) +{ + return r->mask + 1; +} + +int ring__map_fd(const struct ring *r) +{ + return r->map_fd; +} + +int ring__consume_n(struct ring *r, size_t n) +{ + int64_t res; + + res = ringbuf_process_ring(r, n); + if (res < 0) + return libbpf_err(res); + + return res > INT_MAX ? INT_MAX : res; +} + +int ring__consume(struct ring *r) +{ + return ring__consume_n(r, INT_MAX); +} + +static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb) +{ + if (rb->consumer_pos) { + munmap(rb->consumer_pos, rb->page_size); + rb->consumer_pos = NULL; + } + if (rb->producer_pos) { + munmap(rb->producer_pos, rb->page_size + 2 * (rb->mask + 1)); + rb->producer_pos = NULL; + } +} + +void user_ring_buffer__free(struct user_ring_buffer *rb) +{ + if (!rb) + return; + + user_ringbuf_unmap_ring(rb); + + if (rb->epoll_fd >= 0) + close(rb->epoll_fd); + + free(rb); +} + +static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) +{ + struct bpf_map_info info; + __u32 len = sizeof(info); + __u64 mmap_sz; + void *tmp; + struct epoll_event *rb_epoll; + int err; + + memset(&info, 0, sizeof(info)); + + err = bpf_map_get_info_by_fd(map_fd, &info, &len); + if (err) { + err = -errno; + pr_warn("user ringbuf: failed to get map info for fd=%d: %s\n", + map_fd, errstr(err)); + return err; + } + + if (info.type != BPF_MAP_TYPE_USER_RINGBUF) { + pr_warn("user ringbuf: map fd=%d is not BPF_MAP_TYPE_USER_RINGBUF\n", map_fd); + return -EINVAL; + } + + rb->map_fd = map_fd; + rb->mask = info.max_entries - 1; + + /* Map read-only consumer page */ + tmp = mmap(NULL, rb->page_size, PROT_READ, MAP_SHARED, map_fd, 0); + if (tmp == MAP_FAILED) { + err = -errno; + pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %s\n", + map_fd, errstr(err)); + return err; + } + rb->consumer_pos = tmp; + + /* Map read-write the producer page and data pages. We map the data + * region as twice the total size of the ring buffer to allow the + * simple reading and writing of samples that wrap around the end of + * the buffer. See the kernel implementation for details. + */ + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("user ringbuf: ring buf size (%u) is too big\n", info.max_entries); + return -E2BIG; + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED, + map_fd, rb->page_size); + if (tmp == MAP_FAILED) { + err = -errno; + pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %s\n", + map_fd, errstr(err)); + return err; + } + + rb->producer_pos = tmp; + rb->data = tmp + rb->page_size; + + rb_epoll = &rb->event; + rb_epoll->events = EPOLLOUT; + if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, rb_epoll) < 0) { + err = -errno; + pr_warn("user ringbuf: failed to epoll add map fd=%d: %s\n", map_fd, errstr(err)); + return err; + } + + return 0; +} + +struct user_ring_buffer * +user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts) +{ + struct user_ring_buffer *rb; + int err; + + if (!OPTS_VALID(opts, user_ring_buffer_opts)) + return errno = EINVAL, NULL; + + rb = calloc(1, sizeof(*rb)); + if (!rb) + return errno = ENOMEM, NULL; + + rb->page_size = getpagesize(); + + rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (rb->epoll_fd < 0) { + err = -errno; + pr_warn("user ringbuf: failed to create epoll instance: %s\n", errstr(err)); + goto err_out; + } + + err = user_ringbuf_map(rb, map_fd); + if (err) + goto err_out; + + return rb; + +err_out: + user_ring_buffer__free(rb); + return errno = -err, NULL; +} + +static void user_ringbuf_commit(struct user_ring_buffer *rb, void *sample, bool discard) +{ + __u32 new_len; + struct ringbuf_hdr *hdr; + uintptr_t hdr_offset; + + hdr_offset = rb->mask + 1 + (sample - rb->data) - BPF_RINGBUF_HDR_SZ; + hdr = rb->data + (hdr_offset & rb->mask); + + new_len = hdr->len & ~BPF_RINGBUF_BUSY_BIT; + if (discard) + new_len |= BPF_RINGBUF_DISCARD_BIT; + + /* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in + * the kernel. + */ + __atomic_exchange_n(&hdr->len, new_len, __ATOMIC_ACQ_REL); +} + +void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample) +{ + user_ringbuf_commit(rb, sample, true); +} + +void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample) +{ + user_ringbuf_commit(rb, sample, false); +} + +void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size) +{ + __u32 avail_size, total_size, max_size; + /* 64-bit to avoid overflow in case of extreme application behavior */ + __u64 cons_pos, prod_pos; + struct ringbuf_hdr *hdr; + + /* The top two bits are used as special flags */ + if (size & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT)) + return errno = E2BIG, NULL; + + /* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in + * the kernel. + */ + cons_pos = smp_load_acquire(rb->consumer_pos); + /* Synchronizes with smp_store_release() in user_ringbuf_commit() */ + prod_pos = smp_load_acquire(rb->producer_pos); + + max_size = rb->mask + 1; + avail_size = max_size - (prod_pos - cons_pos); + /* Round up total size to a multiple of 8. */ + total_size = (size + BPF_RINGBUF_HDR_SZ + 7) / 8 * 8; + + if (total_size > max_size) + return errno = E2BIG, NULL; + + if (avail_size < total_size) + return errno = ENOSPC, NULL; + + hdr = rb->data + (prod_pos & rb->mask); + hdr->len = size | BPF_RINGBUF_BUSY_BIT; + hdr->pad = 0; + + /* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in + * the kernel. + */ + smp_store_release(rb->producer_pos, prod_pos + total_size); + + return (void *)rb->data + ((prod_pos + BPF_RINGBUF_HDR_SZ) & rb->mask); +} + +static __u64 ns_elapsed_timespec(const struct timespec *start, const struct timespec *end) +{ + __u64 start_ns, end_ns, ns_per_s = 1000000000; + + start_ns = (__u64)start->tv_sec * ns_per_s + start->tv_nsec; + end_ns = (__u64)end->tv_sec * ns_per_s + end->tv_nsec; + + return end_ns - start_ns; +} + +void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb, __u32 size, int timeout_ms) +{ + void *sample; + int err, ms_remaining = timeout_ms; + struct timespec start; + + if (timeout_ms < 0 && timeout_ms != -1) + return errno = EINVAL, NULL; + + if (timeout_ms != -1) { + err = clock_gettime(CLOCK_MONOTONIC, &start); + if (err) + return NULL; + } + + do { + int cnt, ms_elapsed; + struct timespec curr; + __u64 ns_per_ms = 1000000; + + sample = user_ring_buffer__reserve(rb, size); + if (sample) + return sample; + else if (errno != ENOSPC) + return NULL; + + /* The kernel guarantees at least one event notification + * delivery whenever at least one sample is drained from the + * ring buffer in an invocation to bpf_ringbuf_drain(). Other + * additional events may be delivered at any time, but only one + * event is guaranteed per bpf_ringbuf_drain() invocation, + * provided that a sample is drained, and the BPF program did + * not pass BPF_RB_NO_WAKEUP to bpf_ringbuf_drain(). If + * BPF_RB_FORCE_WAKEUP is passed to bpf_ringbuf_drain(), a + * wakeup event will be delivered even if no samples are + * drained. + */ + cnt = epoll_wait(rb->epoll_fd, &rb->event, 1, ms_remaining); + if (cnt < 0) + return NULL; + + if (timeout_ms == -1) + continue; + + err = clock_gettime(CLOCK_MONOTONIC, &curr); + if (err) + return NULL; + + ms_elapsed = ns_elapsed_timespec(&start, &curr) / ns_per_ms; + ms_remaining = timeout_ms - ms_elapsed; + } while (ms_remaining > 0); + + /* Try one more time to reserve a sample after the specified timeout has elapsed. */ + return user_ring_buffer__reserve(rb, size); +} diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 0b84d8e6b72a..6a8f5c7a02eb 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -3,9 +3,24 @@ #ifndef __SKEL_INTERNAL_H #define __SKEL_INTERNAL_H +#ifdef __KERNEL__ +#include <linux/fdtable.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/slab.h> +#include <linux/bpf.h> +#else #include <unistd.h> #include <sys/syscall.h> #include <sys/mman.h> +#include <linux/keyctl.h> +#include <stdlib.h> +#include "bpf.h" +#endif + +#ifndef SHA256_DIGEST_LENGTH +#define SHA256_DIGEST_LENGTH 32 +#endif #ifndef __NR_bpf # if defined(__mips__) && defined(_ABIO32) @@ -25,24 +40,23 @@ * requested during loader program generation. */ struct bpf_map_desc { - union { - /* input for the loader prog */ - struct { - __aligned_u64 initial_value; - __u32 max_entries; - }; - /* output of the loader prog */ - struct { - int map_fd; - }; - }; + /* output of the loader prog */ + int map_fd; + /* input for the loader prog */ + __u32 max_entries; + __aligned_u64 initial_value; }; struct bpf_prog_desc { int prog_fd; }; +enum { + BPF_SKEL_KERNEL = (1ULL << 0), +}; + struct bpf_loader_ctx { - size_t sz; + __u32 sz; + __u32 flags; __u32 log_level; __u32 log_size; __u64 log_buf; @@ -55,14 +69,151 @@ struct bpf_load_and_run_opts { __u32 data_sz; __u32 insns_sz; const char *errstr; + void *signature; + __u32 signature_sz; + __s32 keyring_id; + void *excl_prog_hash; + __u32 excl_prog_hash_sz; }; +long kern_sys_bpf(__u32 cmd, void *attr, __u32 attr_size); + static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, unsigned int size) { +#ifdef __KERNEL__ + return kern_sys_bpf(cmd, attr, size); +#else return syscall(__NR_bpf, cmd, attr, size); +#endif +} + +#ifdef __KERNEL__ +static inline int close(int fd) +{ + return close_fd(fd); +} + +static inline void *skel_alloc(size_t size) +{ + struct bpf_loader_ctx *ctx = kzalloc(size, GFP_KERNEL); + + if (!ctx) + return NULL; + ctx->flags |= BPF_SKEL_KERNEL; + return ctx; +} + +static inline void skel_free(const void *p) +{ + kfree(p); } +/* skel->bss/rodata maps are populated the following way: + * + * For kernel use: + * skel_prep_map_data() allocates kernel memory that kernel module can directly access. + * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value. + * The loader program will perform probe_read_kernel() from maps.rodata.initial_value. + * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and + * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree + * is not necessary. + * + * For user space: + * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly. + * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value. + * The loader program will perform copy_from_user() from maps.rodata.initial_value. + * skel_finalize_map_data() remaps bpf array map value from the kernel memory into + * skel->rodata address. + * + * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for + * both kernel and user space. The generated loader program does + * either bpf_probe_read_kernel() or bpf_copy_from_user() from initial_value + * depending on bpf_loader_ctx->flags. + */ +static inline void skel_free_map_data(void *p, __u64 addr, size_t sz) +{ + if (addr != ~0ULL) + kvfree(p); + /* When addr == ~0ULL the 'p' points to + * ((struct bpf_array *)map)->value. See skel_finalize_map_data. + */ +} + +static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz) +{ + void *addr; + + addr = kvmalloc(val_sz, GFP_KERNEL); + if (!addr) + return NULL; + memcpy(addr, val, val_sz); + return addr; +} + +static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd) +{ + struct bpf_map *map; + void *addr = NULL; + + kvfree((void *) (long) *init_val); + *init_val = ~0ULL; + + /* At this point bpf_load_and_run() finished without error and + * 'fd' is a valid bpf map FD. All sanity checks below should succeed. + */ + map = bpf_map_get(fd); + if (IS_ERR(map)) + return NULL; + if (map->map_type != BPF_MAP_TYPE_ARRAY) + goto out; + addr = ((struct bpf_array *)map)->value; + /* the addr stays valid, since FD is not closed */ +out: + bpf_map_put(map); + return addr; +} + +#else + +static inline void *skel_alloc(size_t size) +{ + return calloc(1, size); +} + +static inline void skel_free(void *p) +{ + free(p); +} + +static inline void skel_free_map_data(void *p, __u64 addr, size_t sz) +{ + munmap(p, sz); +} + +static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz) +{ + void *addr; + + addr = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (addr == (void *) -1) + return NULL; + memcpy(addr, val, val_sz); + return addr; +} + +static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd) +{ + void *addr; + + addr = mmap((void *) (long) *init_val, mmap_sz, flags, MAP_SHARED | MAP_FIXED, fd, 0); + if (addr == (void *) -1) + return NULL; + return addr; +} +#endif + static inline int skel_closenz(int fd) { if (fd > 0) @@ -70,55 +221,213 @@ static inline int skel_closenz(int fd) return -EINVAL; } +#ifndef offsetofend +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) +#endif + +static inline int skel_map_create(enum bpf_map_type map_type, + const char *map_name, + __u32 key_size, + __u32 value_size, + __u32 max_entries, + const void *excl_prog_hash, + __u32 excl_prog_hash_sz) +{ + const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + + attr.map_type = map_type; + attr.excl_prog_hash = (unsigned long) excl_prog_hash; + attr.excl_prog_hash_size = excl_prog_hash_sz; + + strncpy(attr.map_name, map_name, sizeof(attr.map_name)); + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; + + return skel_sys_bpf(BPF_MAP_CREATE, &attr, attr_sz); +} + +static inline int skel_map_update_elem(int fd, const void *key, + const void *value, __u64 flags) +{ + const size_t attr_sz = offsetofend(union bpf_attr, flags); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.map_fd = fd; + attr.key = (long) key; + attr.value = (long) value; + attr.flags = flags; + + return skel_sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz); +} + +static inline int skel_map_delete_elem(int fd, const void *key) +{ + const size_t attr_sz = offsetofend(union bpf_attr, flags); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.map_fd = fd; + attr.key = (long)key; + + return skel_sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz); +} + +static inline int skel_map_get_fd_by_id(__u32 id) +{ + const size_t attr_sz = offsetofend(union bpf_attr, flags); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.map_id = id; + + return skel_sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz); +} + +static inline int skel_raw_tracepoint_open(const char *name, int prog_fd) +{ + const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint.prog_fd); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.raw_tracepoint.name = (long) name; + attr.raw_tracepoint.prog_fd = prog_fd; + + return skel_sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz); +} + +static inline int skel_link_create(int prog_fd, int target_fd, + enum bpf_attach_type attach_type) +{ + const size_t attr_sz = offsetofend(union bpf_attr, link_create.iter_info_len); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.link_create.prog_fd = prog_fd; + attr.link_create.target_fd = target_fd; + attr.link_create.attach_type = attach_type; + + return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz); +} + +static inline int skel_obj_get_info_by_fd(int fd) +{ + const size_t attr_sz = offsetofend(union bpf_attr, info); + __u8 sha[SHA256_DIGEST_LENGTH]; + struct bpf_map_info info; + __u32 info_len = sizeof(info); + union bpf_attr attr; + + memset(&info, 0, sizeof(info)); + info.hash = (long) &sha; + info.hash_size = SHA256_DIGEST_LENGTH; + + memset(&attr, 0, attr_sz); + attr.info.bpf_fd = fd; + attr.info.info = (long) &info; + attr.info.info_len = info_len; + return skel_sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz); +} + +static inline int skel_map_freeze(int fd) +{ + const size_t attr_sz = offsetofend(union bpf_attr, map_fd); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.map_fd = fd; + + return skel_sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz); +} +#ifdef __KERNEL__ +#define set_err +#else +#define set_err err = -errno +#endif + static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) { + const size_t prog_load_attr_sz = offsetofend(union bpf_attr, keyring_id); + const size_t test_run_attr_sz = offsetofend(union bpf_attr, test); int map_fd = -1, prog_fd = -1, key = 0, err; union bpf_attr attr; - map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, NULL); + err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, + opts->excl_prog_hash, opts->excl_prog_hash_sz); if (map_fd < 0) { opts->errstr = "failed to create loader map"; - err = -errno; + set_err; goto out; } - err = bpf_map_update_elem(map_fd, &key, opts->data, 0); + err = skel_map_update_elem(map_fd, &key, opts->data, 0); if (err < 0) { opts->errstr = "failed to update loader map"; - err = -errno; + set_err; + goto out; + } + +#ifndef __KERNEL__ + err = skel_map_freeze(map_fd); + if (err < 0) { + opts->errstr = "failed to freeze map"; + set_err; + goto out; + } + err = skel_obj_get_info_by_fd(map_fd); + if (err < 0) { + opts->errstr = "failed to fetch obj info"; + set_err; goto out; } +#endif - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, prog_load_attr_sz); attr.prog_type = BPF_PROG_TYPE_SYSCALL; attr.insns = (long) opts->insns; attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); attr.license = (long) "Dual BSD/GPL"; +#ifndef __KERNEL__ + attr.signature = (long) opts->signature; + attr.signature_size = opts->signature_sz; +#else + if (opts->signature || opts->signature_sz) + pr_warn("signatures are not supported from bpf_preload\n"); +#endif + attr.keyring_id = opts->keyring_id; memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog")); attr.fd_array = (long) &map_fd; attr.log_level = opts->ctx->log_level; attr.log_size = opts->ctx->log_size; attr.log_buf = opts->ctx->log_buf; attr.prog_flags = BPF_F_SLEEPABLE; - prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, prog_load_attr_sz); if (prog_fd < 0) { opts->errstr = "failed to load loader prog"; - err = -errno; + set_err; goto out; } - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, test_run_attr_sz); attr.test.prog_fd = prog_fd; attr.test.ctx_in = (long) opts->ctx; attr.test.ctx_size_in = opts->ctx->sz; - err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr)); + err = skel_sys_bpf(BPF_PROG_RUN, &attr, test_run_attr_sz); if (err < 0 || (int)attr.test.retval < 0) { - opts->errstr = "failed to execute loader prog"; if (err < 0) { - err = -errno; + opts->errstr = "failed to execute loader prog"; + set_err; } else { + opts->errstr = "error returned by loader prog"; err = (int)attr.test.retval; +#ifndef __KERNEL__ errno = -err; +#endif } goto out; } diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c deleted file mode 100644 index 146da01979c7..000000000000 --- a/tools/lib/bpf/str_error.c +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -#undef _GNU_SOURCE -#include <string.h> -#include <stdio.h> -#include "str_error.h" - -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - -/* - * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl - * libc, while checking strerror_r() return to avoid having to check this in - * all places calling it. - */ -char *libbpf_strerror_r(int err, char *dst, int len) -{ - int ret = strerror_r(err < 0 ? -err : err, dst, len); - if (ret) - snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret); - return dst; -} diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h deleted file mode 100644 index a139334d57b6..000000000000 --- a/tools/lib/bpf/str_error.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __LIBBPF_STR_ERROR_H -#define __LIBBPF_STR_ERROR_H - -char *libbpf_strerror_r(int err, char *dst, int len); -#endif /* __LIBBPF_STR_ERROR_H */ diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c index ea655318153f..2464bcbd04e0 100644 --- a/tools/lib/bpf/strset.c +++ b/tools/lib/bpf/strset.c @@ -19,19 +19,19 @@ struct strset { struct hashmap *strs_hash; }; -static size_t strset_hash_fn(const void *key, void *ctx) +static size_t strset_hash_fn(long key, void *ctx) { const struct strset *s = ctx; - const char *str = s->strs_data + (long)key; + const char *str = s->strs_data + key; return str_hash(str); } -static bool strset_equal_fn(const void *key1, const void *key2, void *ctx) +static bool strset_equal_fn(long key1, long key2, void *ctx) { const struct strset *s = ctx; - const char *str1 = s->strs_data + (long)key1; - const char *str2 = s->strs_data + (long)key2; + const char *str1 = s->strs_data + key1; + const char *str2 = s->strs_data + key2; return strcmp(str1, str2) == 0; } @@ -67,7 +67,7 @@ struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t ini /* hashmap__add() returns EEXIST if string with the same * content already is in the hash map */ - err = hashmap__add(hash, (void *)off, (void *)off); + err = hashmap__add(hash, off, off); if (err == -EEXIST) continue; /* duplicate */ if (err) @@ -127,7 +127,7 @@ int strset__find_str(struct strset *set, const char *s) new_off = set->strs_data_len; memcpy(p, s, len); - if (hashmap__find(set->strs_hash, (void *)new_off, (void **)&old_off)) + if (hashmap__find(set->strs_hash, new_off, &old_off)) return old_off; return -ENOENT; @@ -165,8 +165,8 @@ int strset__add_str(struct strset *set, const char *s) * contents doesn't exist already (HASHMAP_ADD strategy). If such * string exists, we'll get its offset in old_off (that's old_key). */ - err = hashmap__insert(set->strs_hash, (void *)new_off, (void *)new_off, - HASHMAP_ADD, (const void **)&old_off, NULL); + err = hashmap__insert(set->strs_hash, new_off, new_off, + HASHMAP_ADD, &old_off, NULL); if (err == -EEXIST) return old_off; /* duplicated string, return existing offset */ if (err) diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h new file mode 100644 index 000000000000..43deb05a5197 --- /dev/null +++ b/tools/lib/bpf/usdt.bpf.h @@ -0,0 +1,322 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#ifndef __USDT_BPF_H__ +#define __USDT_BPF_H__ + +#include <linux/errno.h> +#include "bpf_helpers.h" +#include "bpf_tracing.h" + +/* Below types and maps are internal implementation details of libbpf's USDT + * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should + * be considered an unstable API as well and might be adjusted based on user + * feedback from using libbpf's USDT support in production. + */ + +/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal + * map that keeps track of USDT argument specifications. This might be + * necessary if there are a lot of USDT attachments. + */ +#ifndef BPF_USDT_MAX_SPEC_CNT +#define BPF_USDT_MAX_SPEC_CNT 256 +#endif +/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal + * map that keeps track of IP (memory address) mapping to USDT argument + * specification. + * Note, if kernel supports BPF cookies, this map is not used and could be + * resized all the way to 1 to save a bit of memory. + */ +#ifndef BPF_USDT_MAX_IP_CNT +#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT) +#endif + +enum __bpf_usdt_arg_type { + BPF_USDT_ARG_CONST, + BPF_USDT_ARG_REG, + BPF_USDT_ARG_REG_DEREF, + BPF_USDT_ARG_SIB, +}; + +/* + * This struct layout is designed specifically to be backwards/forward + * compatible between libbpf versions for ARG_CONST, ARG_REG, and + * ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+. + */ +struct __bpf_usdt_arg_spec { + /* u64 scalar interpreted depending on arg_type, see below */ + __u64 val_off; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + /* arg location case, see bpf_usdt_arg() for details */ + enum __bpf_usdt_arg_type arg_type: 8; + /* index register offset within struct pt_regs */ + __u16 idx_reg_off: 12; + /* scale factor for index register (1, 2, 4, or 8) */ + __u16 scale_bitshift: 4; + /* reserved for future use, keeps reg_off offset stable */ + __u8 __reserved: 8; +#else + __u8 __reserved: 8; + __u16 idx_reg_off: 12; + __u16 scale_bitshift: 4; + enum __bpf_usdt_arg_type arg_type: 8; +#endif + /* offset of referenced register within struct pt_regs */ + short reg_off; + /* whether arg should be interpreted as signed value */ + bool arg_signed; + /* number of bits that need to be cleared and, optionally, + * sign-extended to cast arguments that are 1, 2, or 4 bytes + * long into final 8-byte u64/s64 value returned to user + */ + char arg_bitshift; +}; + +/* should match USDT_MAX_ARG_CNT in usdt.c exactly */ +#define BPF_USDT_MAX_ARG_CNT 12 +struct __bpf_usdt_spec { + struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT]; + __u64 usdt_cookie; + short arg_cnt; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, BPF_USDT_MAX_SPEC_CNT); + __type(key, int); + __type(value, struct __bpf_usdt_spec); +} __bpf_usdt_specs SEC(".maps") __weak; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, BPF_USDT_MAX_IP_CNT); + __type(key, long); + __type(value, __u32); +} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak; + +extern const _Bool LINUX_HAS_BPF_COOKIE __kconfig; + +static __always_inline +int __bpf_usdt_spec_id(struct pt_regs *ctx) +{ + if (!LINUX_HAS_BPF_COOKIE) { + long ip = PT_REGS_IP(ctx); + int *spec_id_ptr; + + spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip); + return spec_id_ptr ? *spec_id_ptr : -ESRCH; + } + + return bpf_get_attach_cookie(ctx); +} + +/* Return number of USDT arguments defined for currently traced USDT. */ +__weak __hidden +int bpf_usdt_arg_cnt(struct pt_regs *ctx) +{ + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + return spec->arg_cnt; +} + +/* Returns the size in bytes of the #*arg_num* (zero-indexed) USDT argument. + * Returns negative error if argument is not found or arg_num is invalid. + */ +static __always_inline +int bpf_usdt_arg_size(struct pt_regs *ctx, __u64 arg_num) +{ + struct __bpf_usdt_arg_spec *arg_spec; + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + if (arg_num >= BPF_USDT_MAX_ARG_CNT) + return -ENOENT; + barrier_var(arg_num); + if (arg_num >= spec->arg_cnt) + return -ENOENT; + + arg_spec = &spec->args[arg_num]; + + /* arg_spec->arg_bitshift = 64 - arg_sz * 8 + * so: arg_sz = (64 - arg_spec->arg_bitshift) / 8 + */ + return (unsigned int)(64 - arg_spec->arg_bitshift) / 8; +} + +/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res. + * Returns 0 on success; negative error, otherwise. + * On error *res is guaranteed to be set to zero. + */ +__weak __hidden +int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) +{ + struct __bpf_usdt_spec *spec; + struct __bpf_usdt_arg_spec *arg_spec; + unsigned long val, idx; + int err, spec_id; + + *res = 0; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + if (arg_num >= BPF_USDT_MAX_ARG_CNT) + return -ENOENT; + barrier_var(arg_num); + if (arg_num >= spec->arg_cnt) + return -ENOENT; + + arg_spec = &spec->args[arg_num]; + switch (arg_spec->arg_type) { + case BPF_USDT_ARG_CONST: + /* Arg is just a constant ("-4@$-9" in USDT arg spec). + * value is recorded in arg_spec->val_off directly. + */ + val = arg_spec->val_off; + break; + case BPF_USDT_ARG_REG: + /* Arg is in a register (e.g, "8@%rax" in USDT arg spec), + * so we read the contents of that register directly from + * struct pt_regs. To keep things simple user-space parts + * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + break; + case BPF_USDT_ARG_REG_DEREF: + /* Arg is in memory addressed by register, plus some offset + * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is + * identified like with BPF_USDT_ARG_REG case, and the offset + * is in arg_spec->val_off. We first fetch register contents + * from pt_regs, then do another user-space probe read to + * fetch argument value itself. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off); + if (err) + return err; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + val >>= arg_spec->arg_bitshift; +#endif + break; + case BPF_USDT_ARG_SIB: + /* Arg is in memory addressed by SIB (Scale-Index-Base) mode + * (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first + * fetch the base register contents and the index register + * contents from pt_regs. Then we calculate the final address + * as base + (index * scale) + offset, and do a user-space + * probe read to fetch the argument value. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off); + if (err) + return err; + err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off)); + if (err) + return err; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + val >>= arg_spec->arg_bitshift; +#endif + break; + default: + return -EINVAL; + } + + /* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing + * necessary upper arg_bitshift bits, with sign extension if argument + * is signed + */ + val <<= arg_spec->arg_bitshift; + if (arg_spec->arg_signed) + val = ((long)val) >> arg_spec->arg_bitshift; + else + val = val >> arg_spec->arg_bitshift; + *res = val; + return 0; +} + +/* Retrieve user-specified cookie value provided during attach as + * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie + * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself + * utilizing BPF cookies internally, so user can't use BPF cookie directly + * for USDT programs and has to use bpf_usdt_cookie() API instead. + */ +__weak __hidden +long bpf_usdt_cookie(struct pt_regs *ctx) +{ + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return 0; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return 0; + + return spec->usdt_cookie; +} + +/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */ +#define ___bpf_usdt_args0() ctx +#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); _x; }) +#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); _x; }) +#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); _x; }) +#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); _x; }) +#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); _x; }) +#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); _x; }) +#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); _x; }) +#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); _x; }) +#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); _x; }) +#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); _x; }) +#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); _x; }) +#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); _x; }) +#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args) + +/* + * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for + * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes. + * Original struct pt_regs * context is preserved as 'ctx' argument. + */ +#define BPF_USDT(name, args...) \ +name(struct pt_regs *ctx); \ +static __always_inline typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_usdt_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __always_inline typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args) + +#endif /* __USDT_BPF_H__ */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c new file mode 100644 index 000000000000..d1524f6f54ae --- /dev/null +++ b/tools/lib/bpf/usdt.c @@ -0,0 +1,1649 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <libelf.h> +#include <gelf.h> +#include <unistd.h> +#include <linux/ptrace.h> +#include <linux/kernel.h> + +/* s8 will be marked as poison while it's a reg of riscv */ +#if defined(__riscv) +#define rv_s8 s8 +#endif + +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_common.h" +#include "libbpf_internal.h" +#include "hashmap.h" + +/* libbpf's USDT support consists of BPF-side state/code and user-space + * state/code working together in concert. BPF-side parts are defined in + * usdt.bpf.h header library. User-space state is encapsulated by struct + * usdt_manager and all the supporting code centered around usdt_manager. + * + * usdt.bpf.h defines two BPF maps that usdt_manager expects: USDT spec map + * and IP-to-spec-ID map, which is auxiliary map necessary for kernels that + * don't support BPF cookie (see below). These two maps are implicitly + * embedded into user's end BPF object file when user's code included + * usdt.bpf.h. This means that libbpf doesn't do anything special to create + * these USDT support maps. They are created by normal libbpf logic of + * instantiating BPF maps when opening and loading BPF object. + * + * As such, libbpf is basically unaware of the need to do anything + * USDT-related until the very first call to bpf_program__attach_usdt(), which + * can be called by user explicitly or happen automatically during skeleton + * attach (or, equivalently, through generic bpf_program__attach() call). At + * this point, libbpf will instantiate and initialize struct usdt_manager and + * store it in bpf_object. USDT manager is per-BPF object construct, as each + * independent BPF object might or might not have USDT programs, and thus all + * the expected USDT-related state. There is no coordination between two + * bpf_object in parts of USDT attachment, they are oblivious of each other's + * existence and libbpf is just oblivious, dealing with bpf_object-specific + * USDT state. + * + * Quick crash course on USDTs. + * + * From user-space application's point of view, USDT is essentially just + * a slightly special function call that normally has zero overhead, unless it + * is being traced by some external entity (e.g, BPF-based tool). Here's how + * a typical application can trigger USDT probe: + * + * #include <sys/sdt.h> // provided by systemtap-sdt-devel package + * // folly also provide similar functionality in folly/tracing/StaticTracepoint.h + * + * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y); + * + * USDT is identified by its <provider-name>:<probe-name> pair of names. Each + * individual USDT has a fixed number of arguments (3 in the above example) + * and specifies values of each argument as if it was a function call. + * + * USDT call is actually not a function call, but is instead replaced by + * a single NOP instruction (thus zero overhead, effectively). But in addition + * to that, those USDT macros generate special SHT_NOTE ELF records in + * .note.stapsdt ELF section. Here's an example USDT definition as emitted by + * `readelf -n <binary>`: + * + * stapsdt 0x00000089 NT_STAPSDT (SystemTap probe descriptors) + * Provider: test + * Name: usdt12 + * Location: 0x0000000000549df3, Base: 0x00000000008effa4, Semaphore: 0x0000000000a4606e + * Arguments: -4@-1204(%rbp) -4@%edi -8@-1216(%rbp) -8@%r8 -4@$5 -8@%r9 8@%rdx 8@%r10 -4@$-9 -2@%cx -2@%ax -1@%sil + * + * In this case we have USDT test:usdt12 with 12 arguments. + * + * Location and base are offsets used to calculate absolute IP address of that + * NOP instruction that kernel can replace with an interrupt instruction to + * trigger instrumentation code (BPF program for all that we care about). + * + * Semaphore above is an optional feature. It records an address of a 2-byte + * refcount variable (normally in '.probes' ELF section) used for signaling if + * there is anything that is attached to USDT. This is useful for user + * applications if, for example, they need to prepare some arguments that are + * passed only to USDTs and preparation is expensive. By checking if USDT is + * "activated", an application can avoid paying those costs unnecessarily. + * Recent enough kernel has built-in support for automatically managing this + * refcount, which libbpf expects and relies on. If USDT is defined without + * associated semaphore, this value will be zero. See selftests for semaphore + * examples. + * + * Arguments is the most interesting part. This USDT specification string is + * providing information about all the USDT arguments and their locations. The + * part before @ sign defined byte size of the argument (1, 2, 4, or 8) and + * whether the argument is signed or unsigned (negative size means signed). + * The part after @ sign is assembly-like definition of argument location + * (see [0] for more details). Technically, assembler can provide some pretty + * advanced definitions, but libbpf is currently supporting three most common + * cases: + * 1) immediate constant, see 5th and 9th args above (-4@$5 and -4@-9); + * 2) register value, e.g., 8@%rdx, which means "unsigned 8-byte integer + * whose value is in register %rdx"; + * 3) memory dereference addressed by register, e.g., -4@-1204(%rbp), which + * specifies signed 32-bit integer stored at offset -1204 bytes from + * memory address stored in %rbp. + * + * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + * + * During attachment, libbpf parses all the relevant USDT specifications and + * prepares `struct usdt_spec` (USDT spec), which is then provided to BPF-side + * code through spec map. This allows BPF applications to quickly fetch the + * actual value at runtime using a simple BPF-side code. + * + * With basics out of the way, let's go over less immediately obvious aspects + * of supporting USDTs. + * + * First, there is no special USDT BPF program type. It is actually just + * a uprobe BPF program (which for kernel, at least currently, is just a kprobe + * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference + * that uprobe is usually attached at the function entry, while USDT will + * normally be somewhere inside the function. But it should always be + * pointing to NOP instruction, which makes such uprobes the fastest uprobe + * kind. + * + * Second, it's important to realize that such STAP_PROBEn(provider, name, ...) + * macro invocations can end up being inlined many-many times, depending on + * specifics of each individual user application. So single conceptual USDT + * (identified by provider:name pair of identifiers) is, generally speaking, + * multiple uprobe locations (USDT call sites) in different places in user + * application. Further, again due to inlining, each USDT call site might end + * up having the same argument #N be located in a different place. In one call + * site it could be a constant, in another will end up in a register, and in + * yet another could be some other register or even somewhere on the stack. + * + * As such, "attaching to USDT" means (in general case) attaching the same + * uprobe BPF program to multiple target locations in user application, each + * potentially having a completely different USDT spec associated with it. + * To wire all this up together libbpf allocates a unique integer spec ID for + * each unique USDT spec. Spec IDs are allocated as sequential small integers + * so that they can be used as keys in array BPF map (for performance reasons). + * Spec ID allocation and accounting is big part of what usdt_manager is + * about. This state has to be maintained per-BPF object and coordinate + * between different USDT attachments within the same BPF object. + * + * Spec ID is the key in spec BPF map, value is the actual USDT spec layed out + * as struct usdt_spec. Each invocation of BPF program at runtime needs to + * know its associated spec ID. It gets it either through BPF cookie, which + * libbpf sets to spec ID during attach time, or, if kernel is too old to + * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such + * case. The latter means that some modes of operation can't be supported + * without BPF cookie. Such a mode is attaching to shared library "generically", + * without specifying target process. In such case, it's impossible to + * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode + * is not supported without BPF cookie support. + * + * Note that libbpf is using BPF cookie functionality for its own internal + * needs, so user itself can't rely on BPF cookie feature. To that end, libbpf + * provides conceptually equivalent USDT cookie support. It's still u64 + * user-provided value that can be associated with USDT attachment. Note that + * this will be the same value for all USDT call sites within the same single + * *logical* USDT attachment. This makes sense because to user attaching to + * USDT is a single BPF program triggered for singular USDT probe. The fact + * that this is done at multiple actual locations is a mostly hidden + * implementation details. This USDT cookie value can be fetched with + * bpf_usdt_cookie(ctx) API provided by usdt.bpf.h + * + * Lastly, while single USDT can have tons of USDT call sites, it doesn't + * necessarily have that many different USDT specs. It very well might be + * that 1000 USDT call sites only need 5 different USDT specs, because all the + * arguments are typically contained in a small set of registers or stack + * locations. As such, it's wasteful to allocate as many USDT spec IDs as + * there are USDT call sites. So libbpf tries to be frugal and performs + * on-the-fly deduplication during a single USDT attachment to only allocate + * the minimal required amount of unique USDT specs (and thus spec IDs). This + * is trivially achieved by using USDT spec string (Arguments string from USDT + * note) as a lookup key in a hashmap. USDT spec string uniquely defines + * everything about how to fetch USDT arguments, so two USDT call sites + * sharing USDT spec string can safely share the same USDT spec and spec ID. + * Note, this spec string deduplication is happening only during the same USDT + * attachment, so each USDT spec shares the same USDT cookie value. This is + * not generally true for other USDT attachments within the same BPF object, + * as even if USDT spec string is the same, USDT cookie value can be + * different. It was deemed excessive to try to deduplicate across independent + * USDT attachments by taking into account USDT spec string *and* USDT cookie + * value, which would complicate spec ID accounting significantly for little + * gain. + */ + +#define USDT_BASE_SEC ".stapsdt.base" +#define USDT_SEMA_SEC ".probes" +#define USDT_NOTE_SEC ".note.stapsdt" +#define USDT_NOTE_TYPE 3 +#define USDT_NOTE_NAME "stapsdt" + +/* should match exactly enum __bpf_usdt_arg_type from usdt.bpf.h */ +enum usdt_arg_type { + USDT_ARG_CONST, + USDT_ARG_REG, + USDT_ARG_REG_DEREF, + USDT_ARG_SIB, +}; + +/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */ +struct usdt_arg_spec { + __u64 val_off; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + enum usdt_arg_type arg_type: 8; + __u16 idx_reg_off: 12; + __u16 scale_bitshift: 4; + __u8 __reserved: 8; /* keep reg_off offset stable */ +#else + __u8 __reserved: 8; /* keep reg_off offset stable */ + __u16 idx_reg_off: 12; + __u16 scale_bitshift: 4; + enum usdt_arg_type arg_type: 8; +#endif + short reg_off; + bool arg_signed; + char arg_bitshift; +}; + +/* should match BPF_USDT_MAX_ARG_CNT in usdt.bpf.h */ +#define USDT_MAX_ARG_CNT 12 + +/* should match struct __bpf_usdt_spec from usdt.bpf.h */ +struct usdt_spec { + struct usdt_arg_spec args[USDT_MAX_ARG_CNT]; + __u64 usdt_cookie; + short arg_cnt; +}; + +struct usdt_note { + const char *provider; + const char *name; + /* USDT args specification string, e.g.: + * "-4@%esi -4@-24(%rbp) -4@%ecx 2@%ax 8@%rdx" + */ + const char *args; + long loc_addr; + long base_addr; + long sema_addr; +}; + +struct usdt_target { + long abs_ip; + long rel_ip; + long sema_off; + struct usdt_spec spec; + const char *spec_str; +}; + +struct usdt_manager { + struct bpf_map *specs_map; + struct bpf_map *ip_to_spec_id_map; + + int *free_spec_ids; + size_t free_spec_cnt; + size_t next_free_spec_id; + + bool has_bpf_cookie; + bool has_sema_refcnt; + bool has_uprobe_multi; +}; + +struct usdt_manager *usdt_manager_new(struct bpf_object *obj) +{ + static const char *ref_ctr_sysfs_path = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset"; + struct usdt_manager *man; + struct bpf_map *specs_map, *ip_to_spec_id_map; + + specs_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_specs"); + ip_to_spec_id_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_ip_to_spec_id"); + if (!specs_map || !ip_to_spec_id_map) { + pr_warn("usdt: failed to find USDT support BPF maps, did you forget to include bpf/usdt.bpf.h?\n"); + return ERR_PTR(-ESRCH); + } + + man = calloc(1, sizeof(*man)); + if (!man) + return ERR_PTR(-ENOMEM); + + man->specs_map = specs_map; + man->ip_to_spec_id_map = ip_to_spec_id_map; + + /* Detect if BPF cookie is supported for kprobes. + * We don't need IP-to-ID mapping if we can use BPF cookies. + * Added in: 7adfc6c9b315 ("bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value") + */ + man->has_bpf_cookie = kernel_supports(obj, FEAT_BPF_COOKIE); + + /* Detect kernel support for automatic refcounting of USDT semaphore. + * If this is not supported, USDTs with semaphores will not be supported. + * Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe") + */ + man->has_sema_refcnt = faccessat(AT_FDCWD, ref_ctr_sysfs_path, F_OK, AT_EACCESS) == 0; + + /* + * Detect kernel support for uprobe multi link to be used for attaching + * usdt probes. + */ + man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK); + return man; +} + +void usdt_manager_free(struct usdt_manager *man) +{ + if (IS_ERR_OR_NULL(man)) + return; + + free(man->free_spec_ids); + free(man); +} + +static int sanity_check_usdt_elf(Elf *elf, const char *path) +{ + GElf_Ehdr ehdr; + int endianness; + + if (elf_kind(elf) != ELF_K_ELF) { + pr_warn("usdt: unrecognized ELF kind %d for '%s'\n", elf_kind(elf), path); + return -EBADF; + } + + switch (gelf_getclass(elf)) { + case ELFCLASS64: + if (sizeof(void *) != 8) { + pr_warn("usdt: attaching to 64-bit ELF binary '%s' is not supported\n", path); + return -EBADF; + } + break; + case ELFCLASS32: + if (sizeof(void *) != 4) { + pr_warn("usdt: attaching to 32-bit ELF binary '%s' is not supported\n", path); + return -EBADF; + } + break; + default: + pr_warn("usdt: unsupported ELF class for '%s'\n", path); + return -EBADF; + } + + if (!gelf_getehdr(elf, &ehdr)) + return -EINVAL; + + if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) { + pr_warn("usdt: unsupported type of ELF binary '%s' (%d), only ET_EXEC and ET_DYN are supported\n", + path, ehdr.e_type); + return -EBADF; + } + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + endianness = ELFDATA2LSB; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + endianness = ELFDATA2MSB; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif + if (endianness != ehdr.e_ident[EI_DATA]) { + pr_warn("usdt: ELF endianness mismatch for '%s'\n", path); + return -EBADF; + } + + return 0; +} + +static int find_elf_sec_by_name(Elf *elf, const char *sec_name, GElf_Shdr *shdr, Elf_Scn **scn) +{ + Elf_Scn *sec = NULL; + size_t shstrndx; + + if (elf_getshdrstrndx(elf, &shstrndx)) + return -EINVAL; + + /* check if ELF is corrupted and avoid calling elf_strptr if yes */ + if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) + return -EINVAL; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + char *name; + + if (!gelf_getshdr(sec, shdr)) + return -EINVAL; + + name = elf_strptr(elf, shstrndx, shdr->sh_name); + if (name && strcmp(sec_name, name) == 0) { + *scn = sec; + return 0; + } + } + + return -ENOENT; +} + +struct elf_seg { + long start; + long end; + long offset; + bool is_exec; +}; + +static int cmp_elf_segs(const void *_a, const void *_b) +{ + const struct elf_seg *a = _a; + const struct elf_seg *b = _b; + + return a->start < b->start ? -1 : 1; +} + +static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, size_t *seg_cnt) +{ + GElf_Phdr phdr; + size_t n; + int i, err; + struct elf_seg *seg; + void *tmp; + + *seg_cnt = 0; + + if (elf_getphdrnum(elf, &n)) { + err = -errno; + return err; + } + + for (i = 0; i < n; i++) { + if (!gelf_getphdr(elf, i, &phdr)) { + err = -errno; + return err; + } + + pr_debug("usdt: discovered PHDR #%d in '%s': vaddr 0x%lx memsz 0x%lx offset 0x%lx type 0x%lx flags 0x%lx\n", + i, path, (long)phdr.p_vaddr, (long)phdr.p_memsz, (long)phdr.p_offset, + (long)phdr.p_type, (long)phdr.p_flags); + if (phdr.p_type != PT_LOAD) + continue; + + tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs)); + if (!tmp) + return -ENOMEM; + + *segs = tmp; + seg = *segs + *seg_cnt; + (*seg_cnt)++; + + seg->start = phdr.p_vaddr; + seg->end = phdr.p_vaddr + phdr.p_memsz; + seg->offset = phdr.p_offset; + seg->is_exec = phdr.p_flags & PF_X; + } + + if (*seg_cnt == 0) { + pr_warn("usdt: failed to find PT_LOAD program headers in '%s'\n", path); + return -ESRCH; + } + + qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs); + return 0; +} + +static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) +{ + char path[PATH_MAX], line[PATH_MAX], mode[16]; + size_t seg_start, seg_end, seg_off; + struct elf_seg *seg; + int tmp_pid, i, err; + FILE *f; + + *seg_cnt = 0; + + /* Handle containerized binaries only accessible from + * /proc/<pid>/root/<path>. They will be reported as just /<path> in + * /proc/<pid>/maps. + */ + if (sscanf(lib_path, "/proc/%d/root%s", &tmp_pid, path) == 2 && pid == tmp_pid) + goto proceed; + + if (!realpath(lib_path, path)) { + pr_warn("usdt: failed to get absolute path of '%s' (err %s), using path as is...\n", + lib_path, errstr(-errno)); + libbpf_strlcpy(path, lib_path, sizeof(path)); + } + +proceed: + sprintf(line, "/proc/%d/maps", pid); + f = fopen(line, "re"); + if (!f) { + err = -errno; + pr_warn("usdt: failed to open '%s' to get base addr of '%s': %s\n", + line, lib_path, errstr(err)); + return err; + } + + /* We need to handle lines with no path at the end: + * + * 7f5c6f5d1000-7f5c6f5d3000 rw-p 001c7000 08:04 21238613 /usr/lib64/libc-2.17.so + * 7f5c6f5d3000-7f5c6f5d8000 rw-p 00000000 00:00 0 + * 7f5c6f5d8000-7f5c6f5d9000 r-xp 00000000 103:01 362990598 /data/users/andriin/linux/tools/bpf/usdt/libhello_usdt.so + */ + while (fscanf(f, "%zx-%zx %s %zx %*s %*d%[^\n]\n", + &seg_start, &seg_end, mode, &seg_off, line) == 5) { + void *tmp; + + /* to handle no path case (see above) we need to capture line + * without skipping any whitespaces. So we need to strip + * leading whitespaces manually here + */ + i = 0; + while (isblank(line[i])) + i++; + if (strcmp(line + i, path) != 0) + continue; + + pr_debug("usdt: discovered segment for lib '%s': addrs %zx-%zx mode %s offset %zx\n", + path, seg_start, seg_end, mode, seg_off); + + /* ignore non-executable sections for shared libs */ + if (mode[2] != 'x') + continue; + + tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs)); + if (!tmp) { + err = -ENOMEM; + goto err_out; + } + + *segs = tmp; + seg = *segs + *seg_cnt; + *seg_cnt += 1; + + seg->start = seg_start; + seg->end = seg_end; + seg->offset = seg_off; + seg->is_exec = true; + } + + if (*seg_cnt == 0) { + pr_warn("usdt: failed to find '%s' (resolved to '%s') within PID %d memory mappings\n", + lib_path, path, pid); + err = -ESRCH; + goto err_out; + } + + qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs); + err = 0; +err_out: + fclose(f); + return err; +} + +static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long virtaddr) +{ + struct elf_seg *seg; + int i; + + /* for ELF binaries (both executables and shared libraries), we are + * given virtual address (absolute for executables, relative for + * libraries) which should match address range of [seg_start, seg_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->start <= virtaddr && virtaddr < seg->end) + return seg; + } + return NULL; +} + +static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long offset) +{ + struct elf_seg *seg; + int i; + + /* for VMA segments from /proc/<pid>/maps file, provided "address" is + * actually a file offset, so should be fall within logical + * offset-based range of [offset_start, offset_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->offset <= offset && offset < seg->offset + (seg->end - seg->start)) + return seg; + } + return NULL; +} + +static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, + size_t desc_off, struct usdt_note *usdt_note); + +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); + +static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, + const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, + struct usdt_target **out_targets, size_t *out_target_cnt) +{ + size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0; + struct elf_seg *segs = NULL, *vma_segs = NULL; + struct usdt_target *targets = NULL, *target; + long base_addr = 0; + Elf_Scn *notes_scn, *base_scn; + GElf_Shdr base_shdr, notes_shdr; + GElf_Ehdr ehdr; + GElf_Nhdr nhdr; + Elf_Data *data; + int err; + + *out_targets = NULL; + *out_target_cnt = 0; + + err = find_elf_sec_by_name(elf, USDT_NOTE_SEC, ¬es_shdr, ¬es_scn); + if (err) { + pr_warn("usdt: no USDT notes section (%s) found in '%s'\n", USDT_NOTE_SEC, path); + return err; + } + + if (notes_shdr.sh_type != SHT_NOTE || !gelf_getehdr(elf, &ehdr)) { + pr_warn("usdt: invalid USDT notes section (%s) in '%s'\n", USDT_NOTE_SEC, path); + return -EINVAL; + } + + err = parse_elf_segs(elf, path, &segs, &seg_cnt); + if (err) { + pr_warn("usdt: failed to process ELF program segments for '%s': %s\n", + path, errstr(err)); + goto err_out; + } + + /* .stapsdt.base ELF section is optional, but is used for prelink + * offset compensation (see a big comment further below) + */ + if (find_elf_sec_by_name(elf, USDT_BASE_SEC, &base_shdr, &base_scn) == 0) + base_addr = base_shdr.sh_addr; + + data = elf_getdata(notes_scn, 0); + off = 0; + while ((off = gelf_getnote(data, off, &nhdr, &name_off, &desc_off)) > 0) { + long usdt_abs_ip, usdt_rel_ip, usdt_sema_off = 0; + struct usdt_note note; + struct elf_seg *seg = NULL; + void *tmp; + + err = parse_usdt_note(&nhdr, data->d_buf, name_off, desc_off, ¬e); + if (err) + goto err_out; + + if (strcmp(note.provider, usdt_provider) != 0 || strcmp(note.name, usdt_name) != 0) + continue; + + /* We need to compensate "prelink effect". See [0] for details, + * relevant parts quoted here: + * + * Each SDT probe also expands into a non-allocated ELF note. You can + * find this by looking at SHT_NOTE sections and decoding the format; + * see below for details. Because the note is non-allocated, it means + * there is no runtime cost, and also preserved in both stripped files + * and .debug files. + * + * However, this means that prelink won't adjust the note's contents + * for address offsets. Instead, this is done via the .stapsdt.base + * section. This is a special section that is added to the text. We + * will only ever have one of these sections in a final link and it + * will only ever be one byte long. Nothing about this section itself + * matters, we just use it as a marker to detect prelink address + * adjustments. + * + * Each probe note records the link-time address of the .stapsdt.base + * section alongside the probe PC address. The decoder compares the + * base address stored in the note with the .stapsdt.base section's + * sh_addr. Initially these are the same, but the section header will + * be adjusted by prelink. So the decoder applies the difference to + * the probe PC address to get the correct prelinked PC address; the + * same adjustment is applied to the semaphore address, if any. + * + * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + */ + usdt_abs_ip = note.loc_addr; + if (base_addr && note.base_addr) + usdt_abs_ip += base_addr - note.base_addr; + + /* When attaching uprobes (which is what USDTs basically are) + * kernel expects file offset to be specified, not a relative + * virtual address, so we need to translate virtual address to + * file offset, for both ET_EXEC and ET_DYN binaries. + */ + seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_abs_ip); + goto err_out; + } + if (!seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + usdt_abs_ip); + goto err_out; + } + /* translate from virtual address to file offset */ + usdt_rel_ip = usdt_abs_ip - seg->start + seg->offset; + + if (ehdr.e_type == ET_DYN && !man->has_bpf_cookie) { + /* If we don't have BPF cookie support but need to + * attach to a shared library, we'll need to know and + * record absolute addresses of attach points due to + * the need to lookup USDT spec by absolute IP of + * triggered uprobe. Doing this resolution is only + * possible when we have a specific PID of the process + * that's using specified shared library. BPF cookie + * removes the absolute address limitation as we don't + * need to do this lookup (we just use BPF cookie as + * an index of USDT spec), so for newer kernels with + * BPF cookie support libbpf supports USDT attachment + * to shared libraries with no PID filter. + */ + if (pid < 0) { + pr_warn("usdt: attaching to shared libraries without specific PID is not supported on current kernel\n"); + err = -ENOTSUP; + goto err_out; + } + + /* vma_segs are lazily initialized only if necessary */ + if (vma_seg_cnt == 0) { + err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt); + if (err) { + pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %s\n", + pid, path, errstr(err)); + goto err_out; + } + } + + seg = find_vma_seg(vma_segs, vma_seg_cnt, usdt_rel_ip); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_rel_ip); + goto err_out; + } + + usdt_abs_ip = seg->start - seg->offset + usdt_rel_ip; + } + + pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n", + usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", path, + note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args, + seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0); + + /* Adjust semaphore address to be a file offset */ + if (note.sema_addr) { + if (!man->has_sema_refcnt) { + pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n", + usdt_provider, usdt_name, path); + err = -ENOTSUP; + goto err_out; + } + + seg = find_elf_seg(segs, seg_cnt, note.sema_addr); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n", + usdt_provider, usdt_name, path, note.sema_addr); + goto err_out; + } + if (seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx] for semaphore of '%s:%s' at 0x%lx is executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + note.sema_addr); + goto err_out; + } + + usdt_sema_off = note.sema_addr - seg->start + seg->offset; + + pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n", + usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", + path, note.sema_addr, note.base_addr, usdt_sema_off, + seg->start, seg->end, seg->offset); + } + + /* Record adjusted addresses and offsets and parse USDT spec */ + tmp = libbpf_reallocarray(targets, target_cnt + 1, sizeof(*targets)); + if (!tmp) { + err = -ENOMEM; + goto err_out; + } + targets = tmp; + + target = &targets[target_cnt]; + memset(target, 0, sizeof(*target)); + + target->abs_ip = usdt_abs_ip; + target->rel_ip = usdt_rel_ip; + target->sema_off = usdt_sema_off; + + /* notes.args references strings from ELF itself, so they can + * be referenced safely until elf_end() call + */ + target->spec_str = note.args; + + err = parse_usdt_spec(&target->spec, ¬e, usdt_cookie); + if (err) + goto err_out; + + target_cnt++; + } + + *out_targets = targets; + *out_target_cnt = target_cnt; + err = target_cnt; + +err_out: + free(segs); + free(vma_segs); + if (err < 0) + free(targets); + return err; +} + +struct bpf_link_usdt { + struct bpf_link link; + + struct usdt_manager *usdt_man; + + size_t spec_cnt; + int *spec_ids; + + size_t uprobe_cnt; + struct { + long abs_ip; + struct bpf_link *link; + } *uprobes; + + struct bpf_link *multi_link; +}; + +static int bpf_link_usdt_detach(struct bpf_link *link) +{ + struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); + struct usdt_manager *man = usdt_link->usdt_man; + int i; + + bpf_link__destroy(usdt_link->multi_link); + + /* When having multi_link, uprobe_cnt is 0 */ + for (i = 0; i < usdt_link->uprobe_cnt; i++) { + /* detach underlying uprobe link */ + bpf_link__destroy(usdt_link->uprobes[i].link); + /* there is no need to update specs map because it will be + * unconditionally overwritten on subsequent USDT attaches, + * but if BPF cookies are not used we need to remove entry + * from ip_to_spec_id map, otherwise we'll run into false + * conflicting IP errors + */ + if (!man->has_bpf_cookie) { + /* not much we can do about errors here */ + (void)bpf_map_delete_elem(bpf_map__fd(man->ip_to_spec_id_map), + &usdt_link->uprobes[i].abs_ip); + } + } + + /* try to return the list of previously used spec IDs to usdt_manager + * for future reuse for subsequent USDT attaches + */ + if (!man->free_spec_ids) { + /* if there were no free spec IDs yet, just transfer our IDs */ + man->free_spec_ids = usdt_link->spec_ids; + man->free_spec_cnt = usdt_link->spec_cnt; + usdt_link->spec_ids = NULL; + } else { + /* otherwise concat IDs */ + size_t new_cnt = man->free_spec_cnt + usdt_link->spec_cnt; + int *new_free_ids; + + new_free_ids = libbpf_reallocarray(man->free_spec_ids, new_cnt, + sizeof(*new_free_ids)); + /* If we couldn't resize free_spec_ids, we'll just leak + * a bunch of free IDs; this is very unlikely to happen and if + * system is so exhausted on memory, it's the least of user's + * concerns, probably. + * So just do our best here to return those IDs to usdt_manager. + * Another edge case when we can legitimately get NULL is when + * new_cnt is zero, which can happen in some edge cases, so we + * need to be careful about that. + */ + if (new_free_ids || new_cnt == 0) { + memcpy(new_free_ids + man->free_spec_cnt, usdt_link->spec_ids, + usdt_link->spec_cnt * sizeof(*usdt_link->spec_ids)); + man->free_spec_ids = new_free_ids; + man->free_spec_cnt = new_cnt; + } + } + + return 0; +} + +static void bpf_link_usdt_dealloc(struct bpf_link *link) +{ + struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); + + free(usdt_link->spec_ids); + free(usdt_link->uprobes); + free(usdt_link); +} + +static size_t specs_hash_fn(long key, void *ctx) +{ + return str_hash((char *)key); +} + +static bool specs_equal_fn(long key1, long key2, void *ctx) +{ + return strcmp((char *)key1, (char *)key2) == 0; +} + +static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash, + struct bpf_link_usdt *link, struct usdt_target *target, + int *spec_id, bool *is_new) +{ + long tmp; + void *new_ids; + int err; + + /* check if we already allocated spec ID for this spec string */ + if (hashmap__find(specs_hash, target->spec_str, &tmp)) { + *spec_id = tmp; + *is_new = false; + return 0; + } + + /* otherwise it's a new ID that needs to be set up in specs map and + * returned back to usdt_manager when USDT link is detached + */ + new_ids = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids)); + if (!new_ids) + return -ENOMEM; + link->spec_ids = new_ids; + + /* get next free spec ID, giving preference to free list, if not empty */ + if (man->free_spec_cnt) { + *spec_id = man->free_spec_ids[man->free_spec_cnt - 1]; + + /* cache spec ID for current spec string for future lookups */ + err = hashmap__add(specs_hash, target->spec_str, *spec_id); + if (err) + return err; + + man->free_spec_cnt--; + } else { + /* don't allocate spec ID bigger than what fits in specs map */ + if (man->next_free_spec_id >= bpf_map__max_entries(man->specs_map)) + return -E2BIG; + + *spec_id = man->next_free_spec_id; + + /* cache spec ID for current spec string for future lookups */ + err = hashmap__add(specs_hash, target->spec_str, *spec_id); + if (err) + return err; + + man->next_free_spec_id++; + } + + /* remember new spec ID in the link for later return back to free list on detach */ + link->spec_ids[link->spec_cnt] = *spec_id; + link->spec_cnt++; + *is_new = true; + return 0; +} + +struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog, + pid_t pid, const char *path, + const char *usdt_provider, const char *usdt_name, + __u64 usdt_cookie) +{ + unsigned long *offsets = NULL, *ref_ctr_offsets = NULL; + int i, err, spec_map_fd, ip_map_fd; + LIBBPF_OPTS(bpf_uprobe_opts, opts); + struct hashmap *specs_hash = NULL; + struct bpf_link_usdt *link = NULL; + struct usdt_target *targets = NULL; + __u64 *cookies = NULL; + struct elf_fd elf_fd; + size_t target_cnt; + + spec_map_fd = bpf_map__fd(man->specs_map); + ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); + + err = elf_open(path, &elf_fd); + if (err) + return libbpf_err_ptr(err); + + err = sanity_check_usdt_elf(elf_fd.elf, path); + if (err) + goto err_out; + + /* normalize PID filter */ + if (pid < 0) + pid = -1; + else if (pid == 0) + pid = getpid(); + + /* discover USDT in given binary, optionally limiting + * activations to a given PID, if pid > 0 + */ + err = collect_usdt_targets(man, elf_fd.elf, path, pid, usdt_provider, usdt_name, + usdt_cookie, &targets, &target_cnt); + if (err <= 0) { + err = (err == 0) ? -ENOENT : err; + goto err_out; + } + + specs_hash = hashmap__new(specs_hash_fn, specs_equal_fn, NULL); + if (IS_ERR(specs_hash)) { + err = PTR_ERR(specs_hash); + goto err_out; + } + + link = calloc(1, sizeof(*link)); + if (!link) { + err = -ENOMEM; + goto err_out; + } + + link->usdt_man = man; + link->link.detach = &bpf_link_usdt_detach; + link->link.dealloc = &bpf_link_usdt_dealloc; + + if (man->has_uprobe_multi) { + offsets = calloc(target_cnt, sizeof(*offsets)); + cookies = calloc(target_cnt, sizeof(*cookies)); + ref_ctr_offsets = calloc(target_cnt, sizeof(*ref_ctr_offsets)); + + if (!offsets || !ref_ctr_offsets || !cookies) { + err = -ENOMEM; + goto err_out; + } + } else { + link->uprobes = calloc(target_cnt, sizeof(*link->uprobes)); + if (!link->uprobes) { + err = -ENOMEM; + goto err_out; + } + } + + for (i = 0; i < target_cnt; i++) { + struct usdt_target *target = &targets[i]; + struct bpf_link *uprobe_link; + bool is_new; + int spec_id; + + /* Spec ID can be either reused or newly allocated. If it is + * newly allocated, we'll need to fill out spec map, otherwise + * entire spec should be valid and can be just used by a new + * uprobe. We reuse spec when USDT arg spec is identical. We + * also never share specs between two different USDT + * attachments ("links"), so all the reused specs already + * share USDT cookie value implicitly. + */ + err = allocate_spec_id(man, specs_hash, link, target, &spec_id, &is_new); + if (err) + goto err_out; + + if (is_new && bpf_map_update_elem(spec_map_fd, &spec_id, &target->spec, BPF_ANY)) { + err = -errno; + pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %s\n", + spec_id, usdt_provider, usdt_name, path, errstr(err)); + goto err_out; + } + if (!man->has_bpf_cookie && + bpf_map_update_elem(ip_map_fd, &target->abs_ip, &spec_id, BPF_NOEXIST)) { + err = -errno; + if (err == -EEXIST) { + pr_warn("usdt: IP collision detected for spec #%d for '%s:%s' in '%s'\n", + spec_id, usdt_provider, usdt_name, path); + } else { + pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %s\n", + target->abs_ip, spec_id, usdt_provider, usdt_name, + path, errstr(err)); + } + goto err_out; + } + + if (man->has_uprobe_multi) { + offsets[i] = target->rel_ip; + ref_ctr_offsets[i] = target->sema_off; + cookies[i] = spec_id; + } else { + opts.ref_ctr_offset = target->sema_off; + opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0; + uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, + target->rel_ip, &opts); + err = libbpf_get_error(uprobe_link); + if (err) { + pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %s\n", + i, usdt_provider, usdt_name, path, errstr(err)); + goto err_out; + } + + link->uprobes[i].link = uprobe_link; + link->uprobes[i].abs_ip = target->abs_ip; + link->uprobe_cnt++; + } + } + + if (man->has_uprobe_multi) { + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts_multi, + .ref_ctr_offsets = ref_ctr_offsets, + .offsets = offsets, + .cookies = cookies, + .cnt = target_cnt, + ); + + link->multi_link = bpf_program__attach_uprobe_multi(prog, pid, path, + NULL, &opts_multi); + if (!link->multi_link) { + err = -errno; + pr_warn("usdt: failed to attach uprobe multi for '%s:%s' in '%s': %s\n", + usdt_provider, usdt_name, path, errstr(err)); + goto err_out; + } + + free(offsets); + free(ref_ctr_offsets); + free(cookies); + } + + free(targets); + hashmap__free(specs_hash); + elf_close(&elf_fd); + return &link->link; + +err_out: + free(offsets); + free(ref_ctr_offsets); + free(cookies); + + if (link) + bpf_link__destroy(&link->link); + free(targets); + hashmap__free(specs_hash); + elf_close(&elf_fd); + return libbpf_err_ptr(err); +} + +/* Parse out USDT ELF note from '.note.stapsdt' section. + * Logic inspired by perf's code. + */ +static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, + struct usdt_note *note) +{ + const char *provider, *name, *args; + long addrs[3]; + size_t len; + + /* sanity check USDT note name and type first */ + if (strncmp(data + name_off, USDT_NOTE_NAME, nhdr->n_namesz) != 0) + return -EINVAL; + if (nhdr->n_type != USDT_NOTE_TYPE) + return -EINVAL; + + /* sanity check USDT note contents ("description" in ELF terminology) */ + len = nhdr->n_descsz; + data = data + desc_off; + + /* +3 is the very minimum required to store three empty strings */ + if (len < sizeof(addrs) + 3) + return -EINVAL; + + /* get location, base, and semaphore addrs */ + memcpy(&addrs, data, sizeof(addrs)); + + /* parse string fields: provider, name, args */ + provider = data + sizeof(addrs); + + name = (const char *)memchr(provider, '\0', data + len - provider); + if (!name) /* non-zero-terminated provider */ + return -EINVAL; + name++; + if (name >= data + len || *name == '\0') /* missing or empty name */ + return -EINVAL; + + args = memchr(name, '\0', data + len - name); + if (!args) /* non-zero-terminated name */ + return -EINVAL; + ++args; + if (args >= data + len) /* missing arguments spec */ + return -EINVAL; + + note->provider = provider; + note->name = name; + if (*args == '\0' || *args == ':') + note->args = ""; + else + note->args = args; + note->loc_addr = addrs[0]; + note->base_addr = addrs[1]; + note->sema_addr = addrs[2]; + + return 0; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz); + +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie) +{ + struct usdt_arg_spec *arg; + const char *s; + int arg_sz, len; + + spec->usdt_cookie = usdt_cookie; + spec->arg_cnt = 0; + + s = note->args; + while (s[0]) { + if (spec->arg_cnt >= USDT_MAX_ARG_CNT) { + pr_warn("usdt: too many USDT arguments (> %d) for '%s:%s' with args spec '%s'\n", + USDT_MAX_ARG_CNT, note->provider, note->name, note->args); + return -E2BIG; + } + + arg = &spec->args[spec->arg_cnt]; + len = parse_usdt_arg(s, spec->arg_cnt, arg, &arg_sz); + if (len < 0) + return len; + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + spec->arg_cnt, s, arg_sz); + return -EINVAL; + } + + s += len; + spec->arg_cnt++; + } + + return 0; +} + +/* Architecture-specific logic for parsing USDT argument location specs */ + +#if defined(__x86_64__) || defined(__i386__) + +static int calc_pt_regs_off(const char *reg_name) +{ + static struct { + const char *names[4]; + size_t pt_regs_off; + } reg_map[] = { +#ifdef __x86_64__ +#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg64) +#else +#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg32) +#endif + { {"rip", "eip", "", ""}, reg_off(rip, eip) }, + { {"rax", "eax", "ax", "al"}, reg_off(rax, eax) }, + { {"rbx", "ebx", "bx", "bl"}, reg_off(rbx, ebx) }, + { {"rcx", "ecx", "cx", "cl"}, reg_off(rcx, ecx) }, + { {"rdx", "edx", "dx", "dl"}, reg_off(rdx, edx) }, + { {"rsi", "esi", "si", "sil"}, reg_off(rsi, esi) }, + { {"rdi", "edi", "di", "dil"}, reg_off(rdi, edi) }, + { {"rbp", "ebp", "bp", "bpl"}, reg_off(rbp, ebp) }, + { {"rsp", "esp", "sp", "spl"}, reg_off(rsp, esp) }, +#undef reg_off +#ifdef __x86_64__ + { {"r8", "r8d", "r8w", "r8b"}, offsetof(struct pt_regs, r8) }, + { {"r9", "r9d", "r9w", "r9b"}, offsetof(struct pt_regs, r9) }, + { {"r10", "r10d", "r10w", "r10b"}, offsetof(struct pt_regs, r10) }, + { {"r11", "r11d", "r11w", "r11b"}, offsetof(struct pt_regs, r11) }, + { {"r12", "r12d", "r12w", "r12b"}, offsetof(struct pt_regs, r12) }, + { {"r13", "r13d", "r13w", "r13b"}, offsetof(struct pt_regs, r13) }, + { {"r14", "r14d", "r14w", "r14b"}, offsetof(struct pt_regs, r14) }, + { {"r15", "r15d", "r15w", "r15b"}, offsetof(struct pt_regs, r15) }, +#endif + }; + int i, j; + + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { + for (j = 0; j < ARRAY_SIZE(reg_map[i].names); j++) { + if (strcmp(reg_name, reg_map[i].names[j]) == 0) + return reg_map[i].pt_regs_off; + } + } + + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) +{ + char reg_name[16] = {0}, idx_reg_name[16] = {0}; + int len, reg_off, idx_reg_off, scale = 1; + long off = 0; + + if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n", + arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 || + sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n", + arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 || + sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n", + arg_sz, &off, reg_name, idx_reg_name, &len) == 4 || + sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n", + arg_sz, reg_name, idx_reg_name, &len) == 3 + ) { + /* + * Scale Index Base case: + * 1@-96(%rbp,%rax,8) + * 1@(%rbp,%rax,8) + * 1@-96(%rbp,%rax) + * 1@(%rbp,%rax) + */ + arg->arg_type = USDT_ARG_SIB; + arg->val_off = off; + + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + + idx_reg_off = calc_pt_regs_off(idx_reg_name); + if (idx_reg_off < 0) + return idx_reg_off; + arg->idx_reg_off = idx_reg_off; + + /* validate scale factor and set fields directly */ + switch (scale) { + case 1: arg->scale_bitshift = 0; break; + case 2: arg->scale_bitshift = 1; break; + case 4: arg->scale_bitshift = 2; break; + case 8: arg->scale_bitshift = 3; break; + default: + pr_warn("usdt: invalid SIB scale %d, expected 1, 2, 4, 8\n", scale); + return -EINVAL; + } + } else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", + arg_sz, &off, reg_name, &len) == 3) { + /* Memory dereference case, e.g., -4@-20(%rbp) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ ( %%%15[^)] ) %n", arg_sz, reg_name, &len) == 2) { + /* Memory dereference case without offset, e.g., 8@(%rsp) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) { + /* Register read case, e.g., -4@%eax */ + arg->arg_type = USDT_ARG_REG; + /* register read has no memory offset */ + arg->val_off = 0; + + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ $%ld %n", arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@$71 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + return len; +} + +#elif defined(__s390x__) + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) +{ + unsigned int reg; + int len; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", arg_sz, &off, ®, &len) == 3) { + /* Memory dereference case, e.g., -2@-28(%r15) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + if (reg > 15) { + pr_warn("usdt: unrecognized register '%%r%u'\n", reg); + return -EINVAL; + } + arg->reg_off = offsetof(user_pt_regs, gprs[reg]); + } else if (sscanf(arg_str, " %d @ %%r%u %n", arg_sz, ®, &len) == 2) { + /* Register read case, e.g., -8@%r0 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + if (reg > 15) { + pr_warn("usdt: unrecognized register '%%r%u'\n", reg); + return -EINVAL; + } + arg->reg_off = offsetof(user_pt_regs, gprs[reg]); + } else if (sscanf(arg_str, " %d @ %ld %n", arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@71 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + return len; +} + +#elif defined(__aarch64__) + +static int calc_pt_regs_off(const char *reg_name) +{ + int reg_num; + + if (sscanf(reg_name, "x%d", ®_num) == 1) { + if (reg_num >= 0 && reg_num < 31) + return offsetof(struct user_pt_regs, regs[reg_num]); + } else if (strcmp(reg_name, "sp") == 0) { + return offsetof(struct user_pt_regs, sp); + } + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) +{ + char reg_name[16]; + int len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] , %ld ] %n", arg_sz, reg_name, &off, &len) == 3) { + /* Memory dereference case, e.g., -4@[sp, 96] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", arg_sz, reg_name, &len) == 2) { + /* Memory dereference case, e.g., -4@[sp] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %ld %n", arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@5 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", arg_sz, reg_name, &len) == 2) { + /* Register read case, e.g., -8@x4 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + return len; +} + +#elif defined(__riscv) + +static int calc_pt_regs_off(const char *reg_name) +{ + static struct { + const char *name; + size_t pt_regs_off; + } reg_map[] = { + { "ra", offsetof(struct user_regs_struct, ra) }, + { "sp", offsetof(struct user_regs_struct, sp) }, + { "gp", offsetof(struct user_regs_struct, gp) }, + { "tp", offsetof(struct user_regs_struct, tp) }, + { "a0", offsetof(struct user_regs_struct, a0) }, + { "a1", offsetof(struct user_regs_struct, a1) }, + { "a2", offsetof(struct user_regs_struct, a2) }, + { "a3", offsetof(struct user_regs_struct, a3) }, + { "a4", offsetof(struct user_regs_struct, a4) }, + { "a5", offsetof(struct user_regs_struct, a5) }, + { "a6", offsetof(struct user_regs_struct, a6) }, + { "a7", offsetof(struct user_regs_struct, a7) }, + { "s0", offsetof(struct user_regs_struct, s0) }, + { "s1", offsetof(struct user_regs_struct, s1) }, + { "s2", offsetof(struct user_regs_struct, s2) }, + { "s3", offsetof(struct user_regs_struct, s3) }, + { "s4", offsetof(struct user_regs_struct, s4) }, + { "s5", offsetof(struct user_regs_struct, s5) }, + { "s6", offsetof(struct user_regs_struct, s6) }, + { "s7", offsetof(struct user_regs_struct, s7) }, + { "s8", offsetof(struct user_regs_struct, rv_s8) }, + { "s9", offsetof(struct user_regs_struct, s9) }, + { "s10", offsetof(struct user_regs_struct, s10) }, + { "s11", offsetof(struct user_regs_struct, s11) }, + { "t0", offsetof(struct user_regs_struct, t0) }, + { "t1", offsetof(struct user_regs_struct, t1) }, + { "t2", offsetof(struct user_regs_struct, t2) }, + { "t3", offsetof(struct user_regs_struct, t3) }, + { "t4", offsetof(struct user_regs_struct, t4) }, + { "t5", offsetof(struct user_regs_struct, t5) }, + { "t6", offsetof(struct user_regs_struct, t6) }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { + if (strcmp(reg_name, reg_map[i].name) == 0) + return reg_map[i].pt_regs_off; + } + + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) +{ + char reg_name[16]; + int len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %15[a-z0-9] ) %n", arg_sz, &off, reg_name, &len) == 3) { + /* Memory dereference case, e.g., -8@-88(s0) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %ld %n", arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@5 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", arg_sz, reg_name, &len) == 2) { + /* Register read case, e.g., -8@a1 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + return len; +} + +#elif defined(__arm__) + +static int calc_pt_regs_off(const char *reg_name) +{ + static struct { + const char *name; + size_t pt_regs_off; + } reg_map[] = { + { "r0", offsetof(struct pt_regs, uregs[0]) }, + { "r1", offsetof(struct pt_regs, uregs[1]) }, + { "r2", offsetof(struct pt_regs, uregs[2]) }, + { "r3", offsetof(struct pt_regs, uregs[3]) }, + { "r4", offsetof(struct pt_regs, uregs[4]) }, + { "r5", offsetof(struct pt_regs, uregs[5]) }, + { "r6", offsetof(struct pt_regs, uregs[6]) }, + { "r7", offsetof(struct pt_regs, uregs[7]) }, + { "r8", offsetof(struct pt_regs, uregs[8]) }, + { "r9", offsetof(struct pt_regs, uregs[9]) }, + { "r10", offsetof(struct pt_regs, uregs[10]) }, + { "fp", offsetof(struct pt_regs, uregs[11]) }, + { "ip", offsetof(struct pt_regs, uregs[12]) }, + { "sp", offsetof(struct pt_regs, uregs[13]) }, + { "lr", offsetof(struct pt_regs, uregs[14]) }, + { "pc", offsetof(struct pt_regs, uregs[15]) }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { + if (strcmp(reg_name, reg_map[i].name) == 0) + return reg_map[i].pt_regs_off; + } + + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) +{ + char reg_name[16]; + int len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] , #%ld ] %n", + arg_sz, reg_name, &off, &len) == 3) { + /* Memory dereference case, e.g., -4@[fp, #96] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", arg_sz, reg_name, &len) == 2) { + /* Memory dereference case, e.g., -4@[sp] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ #%ld %n", arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@#5 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", arg_sz, reg_name, &len) == 2) { + /* Register read case, e.g., -8@r4 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + return len; +} + +#else + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) +{ + pr_warn("usdt: libbpf doesn't support USDTs on current architecture\n"); + return -ENOTSUP; +} + +#endif diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c deleted file mode 100644 index edafe56664f3..000000000000 --- a/tools/lib/bpf/xsk.c +++ /dev/null @@ -1,1249 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -/* - * AF_XDP user-space access library. - * - * Copyright(c) 2018 - 2019 Intel Corporation. - * - * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> - */ - -#include <errno.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <arpa/inet.h> -#include <asm/barrier.h> -#include <linux/compiler.h> -#include <linux/ethtool.h> -#include <linux/filter.h> -#include <linux/if_ether.h> -#include <linux/if_packet.h> -#include <linux/if_xdp.h> -#include <linux/kernel.h> -#include <linux/list.h> -#include <linux/sockios.h> -#include <net/if.h> -#include <sys/ioctl.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <sys/types.h> -#include <linux/if_link.h> - -#include "bpf.h" -#include "libbpf.h" -#include "libbpf_internal.h" -#include "xsk.h" - -/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal - * uses of deprecated APIs - */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - -#ifndef SOL_XDP - #define SOL_XDP 283 -#endif - -#ifndef AF_XDP - #define AF_XDP 44 -#endif - -#ifndef PF_XDP - #define PF_XDP AF_XDP -#endif - -enum xsk_prog { - XSK_PROG_FALLBACK, - XSK_PROG_REDIRECT_FLAGS, -}; - -struct xsk_umem { - struct xsk_ring_prod *fill_save; - struct xsk_ring_cons *comp_save; - char *umem_area; - struct xsk_umem_config config; - int fd; - int refcount; - struct list_head ctx_list; - bool rx_ring_setup_done; - bool tx_ring_setup_done; -}; - -struct xsk_ctx { - struct xsk_ring_prod *fill; - struct xsk_ring_cons *comp; - __u32 queue_id; - struct xsk_umem *umem; - int refcount; - int ifindex; - struct list_head list; - int prog_fd; - int link_fd; - int xsks_map_fd; - char ifname[IFNAMSIZ]; - bool has_bpf_link; -}; - -struct xsk_socket { - struct xsk_ring_cons *rx; - struct xsk_ring_prod *tx; - __u64 outstanding_tx; - struct xsk_ctx *ctx; - struct xsk_socket_config config; - int fd; -}; - -struct xsk_nl_info { - bool xdp_prog_attached; - int ifindex; - int fd; -}; - -/* Up until and including Linux 5.3 */ -struct xdp_ring_offset_v1 { - __u64 producer; - __u64 consumer; - __u64 desc; -}; - -/* Up until and including Linux 5.3 */ -struct xdp_mmap_offsets_v1 { - struct xdp_ring_offset_v1 rx; - struct xdp_ring_offset_v1 tx; - struct xdp_ring_offset_v1 fr; - struct xdp_ring_offset_v1 cr; -}; - -int xsk_umem__fd(const struct xsk_umem *umem) -{ - return umem ? umem->fd : -EINVAL; -} - -int xsk_socket__fd(const struct xsk_socket *xsk) -{ - return xsk ? xsk->fd : -EINVAL; -} - -static bool xsk_page_aligned(void *buffer) -{ - unsigned long addr = (unsigned long)buffer; - - return !(addr & (getpagesize() - 1)); -} - -static void xsk_set_umem_config(struct xsk_umem_config *cfg, - const struct xsk_umem_config *usr_cfg) -{ - if (!usr_cfg) { - cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; - cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; - cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; - cfg->flags = XSK_UMEM__DEFAULT_FLAGS; - return; - } - - cfg->fill_size = usr_cfg->fill_size; - cfg->comp_size = usr_cfg->comp_size; - cfg->frame_size = usr_cfg->frame_size; - cfg->frame_headroom = usr_cfg->frame_headroom; - cfg->flags = usr_cfg->flags; -} - -static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, - const struct xsk_socket_config *usr_cfg) -{ - if (!usr_cfg) { - cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; - cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - cfg->libbpf_flags = 0; - cfg->xdp_flags = 0; - cfg->bind_flags = 0; - return 0; - } - - if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) - return -EINVAL; - - cfg->rx_size = usr_cfg->rx_size; - cfg->tx_size = usr_cfg->tx_size; - cfg->libbpf_flags = usr_cfg->libbpf_flags; - cfg->xdp_flags = usr_cfg->xdp_flags; - cfg->bind_flags = usr_cfg->bind_flags; - - return 0; -} - -static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off) -{ - struct xdp_mmap_offsets_v1 off_v1; - - /* getsockopt on a kernel <= 5.3 has no flags fields. - * Copy over the offsets to the correct places in the >=5.4 format - * and put the flags where they would have been on that kernel. - */ - memcpy(&off_v1, off, sizeof(off_v1)); - - off->rx.producer = off_v1.rx.producer; - off->rx.consumer = off_v1.rx.consumer; - off->rx.desc = off_v1.rx.desc; - off->rx.flags = off_v1.rx.consumer + sizeof(__u32); - - off->tx.producer = off_v1.tx.producer; - off->tx.consumer = off_v1.tx.consumer; - off->tx.desc = off_v1.tx.desc; - off->tx.flags = off_v1.tx.consumer + sizeof(__u32); - - off->fr.producer = off_v1.fr.producer; - off->fr.consumer = off_v1.fr.consumer; - off->fr.desc = off_v1.fr.desc; - off->fr.flags = off_v1.fr.consumer + sizeof(__u32); - - off->cr.producer = off_v1.cr.producer; - off->cr.consumer = off_v1.cr.consumer; - off->cr.desc = off_v1.cr.desc; - off->cr.flags = off_v1.cr.consumer + sizeof(__u32); -} - -static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) -{ - socklen_t optlen; - int err; - - optlen = sizeof(*off); - err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen); - if (err) - return err; - - if (optlen == sizeof(*off)) - return 0; - - if (optlen == sizeof(struct xdp_mmap_offsets_v1)) { - xsk_mmap_offsets_v1(off); - return 0; - } - - return -EINVAL; -} - -static int xsk_create_umem_rings(struct xsk_umem *umem, int fd, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp) -{ - struct xdp_mmap_offsets off; - void *map; - int err; - - err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING, - &umem->config.fill_size, - sizeof(umem->config.fill_size)); - if (err) - return -errno; - - err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, - &umem->config.comp_size, - sizeof(umem->config.comp_size)); - if (err) - return -errno; - - err = xsk_get_mmap_offsets(fd, &off); - if (err) - return -errno; - - map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, - XDP_UMEM_PGOFF_FILL_RING); - if (map == MAP_FAILED) - return -errno; - - fill->mask = umem->config.fill_size - 1; - fill->size = umem->config.fill_size; - fill->producer = map + off.fr.producer; - fill->consumer = map + off.fr.consumer; - fill->flags = map + off.fr.flags; - fill->ring = map + off.fr.desc; - fill->cached_cons = umem->config.fill_size; - - map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, - XDP_UMEM_PGOFF_COMPLETION_RING); - if (map == MAP_FAILED) { - err = -errno; - goto out_mmap; - } - - comp->mask = umem->config.comp_size - 1; - comp->size = umem->config.comp_size; - comp->producer = map + off.cr.producer; - comp->consumer = map + off.cr.consumer; - comp->flags = map + off.cr.flags; - comp->ring = map + off.cr.desc; - - return 0; - -out_mmap: - munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); - return err; -} - -DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) -int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, - __u64 size, struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *usr_config) -{ - struct xdp_umem_reg mr; - struct xsk_umem *umem; - int err; - - if (!umem_area || !umem_ptr || !fill || !comp) - return -EFAULT; - if (!size && !xsk_page_aligned(umem_area)) - return -EINVAL; - - umem = calloc(1, sizeof(*umem)); - if (!umem) - return -ENOMEM; - - umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); - if (umem->fd < 0) { - err = -errno; - goto out_umem_alloc; - } - - umem->umem_area = umem_area; - INIT_LIST_HEAD(&umem->ctx_list); - xsk_set_umem_config(&umem->config, usr_config); - - memset(&mr, 0, sizeof(mr)); - mr.addr = (uintptr_t)umem_area; - mr.len = size; - mr.chunk_size = umem->config.frame_size; - mr.headroom = umem->config.frame_headroom; - mr.flags = umem->config.flags; - - err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); - if (err) { - err = -errno; - goto out_socket; - } - - err = xsk_create_umem_rings(umem, umem->fd, fill, comp); - if (err) - goto out_socket; - - umem->fill_save = fill; - umem->comp_save = comp; - *umem_ptr = umem; - return 0; - -out_socket: - close(umem->fd); -out_umem_alloc: - free(umem); - return err; -} - -struct xsk_umem_config_v1 { - __u32 fill_size; - __u32 comp_size; - __u32 frame_size; - __u32 frame_headroom; -}; - -COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) -int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, - __u64 size, struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *usr_config) -{ - struct xsk_umem_config config; - - memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1)); - config.flags = 0; - - return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, - &config); -} - -static enum xsk_prog get_xsk_prog(void) -{ - enum xsk_prog detected = XSK_PROG_FALLBACK; - __u32 size_out, retval, duration; - char data_in = 0, data_out; - struct bpf_insn insns[] = { - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, XDP_PASS), - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - BPF_EXIT_INSN(), - }; - int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns); - - map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); - if (map_fd < 0) - return detected; - - insns[0].imm = map_fd; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); - if (prog_fd < 0) { - close(map_fd); - return detected; - } - - ret = bpf_prog_test_run(prog_fd, 0, &data_in, 1, &data_out, &size_out, &retval, &duration); - if (!ret && retval == XDP_PASS) - detected = XSK_PROG_REDIRECT_FLAGS; - close(prog_fd); - close(map_fd); - return detected; -} - -static int xsk_load_xdp_prog(struct xsk_socket *xsk) -{ - static const int log_buf_size = 16 * 1024; - struct xsk_ctx *ctx = xsk->ctx; - char log_buf[log_buf_size]; - int prog_fd; - - /* This is the fallback C-program: - * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) - * { - * int ret, index = ctx->rx_queue_index; - * - * // A set entry here means that the correspnding queue_id - * // has an active AF_XDP socket bound to it. - * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); - * if (ret > 0) - * return ret; - * - * // Fallback for pre-5.3 kernels, not supporting default - * // action in the flags parameter. - * if (bpf_map_lookup_elem(&xsks_map, &index)) - * return bpf_redirect_map(&xsks_map, index, 0); - * return XDP_PASS; - * } - */ - struct bpf_insn prog[] = { - /* r2 = *(u32 *)(r1 + 16) */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), - /* *(u32 *)(r10 - 4) = r2 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), - /* r3 = XDP_PASS */ - BPF_MOV64_IMM(BPF_REG_3, 2), - /* call bpf_redirect_map */ - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - /* if w0 != 0 goto pc+13 */ - BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13), - /* r2 = r10 */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - /* r2 += -4 */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), - /* call bpf_map_lookup_elem */ - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - /* r1 = r0 */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - /* r0 = XDP_PASS */ - BPF_MOV64_IMM(BPF_REG_0, 2), - /* if r1 == 0 goto pc+5 */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), - /* r2 = *(u32 *)(r10 - 4) */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), - /* r3 = 0 */ - BPF_MOV64_IMM(BPF_REG_3, 0), - /* call bpf_redirect_map */ - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - /* The jumps are to this instruction */ - BPF_EXIT_INSN(), - }; - - /* This is the post-5.3 kernel C-program: - * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) - * { - * return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS); - * } - */ - struct bpf_insn prog_redirect_flags[] = { - /* r2 = *(u32 *)(r1 + 16) */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), - /* r3 = XDP_PASS */ - BPF_MOV64_IMM(BPF_REG_3, 2), - /* call bpf_redirect_map */ - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - BPF_EXIT_INSN(), - }; - size_t insns_cnt[] = {sizeof(prog) / sizeof(struct bpf_insn), - sizeof(prog_redirect_flags) / sizeof(struct bpf_insn), - }; - struct bpf_insn *progs[] = {prog, prog_redirect_flags}; - enum xsk_prog option = get_xsk_prog(); - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .log_buf = log_buf, - .log_size = log_buf_size, - ); - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause", - progs[option], insns_cnt[option], &opts); - if (prog_fd < 0) { - pr_warn("BPF log buffer:\n%s", log_buf); - return prog_fd; - } - - ctx->prog_fd = prog_fd; - return 0; -} - -static int xsk_create_bpf_link(struct xsk_socket *xsk) -{ - DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); - struct xsk_ctx *ctx = xsk->ctx; - __u32 prog_id = 0; - int link_fd; - int err; - - err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); - if (err) { - pr_warn("getting XDP prog id failed\n"); - return err; - } - - /* if there's a netlink-based XDP prog loaded on interface, bail out - * and ask user to do the removal by himself - */ - if (prog_id) { - pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n"); - return -EINVAL; - } - - opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE); - - link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts); - if (link_fd < 0) { - pr_warn("bpf_link_create failed: %s\n", strerror(errno)); - return link_fd; - } - - ctx->link_fd = link_fd; - return 0; -} - -static int xsk_get_max_queues(struct xsk_socket *xsk) -{ - struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; - struct xsk_ctx *ctx = xsk->ctx; - struct ifreq ifr = {}; - int fd, err, ret; - - fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0); - if (fd < 0) - return -errno; - - ifr.ifr_data = (void *)&channels; - libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ); - err = ioctl(fd, SIOCETHTOOL, &ifr); - if (err && errno != EOPNOTSUPP) { - ret = -errno; - goto out; - } - - if (err) { - /* If the device says it has no channels, then all traffic - * is sent to a single stream, so max queues = 1. - */ - ret = 1; - } else { - /* Take the max of rx, tx, combined. Drivers return - * the number of channels in different ways. - */ - ret = max(channels.max_rx, channels.max_tx); - ret = max(ret, (int)channels.max_combined); - } - -out: - close(fd); - return ret; -} - -static int xsk_create_bpf_maps(struct xsk_socket *xsk) -{ - struct xsk_ctx *ctx = xsk->ctx; - int max_queues; - int fd; - - max_queues = xsk_get_max_queues(xsk); - if (max_queues < 0) - return max_queues; - - fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", - sizeof(int), sizeof(int), max_queues, NULL); - if (fd < 0) - return fd; - - ctx->xsks_map_fd = fd; - - return 0; -} - -static void xsk_delete_bpf_maps(struct xsk_socket *xsk) -{ - struct xsk_ctx *ctx = xsk->ctx; - - bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id); - close(ctx->xsks_map_fd); -} - -static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) -{ - __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info); - __u32 map_len = sizeof(struct bpf_map_info); - struct bpf_prog_info prog_info = {}; - struct xsk_ctx *ctx = xsk->ctx; - struct bpf_map_info map_info; - int fd, err; - - err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); - if (err) - return err; - - num_maps = prog_info.nr_map_ids; - - map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids)); - if (!map_ids) - return -ENOMEM; - - memset(&prog_info, 0, prog_len); - prog_info.nr_map_ids = num_maps; - prog_info.map_ids = (__u64)(unsigned long)map_ids; - - err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); - if (err) - goto out_map_ids; - - ctx->xsks_map_fd = -1; - - for (i = 0; i < prog_info.nr_map_ids; i++) { - fd = bpf_map_get_fd_by_id(map_ids[i]); - if (fd < 0) - continue; - - memset(&map_info, 0, map_len); - err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); - if (err) { - close(fd); - continue; - } - - if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) { - ctx->xsks_map_fd = fd; - break; - } - - close(fd); - } - - if (ctx->xsks_map_fd == -1) - err = -ENOENT; - -out_map_ids: - free(map_ids); - return err; -} - -static int xsk_set_bpf_maps(struct xsk_socket *xsk) -{ - struct xsk_ctx *ctx = xsk->ctx; - - return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id, - &xsk->fd, 0); -} - -static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd) -{ - struct bpf_link_info link_info; - __u32 link_len; - __u32 id = 0; - int err; - int fd; - - while (true) { - err = bpf_link_get_next_id(id, &id); - if (err) { - if (errno == ENOENT) { - err = 0; - break; - } - pr_warn("can't get next link: %s\n", strerror(errno)); - break; - } - - fd = bpf_link_get_fd_by_id(id); - if (fd < 0) { - if (errno == ENOENT) - continue; - pr_warn("can't get link by id (%u): %s\n", id, strerror(errno)); - err = -errno; - break; - } - - link_len = sizeof(struct bpf_link_info); - memset(&link_info, 0, link_len); - err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len); - if (err) { - pr_warn("can't get link info: %s\n", strerror(errno)); - close(fd); - break; - } - if (link_info.type == BPF_LINK_TYPE_XDP) { - if (link_info.xdp.ifindex == ifindex) { - *link_fd = fd; - if (prog_id) - *prog_id = link_info.prog_id; - break; - } - } - close(fd); - } - - return err; -} - -static bool xsk_probe_bpf_link(void) -{ - LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE); - struct bpf_insn insns[2] = { - BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), - BPF_EXIT_INSN() - }; - int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns); - int ifindex_lo = 1; - bool ret = false; - int err; - - err = xsk_link_lookup(ifindex_lo, NULL, &link_fd); - if (err) - return ret; - - if (link_fd >= 0) - return true; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); - if (prog_fd < 0) - return ret; - - link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts); - close(prog_fd); - - if (link_fd >= 0) { - ret = true; - close(link_fd); - } - - return ret; -} - -static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) -{ - char ifname[IFNAMSIZ]; - struct xsk_ctx *ctx; - char *interface; - - ctx = calloc(1, sizeof(*ctx)); - if (!ctx) - return -ENOMEM; - - interface = if_indextoname(ifindex, &ifname[0]); - if (!interface) { - free(ctx); - return -errno; - } - - ctx->ifindex = ifindex; - libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); - - xsk->ctx = ctx; - xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); - - return 0; -} - -static int xsk_init_xdp_res(struct xsk_socket *xsk, - int *xsks_map_fd) -{ - struct xsk_ctx *ctx = xsk->ctx; - int err; - - err = xsk_create_bpf_maps(xsk); - if (err) - return err; - - err = xsk_load_xdp_prog(xsk); - if (err) - goto err_load_xdp_prog; - - if (ctx->has_bpf_link) - err = xsk_create_bpf_link(xsk); - else - err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd, - xsk->config.xdp_flags); - - if (err) - goto err_attach_xdp_prog; - - if (!xsk->rx) - return err; - - err = xsk_set_bpf_maps(xsk); - if (err) - goto err_set_bpf_maps; - - return err; - -err_set_bpf_maps: - if (ctx->has_bpf_link) - close(ctx->link_fd); - else - bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); -err_attach_xdp_prog: - close(ctx->prog_fd); -err_load_xdp_prog: - xsk_delete_bpf_maps(xsk); - return err; -} - -static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id) -{ - struct xsk_ctx *ctx = xsk->ctx; - int err; - - ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (ctx->prog_fd < 0) { - err = -errno; - goto err_prog_fd; - } - err = xsk_lookup_bpf_maps(xsk); - if (err) - goto err_lookup_maps; - - if (!xsk->rx) - return err; - - err = xsk_set_bpf_maps(xsk); - if (err) - goto err_set_maps; - - return err; - -err_set_maps: - close(ctx->xsks_map_fd); -err_lookup_maps: - close(ctx->prog_fd); -err_prog_fd: - if (ctx->has_bpf_link) - close(ctx->link_fd); - return err; -} - -static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd) -{ - struct xsk_socket *xsk = _xdp; - struct xsk_ctx *ctx = xsk->ctx; - __u32 prog_id = 0; - int err; - - if (ctx->has_bpf_link) - err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd); - else - err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); - - if (err) - return err; - - err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) : - xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id); - - if (!err && xsks_map_fd) - *xsks_map_fd = ctx->xsks_map_fd; - - return err; -} - -static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, - __u32 queue_id) -{ - struct xsk_ctx *ctx; - - if (list_empty(&umem->ctx_list)) - return NULL; - - list_for_each_entry(ctx, &umem->ctx_list, list) { - if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) { - ctx->refcount++; - return ctx; - } - } - - return NULL; -} - -static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap) -{ - struct xsk_umem *umem = ctx->umem; - struct xdp_mmap_offsets off; - int err; - - if (--ctx->refcount) - return; - - if (!unmap) - goto out_free; - - err = xsk_get_mmap_offsets(umem->fd, &off); - if (err) - goto out_free; - - munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size * - sizeof(__u64)); - munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size * - sizeof(__u64)); - -out_free: - list_del(&ctx->list); - free(ctx); -} - -static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, - struct xsk_umem *umem, int ifindex, - const char *ifname, __u32 queue_id, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp) -{ - struct xsk_ctx *ctx; - int err; - - ctx = calloc(1, sizeof(*ctx)); - if (!ctx) - return NULL; - - if (!umem->fill_save) { - err = xsk_create_umem_rings(umem, xsk->fd, fill, comp); - if (err) { - free(ctx); - return NULL; - } - } else if (umem->fill_save != fill || umem->comp_save != comp) { - /* Copy over rings to new structs. */ - memcpy(fill, umem->fill_save, sizeof(*fill)); - memcpy(comp, umem->comp_save, sizeof(*comp)); - } - - ctx->ifindex = ifindex; - ctx->refcount = 1; - ctx->umem = umem; - ctx->queue_id = queue_id; - libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); - - ctx->fill = fill; - ctx->comp = comp; - list_add(&ctx->list, &umem->ctx_list); - return ctx; -} - -static void xsk_destroy_xsk_struct(struct xsk_socket *xsk) -{ - free(xsk->ctx); - free(xsk); -} - -int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd) -{ - xsk->ctx->xsks_map_fd = fd; - return xsk_set_bpf_maps(xsk); -} - -int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) -{ - struct xsk_socket *xsk; - int res; - - xsk = calloc(1, sizeof(*xsk)); - if (!xsk) - return -ENOMEM; - - res = xsk_create_xsk_struct(ifindex, xsk); - if (res) { - free(xsk); - return -EINVAL; - } - - res = __xsk_setup_xdp_prog(xsk, xsks_map_fd); - - xsk_destroy_xsk_struct(xsk); - - return res; -} - -int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, - const char *ifname, - __u32 queue_id, struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_socket_config *usr_config) -{ - bool unmap, rx_setup_done = false, tx_setup_done = false; - void *rx_map = NULL, *tx_map = NULL; - struct sockaddr_xdp sxdp = {}; - struct xdp_mmap_offsets off; - struct xsk_socket *xsk; - struct xsk_ctx *ctx; - int err, ifindex; - - if (!umem || !xsk_ptr || !(rx || tx)) - return -EFAULT; - - unmap = umem->fill_save != fill; - - xsk = calloc(1, sizeof(*xsk)); - if (!xsk) - return -ENOMEM; - - err = xsk_set_xdp_socket_config(&xsk->config, usr_config); - if (err) - goto out_xsk_alloc; - - xsk->outstanding_tx = 0; - ifindex = if_nametoindex(ifname); - if (!ifindex) { - err = -errno; - goto out_xsk_alloc; - } - - if (umem->refcount++ > 0) { - xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); - if (xsk->fd < 0) { - err = -errno; - goto out_xsk_alloc; - } - } else { - xsk->fd = umem->fd; - rx_setup_done = umem->rx_ring_setup_done; - tx_setup_done = umem->tx_ring_setup_done; - } - - ctx = xsk_get_ctx(umem, ifindex, queue_id); - if (!ctx) { - if (!fill || !comp) { - err = -EFAULT; - goto out_socket; - } - - ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id, - fill, comp); - if (!ctx) { - err = -ENOMEM; - goto out_socket; - } - } - xsk->ctx = ctx; - xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); - - if (rx && !rx_setup_done) { - err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, - &xsk->config.rx_size, - sizeof(xsk->config.rx_size)); - if (err) { - err = -errno; - goto out_put_ctx; - } - if (xsk->fd == umem->fd) - umem->rx_ring_setup_done = true; - } - if (tx && !tx_setup_done) { - err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, - &xsk->config.tx_size, - sizeof(xsk->config.tx_size)); - if (err) { - err = -errno; - goto out_put_ctx; - } - if (xsk->fd == umem->fd) - umem->tx_ring_setup_done = true; - } - - err = xsk_get_mmap_offsets(xsk->fd, &off); - if (err) { - err = -errno; - goto out_put_ctx; - } - - if (rx) { - rx_map = mmap(NULL, off.rx.desc + - xsk->config.rx_size * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, - xsk->fd, XDP_PGOFF_RX_RING); - if (rx_map == MAP_FAILED) { - err = -errno; - goto out_put_ctx; - } - - rx->mask = xsk->config.rx_size - 1; - rx->size = xsk->config.rx_size; - rx->producer = rx_map + off.rx.producer; - rx->consumer = rx_map + off.rx.consumer; - rx->flags = rx_map + off.rx.flags; - rx->ring = rx_map + off.rx.desc; - rx->cached_prod = *rx->producer; - rx->cached_cons = *rx->consumer; - } - xsk->rx = rx; - - if (tx) { - tx_map = mmap(NULL, off.tx.desc + - xsk->config.tx_size * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, - xsk->fd, XDP_PGOFF_TX_RING); - if (tx_map == MAP_FAILED) { - err = -errno; - goto out_mmap_rx; - } - - tx->mask = xsk->config.tx_size - 1; - tx->size = xsk->config.tx_size; - tx->producer = tx_map + off.tx.producer; - tx->consumer = tx_map + off.tx.consumer; - tx->flags = tx_map + off.tx.flags; - tx->ring = tx_map + off.tx.desc; - tx->cached_prod = *tx->producer; - /* cached_cons is r->size bigger than the real consumer pointer - * See xsk_prod_nb_free - */ - tx->cached_cons = *tx->consumer + xsk->config.tx_size; - } - xsk->tx = tx; - - sxdp.sxdp_family = PF_XDP; - sxdp.sxdp_ifindex = ctx->ifindex; - sxdp.sxdp_queue_id = ctx->queue_id; - if (umem->refcount > 1) { - sxdp.sxdp_flags |= XDP_SHARED_UMEM; - sxdp.sxdp_shared_umem_fd = umem->fd; - } else { - sxdp.sxdp_flags = xsk->config.bind_flags; - } - - err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); - if (err) { - err = -errno; - goto out_mmap_tx; - } - - ctx->prog_fd = -1; - - if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { - err = __xsk_setup_xdp_prog(xsk, NULL); - if (err) - goto out_mmap_tx; - } - - *xsk_ptr = xsk; - umem->fill_save = NULL; - umem->comp_save = NULL; - return 0; - -out_mmap_tx: - if (tx) - munmap(tx_map, off.tx.desc + - xsk->config.tx_size * sizeof(struct xdp_desc)); -out_mmap_rx: - if (rx) - munmap(rx_map, off.rx.desc + - xsk->config.rx_size * sizeof(struct xdp_desc)); -out_put_ctx: - xsk_put_ctx(ctx, unmap); -out_socket: - if (--umem->refcount) - close(xsk->fd); -out_xsk_alloc: - free(xsk); - return err; -} - -int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, - __u32 queue_id, struct xsk_umem *umem, - struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, - const struct xsk_socket_config *usr_config) -{ - if (!umem) - return -EFAULT; - - return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem, - rx, tx, umem->fill_save, - umem->comp_save, usr_config); -} - -int xsk_umem__delete(struct xsk_umem *umem) -{ - if (!umem) - return 0; - - if (umem->refcount) - return -EBUSY; - - close(umem->fd); - free(umem); - - return 0; -} - -void xsk_socket__delete(struct xsk_socket *xsk) -{ - size_t desc_sz = sizeof(struct xdp_desc); - struct xdp_mmap_offsets off; - struct xsk_umem *umem; - struct xsk_ctx *ctx; - int err; - - if (!xsk) - return; - - ctx = xsk->ctx; - umem = ctx->umem; - if (ctx->prog_fd != -1) { - xsk_delete_bpf_maps(xsk); - close(ctx->prog_fd); - if (ctx->has_bpf_link) - close(ctx->link_fd); - } - - err = xsk_get_mmap_offsets(xsk->fd, &off); - if (!err) { - if (xsk->rx) { - munmap(xsk->rx->ring - off.rx.desc, - off.rx.desc + xsk->config.rx_size * desc_sz); - } - if (xsk->tx) { - munmap(xsk->tx->ring - off.tx.desc, - off.tx.desc + xsk->config.tx_size * desc_sz); - } - } - - xsk_put_ctx(ctx, true); - - umem->refcount--; - /* Do not close an fd that also has an associated umem connected - * to it. - */ - if (xsk->fd != umem->fd) - close(xsk->fd); - free(xsk); -} diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h deleted file mode 100644 index 64e9c57fd792..000000000000 --- a/tools/lib/bpf/xsk.h +++ /dev/null @@ -1,336 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -/* - * AF_XDP user-space access library. - * - * Copyright (c) 2018 - 2019 Intel Corporation. - * Copyright (c) 2019 Facebook - * - * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> - */ - -#ifndef __LIBBPF_XSK_H -#define __LIBBPF_XSK_H - -#include <stdio.h> -#include <stdint.h> -#include <stdbool.h> -#include <linux/if_xdp.h> - -#include "libbpf.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* This whole API has been deprecated and moved to libxdp that can be found at - * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so - * it should just be linking with libxdp instead of libbpf for this set of - * functionality. If not, please submit a bug report on the aforementioned page. - */ - -/* Load-Acquire Store-Release barriers used by the XDP socket - * library. The following macros should *NOT* be considered part of - * the xsk.h API, and is subject to change anytime. - * - * LIBRARY INTERNAL - */ - -#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x) -#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v) - -#if defined(__i386__) || defined(__x86_64__) -# define libbpf_smp_store_release(p, v) \ - do { \ - asm volatile("" : : : "memory"); \ - __XSK_WRITE_ONCE(*p, v); \ - } while (0) -# define libbpf_smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ - asm volatile("" : : : "memory"); \ - ___p1; \ - }) -#elif defined(__aarch64__) -# define libbpf_smp_store_release(p, v) \ - asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory") -# define libbpf_smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1; \ - asm volatile ("ldar %w0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ - ___p1; \ - }) -#elif defined(__riscv) -# define libbpf_smp_store_release(p, v) \ - do { \ - asm volatile ("fence rw,w" : : : "memory"); \ - __XSK_WRITE_ONCE(*p, v); \ - } while (0) -# define libbpf_smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ - asm volatile ("fence r,rw" : : : "memory"); \ - ___p1; \ - }) -#endif - -#ifndef libbpf_smp_store_release -#define libbpf_smp_store_release(p, v) \ - do { \ - __sync_synchronize(); \ - __XSK_WRITE_ONCE(*p, v); \ - } while (0) -#endif - -#ifndef libbpf_smp_load_acquire -#define libbpf_smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ - __sync_synchronize(); \ - ___p1; \ - }) -#endif - -/* LIBRARY INTERNAL -- END */ - -/* Do not access these members directly. Use the functions below. */ -#define DEFINE_XSK_RING(name) \ -struct name { \ - __u32 cached_prod; \ - __u32 cached_cons; \ - __u32 mask; \ - __u32 size; \ - __u32 *producer; \ - __u32 *consumer; \ - void *ring; \ - __u32 *flags; \ -} - -DEFINE_XSK_RING(xsk_ring_prod); -DEFINE_XSK_RING(xsk_ring_cons); - -/* For a detailed explanation on the memory barriers associated with the - * ring, please take a look at net/xdp/xsk_queue.h. - */ - -struct xsk_umem; -struct xsk_socket; - -static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, - __u32 idx) -{ - __u64 *addrs = (__u64 *)fill->ring; - - return &addrs[idx & fill->mask]; -} - -static inline const __u64 * -xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx) -{ - const __u64 *addrs = (const __u64 *)comp->ring; - - return &addrs[idx & comp->mask]; -} - -static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx, - __u32 idx) -{ - struct xdp_desc *descs = (struct xdp_desc *)tx->ring; - - return &descs[idx & tx->mask]; -} - -static inline const struct xdp_desc * -xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) -{ - const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring; - - return &descs[idx & rx->mask]; -} - -static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r) -{ - return *r->flags & XDP_RING_NEED_WAKEUP; -} - -static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) -{ - __u32 free_entries = r->cached_cons - r->cached_prod; - - if (free_entries >= nb) - return free_entries; - - /* Refresh the local tail pointer. - * cached_cons is r->size bigger than the real consumer pointer so - * that this addition can be avoided in the more frequently - * executed code that computs free_entries in the beginning of - * this function. Without this optimization it whould have been - * free_entries = r->cached_prod - r->cached_cons + r->size. - */ - r->cached_cons = libbpf_smp_load_acquire(r->consumer); - r->cached_cons += r->size; - - return r->cached_cons - r->cached_prod; -} - -static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) -{ - __u32 entries = r->cached_prod - r->cached_cons; - - if (entries == 0) { - r->cached_prod = libbpf_smp_load_acquire(r->producer); - entries = r->cached_prod - r->cached_cons; - } - - return (entries > nb) ? nb : entries; -} - -static inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx) -{ - if (xsk_prod_nb_free(prod, nb) < nb) - return 0; - - *idx = prod->cached_prod; - prod->cached_prod += nb; - - return nb; -} - -static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb) -{ - /* Make sure everything has been written to the ring before indicating - * this to the kernel by writing the producer pointer. - */ - libbpf_smp_store_release(prod->producer, *prod->producer + nb); -} - -static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) -{ - __u32 entries = xsk_cons_nb_avail(cons, nb); - - if (entries > 0) { - *idx = cons->cached_cons; - cons->cached_cons += entries; - } - - return entries; -} - -static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb) -{ - cons->cached_cons -= nb; -} - -static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb) -{ - /* Make sure data has been read before indicating we are done - * with the entries by updating the consumer pointer. - */ - libbpf_smp_store_release(cons->consumer, *cons->consumer + nb); - -} - -static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) -{ - return &((char *)umem_area)[addr]; -} - -static inline __u64 xsk_umem__extract_addr(__u64 addr) -{ - return addr & XSK_UNALIGNED_BUF_ADDR_MASK; -} - -static inline __u64 xsk_umem__extract_offset(__u64 addr) -{ - return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT; -} - -static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) -{ - return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr); -} - -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__fd(const struct xsk_umem *umem); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_socket__fd(const struct xsk_socket *xsk); - -#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 -#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 -#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */ -#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) -#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 -#define XSK_UMEM__DEFAULT_FLAGS 0 - -struct xsk_umem_config { - __u32 fill_size; - __u32 comp_size; - __u32 frame_size; - __u32 frame_headroom; - __u32 flags; -}; - -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); - -/* Flags for the libbpf_flags field. */ -#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) - -struct xsk_socket_config { - __u32 rx_size; - __u32 tx_size; - __u32 libbpf_flags; - __u32 xdp_flags; - __u16 bind_flags; -}; - -/* Set config to NULL to get the default configuration. */ -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__create(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__create_v0_0_2(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__create_v0_0_4(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_socket__create(struct xsk_socket **xsk, - const char *ifname, __u32 queue_id, - struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - const struct xsk_socket_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, - const char *ifname, - __u32 queue_id, struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_socket_config *config); - -/* Returns 0 for success and -EBUSY if the umem is still in use. */ -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__delete(struct xsk_umem *umem); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -void xsk_socket__delete(struct xsk_socket *xsk); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* __LIBBPF_XSK_H */ diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c new file mode 100644 index 000000000000..88c376a8348d --- /dev/null +++ b/tools/lib/bpf/zip.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* + * Routines for dealing with .zip archives. + * + * Copyright (c) Meta Platforms, Inc. and affiliates. + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <unistd.h> + +#include "libbpf_internal.h" +#include "zip.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" + +/* Specification of ZIP file format can be found here: + * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + * For a high level overview of the structure of a ZIP file see + * sections 4.3.1 - 4.3.6. + * + * Data structures appearing in ZIP files do not contain any + * padding and they might be misaligned. To allow us to safely + * operate on pointers to such structures and their members, we + * declare the types as packed. + */ + +#define END_OF_CD_RECORD_MAGIC 0x06054b50 + +/* See section 4.3.16 of the spec. */ +struct end_of_cd_record { + /* Magic value equal to END_OF_CD_RECORD_MAGIC */ + __u32 magic; + + /* Number of the file containing this structure or 0xFFFF if ZIP64 archive. + * Zip archive might span multiple files (disks). + */ + __u16 this_disk; + + /* Number of the file containing the beginning of the central directory or + * 0xFFFF if ZIP64 archive. + */ + __u16 cd_disk; + + /* Number of central directory records on this disk or 0xFFFF if ZIP64 + * archive. + */ + __u16 cd_records; + + /* Number of central directory records on all disks or 0xFFFF if ZIP64 + * archive. + */ + __u16 cd_records_total; + + /* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */ + __u32 cd_size; + + /* Offset of the central directory from the beginning of the archive or + * 0xFFFFFFFF if ZIP64 archive. + */ + __u32 cd_offset; + + /* Length of comment data following end of central directory record. */ + __u16 comment_length; + + /* Up to 64k of arbitrary bytes. */ + /* uint8_t comment[comment_length] */ +} __attribute__((packed)); + +#define CD_FILE_HEADER_MAGIC 0x02014b50 +#define FLAG_ENCRYPTED (1 << 0) +#define FLAG_HAS_DATA_DESCRIPTOR (1 << 3) + +/* See section 4.3.12 of the spec. */ +struct cd_file_header { + /* Magic value equal to CD_FILE_HEADER_MAGIC. */ + __u32 magic; + __u16 version; + /* Minimum zip version needed to extract the file. */ + __u16 min_version; + __u16 flags; + __u16 compression; + __u16 last_modified_time; + __u16 last_modified_date; + __u32 crc; + __u32 compressed_size; + __u32 uncompressed_size; + __u16 file_name_length; + __u16 extra_field_length; + __u16 file_comment_length; + /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */ + __u16 disk; + __u16 internal_attributes; + __u32 external_attributes; + /* Offset from the start of the disk containing the local file header to the + * start of the local file header. + */ + __u32 offset; +} __attribute__((packed)); + +#define LOCAL_FILE_HEADER_MAGIC 0x04034b50 + +/* See section 4.3.7 of the spec. */ +struct local_file_header { + /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */ + __u32 magic; + /* Minimum zip version needed to extract the file. */ + __u16 min_version; + __u16 flags; + __u16 compression; + __u16 last_modified_time; + __u16 last_modified_date; + __u32 crc; + __u32 compressed_size; + __u32 uncompressed_size; + __u16 file_name_length; + __u16 extra_field_length; +} __attribute__((packed)); + +#pragma GCC diagnostic pop + +struct zip_archive { + void *data; + __u32 size; + __u32 cd_offset; + __u32 cd_records; +}; + +static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size) +{ + if (offset + size > archive->size || offset > offset + size) + return NULL; + + return archive->data + offset; +} + +/* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the + * archive uses features which are not supported. + */ +static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset) +{ + __u16 comment_length, cd_records; + struct end_of_cd_record *eocd; + __u32 cd_offset, cd_size; + + eocd = check_access(archive, offset, sizeof(*eocd)); + if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC) + return -EINVAL; + + comment_length = eocd->comment_length; + if (offset + sizeof(*eocd) + comment_length != archive->size) + return -EINVAL; + + cd_records = eocd->cd_records; + if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records) + /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */ + return -ENOTSUP; + + cd_offset = eocd->cd_offset; + cd_size = eocd->cd_size; + if (!check_access(archive, cd_offset, cd_size)) + return -EINVAL; + + archive->cd_offset = cd_offset; + archive->cd_records = cd_records; + return 0; +} + +static int find_cd(struct zip_archive *archive) +{ + int64_t limit, offset; + int rc = -EINVAL; + + if (archive->size <= sizeof(struct end_of_cd_record)) + return -EINVAL; + + /* Because the end of central directory ends with a variable length array of + * up to 0xFFFF bytes we can't know exactly where it starts and need to + * search for it at the end of the file, scanning the (limit, offset] range. + */ + offset = archive->size - sizeof(struct end_of_cd_record); + limit = (int64_t)offset - (1 << 16); + + for (; offset >= 0 && offset > limit && rc != 0; offset--) { + rc = try_parse_end_of_cd(archive, offset); + if (rc == -ENOTSUP) + break; + } + return rc; +} + +struct zip_archive *zip_archive_open(const char *path) +{ + struct zip_archive *archive; + int err, fd; + off_t size; + void *data; + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) + return ERR_PTR(-errno); + + size = lseek(fd, 0, SEEK_END); + if (size == (off_t)-1 || size > UINT32_MAX) { + close(fd); + return ERR_PTR(-EINVAL); + } + + data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + err = -errno; + close(fd); + + if (data == MAP_FAILED) + return ERR_PTR(err); + + archive = malloc(sizeof(*archive)); + if (!archive) { + munmap(data, size); + return ERR_PTR(-ENOMEM); + } + + archive->data = data; + archive->size = size; + + err = find_cd(archive); + if (err) { + munmap(data, size); + free(archive); + return ERR_PTR(err); + } + + return archive; +} + +void zip_archive_close(struct zip_archive *archive) +{ + munmap(archive->data, archive->size); + free(archive); +} + +static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive, + __u32 offset) +{ + struct local_file_header *lfh; + + lfh = check_access(archive, offset, sizeof(*lfh)); + if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC) + return NULL; + + return lfh; +} + +static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out) +{ + struct local_file_header *lfh; + __u32 compressed_size; + const char *name; + void *data; + + lfh = local_file_header_at_offset(archive, offset); + if (!lfh) + return -EINVAL; + + offset += sizeof(*lfh); + if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR)) + return -EINVAL; + + name = check_access(archive, offset, lfh->file_name_length); + if (!name) + return -EINVAL; + + offset += lfh->file_name_length; + if (!check_access(archive, offset, lfh->extra_field_length)) + return -EINVAL; + + offset += lfh->extra_field_length; + compressed_size = lfh->compressed_size; + data = check_access(archive, offset, compressed_size); + if (!data) + return -EINVAL; + + out->compression = lfh->compression; + out->name_length = lfh->file_name_length; + out->name = name; + out->data = data; + out->data_length = compressed_size; + out->data_offset = offset; + + return 0; +} + +int zip_archive_find_entry(struct zip_archive *archive, const char *file_name, + struct zip_entry *out) +{ + size_t file_name_length = strlen(file_name); + __u32 i, offset = archive->cd_offset; + + for (i = 0; i < archive->cd_records; ++i) { + __u16 cdfh_name_length, cdfh_flags; + struct cd_file_header *cdfh; + const char *cdfh_name; + + cdfh = check_access(archive, offset, sizeof(*cdfh)); + if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC) + return -EINVAL; + + offset += sizeof(*cdfh); + cdfh_name_length = cdfh->file_name_length; + cdfh_name = check_access(archive, offset, cdfh_name_length); + if (!cdfh_name) + return -EINVAL; + + cdfh_flags = cdfh->flags; + if ((cdfh_flags & FLAG_ENCRYPTED) == 0 && + (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 && + file_name_length == cdfh_name_length && + memcmp(file_name, archive->data + offset, file_name_length) == 0) { + return get_entry_at_offset(archive, cdfh->offset, out); + } + + offset += cdfh_name_length; + offset += cdfh->extra_field_length; + offset += cdfh->file_comment_length; + } + + return -ENOENT; +} diff --git a/tools/lib/bpf/zip.h b/tools/lib/bpf/zip.h new file mode 100644 index 000000000000..1c1bb21fba76 --- /dev/null +++ b/tools/lib/bpf/zip.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +#ifndef __LIBBPF_ZIP_H +#define __LIBBPF_ZIP_H + +#include <linux/types.h> + +/* Represents an open zip archive. + * Only basic ZIP files are supported, in particular the following are not + * supported: + * - encryption + * - streaming + * - multi-part ZIP files + * - ZIP64 + */ +struct zip_archive; + +/* Carries information on name, compression method, and data corresponding to a + * file in a zip archive. + */ +struct zip_entry { + /* Compression method as defined in pkzip spec. 0 means data is uncompressed. */ + __u16 compression; + + /* Non-null terminated name of the file. */ + const char *name; + /* Length of the file name. */ + __u16 name_length; + + /* Pointer to the file data. */ + const void *data; + /* Length of the file data. */ + __u32 data_length; + /* Offset of the file data within the archive. */ + __u32 data_offset; +}; + +/* Open a zip archive. Returns NULL in case of an error. */ +struct zip_archive *zip_archive_open(const char *path); + +/* Close a zip archive and release resources. */ +void zip_archive_close(struct zip_archive *archive); + +/* Look up an entry corresponding to a file in given zip archive. */ +int zip_archive_find_entry(struct zip_archive *archive, const char *name, struct zip_entry *out); + +#endif diff --git a/tools/lib/cmdline.c b/tools/lib/cmdline.c new file mode 100644 index 000000000000..c85f00f43c5e --- /dev/null +++ b/tools/lib/cmdline.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * From lib/cmdline.c + */ +#include <stdlib.h> + +#if __has_attribute(__fallthrough__) +# define fallthrough __attribute__((__fallthrough__)) +#else +# define fallthrough do {} while (0) /* fallthrough */ +#endif + +unsigned long long memparse(const char *ptr, char **retptr) +{ + char *endptr; /* local pointer to end of parsed string */ + + unsigned long long ret = strtoll(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + fallthrough; + case 'P': + case 'p': + ret <<= 10; + fallthrough; + case 'T': + case 't': + ret <<= 10; + fallthrough; + case 'G': + case 'g': + ret <<= 10; + fallthrough; + case 'M': + case 'm': + ret <<= 10; + fallthrough; + case 'K': + case 'k': + ret <<= 10; + endptr++; + fallthrough; + default: + break; + } + + if (retptr) + *retptr = endptr; + + return ret; +} diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c index ba4b8d94e004..6a3dc167d30e 100644 --- a/tools/lib/find_bit.c +++ b/tools/lib/find_bit.c @@ -18,66 +18,54 @@ #include <linux/bitmap.h> #include <linux/kernel.h> -#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \ - !defined(find_next_and_bit) - /* - * This is a common helper function for find_next_bit, find_next_zero_bit, and - * find_next_and_bit. The differences are: - * - The "invert" argument, which is XORed with each fetched word before - * searching it for one bits. - * - The optional "addr2", which is anded with "addr1" if present. + * Common helper for find_bit() function family + * @FETCH: The expression that fetches and pre-processes each word of bitmap(s) + * @MUNGE: The expression that post-processes a word containing found bit (may be empty) + * @size: The bitmap size in bits */ -unsigned long _find_next_bit(const unsigned long *addr1, - const unsigned long *addr2, unsigned long nbits, - unsigned long start, unsigned long invert, unsigned long le) -{ - unsigned long tmp, mask; - (void) le; - - if (unlikely(start >= nbits)) - return nbits; - - tmp = addr1[start / BITS_PER_LONG]; - if (addr2) - tmp &= addr2[start / BITS_PER_LONG]; - tmp ^= invert; - - /* Handle 1st word. */ - mask = BITMAP_FIRST_WORD_MASK(start); - - /* - * Due to the lack of swab() in tools, and the fact that it doesn't - * need little-endian support, just comment it out - */ -#if (0) - if (le) - mask = swab(mask); -#endif - - tmp &= mask; +#define FIND_FIRST_BIT(FETCH, MUNGE, size) \ +({ \ + unsigned long idx, val, sz = (size); \ + \ + for (idx = 0; idx * BITS_PER_LONG < sz; idx++) { \ + val = (FETCH); \ + if (val) { \ + sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(val)), sz); \ + break; \ + } \ + } \ + \ + sz; \ +}) - start = round_down(start, BITS_PER_LONG); - - while (!tmp) { - start += BITS_PER_LONG; - if (start >= nbits) - return nbits; - - tmp = addr1[start / BITS_PER_LONG]; - if (addr2) - tmp &= addr2[start / BITS_PER_LONG]; - tmp ^= invert; - } - -#if (0) - if (le) - tmp = swab(tmp); -#endif - - return min(start + __ffs(tmp), nbits); -} -#endif +/* + * Common helper for find_next_bit() function family + * @FETCH: The expression that fetches and pre-processes each word of bitmap(s) + * @MUNGE: The expression that post-processes a word containing found bit (may be empty) + * @size: The bitmap size in bits + * @start: The bitnumber to start searching at + */ +#define FIND_NEXT_BIT(FETCH, MUNGE, size, start) \ +({ \ + unsigned long mask, idx, tmp, sz = (size), __start = (start); \ + \ + if (unlikely(__start >= sz)) \ + goto out; \ + \ + mask = MUNGE(BITMAP_FIRST_WORD_MASK(__start)); \ + idx = __start / BITS_PER_LONG; \ + \ + for (tmp = (FETCH) & mask; !tmp; tmp = (FETCH)) { \ + if ((idx + 1) * BITS_PER_LONG >= sz) \ + goto out; \ + idx++; \ + } \ + \ + sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(tmp)), sz); \ +out: \ + sz; \ +}) #ifndef find_first_bit /* @@ -85,14 +73,7 @@ unsigned long _find_next_bit(const unsigned long *addr1, */ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size) { - unsigned long idx; - - for (idx = 0; idx * BITS_PER_LONG < size; idx++) { - if (addr[idx]) - return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size); - } - - return size; + return FIND_FIRST_BIT(addr[idx], /* nop */, size); } #endif @@ -104,15 +85,7 @@ unsigned long _find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size) { - unsigned long idx, val; - - for (idx = 0; idx * BITS_PER_LONG < size; idx++) { - val = addr1[idx] & addr2[idx]; - if (val) - return min(idx * BITS_PER_LONG + __ffs(val), size); - } - - return size; + return FIND_FIRST_BIT(addr1[idx] & addr2[idx], /* nop */, size); } #endif @@ -122,13 +95,29 @@ unsigned long _find_first_and_bit(const unsigned long *addr1, */ unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size) { - unsigned long idx; + return FIND_FIRST_BIT(~addr[idx], /* nop */, size); +} +#endif - for (idx = 0; idx * BITS_PER_LONG < size; idx++) { - if (addr[idx] != ~0UL) - return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); - } +#ifndef find_next_bit +unsigned long _find_next_bit(const unsigned long *addr, unsigned long nbits, unsigned long start) +{ + return FIND_NEXT_BIT(addr[idx], /* nop */, nbits, start); +} +#endif - return size; +#ifndef find_next_and_bit +unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start) +{ + return FIND_NEXT_BIT(addr1[idx] & addr2[idx], /* nop */, nbits, start); +} +#endif + +#ifndef find_next_zero_bit +unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, + unsigned long start) +{ + return FIND_NEXT_BIT(~addr[idx], /* nop */, nbits, start); } #endif diff --git a/tools/lib/list_sort.c b/tools/lib/list_sort.c index 10c067e3a8d2..bb99e493dcd1 100644 --- a/tools/lib/list_sort.c +++ b/tools/lib/list_sort.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> #include <linux/compiler.h> #include <linux/export.h> -#include <linux/string.h> #include <linux/list_sort.h> #include <linux/list.h> @@ -52,7 +50,6 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, struct list_head *a, struct list_head *b) { struct list_head *tail = head; - u8 count = 0; for (;;) { /* if equal, take 'a' -- important for sort stability */ @@ -78,15 +75,6 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, /* Finish linking remainder of list b on to tail */ tail->next = b; do { - /* - * If the merge is highly unbalanced (e.g. the input is - * already sorted), this loop may run many iterations. - * Continue callbacks to the client even though no - * element comparison is needed, so the client's cmp() - * routine can invoke cond_resched() periodically. - */ - if (unlikely(!++count)) - cmp(priv, b, b); b->prev = tail; tail = b; b = b->next; diff --git a/tools/lib/perf/.gitignore b/tools/lib/perf/.gitignore new file mode 100644 index 000000000000..0f5b4af63f62 --- /dev/null +++ b/tools/lib/perf/.gitignore @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only +libperf.pc +libperf.so.* +tests-shared +tests-static diff --git a/tools/lib/perf/Documentation/Makefile b/tools/lib/perf/Documentation/Makefile index 972754082a85..573ca5b27556 100644 --- a/tools/lib/perf/Documentation/Makefile +++ b/tools/lib/perf/Documentation/Makefile @@ -121,7 +121,7 @@ install-man: all $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \ $(INSTALL) -m 644 $(MAN_7) $(DESTDIR)$(man7dir); -install-html: +install-html: $(MAN_HTML) $(call QUIET_INSTALL, html) \ $(INSTALL) -d -m 755 $(DESTDIR)$(htmldir); \ $(INSTALL) -m 644 $(MAN_HTML) $(DESTDIR)$(htmldir); \ diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c index 8e1a926a9cfe..bc142f0664b5 100644 --- a/tools/lib/perf/Documentation/examples/sampling.c +++ b/tools/lib/perf/Documentation/examples/sampling.c @@ -39,7 +39,7 @@ int main(int argc, char **argv) libperf_init(libperf_print); - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) { fprintf(stderr, "failed to create cpus\n"); return -1; diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt index d6ca24f6ef78..2378980fab8a 100644 --- a/tools/lib/perf/Documentation/libperf-sampling.txt +++ b/tools/lib/perf/Documentation/libperf-sampling.txt @@ -97,7 +97,7 @@ In this case we will monitor all the available CPUs: [source,c] -- - 42 cpus = perf_cpu_map__new(NULL); + 42 cpus = perf_cpu_map__new_online_cpus(); 43 if (!cpus) { 44 fprintf(stderr, "failed to create cpus\n"); 45 return -1; diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 32c5051c24eb..4072bc9b7670 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -37,16 +37,15 @@ SYNOPSIS struct perf_cpu_map; - struct perf_cpu_map *perf_cpu_map__dummy_new(void); + struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); - struct perf_cpu_map *perf_cpu_map__read(FILE *file); struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other); void perf_cpu_map__put(struct perf_cpu_map *map); int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); int perf_cpu_map__nr(const struct perf_cpu_map *cpus); - bool perf_cpu_map__empty(const struct perf_cpu_map *map); + bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); int perf_cpu_map__max(struct perf_cpu_map *map); bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); @@ -62,11 +61,12 @@ SYNOPSIS struct perf_thread_map; struct perf_thread_map *perf_thread_map__new_dummy(void); + struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array); - void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid); - char *perf_thread_map__comm(struct perf_thread_map *map, int thread); + void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid); + char *perf_thread_map__comm(struct perf_thread_map *map, int idx); int perf_thread_map__nr(struct perf_thread_map *threads); - pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread); + pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx); struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map); void perf_thread_map__put(struct perf_thread_map *map); @@ -210,6 +210,7 @@ SYNOPSIS struct perf_record_time_conv; struct perf_record_header_feature; struct perf_record_compressed; + struct perf_record_compressed2; -- DESCRIPTION diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 08fe6e3c4089..7fbb50b74c00 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -39,29 +39,10 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - TEST_ARGS := $(if $(V),-v) -# Set compile option CFLAGS -ifdef EXTRA_CFLAGS - CFLAGS := $(EXTRA_CFLAGS) -else - CFLAGS := -g -Wall -endif - INCLUDES = \ +-I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -70,11 +51,12 @@ INCLUDES = \ -I$(srctree)/tools/include/uapi # Append required CFLAGS -override CFLAGS += $(EXTRA_WARNINGS) -override CFLAGS += -Werror -Wall +override CFLAGS += -g -Werror -Wall override CFLAGS += -fPIC override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden +override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += $(EXTRA_CFLAGS) all: @@ -118,7 +100,16 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBPERF_IN): FORCE +uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm +ifeq ($(SRCARCH),arm64) + syscall-y := $(uapi-asm)/unistd_64.h +endif +uapi-asm-generic: + $(if $(syscall-y),\ + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \ + generic=include/uapi/asm-generic $(syscall-y),) + +$(LIBPERF_IN): uapi-asm-generic FORCE $(Q)$(MAKE) $(build)=libperf $(LIBPERF_A): $(LIBPERF_IN) @@ -139,7 +130,7 @@ all: fixdep clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \ - $(TESTS_STATIC) $(TESTS_SHARED) + $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y) TESTS_IN = tests-in.o @@ -153,7 +144,7 @@ $(TESTS_STATIC): $(TESTS_IN) $(LIBPERF_A) $(LIBAPI) $(QUIET_LINK)$(CC) -o $@ $^ $(TESTS_SHARED): $(TESTS_IN) $(LIBAPI) - $(QUIET_LINK)$(CC) -o $@ -L$(if $(OUTPUT),$(OUTPUT),.) $^ -lperf + $(QUIET_LINK)$(CC) -o $@ -L$(or $(OUTPUT),.) $^ -lperf make-tests: libs $(TESTS_SHARED) $(TESTS_STATIC) @@ -176,10 +167,10 @@ define do_install_mkdir endef define do_install - if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ - fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' + if [ ! -d '$2' ]; then \ + $(INSTALL) -d -m 755 '$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$2' endef install_lib: libs @@ -187,19 +178,28 @@ install_lib: libs $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ) -install_headers: - $(call QUIET_INSTALL, headers) \ - $(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); +HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h +INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h + +INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf +INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) +INSTALL_INTERNAL_HDRS_PFX := $(DESTDIR)$(prefix)/include/internal +INSTALL_INTERNAL_HDRS := $(addprefix $(INSTALL_INTERNAL_HDRS_PFX)/, $(INTERNAL_HDRS)) + +$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: include/perf/%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644) + +$(INSTALL_INTERNAL_HDRS): $(INSTALL_INTERNAL_HDRS_PFX)/%.h: include/internal/%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_INTERNAL_HDRS_PFX)/,644) + +install_headers: $(INSTALL_HDRS) $(INSTALL_INTERNAL_HDRS) + $(call QUIET_INSTALL, libperf_headers) install_pkgconfig: $(LIBPERF_PC) $(call QUIET_INSTALL, $(LIBPERF_PC)) \ - $(call do_install,$(LIBPERF_PC),$(libdir_SQ)/pkgconfig,644) + $(call do_install,$(LIBPERF_PC),$(DESTDIR_SQ)$(libdir_SQ)/pkgconfig,644) install_doc: $(Q)$(MAKE) -C Documentation install-man install-html install-examples diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index ee66760f1e63..4160e7d2e120 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> #include <perf/cpumap.h> #include <stdlib.h> #include <linux/refcount.h> @@ -9,25 +10,38 @@ #include <unistd.h> #include <ctype.h> #include <limits.h> +#include "internal.h" +#include <api/fs/fs.h> -static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) +#define MAX_NR_CPUS 4096 + +void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus) { - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); + RC_CHK_ACCESS(map)->nr = nr_cpus; +} - if (cpus != NULL) { +struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) +{ + RC_STRUCT(perf_cpu_map) *cpus; + struct perf_cpu_map *result; + + if (nr_cpus == 0) + return NULL; + + cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); + if (ADD_RC_CHK(result, cpus)) { cpus->nr = nr_cpus; refcount_set(&cpus->refcnt, 1); - } - return cpus; + return result; } -struct perf_cpu_map *perf_cpu_map__dummy_new(void) +struct perf_cpu_map *perf_cpu_map__new_any_cpu(void) { struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); if (cpus) - cpus->map[0].cpu = -1; + RC_CHK_ACCESS(cpus)->map[0].cpu = -1; return cpus; } @@ -35,48 +49,79 @@ struct perf_cpu_map *perf_cpu_map__dummy_new(void) static void cpu_map__delete(struct perf_cpu_map *map) { if (map) { - WARN_ONCE(refcount_read(&map->refcnt) != 0, + WARN_ONCE(refcount_read(perf_cpu_map__refcnt(map)) != 0, "cpu_map refcnt unbalanced\n"); - free(map); + RC_CHK_FREE(map); } } struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map) { - if (map) - refcount_inc(&map->refcnt); - return map; + struct perf_cpu_map *result; + + if (RC_CHK_GET(result, map)) + refcount_inc(perf_cpu_map__refcnt(map)); + + return result; } void perf_cpu_map__put(struct perf_cpu_map *map) { - if (map && refcount_dec_and_test(&map->refcnt)) - cpu_map__delete(map); + if (map) { + if (refcount_dec_and_test(perf_cpu_map__refcnt(map))) + cpu_map__delete(map); + else + RC_CHK_PUT(map); + } } -static struct perf_cpu_map *cpu_map__default_new(void) +static struct perf_cpu_map *cpu_map__new_sysconf(void) { struct perf_cpu_map *cpus; - int nr_cpus; + int nr_cpus, nr_cpus_conf; nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); if (nr_cpus < 0) return NULL; + nr_cpus_conf = sysconf(_SC_NPROCESSORS_CONF); + if (nr_cpus != nr_cpus_conf) { + pr_warning("Number of online CPUs (%d) differs from the number configured (%d) the CPU map will only cover the first %d CPUs.", + nr_cpus, nr_cpus_conf, nr_cpus); + } + cpus = perf_cpu_map__alloc(nr_cpus); if (cpus != NULL) { int i; for (i = 0; i < nr_cpus; ++i) - cpus->map[i].cpu = i; + RC_CHK_ACCESS(cpus)->map[i].cpu = i; } return cpus; } -struct perf_cpu_map *perf_cpu_map__default_new(void) +static struct perf_cpu_map *cpu_map__new_sysfs_online(void) { - return cpu_map__default_new(); + struct perf_cpu_map *cpus = NULL; + char *buf = NULL; + size_t buf_len; + + if (sysfs__read_str("devices/system/cpu/online", &buf, &buf_len) >= 0) { + cpus = perf_cpu_map__new(buf); + free(buf); + } + return cpus; +} + +struct perf_cpu_map *perf_cpu_map__new_online_cpus(void) +{ + struct perf_cpu_map *cpus = cpu_map__new_sysfs_online(); + + if (cpus) + return cpus; + + return cpu_map__new_sysconf(); } @@ -87,6 +132,11 @@ static int cmp_cpu(const void *a, const void *b) return cpu_a->cpu - cpu_b->cpu; } +static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) +{ + return RC_CHK_ACCESS(cpus)->map[idx]; +} + static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus) { size_t payload_size = nr_cpus * sizeof(struct perf_cpu); @@ -94,89 +144,21 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu int i, j; if (cpus != NULL) { - memcpy(cpus->map, tmp_cpus, payload_size); - qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu); + memcpy(RC_CHK_ACCESS(cpus)->map, tmp_cpus, payload_size); + qsort(RC_CHK_ACCESS(cpus)->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu); /* Remove dups */ j = 0; for (i = 0; i < nr_cpus; i++) { - if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu) - cpus->map[j++].cpu = cpus->map[i].cpu; - } - cpus->nr = j; - assert(j <= nr_cpus); - } - return cpus; -} - -struct perf_cpu_map *perf_cpu_map__read(FILE *file) -{ - struct perf_cpu_map *cpus = NULL; - int nr_cpus = 0; - struct perf_cpu *tmp_cpus = NULL, *tmp; - int max_entries = 0; - int n, cpu, prev; - char sep; - - sep = 0; - prev = -1; - for (;;) { - n = fscanf(file, "%u%c", &cpu, &sep); - if (n <= 0) - break; - if (prev >= 0) { - int new_max = nr_cpus + cpu - prev - 1; - - WARN_ONCE(new_max >= MAX_NR_CPUS, "Perf can support %d CPUs. " - "Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS); - - if (new_max >= max_entries) { - max_entries = new_max + MAX_NR_CPUS / 2; - tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); - if (tmp == NULL) - goto out_free_tmp; - tmp_cpus = tmp; + if (i == 0 || + __perf_cpu_map__cpu(cpus, i).cpu != + __perf_cpu_map__cpu(cpus, i - 1).cpu) { + RC_CHK_ACCESS(cpus)->map[j++].cpu = + __perf_cpu_map__cpu(cpus, i).cpu; } - - while (++prev < cpu) - tmp_cpus[nr_cpus++].cpu = prev; } - if (nr_cpus == max_entries) { - max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); - if (tmp == NULL) - goto out_free_tmp; - tmp_cpus = tmp; - } - - tmp_cpus[nr_cpus++].cpu = cpu; - if (n == 2 && sep == '-') - prev = cpu; - else - prev = -1; - if (n == 1 || sep == '\n') - break; + perf_cpu_map__set_nr(cpus, j); + assert(j <= nr_cpus); } - - if (nr_cpus > 0) - cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else - cpus = cpu_map__default_new(); -out_free_tmp: - free(tmp_cpus); - return cpus; -} - -static struct perf_cpu_map *cpu_map__read_all_cpu_map(void) -{ - struct perf_cpu_map *cpus = NULL; - FILE *onlnf; - - onlnf = fopen("/sys/devices/system/cpu/online", "r"); - if (!onlnf) - return cpu_map__default_new(); - - cpus = perf_cpu_map__read(onlnf); - fclose(onlnf); return cpus; } @@ -190,7 +172,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) int max_entries = 0; if (!cpu_list) - return cpu_map__read_all_cpu_map(); + return perf_cpu_map__new_online_cpus(); /* * must handle the case of empty cpumap to cover @@ -203,8 +185,8 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) while (isdigit(*cpu_list)) { p = NULL; start_cpu = strtoul(cpu_list, &p, 0); - if (start_cpu >= INT_MAX - || (*p != '\0' && *p != ',' && *p != '-')) + if (start_cpu >= INT16_MAX + || (*p != '\0' && *p != ',' && *p != '-' && *p != '\n')) goto invalid; if (*p == '-') { @@ -212,7 +194,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) p = NULL; end_cpu = strtoul(cpu_list, &p, 0); - if (end_cpu >= INT_MAX || (*p != '\0' && *p != ',')) + if (end_cpu >= INT16_MAX || (*p != '\0' && *p != ',' && *p != '\n')) goto invalid; if (end_cpu < start_cpu) @@ -227,17 +209,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) for (; start_cpu <= end_cpu; start_cpu++) { /* check for duplicates */ for (i = 0; i < nr_cpus; i++) - if (tmp_cpus[i].cpu == (int)start_cpu) + if (tmp_cpus[i].cpu == (int16_t)start_cpu) goto invalid; if (nr_cpus == max_entries) { - max_entries += MAX_NR_CPUS; + max_entries += max(end_cpu - start_cpu + 1, 16UL); tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto invalid; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++].cpu = (int)start_cpu; + tmp_cpus[nr_cpus++].cpu = (int16_t)start_cpu; } if (*p) ++p; @@ -245,38 +227,69 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) cpu_list = p; } - if (nr_cpus > 0) + if (nr_cpus > 0) { cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else if (*cpu_list != '\0') - cpus = cpu_map__default_new(); - else - cpus = perf_cpu_map__dummy_new(); + } else if (*cpu_list != '\0') { + pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.", + cpu_list); + cpus = perf_cpu_map__new_online_cpus(); + } else { + cpus = perf_cpu_map__new_any_cpu(); + } invalid: free(tmp_cpus); out: return cpus; } +struct perf_cpu_map *perf_cpu_map__new_int(int cpu) +{ + struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); + + if (cpus) + RC_CHK_ACCESS(cpus)->map[0].cpu = cpu; + + return cpus; +} + +static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus) +{ + return RC_CHK_ACCESS(cpus)->nr; +} + struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) { struct perf_cpu result = { .cpu = -1 }; - if (cpus && idx < cpus->nr) - return cpus->map[idx]; + if (cpus && idx < __perf_cpu_map__nr(cpus)) + return __perf_cpu_map__cpu(cpus, idx); return result; } int perf_cpu_map__nr(const struct perf_cpu_map *cpus) { - return cpus ? cpus->nr : 1; + return cpus ? __perf_cpu_map__nr(cpus) : 1; +} + +bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map) +{ + return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true; +} + +bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map) +{ + if (!map) + return true; + + return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1; } -bool perf_cpu_map__empty(const struct perf_cpu_map *map) +bool perf_cpu_map__is_empty(const struct perf_cpu_map *map) { - return map ? map->map[0].cpu == -1 : true; + return map == NULL; } int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) @@ -287,10 +300,10 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) return -1; low = 0; - high = cpus->nr; + high = __perf_cpu_map__nr(cpus); while (low < high) { int idx = (low + high) / 2; - struct perf_cpu cpu_at_idx = cpus->map[idx]; + struct perf_cpu cpu_at_idx = __perf_cpu_map__cpu(cpus, idx); if (cpu_at_idx.cpu == cpu.cpu) return idx; @@ -309,69 +322,176 @@ bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu) return perf_cpu_map__idx(cpus, cpu) != -1; } -struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map) +bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_map *rhs) +{ + int nr; + + if (lhs == rhs) + return true; + + if (!lhs || !rhs) + return false; + + nr = __perf_cpu_map__nr(lhs); + if (nr != __perf_cpu_map__nr(rhs)) + return false; + + for (int idx = 0; idx < nr; idx++) { + if (__perf_cpu_map__cpu(lhs, idx).cpu != __perf_cpu_map__cpu(rhs, idx).cpu) + return false; + } + return true; +} + +bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map) +{ + return map && __perf_cpu_map__cpu(map, 0).cpu == -1; +} + +struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map) +{ + struct perf_cpu cpu, result = { + .cpu = -1 + }; + int idx; + + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) { + result = cpu; + break; + } + return result; +} + +struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map) { struct perf_cpu result = { .cpu = -1 }; - // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. - return map->nr > 0 ? map->map[map->nr - 1] : result; + if (!map) + return result; + + // The CPUs are always sorted and nr is always > 0 as 0 length map is + // encoded as NULL. + return __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1); +} + +/** Is 'b' a subset of 'a'. */ +bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b) +{ + if (a == b || !b) + return true; + if (!a || __perf_cpu_map__nr(b) > __perf_cpu_map__nr(a)) + return false; + + for (int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) { + if (__perf_cpu_map__cpu(a, i).cpu > __perf_cpu_map__cpu(b, j).cpu) + return false; + if (__perf_cpu_map__cpu(a, i).cpu == __perf_cpu_map__cpu(b, j).cpu) { + j++; + if (j == __perf_cpu_map__nr(b)) + return true; + } + } + return false; } /* - * Merge two cpumaps + * Merge two cpumaps. + * + * If 'other' is subset of '*orig', '*orig' keeps itself with no reference count + * change (similar to "realloc"). + * + * If '*orig' is subset of 'other', '*orig' reuses 'other' with its reference + * count increased. * - * orig either gets freed and replaced with a new map, or reused - * with no reference count change (similar to "realloc") - * other has its reference count increased. + * Otherwise, '*orig' gets freed and replaced with a new map. */ - -struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, - struct perf_cpu_map *other) +int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other) { struct perf_cpu *tmp_cpus; int tmp_len; int i, j, k; struct perf_cpu_map *merged; - if (!orig && !other) - return NULL; - if (!orig) { - perf_cpu_map__get(other); - return other; + if (perf_cpu_map__is_subset(*orig, other)) + return 0; + if (perf_cpu_map__is_subset(other, *orig)) { + perf_cpu_map__put(*orig); + *orig = perf_cpu_map__get(other); + return 0; } - if (!other) - return orig; - if (orig->nr == other->nr && - !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu))) - return orig; - tmp_len = orig->nr + other->nr; + tmp_len = __perf_cpu_map__nr(*orig) + __perf_cpu_map__nr(other); tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); if (!tmp_cpus) - return NULL; + return -ENOMEM; /* Standard merge algorithm from wikipedia */ i = j = k = 0; - while (i < orig->nr && j < other->nr) { - if (orig->map[i].cpu <= other->map[j].cpu) { - if (orig->map[i].cpu == other->map[j].cpu) + while (i < __perf_cpu_map__nr(*orig) && j < __perf_cpu_map__nr(other)) { + if (__perf_cpu_map__cpu(*orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) { + if (__perf_cpu_map__cpu(*orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu) j++; - tmp_cpus[k++] = orig->map[i++]; + tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++); } else - tmp_cpus[k++] = other->map[j++]; + tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++); } - while (i < orig->nr) - tmp_cpus[k++] = orig->map[i++]; + while (i < __perf_cpu_map__nr(*orig)) + tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++); - while (j < other->nr) - tmp_cpus[k++] = other->map[j++]; + while (j < __perf_cpu_map__nr(other)) + tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++); assert(k <= tmp_len); merged = cpu_map__trim_new(k, tmp_cpus); free(tmp_cpus); - perf_cpu_map__put(orig); + perf_cpu_map__put(*orig); + *orig = merged; + return 0; +} + +struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, + struct perf_cpu_map *other) +{ + int i, j, k; + struct perf_cpu_map *merged; + + if (perf_cpu_map__is_subset(other, orig)) + return perf_cpu_map__get(orig); + if (perf_cpu_map__is_subset(orig, other)) + return perf_cpu_map__get(other); + + i = j = k = 0; + while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { + if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu) + i++; + else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu) + j++; + else { /* CPUs match. */ + i++; + j++; + k++; + } + } + if (k == 0) /* Maps are completely disjoint. */ + return NULL; + + merged = perf_cpu_map__alloc(k); + if (!merged) + return NULL; + /* Entries are added to merged in sorted order, so no need to sort again. */ + i = j = k = 0; + while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { + if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu) + i++; + else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu) + j++; + else { + j++; + RC_CHK_ACCESS(merged)->map[k++] = __perf_cpu_map__cpu(orig, i++); + } + } return merged; } diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 9a770bfdc804..3ed023f4b190 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -23,6 +23,7 @@ #include <perf/cpumap.h> #include <perf/threadmap.h> #include <api/fd/array.h> +#include "internal.h" void perf_evlist__init(struct perf_evlist *evlist) { @@ -35,31 +36,112 @@ void perf_evlist__init(struct perf_evlist *evlist) static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, struct perf_evsel *evsel) { + if (perf_cpu_map__is_empty(evsel->cpus)) { + if (perf_cpu_map__is_empty(evsel->pmu_cpus)) { + /* + * Assume the unset PMU cpus were for a system-wide + * event, like a software or tracepoint. + */ + evsel->pmu_cpus = perf_cpu_map__new_online_cpus(); + } + if (evlist->has_user_cpus && !evsel->system_wide) { + /* + * Use the user CPUs unless the evsel is set to be + * system wide, such as the dummy event. + */ + evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); + } else { + /* + * System wide and other modes, assume the cpu map + * should be set to all PMU CPUs. + */ + evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus); + } + } /* - * We already have cpus for evsel (via PMU sysfs) so - * keep it, if there's no target cpu list defined. + * Avoid "any CPU"(-1) for uncore and PMUs that require a CPU, even if + * requested. */ - if (!evsel->own_cpus || evlist->has_user_cpus) { + if (evsel->requires_cpu && perf_cpu_map__has_any_cpu(evsel->cpus)) { perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evlist->cpus); - } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->cpus)) { + evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus); + } + + /* + * Globally requested CPUs replace user requested unless the evsel is + * set to be system wide. + */ + if (evlist->has_user_cpus && !evsel->system_wide) { + assert(!perf_cpu_map__has_any_cpu(evlist->user_requested_cpus)); + if (!perf_cpu_map__equal(evsel->cpus, evlist->user_requested_cpus)) { + perf_cpu_map__put(evsel->cpus); + evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); + } + } + + /* Ensure cpus only references valid PMU CPUs. */ + if (!perf_cpu_map__has_any_cpu(evsel->cpus) && + !perf_cpu_map__is_subset(evsel->pmu_cpus, evsel->cpus)) { + struct perf_cpu_map *tmp = perf_cpu_map__intersect(evsel->pmu_cpus, evsel->cpus); + perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evlist->cpus); - } else if (evsel->cpus != evsel->own_cpus) { + evsel->cpus = tmp; + } + + /* + * Was event requested on all the PMU's CPUs but the user requested is + * any CPU (-1)? If so switch to using any CPU (-1) to reduce the number + * of events. + */ + if (!evsel->system_wide && + !evsel->requires_cpu && + perf_cpu_map__equal(evsel->cpus, evsel->pmu_cpus) && + perf_cpu_map__has_any_cpu(evlist->user_requested_cpus)) { perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evsel->own_cpus); + evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); } - perf_thread_map__put(evsel->threads); - evsel->threads = perf_thread_map__get(evlist->threads); - evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); + /* Sanity check assert before the evsel is potentially removed. */ + assert(!evsel->requires_cpu || !perf_cpu_map__has_any_cpu(evsel->cpus)); + + /* + * Empty cpu lists would eventually get opened as "any" so remove + * genuinely empty ones before they're opened in the wrong place. + */ + if (perf_cpu_map__is_empty(evsel->cpus)) { + struct perf_evsel *next = perf_evlist__next(evlist, evsel); + + perf_evlist__remove(evlist, evsel); + /* Keep idx contiguous */ + if (next) + list_for_each_entry_from(next, &evlist->entries, node) + next->idx--; + + return; + } + + if (evsel->system_wide) { + perf_thread_map__put(evsel->threads); + evsel->threads = perf_thread_map__new_dummy(); + } else { + perf_thread_map__put(evsel->threads); + evsel->threads = perf_thread_map__get(evlist->threads); + } + + perf_cpu_map__merge(&evlist->all_cpus, evsel->cpus); } static void perf_evlist__propagate_maps(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel, *n; + + evlist->needs_map_propagation = true; + + /* Clear the all_cpus set which will be merged into during propagation. */ + perf_cpu_map__put(evlist->all_cpus); + evlist->all_cpus = NULL; - perf_evlist__for_each_evsel(evlist, evsel) + list_for_each_entry_safe(evsel, n, &evlist->entries, node) __perf_evlist__propagate_maps(evlist, evsel); } @@ -69,7 +151,9 @@ void perf_evlist__add(struct perf_evlist *evlist, evsel->idx = evlist->nr_entries; list_add_tail(&evsel->node, &evlist->entries); evlist->nr_entries += 1; - __perf_evlist__propagate_maps(evlist, evsel); + + if (evlist->needs_map_propagation) + __perf_evlist__propagate_maps(evlist, evsel); } void perf_evlist__remove(struct perf_evlist *evlist, @@ -123,10 +207,10 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist) { - perf_cpu_map__put(evlist->cpus); + perf_cpu_map__put(evlist->user_requested_cpus); perf_cpu_map__put(evlist->all_cpus); perf_thread_map__put(evlist->threads); - evlist->cpus = NULL; + evlist->user_requested_cpus = NULL; evlist->all_cpus = NULL; evlist->threads = NULL; fdarray__exit(&evlist->pollfd); @@ -155,9 +239,9 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, * original reference count of 1. If that is not the case it is up to * the caller to increase the reference count. */ - if (cpus != evlist->cpus) { - perf_cpu_map__put(evlist->cpus); - evlist->cpus = perf_cpu_map__get(cpus); + if (cpus != evlist->user_requested_cpus) { + perf_cpu_map__put(evlist->user_requested_cpus); + evlist->user_requested_cpus = perf_cpu_map__get(cpus); } if (threads != evlist->threads) { @@ -165,9 +249,6 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, evlist->threads = perf_thread_map__get(threads); } - if (!evlist->all_cpus && cpus) - evlist->all_cpus = perf_cpu_map__get(cpus); - perf_evlist__propagate_maps(evlist); } @@ -224,10 +305,10 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist) static void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) + int cpu_map_idx, int thread, u64 id) { int hash; - struct perf_sample_id *sid = SID(evsel, cpu, thread); + struct perf_sample_id *sid = SID(evsel, cpu_map_idx, thread); sid->id = id; sid->evsel = evsel; @@ -245,21 +326,27 @@ void perf_evlist__reset_id_hash(struct perf_evlist *evlist) void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) + int cpu_map_idx, int thread, u64 id) { - perf_evlist__id_hash(evlist, evsel, cpu, thread, id); + if (!SID(evsel, cpu_map_idx, thread)) + return; + + perf_evlist__id_hash(evlist, evsel, cpu_map_idx, thread, id); evsel->id[evsel->ids++] = id; } int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd) + int cpu_map_idx, int thread, int fd) { u64 read_data[4] = { 0, }; int id_idx = 1; /* The first entry is the counter value */ u64 id; int ret; + if (!SID(evsel, cpu_map_idx, thread)) + return -1; + ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); if (!ret) goto add; @@ -288,13 +375,13 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, id = read_data[id_idx]; add: - perf_evlist__id_add(evlist, evsel, cpu, thread, id); + perf_evlist__id_add(evlist, evsel, cpu_map_idx, thread, id); return 0; } int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { - int nr_cpus = perf_cpu_map__nr(evlist->cpus); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); int nr_threads = perf_thread_map__nr(evlist->threads); int nfds = 0; struct perf_evsel *evsel; @@ -424,14 +511,15 @@ static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_ static int mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, - int thread, int *_output, int *_output_overwrite) + int thread, int *_output, int *_output_overwrite, int *nr_mmaps) { - struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); + struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx); struct perf_evsel *evsel; int revent; perf_evlist__for_each_entry(evlist, evsel) { bool overwrite = evsel->attr.write_backward; + enum fdarray_flags flgs; struct perf_mmap *map; int *output, fd, cpu; @@ -474,12 +562,21 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, */ refcount_set(&map->refcnt, 2); + if (ops->idx) + ops->idx(evlist, evsel, mp, idx); + + /* Debug message used by test scripts */ + pr_debug("idx %d: mmapping fd %d\n", idx, *output); if (ops->mmap(map, mp, *output, evlist_cpu) < 0) return -1; + *nr_mmaps += 1; + if (!idx) perf_evlist__set_mmap_first(evlist, map, overwrite); } else { + /* Debug message used by test scripts */ + pr_debug("idx %d: set output fd %d -> %d\n", idx, fd, *output); if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) return -1; @@ -488,8 +585,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, revent = !overwrite ? POLLIN : 0; - if (!evsel->system_wide && - perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) { + flgs = evsel->system_wide ? fdarray_flag__nonfilterable : fdarray_flag__default; + if (perf_evlist__add_pollfd(evlist, fd, map, revent, flgs) < 0) { perf_mmap__put(map); return -1; } @@ -509,21 +606,37 @@ static int mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp) { - int thread; int nr_threads = perf_thread_map__nr(evlist->threads); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); + int cpu, thread, idx = 0; + int nr_mmaps = 0; - for (thread = 0; thread < nr_threads; thread++) { + pr_debug("%s: nr cpu values (may include -1) %d nr threads %d\n", + __func__, nr_cpus, nr_threads); + + /* per-thread mmaps */ + for (thread = 0; thread < nr_threads; thread++, idx++) { int output = -1; int output_overwrite = -1; - if (ops->idx) - ops->idx(evlist, mp, thread, false); + if (mmap_per_evsel(evlist, ops, idx, mp, 0, thread, &output, + &output_overwrite, &nr_mmaps)) + goto out_unmap; + } - if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, - &output, &output_overwrite)) + /* system-wide mmaps i.e. per-cpu */ + for (cpu = 1; cpu < nr_cpus; cpu++, idx++) { + int output = -1; + int output_overwrite = -1; + + if (mmap_per_evsel(evlist, ops, idx, mp, cpu, 0, &output, + &output_overwrite, &nr_mmaps)) goto out_unmap; } + if (nr_mmaps != evlist->nr_mmaps) + pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps); + return 0; out_unmap: @@ -536,23 +649,26 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp) { int nr_threads = perf_thread_map__nr(evlist->threads); - int nr_cpus = perf_cpu_map__nr(evlist->cpus); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); + int nr_mmaps = 0; int cpu, thread; + pr_debug("%s: nr cpu values %d nr threads %d\n", __func__, nr_cpus, nr_threads); + for (cpu = 0; cpu < nr_cpus; cpu++) { int output = -1; int output_overwrite = -1; - if (ops->idx) - ops->idx(evlist, mp, cpu, true); - for (thread = 0; thread < nr_threads; thread++) { if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, - thread, &output, &output_overwrite)) + thread, &output, &output_overwrite, &nr_mmaps)) goto out_unmap; } } + if (nr_mmaps != evlist->nr_mmaps) + pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps); + return 0; out_unmap: @@ -564,9 +680,14 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) { int nr_mmaps; - nr_mmaps = perf_cpu_map__nr(evlist->cpus); - if (perf_cpu_map__empty(evlist->cpus)) - nr_mmaps = perf_thread_map__nr(evlist->threads); + /* One for each CPU */ + nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); + if (perf_cpu_map__has_any_cpu_or_is_empty(evlist->all_cpus)) { + /* Plus one for each thread */ + nr_mmaps += perf_thread_map__nr(evlist->threads); + /* Minus the per-thread CPU (-1) */ + nr_mmaps -= 1; + } return nr_mmaps; } @@ -575,9 +696,8 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp) { + const struct perf_cpu_map *cpus = evlist->all_cpus; struct perf_evsel *evsel; - const struct perf_cpu_map *cpus = evlist->cpus; - const struct perf_thread_map *threads = evlist->threads; if (!ops || !ops->get || !ops->mmap) return -EINVAL; @@ -589,14 +709,14 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, perf_evlist__for_each_entry(evlist, evsel) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && evsel->sample_id == NULL && - perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) + perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0) return -ENOMEM; } if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - if (perf_cpu_map__empty(cpus)) + if (perf_cpu_map__has_any_cpu_or_is_empty(cpus)) return mmap_per_thread(evlist, ops, mp); return mmap_per_cpu(evlist, ops, mp); @@ -645,15 +765,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader) { - struct perf_evsel *first, *last, *evsel; - - first = list_first_entry(list, struct perf_evsel, node); - last = list_last_entry(list, struct perf_evsel, node); - - leader->nr_members = last->idx - first->idx + 1; + struct perf_evsel *evsel; + int n = 0; - __perf_evlist__for_each_entry(list, evsel) + __perf_evlist__for_each_entry(list, evsel) { evsel->leader = leader; + n++; + } + leader->nr_members = n; } void perf_evlist__set_leader(struct perf_evlist *evlist) @@ -662,7 +781,32 @@ void perf_evlist__set_leader(struct perf_evlist *evlist) struct perf_evsel *first = list_entry(evlist->entries.next, struct perf_evsel, node); - evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; __perf_evlist__set_leader(&evlist->entries, first); } } + +int perf_evlist__nr_groups(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + int nr_groups = 0; + + perf_evlist__for_each_evsel(evlist, evsel) { + /* + * evsels by default have a nr_members of 1, and they are their + * own leader. If the nr_members is >1 then this is an + * indication of a group. + */ + if (evsel->leader == evsel && evsel->nr_members > 1) + nr_groups++; + } + return nr_groups; +} + +void perf_evlist__go_system_wide(struct perf_evlist *evlist, struct perf_evsel *evsel) +{ + if (!evsel->system_wide) { + evsel->system_wide = true; + if (evlist->needs_map_propagation) + __perf_evlist__propagate_maps(evlist, evsel); + } +} diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 210ea7c06ce8..13a307fc75ae 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -5,6 +5,7 @@ #include <perf/evsel.h> #include <perf/cpumap.h> #include <perf/threadmap.h> +#include <linux/hash.h> #include <linux/list.h> #include <internal/evsel.h> #include <linux/zalloc.h> @@ -23,6 +24,7 @@ void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx) { INIT_LIST_HEAD(&evsel->node); + INIT_LIST_HEAD(&evsel->per_stream_periods); evsel->attr = *attr; evsel->idx = idx; evsel->leader = evsel; @@ -38,8 +40,19 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr) return evsel; } +void perf_evsel__exit(struct perf_evsel *evsel) +{ + assert(evsel->fd == NULL); /* If not fds were not closed. */ + assert(evsel->mmap == NULL); /* If not munmap wasn't called. */ + assert(evsel->sample_id == NULL); /* If not free_id wasn't called. */ + perf_cpu_map__put(evsel->cpus); + perf_cpu_map__put(evsel->pmu_cpus); + perf_thread_map__put(evsel->threads); +} + void perf_evsel__delete(struct perf_evsel *evsel) { + perf_evsel__exit(evsel); free(evsel); } @@ -120,7 +133,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, static struct perf_cpu_map *empty_cpu_map; if (empty_cpu_map == NULL) { - empty_cpu_map = perf_cpu_map__dummy_new(); + empty_cpu_map = perf_cpu_map__new_any_cpu(); if (empty_cpu_map == NULL) return -ENOMEM; } @@ -149,23 +162,30 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, int fd, group_fd, *evsel_fd; evsel_fd = FD(evsel, idx, thread); - if (evsel_fd == NULL) - return -EINVAL; + if (evsel_fd == NULL) { + err = -EINVAL; + goto out; + } err = get_group_fd(evsel, idx, thread, &group_fd); if (err < 0) - return err; + goto out; fd = sys_perf_event_open(&evsel->attr, threads->map[thread].pid, cpu, group_fd, 0); - if (fd < 0) - return -errno; + if (fd < 0) { + err = -errno; + goto out; + } *evsel_fd = fd; } } +out: + if (err) + perf_evsel__close(evsel); return err; } @@ -298,6 +318,9 @@ int perf_evsel__read_size(struct perf_evsel *evsel) if (read_format & PERF_FORMAT_ID) entry += sizeof(u64); + if (read_format & PERF_FORMAT_LOST) + entry += sizeof(u64); + if (read_format & PERF_FORMAT_GROUP) { nr = evsel->nr_members; size += sizeof(u64); @@ -307,27 +330,112 @@ int perf_evsel__read_size(struct perf_evsel *evsel) return size; } +/* This only reads values for the leader */ +static int perf_evsel__read_group(struct perf_evsel *evsel, int cpu_map_idx, + int thread, struct perf_counts_values *count) +{ + size_t size = perf_evsel__read_size(evsel); + int *fd = FD(evsel, cpu_map_idx, thread); + u64 read_format = evsel->attr.read_format; + u64 *data; + int idx = 1; + + if (fd == NULL || *fd < 0) + return -EINVAL; + + data = calloc(1, size); + if (data == NULL) + return -ENOMEM; + + if (readn(*fd, data, size) <= 0) { + free(data); + return -errno; + } + + /* + * This reads only the leader event intentionally since we don't have + * perf counts values for sibling events. + */ + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + count->ena = data[idx++]; + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + count->run = data[idx++]; + + /* value is always available */ + count->val = data[idx++]; + if (read_format & PERF_FORMAT_ID) + count->id = data[idx++]; + if (read_format & PERF_FORMAT_LOST) + count->lost = data[idx++]; + + free(data); + return 0; +} + +/* + * The perf read format is very flexible. It needs to set the proper + * values according to the read format. + */ +static void perf_evsel__adjust_values(struct perf_evsel *evsel, u64 *buf, + struct perf_counts_values *count) +{ + u64 read_format = evsel->attr.read_format; + int n = 0; + + count->val = buf[n++]; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + count->ena = buf[n++]; + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + count->run = buf[n++]; + + if (read_format & PERF_FORMAT_ID) + count->id = buf[n++]; + + if (read_format & PERF_FORMAT_LOST) + count->lost = buf[n++]; +} + int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count) { size_t size = perf_evsel__read_size(evsel); int *fd = FD(evsel, cpu_map_idx, thread); + u64 read_format = evsel->attr.read_format; + struct perf_counts_values buf; memset(count, 0, sizeof(*count)); if (fd == NULL || *fd < 0) return -EINVAL; + if (read_format & PERF_FORMAT_GROUP) + return perf_evsel__read_group(evsel, cpu_map_idx, thread, count); + if (MMAP(evsel, cpu_map_idx, thread) && + !(read_format & (PERF_FORMAT_ID | PERF_FORMAT_LOST)) && !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count)) return 0; - if (readn(*fd, count->values, size) <= 0) + if (readn(*fd, buf.values, size) <= 0) return -errno; + perf_evsel__adjust_values(evsel, buf.values, count); return 0; } +static int perf_evsel__ioctl(struct perf_evsel *evsel, int ioc, void *arg, + int cpu_map_idx, int thread) +{ + int *fd = FD(evsel, cpu_map_idx, thread); + + if (fd == NULL || *fd < 0) + return -1; + + return ioctl(*fd, ioc, arg); +} + static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ioc, void *arg, int cpu_map_idx) @@ -335,13 +443,7 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int err; - int *fd = FD(evsel, cpu_map_idx, thread); - - if (fd == NULL || *fd < 0) - return -1; - - err = ioctl(*fd, ioc, arg); + int err = perf_evsel__ioctl(evsel, ioc, arg, cpu_map_idx, thread); if (err) return err; @@ -355,6 +457,21 @@ int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx) return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx); } +int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread) +{ + struct perf_cpu cpu __maybe_unused; + int idx; + int err; + + perf_cpu_map__for_each_cpu(cpu, idx, evsel->cpus) { + err = perf_evsel__ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, idx, thread); + if (err) + return err; + } + + return 0; +} + int perf_evsel__enable(struct perf_evsel *evsel) { int i; @@ -411,9 +528,6 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) if (ncpus == 0 || nthreads == 0) return 0; - if (evsel->system_wide) - nthreads = 1; - evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); if (evsel->sample_id == NULL) return -ENOMEM; @@ -430,10 +544,56 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) void perf_evsel__free_id(struct perf_evsel *evsel) { + struct perf_sample_id_period *pos, *n; + xyarray__delete(evsel->sample_id); evsel->sample_id = NULL; zfree(&evsel->id); evsel->ids = 0; + + perf_evsel_for_each_per_thread_period_safe(evsel, n, pos) { + list_del_init(&pos->node); + free(pos); + } +} + +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel) +{ + return (evsel->attr.sample_type & PERF_SAMPLE_READ) && + (evsel->attr.sample_type & PERF_SAMPLE_TID) && + evsel->attr.inherit; +} + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, bool per_thread) +{ + struct hlist_head *head; + struct perf_sample_id_period *res; + int hash; + + if (!per_thread) + return &sid->period; + + hash = hash_32(tid, PERF_SAMPLE_ID__HLIST_BITS); + head = &sid->periods[hash]; + + hlist_for_each_entry(res, head, hnode) + if (res->tid == tid) + return &res->period; + + if (sid->evsel == NULL) + return NULL; + + res = zalloc(sizeof(struct perf_sample_id_period)); + if (res == NULL) + return NULL; + + INIT_LIST_HEAD(&res->node); + res->tid = tid; + + list_add_tail(&res->node, &sid->evsel->per_stream_periods); + hlist_add_head(&res->hnode, &sid->periods[hash]); + + return &res->period; } void perf_counts_values__scale(struct perf_counts_values *count, diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 581f9ffb4237..e2be2d17c32b 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -3,11 +3,8 @@ #define __LIBPERF_INTERNAL_CPUMAP_H #include <linux/refcount.h> - -/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ -struct perf_cpu { - int cpu; -}; +#include <perf/cpumap.h> +#include <internal/rc_check.h> /** * A sized, reference counted, sorted array of integers representing CPU @@ -16,7 +13,7 @@ struct perf_cpu { * gaps if CPU numbers were used. For events associated with a pid, rather than * a CPU, a single dummy map with an entry of -1 is used. */ -struct perf_cpu_map { +DECLARE_RC_STRUCT(perf_cpu_map) { refcount_t refcnt; /** Length of the map array. */ int nr; @@ -24,10 +21,14 @@ struct perf_cpu_map { struct perf_cpu map[]; }; -#ifndef MAX_NR_CPUS -#define MAX_NR_CPUS 2048 -#endif - +struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus); int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu); +bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b); + +void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus); +static inline refcount_t *perf_cpu_map__refcnt(struct perf_cpu_map *map) +{ + return &RC_CHK_ACCESS(map)->refcnt; +} #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 4cefade540bd..f43bdb9b6227 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -17,9 +17,14 @@ struct perf_mmap_param; struct perf_evlist { struct list_head entries; int nr_entries; - int nr_groups; bool has_user_cpus; - struct perf_cpu_map *cpus; + bool needs_map_propagation; + /** + * The cpus passed from the command line or all online CPUs by + * default. + */ + struct perf_cpu_map *user_requested_cpus; + /** The union of all evsel cpu maps. */ struct perf_cpu_map *all_cpus; struct perf_thread_map *threads; int nr_mmaps; @@ -33,7 +38,8 @@ struct perf_evlist { }; typedef void -(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool); +(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_evsel*, + struct perf_mmap_param*, int); typedef struct perf_mmap* (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); typedef int @@ -120,13 +126,15 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist); void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id); + int cpu_map_idx, int thread, u64 id); int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd); + int cpu_map_idx, int thread, int fd); void perf_evlist__reset_id_hash(struct perf_evlist *evlist); void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader); + +void perf_evlist__go_system_wide(struct perf_evlist *evlist, struct perf_evsel *evsel); #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index cfc9ebd7968e..fefe64ba5e26 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -11,6 +11,32 @@ struct perf_thread_map; struct xyarray; +/** + * The per-thread accumulated period storage node. + */ +struct perf_sample_id_period { + struct list_head node; + struct hlist_node hnode; + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ + u64 period; + /* The TID that the values belongs to */ + u32 tid; +}; + +/** + * perf_evsel_for_each_per_thread_period_safe - safely iterate thru all the + * per_stream_periods + * @evlist:perf_evsel instance to iterate + * @item: struct perf_sample_id_period iterator + * @tmp: struct perf_sample_id_period temp iterator + */ +#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \ + list_for_each_entry_safe(item, tmp, &(evsel)->per_stream_periods, node) + + +#define PERF_SAMPLE_ID__HLIST_BITS 4 +#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS) + /* * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are * more than one entry in the evlist. @@ -30,15 +56,50 @@ struct perf_sample_id { struct perf_cpu cpu; pid_t tid; - /* Holds total ID period value for PERF_SAMPLE_READ processing. */ - u64 period; + /* Guest machine pid and VCPU, valid only if machine_pid is non-zero */ + pid_t machine_pid; + struct perf_cpu vcpu; + + /* + * Per-thread, and global event counts are mutually exclusive: + * Whilst it is possible to combine events into a group with differing + * values of PERF_SAMPLE_READ, it is not valid to have inconsistent + * values for `inherit`. Therefore it is not possible to have a + * situation where a per-thread event is sampled as a global event; + * all !inherit groups are global, and all groups where the sampling + * event is inherit + PERF_SAMPLE_READ will be per-thread. Any event + * that is part of such a group that is inherit but not PERF_SAMPLE_READ + * will be read as per-thread. If such an event can also trigger a + * sample (such as with sample_period > 0) then it will not cause + * `read_format` to be included in its PERF_RECORD_SAMPLE, and + * therefore will not expose the per-thread group members as global. + */ + union { + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is not per-thread). + */ + u64 period; + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is per-thread). + */ + struct hlist_head periods[PERF_SAMPLE_ID__HLIST_SIZE]; + }; }; struct perf_evsel { struct list_head node; struct perf_event_attr attr; + /** The commonly used cpu map of CPUs the event should be opened upon, etc. */ struct perf_cpu_map *cpus; - struct perf_cpu_map *own_cpus; + /** + * The cpu map read from the PMU. For core PMUs this is the list of all + * CPUs the event can be opened upon. For other PMUs this is the default + * cpu map for opening the event on, for example, the first CPU on a + * socket for an uncore event. + */ + struct perf_cpu_map *pmu_cpus; struct perf_thread_map *threads; struct xyarray *fd; struct xyarray *mmap; @@ -47,14 +108,32 @@ struct perf_evsel { u32 ids; struct perf_evsel *leader; + /* For events where the read_format value is per-thread rather than + * global, stores the per-thread cumulative period */ + struct list_head per_stream_periods; + /* parse modifier helper */ int nr_members; + /* + * system_wide is for events that need to be on every CPU, irrespective + * of user requested CPUs or threads. Tha main example of this is the + * dummy event. Map propagation will set cpus for this event to all CPUs + * as software PMU events like dummy, have a CPU map that is empty. + */ bool system_wide; + /* + * Some events, for example uncore events, require a CPU. + * i.e. it cannot be the 'any CPU' value of -1. + */ + bool requires_cpu; + /** Is the PMU for the event a core one? Effects the handling of own_cpus. */ + bool is_pmu_core; int idx; }; void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx); +void perf_evsel__exit(struct perf_evsel *evsel); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__close_fd(struct perf_evsel *evsel); void perf_evsel__free_fd(struct perf_evsel *evsel); @@ -64,4 +143,9 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_id(struct perf_evsel *evsel); +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel); + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, + bool per_thread); + #endif /* __LIBPERF_INTERNAL_EVSEL_H */ diff --git a/tools/lib/perf/include/internal/lib.h b/tools/lib/perf/include/internal/lib.h index 5175d491b2d4..85471a4b900f 100644 --- a/tools/lib/perf/include/internal/lib.h +++ b/tools/lib/perf/include/internal/lib.h @@ -9,4 +9,6 @@ extern unsigned int page_size; ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); +ssize_t preadn(int fd, void *buf, size_t n, off_t offs); + #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index 5a062af8e9d8..5f08cab61ece 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -33,7 +33,8 @@ struct perf_mmap { bool overwrite; u64 flush; libperf_unmap_cb_t unmap_cb; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + void *event_copy; + size_t event_copy_sz; struct perf_mmap *next; }; diff --git a/tools/lib/perf/include/internal/rc_check.h b/tools/lib/perf/include/internal/rc_check.h new file mode 100644 index 000000000000..f80ddfc80129 --- /dev/null +++ b/tools/lib/perf/include/internal/rc_check.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __LIBPERF_INTERNAL_RC_CHECK_H +#define __LIBPERF_INTERNAL_RC_CHECK_H + +#include <stdlib.h> +#include <linux/zalloc.h> + +/* + * Enable reference count checking implicitly with leak checking, which is + * integrated into address sanitizer. + */ +#if defined(__SANITIZE_ADDRESS__) || defined(LEAK_SANITIZER) || defined(ADDRESS_SANITIZER) +#define REFCNT_CHECKING 1 +#elif defined(__has_feature) +#if __has_feature(address_sanitizer) || __has_feature(leak_sanitizer) +#define REFCNT_CHECKING 1 +#endif +#endif + +/* + * Shared reference count checking macros. + * + * Reference count checking is an approach to sanitizing the use of reference + * counted structs. It leverages address and leak sanitizers to make sure gets + * are paired with a put. Reference count checking adds a malloc-ed layer of + * indirection on a get, and frees it on a put. A missed put will be reported as + * a memory leak. A double put will be reported as a double free. Accessing + * after a put will cause a use-after-free and/or a segfault. + */ + +#ifndef REFCNT_CHECKING +/* Replaces "struct foo" so that the pointer may be interposed. */ +#define DECLARE_RC_STRUCT(struct_name) \ + struct struct_name + +/* Declare a reference counted struct variable. */ +#define RC_STRUCT(struct_name) struct struct_name + +/* + * Interpose the indirection. Result will hold the indirection and object is the + * reference counted struct. + */ +#define ADD_RC_CHK(result, object) (result = object, object) + +/* Strip the indirection layer. */ +#define RC_CHK_ACCESS(object) object + +/* Frees the object and the indirection layer. */ +#define RC_CHK_FREE(object) free(object) + +/* A get operation adding the indirection layer. */ +#define RC_CHK_GET(result, object) ADD_RC_CHK(result, object) + +/* A put operation removing the indirection layer. */ +#define RC_CHK_PUT(object) {} + +/* Pointer equality when the indirection may or may not be there. */ +#define RC_CHK_EQUAL(object1, object2) (object1 == object2) + +#else + +/* Replaces "struct foo" so that the pointer may be interposed. */ +#define DECLARE_RC_STRUCT(struct_name) \ + struct original_##struct_name; \ + struct struct_name { \ + struct original_##struct_name *orig; \ + }; \ + struct original_##struct_name + +/* Declare a reference counted struct variable. */ +#define RC_STRUCT(struct_name) struct original_##struct_name + +/* + * Interpose the indirection. Result will hold the indirection and object is the + * reference counted struct. + */ +#define ADD_RC_CHK(result, object) \ + ( \ + object ? (result = malloc(sizeof(*result)), \ + result ? (result->orig = object, result) \ + : (result = NULL, NULL)) \ + : (result = NULL, NULL) \ + ) + +/* Strip the indirection layer. */ +#define RC_CHK_ACCESS(object) object->orig + +/* Frees the object and the indirection layer. */ +#define RC_CHK_FREE(object) \ + do { \ + zfree(&object->orig); \ + free(object); \ + } while(0) + +/* A get operation adding the indirection layer. */ +#define RC_CHK_GET(result, object) ADD_RC_CHK(result, (object ? object->orig : NULL)) + +/* A put operation removing the indirection layer. */ +#define RC_CHK_PUT(object) \ + do { \ + if (object) { \ + object->orig = NULL; \ + free(object); \ + } \ + } while(0) + +/* Pointer equality when the indirection may or may not be there. */ +#define RC_CHK_EQUAL(object1, object2) (object1 == object2 || \ + (object1 && object2 && object1->orig == object2->orig)) + +#endif + +#endif /* __LIBPERF_INTERNAL_RC_CHECK_H */ diff --git a/tools/lib/perf/include/perf/core.h b/tools/lib/perf/include/perf/core.h index a3f6d68edad7..06cc132d88cf 100644 --- a/tools/lib/perf/include/perf/core.h +++ b/tools/lib/perf/include/perf/core.h @@ -5,7 +5,7 @@ #include <stdarg.h> #ifndef LIBPERF_API -#define LIBPERF_API __attribute__((visibility("default"))) +#define LIBPERF_API extern __attribute__((visibility("default"))) #endif enum libperf_print_level { diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 15b8faafd615..58cc5c5fa47c 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,27 +3,102 @@ #define __LIBPERF_CPUMAP_H #include <perf/core.h> -#include <perf/cpumap.h> -#include <stdio.h> #include <stdbool.h> +#include <stdint.h> -LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); +/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ +struct perf_cpu { + int16_t cpu; +}; + +struct perf_cache { + int cache_lvl; + int cache; +}; + +struct perf_cpu_map; + +/** + * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value. + */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); +/** + * perf_cpu_map__new_online_cpus - a map read from + * /sys/devices/system/cpu/online if + * available. If reading wasn't possible a map + * is created using the online processors + * assuming the first 'n' processors are all + * online. + */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void); +/** + * perf_cpu_map__new - create a map from the given cpu_list such as "0-7". If no + * cpu_list argument is provided then + * perf_cpu_map__new_online_cpus is returned. + */ LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); +/** perf_cpu_map__new_int - create a map with the one given cpu. */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_int(int cpu); LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, - struct perf_cpu_map *other); +LIBPERF_API int perf_cpu_map__merge(struct perf_cpu_map **orig, + struct perf_cpu_map *other); +LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, + struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); +/** + * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index + * is invalid. + */ LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +/** + * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a + * cpu of -1 for an invalid index, this makes an empty map + * look like it contains the "any CPU"/dummy value. Otherwise + * the result is the number CPUs in the map plus one if the + * "any CPU"/dummy value is present. + */ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); -LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); -LIBPERF_API struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map); +/** + * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value. + */ +LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value. + */ +LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__is_empty - does the map contain no values and it doesn't + * contain the special "any CPU"/dummy value. + */ +LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value. + */ +LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map); +/** + * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value. + */ +LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map); LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); +LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, + const struct perf_cpu_map *rhs); +/** + * perf_cpu_map__any_cpu - Does the map contain the "any CPU"/dummy -1 value? + */ +LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ (idx) < perf_cpu_map__nr(cpus); \ (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx)) +#define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus) \ + for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx); \ + (idx) < perf_cpu_map__nr(cpus); \ + (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx)) \ + if ((_cpu).cpu != -1) + +#define perf_cpu_map__for_each_idx(idx, cpus) \ + for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++) + #endif /* __LIBPERF_CPUMAP_H */ diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 75ee385fb078..43a8cb04994f 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -70,13 +70,21 @@ struct perf_record_lost { __u64 lost; }; +#define PERF_RECORD_MISC_LOST_SAMPLES_BPF (1 << 15) + struct perf_record_lost_samples { struct perf_event_header header; __u64 lost; }; +#define MAX_ID_HDR_ENTRIES 6 +struct perf_record_lost_samples_and_ids { + struct perf_record_lost_samples lost; + __u64 sample_ids[MAX_ID_HDR_ENTRIES]; +}; + /* - * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID + * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_LOST */ struct perf_record_read { struct perf_event_header header; @@ -85,6 +93,7 @@ struct perf_record_read { __u64 time_enabled; __u64 time_running; __u64 id; + __u64 lost; }; struct perf_record_throttle { @@ -95,7 +104,7 @@ struct perf_record_throttle { }; #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 +#define KSYM_NAME_LEN 512 #endif struct perf_record_ksymbol { @@ -142,32 +151,106 @@ struct perf_record_switch { __u32 next_prev_tid; }; +struct perf_record_callchain_deferred { + struct perf_event_header header; + /* + * This is to match kernel and (deferred) user stacks together. + * The kernel part will be in the sample callchain array after + * the PERF_CONTEXT_USER_DEFERRED entry. + */ + __u64 cookie; + __u64 nr; + __u64 ips[]; +}; + struct perf_record_header_attr { struct perf_event_header header; struct perf_event_attr attr; - __u64 id[]; + /* + * Array of u64 id follows here but we cannot use a flexible array + * because size of attr in the data can be different then current + * version. Please use perf_record_header_attr_id() below. + * + * __u64 id[]; // do not use this + */ }; +/* Returns the pointer to id array based on the actual attr size. */ +#define perf_record_header_attr_id(evt) \ + ((void *)&(evt)->attr.attr + (evt)->attr.attr.size) + enum { PERF_CPU_MAP__CPUS = 0, PERF_CPU_MAP__MASK = 1, + PERF_CPU_MAP__RANGE_CPUS = 2, }; +/* + * Array encoding of a perf_cpu_map where nr is the number of entries in cpu[] + * and each entry is a value for a CPU in the map. + */ struct cpu_map_entries { __u16 nr; __u16 cpu[]; }; -struct perf_record_record_cpu_map { +/* Bitmap encoding of a perf_cpu_map where bitmap entries are 32-bit. */ +struct perf_record_mask_cpu_map32 { + /* Number of mask values. */ __u16 nr; + /* Constant 4. */ __u16 long_size; - unsigned long mask[]; + /* Bitmap data. */ + __u32 mask[]; +}; + +/* Bitmap encoding of a perf_cpu_map where bitmap entries are 64-bit. */ +struct perf_record_mask_cpu_map64 { + /* Number of mask values. */ + __u16 nr; + /* Constant 8. */ + __u16 long_size; + /* Legacy padding. */ + char __pad[4]; + /* Bitmap data. */ + __u64 mask[]; +}; + +/* + * 'struct perf_record_cpu_map_data' is packed as unfortunately an earlier + * version had unaligned data and we wish to retain file format compatibility. + * -irogers + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" + +/* + * An encoding of a CPU map for a range starting at start_cpu through to + * end_cpu. If any_cpu is 1, an any CPU (-1) value (aka dummy value) is present. + */ +struct perf_record_range_cpu_map { + __u8 any_cpu; + __u8 __pad; + __u16 start_cpu; + __u16 end_cpu; }; struct perf_record_cpu_map_data { __u16 type; - char data[]; -}; + union { + /* Used when type == PERF_CPU_MAP__CPUS. */ + struct cpu_map_entries cpus_data; + /* Used when type == PERF_CPU_MAP__MASK and long_size == 4. */ + struct perf_record_mask_cpu_map32 mask32_data; + /* Used when type == PERF_CPU_MAP__MASK and long_size == 8. */ + struct perf_record_mask_cpu_map64 mask64_data; + /* Used when type == PERF_CPU_MAP__RANGE_CPUS. */ + struct perf_record_range_cpu_map range_cpu_data; + }; +} __attribute__((packed)); + +#pragma GCC diagnostic pop struct perf_record_cpu_map { struct perf_event_header header; @@ -193,7 +276,16 @@ struct perf_record_event_update { struct perf_event_header header; __u64 type; __u64 id; - char data[]; + union { + /* Used when type == PERF_EVENT_UPDATE__SCALE. */ + struct perf_record_event_update_scale scale; + /* Used when type == PERF_EVENT_UPDATE__UNIT. */ + char unit[0]; + /* Used when type == PERF_EVENT_UPDATE__NAME. */ + char name[0]; + /* Used when type == PERF_EVENT_UPDATE__CPUS. */ + struct perf_record_event_update_cpus cpus; + }; }; #define MAX_EVENT_NAME 64 @@ -211,6 +303,7 @@ struct perf_record_header_event_type { struct perf_record_header_tracing_data { struct perf_event_header header; __u32 size; + __u32 pad; }; #define PERF_RECORD_MISC_BUILD_ID_SIZE (1 << 15) @@ -237,10 +330,15 @@ struct id_index_entry { __u64 tid; }; +struct id_index_entry_2 { + __u64 machine_pid; + __u64 vcpu; +}; + struct perf_record_id_index { struct perf_event_header header; __u64 nr; - struct id_index_entry entries[0]; + struct id_index_entry entries[]; }; struct perf_record_auxtrace_info { @@ -274,6 +372,8 @@ struct perf_record_auxtrace_error { __u64 ip; __u64 time; char msg[MAX_AUXTRACE_ERROR_MSG]; + __u32 machine_pid; + __u32 vcpu; }; struct perf_record_aux { @@ -309,7 +409,8 @@ enum { PERF_STAT_CONFIG_TERM__AGGR_MODE = 0, PERF_STAT_CONFIG_TERM__INTERVAL = 1, PERF_STAT_CONFIG_TERM__SCALE = 2, - PERF_STAT_CONFIG_TERM__MAX = 3, + PERF_STAT_CONFIG_TERM__AGGR_LEVEL = 3, + PERF_STAT_CONFIG_TERM__MAX = 4, }; struct perf_record_stat_config_entry { @@ -369,6 +470,32 @@ struct perf_record_compressed { char data[]; }; +/* + * `header.size` includes the padding we are going to add while writing the record. + * `data_size` only includes the size of `data[]` itself. + */ +struct perf_record_compressed2 { + struct perf_event_header header; + __u64 data_size; + char data[]; +}; + +#define BPF_METADATA_KEY_LEN 64 +#define BPF_METADATA_VALUE_LEN 256 +#define BPF_PROG_NAME_LEN KSYM_NAME_LEN + +struct perf_record_bpf_metadata_entry { + char key[BPF_METADATA_KEY_LEN]; + char value[BPF_METADATA_VALUE_LEN]; +}; + +struct perf_record_bpf_metadata { + struct perf_event_header header; + char prog_name[BPF_PROG_NAME_LEN]; + __u64 nr_entries; + struct perf_record_bpf_metadata_entry entries[]; +}; + enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_USER_TYPE_START = 64, PERF_RECORD_HEADER_ATTR = 64, @@ -389,6 +516,9 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_TIME_CONV = 79, PERF_RECORD_HEADER_FEATURE = 80, PERF_RECORD_COMPRESSED = 81, + PERF_RECORD_FINISHED_INIT = 82, + PERF_RECORD_COMPRESSED2 = 83, + PERF_RECORD_BPF_METADATA = 84, PERF_RECORD_HEADER_MAX }; @@ -405,6 +535,7 @@ union perf_event { struct perf_record_read read; struct perf_record_throttle throttle; struct perf_record_sample sample; + struct perf_record_callchain_deferred callchain_deferred; struct perf_record_bpf_event bpf; struct perf_record_ksymbol ksymbol; struct perf_record_text_poke_event text_poke; @@ -429,6 +560,8 @@ union perf_event { struct perf_record_time_conv time_conv; struct perf_record_header_feature feat; struct perf_record_compressed pack; + struct perf_record_compressed2 pack2; + struct perf_record_bpf_metadata bpf_metadata; }; #endif /* __LIBPERF_EVENT_H */ diff --git a/tools/lib/perf/include/perf/evlist.h b/tools/lib/perf/include/perf/evlist.h index 9ca399d49bb4..e894b770779e 100644 --- a/tools/lib/perf/include/perf/evlist.h +++ b/tools/lib/perf/include/perf/evlist.h @@ -47,4 +47,5 @@ LIBPERF_API struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist, (pos) = perf_evlist__next_mmap((evlist), (pos), overwrite)) LIBPERF_API void perf_evlist__set_leader(struct perf_evlist *evlist); +LIBPERF_API int perf_evlist__nr_groups(struct perf_evlist *evlist); #endif /* __LIBPERF_EVLIST_H */ diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index 2a9516b42d15..6f92204075c2 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -18,8 +18,10 @@ struct perf_counts_values { uint64_t val; uint64_t ena; uint64_t run; + uint64_t id; + uint64_t lost; }; - uint64_t values[3]; + uint64_t values[5]; }; }; @@ -36,6 +38,7 @@ LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int struct perf_counts_values *count); LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); +LIBPERF_API int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread); LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); diff --git a/tools/lib/perf/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h index a7c50de8d010..44deb815b817 100644 --- a/tools/lib/perf/include/perf/threadmap.h +++ b/tools/lib/perf/include/perf/threadmap.h @@ -8,11 +8,13 @@ struct perf_thread_map; LIBPERF_API struct perf_thread_map *perf_thread_map__new_dummy(void); +LIBPERF_API struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array); -LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid); -LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int thread); +LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid); +LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int idx); LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads); -LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread); +LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx); +LIBPERF_API int perf_thread_map__idx(struct perf_thread_map *map, pid_t pid); LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map); LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map); diff --git a/tools/lib/perf/lib.c b/tools/lib/perf/lib.c index 18658931fc71..696fb0ea67c6 100644 --- a/tools/lib/perf/lib.c +++ b/tools/lib/perf/lib.c @@ -38,6 +38,26 @@ ssize_t readn(int fd, void *buf, size_t n) return ion(true, fd, buf, n); } +ssize_t preadn(int fd, void *buf, size_t n, off_t offs) +{ + size_t left = n; + + while (left) { + ssize_t ret = pread(fd, buf, left, offs); + + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) + return ret; + + left -= ret; + buf += ret; + offs += ret; + } + + return n; +} + /* * Write exactly 'n' bytes or return an error. */ diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 93696affda2e..fdd8304fe9d0 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -1,16 +1,21 @@ LIBPERF_0.0.1 { global: libperf_init; - perf_cpu_map__dummy_new; + perf_cpu_map__new_any_cpu; + perf_cpu_map__new_online_cpus; perf_cpu_map__get; perf_cpu_map__put; perf_cpu_map__new; - perf_cpu_map__read; perf_cpu_map__nr; perf_cpu_map__cpu; - perf_cpu_map__empty; + perf_cpu_map__has_any_cpu_or_is_empty; + perf_cpu_map__is_any_cpu_or_is_empty; + perf_cpu_map__is_empty; + perf_cpu_map__has_any_cpu; + perf_cpu_map__min; perf_cpu_map__max; perf_cpu_map__has; + perf_thread_map__new_array; perf_thread_map__new_dummy; perf_thread_map__set_pid; perf_thread_map__comm; diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index f7ee07cb5818..ec124eb0ec0a 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -13,11 +13,13 @@ #include <internal/lib.h> #include <linux/kernel.h> #include <linux/math64.h> +#include <linux/stringify.h> #include "internal.h" void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, bool overwrite, libperf_unmap_cb_t unmap_cb) { + /* Assume fields were zero initialized. */ map->fd = -1; map->overwrite = overwrite; map->unmap_cb = unmap_cb; @@ -50,13 +52,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, void perf_mmap__munmap(struct perf_mmap *map) { - if (map && map->base != NULL) { + if (!map) + return; + + zfree(&map->event_copy); + map->event_copy_sz = 0; + if (map->base) { munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; map->fd = -1; refcount_set(&map->refcnt, 0); } - if (map && map->unmap_cb) + if (map->unmap_cb) map->unmap_cb(map); } @@ -222,9 +229,17 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, */ if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { unsigned int offset = *startp; - unsigned int len = min(sizeof(*event), size), cpy; + unsigned int len = size, cpy; void *dst = map->event_copy; + if (size > map->event_copy_sz) { + dst = realloc(map->event_copy, size); + if (!dst) + return NULL; + map->event_copy = dst; + map->event_copy_sz = size; + } + do { cpy = min(map->mask + 1 - (offset & map->mask), len); memcpy(dst, &data[offset & map->mask], cpy); @@ -264,7 +279,7 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map) if (!refcount_read(&map->refcnt)) return NULL; - /* non-overwirte doesn't pause the ringbuffer */ + /* non-overwrite doesn't pause the ringbuffer */ if (!map->overwrite) map->end = perf_mmap__read_head(map); @@ -294,6 +309,169 @@ static u64 read_timestamp(void) return low | ((u64)high) << 32; } +#elif defined(__aarch64__) +#define read_sysreg(r) ({ \ + u64 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +static u64 read_pmccntr(void) +{ + return read_sysreg(pmccntr_el0); +} + +#define PMEVCNTR_READ(idx) \ + static u64 read_pmevcntr_##idx(void) { \ + return read_sysreg(pmevcntr##idx##_el0); \ + } + +PMEVCNTR_READ(0); +PMEVCNTR_READ(1); +PMEVCNTR_READ(2); +PMEVCNTR_READ(3); +PMEVCNTR_READ(4); +PMEVCNTR_READ(5); +PMEVCNTR_READ(6); +PMEVCNTR_READ(7); +PMEVCNTR_READ(8); +PMEVCNTR_READ(9); +PMEVCNTR_READ(10); +PMEVCNTR_READ(11); +PMEVCNTR_READ(12); +PMEVCNTR_READ(13); +PMEVCNTR_READ(14); +PMEVCNTR_READ(15); +PMEVCNTR_READ(16); +PMEVCNTR_READ(17); +PMEVCNTR_READ(18); +PMEVCNTR_READ(19); +PMEVCNTR_READ(20); +PMEVCNTR_READ(21); +PMEVCNTR_READ(22); +PMEVCNTR_READ(23); +PMEVCNTR_READ(24); +PMEVCNTR_READ(25); +PMEVCNTR_READ(26); +PMEVCNTR_READ(27); +PMEVCNTR_READ(28); +PMEVCNTR_READ(29); +PMEVCNTR_READ(30); + +/* + * Read a value direct from PMEVCNTR<idx> + */ +static u64 read_perf_counter(unsigned int counter) +{ + static u64 (* const read_f[])(void) = { + read_pmevcntr_0, + read_pmevcntr_1, + read_pmevcntr_2, + read_pmevcntr_3, + read_pmevcntr_4, + read_pmevcntr_5, + read_pmevcntr_6, + read_pmevcntr_7, + read_pmevcntr_8, + read_pmevcntr_9, + read_pmevcntr_10, + read_pmevcntr_11, + read_pmevcntr_13, + read_pmevcntr_12, + read_pmevcntr_14, + read_pmevcntr_15, + read_pmevcntr_16, + read_pmevcntr_17, + read_pmevcntr_18, + read_pmevcntr_19, + read_pmevcntr_20, + read_pmevcntr_21, + read_pmevcntr_22, + read_pmevcntr_23, + read_pmevcntr_24, + read_pmevcntr_25, + read_pmevcntr_26, + read_pmevcntr_27, + read_pmevcntr_28, + read_pmevcntr_29, + read_pmevcntr_30, + read_pmccntr + }; + + if (counter < ARRAY_SIZE(read_f)) + return (read_f[counter])(); + + return 0; +} + +static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } + +/* __riscv_xlen contains the witdh of the native base integer, here 64-bit */ +#elif defined(__riscv) && __riscv_xlen == 64 + +/* TODO: implement rv32 support */ + +#define CSR_CYCLE 0xc00 +#define CSR_TIME 0xc01 + +#define csr_read(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__ ("csrr %0, %1" \ + : "=r" (__v) \ + : "i" (csr) : ); \ + __v; \ +}) + +static unsigned long csr_read_num(int csr_num) +{ +#define switchcase_csr_read(__csr_num, __val) {\ + case __csr_num: \ + __val = csr_read(__csr_num); \ + break; } +#define switchcase_csr_read_2(__csr_num, __val) {\ + switchcase_csr_read(__csr_num + 0, __val) \ + switchcase_csr_read(__csr_num + 1, __val)} +#define switchcase_csr_read_4(__csr_num, __val) {\ + switchcase_csr_read_2(__csr_num + 0, __val) \ + switchcase_csr_read_2(__csr_num + 2, __val)} +#define switchcase_csr_read_8(__csr_num, __val) {\ + switchcase_csr_read_4(__csr_num + 0, __val) \ + switchcase_csr_read_4(__csr_num + 4, __val)} +#define switchcase_csr_read_16(__csr_num, __val) {\ + switchcase_csr_read_8(__csr_num + 0, __val) \ + switchcase_csr_read_8(__csr_num + 8, __val)} +#define switchcase_csr_read_32(__csr_num, __val) {\ + switchcase_csr_read_16(__csr_num + 0, __val) \ + switchcase_csr_read_16(__csr_num + 16, __val)} + + unsigned long ret = 0; + + switch (csr_num) { + switchcase_csr_read_32(CSR_CYCLE, ret) + default: + break; + } + + return ret; +#undef switchcase_csr_read_32 +#undef switchcase_csr_read_16 +#undef switchcase_csr_read_8 +#undef switchcase_csr_read_4 +#undef switchcase_csr_read_2 +#undef switchcase_csr_read +} + +static u64 read_perf_counter(unsigned int counter) +{ + return csr_read_num(CSR_CYCLE + counter); +} + +static u64 read_timestamp(void) +{ + return csr_read_num(CSR_TIME); +} + #else static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } static u64 read_timestamp(void) { return 0; } @@ -330,7 +508,7 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count idx = READ_ONCE(pc->index); cnt = READ_ONCE(pc->offset); if (pc->cap_user_rdpmc && idx) { - s64 evcnt = read_perf_counter(idx - 1); + u64 evcnt = read_perf_counter(idx - 1); u16 width = READ_ONCE(pc->pmc_width); evcnt <<= 64 - width; diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index d39378eaf897..c998b1dae863 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -14,12 +14,14 @@ static int libperf_print(enum libperf_print_level level, int test_cpumap(int argc, char **argv) { struct perf_cpu_map *cpus; + struct perf_cpu cpu; + int idx; __T_START; libperf_init(libperf_print); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); if (!cpus) return -1; @@ -27,6 +29,15 @@ int test_cpumap(int argc, char **argv) perf_cpu_map__put(cpus); perf_cpu_map__put(cpus); + cpus = perf_cpu_map__new_online_cpus(); + if (!cpus) + return -1; + + perf_cpu_map__for_each_cpu(cpu, idx, cpus) + __T("wrong cpu number", cpu.cpu != -1); + + perf_cpu_map__put(cpus); + __T_END; return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index b3479dfa9a1c..10f70cb41ff1 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE // needed for sched.h to get sched_[gs]etaffinity and CPU_(ZERO,SET) +#include <inttypes.h> #include <sched.h> #include <stdio.h> #include <stdarg.h> @@ -45,7 +46,7 @@ static int test_stat_cpu(void) }; int err, idx; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evlist = perf_evlist__new(); @@ -68,7 +69,7 @@ static int test_stat_cpu(void) perf_evlist__set_maps(evlist, cpus, NULL); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { cpus = perf_evsel__cpus(evsel); @@ -129,7 +130,7 @@ static int test_stat_thread(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { perf_evsel__read(evsel, 0, 0, &counts); @@ -186,7 +187,7 @@ static int test_stat_thread_enable(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { perf_evsel__read(evsel, 0, 0, &counts); @@ -260,7 +261,7 @@ static int test_mmap_thread(void) threads = perf_thread_map__new_dummy(); __T("failed to create threads", threads); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); __T("failed to create cpus", cpus); perf_thread_map__set_pid(threads, 0, pid); @@ -349,7 +350,7 @@ static int test_mmap_cpus(void) attr.config = id; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evlist = perf_evlist__new(); @@ -506,7 +507,7 @@ static int test_stat_multiplexing(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__enable(evlist); @@ -526,12 +527,12 @@ static int test_stat_multiplexing(void) min = counts[0].val; for (i = 0; i < EVENT_NUM; i++) { - __T_VERBOSE("Event %2d -- Raw count = %lu, run = %lu, enable = %lu\n", + __T_VERBOSE("Event %2d -- Raw count = %" PRIu64 ", run = %" PRIu64 ", enable = %" PRIu64 "\n", i, counts[i].val, counts[i].run, counts[i].ena); perf_counts_values__scale(&counts[i], true, &scaled); if (scaled == 1) { - __T_VERBOSE("\t Scaled count = %lu (%.2lf%%, %lu/%lu)\n", + __T_VERBOSE("\t Scaled count = %" PRIu64 " (%.2lf%%, %" PRIu64 "/%" PRIu64 ")\n", counts[i].val, (double)counts[i].run / (double)counts[i].ena * 100.0, counts[i].run, counts[i].ena); diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 33ae9334861a..545ec3150546 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -1,10 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdarg.h> #include <stdio.h> +#include <string.h> #include <linux/perf_event.h> +#include <linux/kernel.h> #include <perf/cpumap.h> #include <perf/threadmap.h> #include <perf/evsel.h> +#include <internal/evsel.h> #include <internal/tests.h> #include "tests.h" @@ -24,7 +27,7 @@ static int test_stat_cpu(void) }; int err, idx; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evsel = perf_evsel__new(&attr); @@ -130,6 +133,9 @@ static int test_stat_user_read(int event) struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, .config = event, +#ifdef __aarch64__ + .config1 = 0x2, /* Request user access */ +#endif }; int err, i; @@ -150,7 +156,7 @@ static int test_stat_user_read(int event) pc = perf_evsel__mmap_base(evsel, 0, 0); __T("failed to get mmapped address", pc); -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) __T("userspace counter access not supported", pc->cap_user_rdpmc); __T("userspace counter access not enabled", pc->index); __T("userspace counter width not set", pc->pmc_width >= 32); @@ -186,6 +192,163 @@ static int test_stat_user_read(int event) return 0; } +static int test_stat_read_format_single(struct perf_event_attr *attr, struct perf_thread_map *threads) +{ + struct perf_evsel *evsel; + struct perf_counts_values counts; + volatile int count = 0x100000; + int err; + + evsel = perf_evsel__new(attr); + __T("failed to create evsel", evsel); + + /* skip old kernels that don't support the format */ + err = perf_evsel__open(evsel, NULL, threads); + if (err < 0) + return 0; + + while (count--) ; + + memset(&counts, -1, sizeof(counts)); + perf_evsel__read(evsel, 0, 0, &counts); + + __T("failed to read value", counts.val); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + __T("failed to read TOTAL_TIME_ENABLED", counts.ena); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + __T("failed to read TOTAL_TIME_RUNNING", counts.run); + if (attr->read_format & PERF_FORMAT_ID) + __T("failed to read ID", counts.id); + if (attr->read_format & PERF_FORMAT_LOST) + __T("failed to read LOST", counts.lost == 0); + + perf_evsel__close(evsel); + perf_evsel__delete(evsel); + return 0; +} + +static int test_stat_read_format_group(struct perf_event_attr *attr, struct perf_thread_map *threads) +{ + struct perf_evsel *leader, *member; + struct perf_counts_values counts; + volatile int count = 0x100000; + int err; + + attr->read_format |= PERF_FORMAT_GROUP; + leader = perf_evsel__new(attr); + __T("failed to create leader", leader); + + attr->read_format &= ~PERF_FORMAT_GROUP; + member = perf_evsel__new(attr); + __T("failed to create member", member); + + member->leader = leader; + leader->nr_members = 2; + + /* skip old kernels that don't support the format */ + err = perf_evsel__open(leader, NULL, threads); + if (err < 0) + return 0; + err = perf_evsel__open(member, NULL, threads); + if (err < 0) + return 0; + + while (count--) ; + + memset(&counts, -1, sizeof(counts)); + perf_evsel__read(leader, 0, 0, &counts); + + __T("failed to read leader value", counts.val); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + __T("failed to read leader TOTAL_TIME_ENABLED", counts.ena); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + __T("failed to read leader TOTAL_TIME_RUNNING", counts.run); + if (attr->read_format & PERF_FORMAT_ID) + __T("failed to read leader ID", counts.id); + if (attr->read_format & PERF_FORMAT_LOST) + __T("failed to read leader LOST", counts.lost == 0); + + memset(&counts, -1, sizeof(counts)); + perf_evsel__read(member, 0, 0, &counts); + + __T("failed to read member value", counts.val); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + __T("failed to read member TOTAL_TIME_ENABLED", counts.ena); + if (attr->read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + __T("failed to read member TOTAL_TIME_RUNNING", counts.run); + if (attr->read_format & PERF_FORMAT_ID) + __T("failed to read member ID", counts.id); + if (attr->read_format & PERF_FORMAT_LOST) + __T("failed to read member LOST", counts.lost == 0); + + perf_evsel__close(member); + perf_evsel__close(leader); + perf_evsel__delete(member); + perf_evsel__delete(leader); + return 0; +} + +static int test_stat_read_format(void) +{ + struct perf_thread_map *threads; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_TASK_CLOCK, + }; + int err, i; + +#define FMT(_fmt) PERF_FORMAT_ ## _fmt +#define FMT_TIME (FMT(TOTAL_TIME_ENABLED) | FMT(TOTAL_TIME_RUNNING)) + + uint64_t test_formats [] = { + 0, + FMT_TIME, + FMT(ID), + FMT(LOST), + FMT_TIME | FMT(ID), + FMT_TIME | FMT(LOST), + FMT_TIME | FMT(ID) | FMT(LOST), + FMT(ID) | FMT(LOST), + }; + +#undef FMT +#undef FMT_TIME + + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + for (i = 0; i < (int)ARRAY_SIZE(test_formats); i++) { + attr.read_format = test_formats[i]; + __T_VERBOSE("testing single read with read_format: %lx\n", + (unsigned long)test_formats[i]); + + err = test_stat_read_format_single(&attr, threads); + __T("failed to read single format", err == 0); + } + + perf_thread_map__put(threads); + + threads = perf_thread_map__new_array(2, NULL); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + perf_thread_map__set_pid(threads, 1, 0); + + for (i = 0; i < (int)ARRAY_SIZE(test_formats); i++) { + attr.read_format = test_formats[i]; + __T_VERBOSE("testing group read with read_format: %lx\n", + (unsigned long)test_formats[i]); + + err = test_stat_read_format_group(&attr, threads); + __T("failed to read group format", err == 0); + } + + perf_thread_map__put(threads); + return 0; +} + int test_evsel(int argc, char **argv) { __T_START; @@ -197,6 +360,7 @@ int test_evsel(int argc, char **argv) test_stat_thread_enable(); test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS); test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES); + test_stat_read_format(); __T_END; return tests_failed == 0 ? 0 : -1; diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c index 5e2a0291e94c..f728ad7002bb 100644 --- a/tools/lib/perf/tests/test-threadmap.c +++ b/tools/lib/perf/tests/test-threadmap.c @@ -11,9 +11,43 @@ static int libperf_print(enum libperf_print_level level, return vfprintf(stderr, fmt, ap); } +static int test_threadmap_array(int nr, pid_t *array) +{ + struct perf_thread_map *threads; + int i; + + threads = perf_thread_map__new_array(nr, array); + __T("Failed to allocate new thread map", threads); + + __T("Unexpected number of threads", perf_thread_map__nr(threads) == nr); + + for (i = 0; i < nr; i++) { + __T("Unexpected initial value of thread", + perf_thread_map__pid(threads, i) == (array ? array[i] : -1)); + } + + for (i = 1; i < nr; i++) + perf_thread_map__set_pid(threads, i, i * 100); + + __T("Unexpected value of thread 0", + perf_thread_map__pid(threads, 0) == (array ? array[0] : -1)); + + for (i = 1; i < nr; i++) { + __T("Unexpected thread value", + perf_thread_map__pid(threads, i) == i * 100); + } + + perf_thread_map__put(threads); + + return 0; +} + +#define THREADS_NR 10 int test_threadmap(int argc, char **argv) { struct perf_thread_map *threads; + pid_t thr_array[THREADS_NR]; + int i; __T_START; @@ -27,6 +61,13 @@ int test_threadmap(int argc, char **argv) perf_thread_map__put(threads); perf_thread_map__put(threads); + test_threadmap_array(THREADS_NR, NULL); + + for (i = 0; i < THREADS_NR; i++) + thr_array[i] = i + 100; + + test_threadmap_array(THREADS_NR, thr_array); + __T_END; return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/threadmap.c b/tools/lib/perf/threadmap.c index e92c368b0a6c..db431b036f57 100644 --- a/tools/lib/perf/threadmap.c +++ b/tools/lib/perf/threadmap.c @@ -32,28 +32,38 @@ struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, in #define thread_map__alloc(__nr) perf_thread_map__realloc(NULL, __nr) -void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid) +void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid) { - map->map[thread].pid = pid; + map->map[idx].pid = pid; } -char *perf_thread_map__comm(struct perf_thread_map *map, int thread) +char *perf_thread_map__comm(struct perf_thread_map *map, int idx) { - return map->map[thread].comm; + return map->map[idx].comm; } -struct perf_thread_map *perf_thread_map__new_dummy(void) +struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array) { - struct perf_thread_map *threads = thread_map__alloc(1); + struct perf_thread_map *threads = thread_map__alloc(nr_threads); + int i; + + if (!threads) + return NULL; + + for (i = 0; i < nr_threads; i++) + perf_thread_map__set_pid(threads, i, array ? array[i] : -1); + + threads->nr = nr_threads; + refcount_set(&threads->refcnt, 1); - if (threads != NULL) { - perf_thread_map__set_pid(threads, 0, -1); - threads->nr = 1; - refcount_set(&threads->refcnt, 1); - } return threads; } +struct perf_thread_map *perf_thread_map__new_dummy(void) +{ + return perf_thread_map__new_array(1, NULL); +} + static void perf_thread_map__delete(struct perf_thread_map *threads) { if (threads) { @@ -85,7 +95,24 @@ int perf_thread_map__nr(struct perf_thread_map *threads) return threads ? threads->nr : 1; } -pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread) +pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx) +{ + if (!map) { + assert(idx == 0); + return -1; + } + + return map->map[idx].pid; +} + +int perf_thread_map__idx(struct perf_thread_map *threads, pid_t pid) { - return map->map[thread].pid; + if (!threads) + return pid == -1 ? 0 : -1; + + for (int i = 0; i < threads->nr; ++i) { + if (threads->map[i].pid == pid) + return i; + } + return -1; } diff --git a/tools/lib/python/__init__.py b/tools/lib/python/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/lib/python/__init__.py diff --git a/tools/lib/python/abi/__init__.py b/tools/lib/python/abi/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/lib/python/abi/__init__.py diff --git a/tools/lib/python/abi/abi_parser.py b/tools/lib/python/abi/abi_parser.py new file mode 100644 index 000000000000..9b8db70067ef --- /dev/null +++ b/tools/lib/python/abi/abi_parser.py @@ -0,0 +1,628 @@ +#!/usr/bin/env python3 +# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# SPDX-License-Identifier: GPL-2.0 + +""" +Parse ABI documentation and produce results from it. +""" + +from argparse import Namespace +import logging +import os +import re + +from pprint import pformat +from random import randrange, seed + +# Import Python modules + +from abi.helpers import AbiDebug, ABI_DIR + + +class AbiParser: + """Main class to parse ABI files""" + + TAGS = r"(what|where|date|kernelversion|contact|description|users)" + XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" + + def __init__(self, directory, logger=None, + enable_lineno=False, show_warnings=True, debug=0): + """Stores arguments for the class and initialize class vars""" + + self.directory = directory + self.enable_lineno = enable_lineno + self.show_warnings = show_warnings + self.debug = debug + + if not logger: + self.log = logging.getLogger("get_abi") + else: + self.log = logger + + self.data = {} + self.what_symbols = {} + self.file_refs = {} + self.what_refs = {} + + # Ignore files that contain such suffixes + self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") + + # Regular expressions used on parser + self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) + self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) + self.re_valid = re.compile(self.TAGS) + self.re_start_spc = re.compile(r"(\s*)(\S.*)") + self.re_whitespace = re.compile(r"^\s+") + + # Regular used on print + self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") + self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") + self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") + self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") + self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") + self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") + self.re_xref_node = re.compile(self.XREF) + + def warn(self, fdata, msg, extra=None): + """Displays a parse error if warning is enabled""" + + if not self.show_warnings: + return + + msg = f"{fdata.fname}:{fdata.ln}: {msg}" + if extra: + msg += "\n\t\t" + extra + + self.log.warning(msg) + + def add_symbol(self, what, fname, ln=None, xref=None): + """Create a reference table describing where each 'what' is located""" + + if what not in self.what_symbols: + self.what_symbols[what] = {"file": {}} + + if fname not in self.what_symbols[what]["file"]: + self.what_symbols[what]["file"][fname] = [] + + if ln and ln not in self.what_symbols[what]["file"][fname]: + self.what_symbols[what]["file"][fname].append(ln) + + if xref: + self.what_symbols[what]["xref"] = xref + + def _parse_line(self, fdata, line): + """Parse a single line of an ABI file""" + + new_what = False + new_tag = False + content = None + + match = self.re_tag.match(line) + if match: + new = match.group(1).lower() + sep = match.group(2) + content = match.group(3) + + match = self.re_valid.search(new) + if match: + new_tag = match.group(1) + else: + if fdata.tag == "description": + # New "tag" is actually part of description. + # Don't consider it a tag + new_tag = False + elif fdata.tag != "": + self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) + + if new_tag: + # "where" is Invalid, but was a common mistake. Warn if found + if new_tag == "where": + self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") + new_tag = "what" + + if new_tag == "what": + fdata.space = None + + if content not in self.what_symbols: + self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) + + if fdata.tag == "what": + fdata.what.append(content.strip("\n")) + else: + if fdata.key: + if "description" not in self.data.get(fdata.key, {}): + self.warn(fdata, f"{fdata.key} doesn't have a description") + + for w in fdata.what: + self.add_symbol(what=w, fname=fdata.fname, + ln=fdata.what_ln, xref=fdata.key) + + fdata.label = content + new_what = True + + key = "abi_" + content.lower() + fdata.key = self.re_unprintable.sub("_", key).strip("_") + + # Avoid duplicated keys but using a defined seed, to make + # the namespace identical if there aren't changes at the + # ABI symbols + seed(42) + + while fdata.key in self.data: + char = randrange(0, 51) + ord("A") + if char > ord("Z"): + char += ord("a") - ord("Z") - 1 + + fdata.key += chr(char) + + if fdata.key and fdata.key not in self.data: + self.data[fdata.key] = { + "what": [content], + "file": [fdata.file_ref], + "path": fdata.ftype, + "line_no": fdata.ln, + } + + fdata.what = self.data[fdata.key]["what"] + + self.what_refs[content] = fdata.key + fdata.tag = new_tag + fdata.what_ln = fdata.ln + + if fdata.nametag["what"]: + t = (content, fdata.key) + if t not in fdata.nametag["symbols"]: + fdata.nametag["symbols"].append(t) + + return + + if fdata.tag and new_tag: + fdata.tag = new_tag + + if new_what: + fdata.label = "" + + if "description" in self.data[fdata.key]: + self.data[fdata.key]["description"] += "\n\n" + + if fdata.file_ref not in self.data[fdata.key]["file"]: + self.data[fdata.key]["file"].append(fdata.file_ref) + + if self.debug == AbiDebug.WHAT_PARSING: + self.log.debug("what: %s", fdata.what) + + if not fdata.what: + self.warn(fdata, "'What:' should come first:", line) + return + + if new_tag == "description": + fdata.space = None + + if content: + sep = sep.replace(":", " ") + + c = " " * len(new_tag) + sep + content + c = c.expandtabs() + + match = self.re_start_spc.match(c) + if match: + # Preserve initial spaces for the first line + fdata.space = match.group(1) + content = match.group(2) + "\n" + + self.data[fdata.key][fdata.tag] = content + + return + + # Store any contents before tags at the database + if not fdata.tag and "what" in fdata.nametag: + fdata.nametag["description"] += line + return + + if fdata.tag == "description": + content = line.expandtabs() + + if self.re_whitespace.sub("", content) == "": + self.data[fdata.key][fdata.tag] += "\n" + return + + if fdata.space is None: + match = self.re_start_spc.match(content) + if match: + # Preserve initial spaces for the first line + fdata.space = match.group(1) + + content = match.group(2) + "\n" + else: + if content.startswith(fdata.space): + content = content[len(fdata.space):] + + else: + fdata.space = "" + + if fdata.tag == "what": + w = content.strip("\n") + if w: + self.data[fdata.key][fdata.tag].append(w) + else: + self.data[fdata.key][fdata.tag] += content + return + + content = line.strip() + if fdata.tag: + if fdata.tag == "what": + w = content.strip("\n") + if w: + self.data[fdata.key][fdata.tag].append(w) + else: + self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") + return + + # Everything else is error + if content: + self.warn(fdata, "Unexpected content", line) + + def parse_readme(self, nametag, fname): + """Parse ABI README file""" + + nametag["what"] = ["Introduction"] + nametag["path"] = "README" + with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: + for line in fp: + match = self.re_tag.match(line) + if match: + new = match.group(1).lower() + + match = self.re_valid.search(new) + if match: + nametag["description"] += "\n:" + line + continue + + nametag["description"] += line + + def parse_file(self, fname, path, basename): + """Parse a single file""" + + ref = f"abi_file_{path}_{basename}" + ref = self.re_unprintable.sub("_", ref).strip("_") + + # Store per-file state into a namespace variable. This will be used + # by the per-line parser state machine and by the warning function. + fdata = Namespace + + fdata.fname = fname + fdata.name = basename + + pos = fname.find(ABI_DIR) + if pos > 0: + f = fname[pos:] + else: + f = fname + + fdata.file_ref = (f, ref) + self.file_refs[f] = ref + + fdata.ln = 0 + fdata.what_ln = 0 + fdata.tag = "" + fdata.label = "" + fdata.what = [] + fdata.key = None + fdata.xrefs = None + fdata.space = None + fdata.ftype = path.split("/")[0] + + fdata.nametag = {} + fdata.nametag["what"] = [f"ABI file {path}/{basename}"] + fdata.nametag["type"] = "File" + fdata.nametag["path"] = fdata.ftype + fdata.nametag["file"] = [fdata.file_ref] + fdata.nametag["line_no"] = 1 + fdata.nametag["description"] = "" + fdata.nametag["symbols"] = [] + + self.data[ref] = fdata.nametag + + if self.debug & AbiDebug.WHAT_OPEN: + self.log.debug("Opening file %s", fname) + + if basename == "README": + self.parse_readme(fdata.nametag, fname) + return + + with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: + for line in fp: + fdata.ln += 1 + + self._parse_line(fdata, line) + + if "description" in fdata.nametag: + fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") + + if fdata.key: + if "description" not in self.data.get(fdata.key, {}): + self.warn(fdata, f"{fdata.key} doesn't have a description") + + for w in fdata.what: + self.add_symbol(what=w, fname=fname, xref=fdata.key) + + def _parse_abi(self, root=None): + """Internal function to parse documentation ABI recursively""" + + if not root: + root = self.directory + + with os.scandir(root) as obj: + for entry in obj: + name = os.path.join(root, entry.name) + + if entry.is_dir(): + self._parse_abi(name) + continue + + if not entry.is_file(): + continue + + basename = os.path.basename(name) + + if basename.startswith("."): + continue + + if basename.endswith(self.ignore_suffixes): + continue + + path = self.re_abi_dir.sub("", os.path.dirname(name)) + + self.parse_file(name, path, basename) + + def parse_abi(self, root=None): + """Parse documentation ABI""" + + self._parse_abi(root) + + if self.debug & AbiDebug.DUMP_ABI_STRUCTS: + self.log.debug(pformat(self.data)) + + def desc_txt(self, desc): + """Print description as found inside ABI files""" + + desc = desc.strip(" \t\n") + + return desc + "\n\n" + + def xref(self, fname): + """ + Converts a Documentation/ABI + basename into a ReST cross-reference + """ + + xref = self.file_refs.get(fname) + if not xref: + return None + else: + return xref + + def desc_rst(self, desc): + """Enrich ReST output by creating cross-references""" + + # Remove title markups from the description + # Having titles inside ABI files will only work if extra + # care would be taken in order to strictly follow the same + # level order for each markup. + desc = self.re_title_mark.sub("\n\n", "\n" + desc) + desc = desc.rstrip(" \t\n").lstrip("\n") + + # Python's regex performance for non-compiled expressions is a lot + # than Perl, as Perl automatically caches them at their + # first usage. Here, we'll need to do the same, as otherwise the + # performance penalty is be high + + new_desc = "" + for d in desc.split("\n"): + if d == "": + new_desc += "\n" + continue + + # Use cross-references for doc files where needed + d = self.re_doc.sub(r":doc:`/\1`", d) + + # Use cross-references for ABI generated docs where needed + matches = self.re_abi.findall(d) + for m in matches: + abi = m[0] + m[1] + + xref = self.file_refs.get(abi) + if not xref: + # This may happen if ABI is on a separate directory, + # like parsing ABI testing and symbol is at stable. + # The proper solution is to move this part of the code + # for it to be inside sphinx/kernel_abi.py + self.log.info("Didn't find ABI reference for '%s'", abi) + else: + new = self.re_escape.sub(r"\\\1", m[1]) + d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) + + # Seek for cross reference symbols like /sys/... + # Need to be careful to avoid doing it on a code block + if d[0] not in [" ", "\t"]: + matches = self.re_xref_node.findall(d) + for m in matches: + # Finding ABI here is more complex due to wildcards + xref = self.what_refs.get(m) + if xref: + new = self.re_escape.sub(r"\\\1", m) + d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) + + new_desc += d + "\n" + + return new_desc + "\n\n" + + def doc(self, output_in_txt=False, show_symbols=True, show_file=True, + filter_path=None): + """Print ABI at stdout""" + + part = None + for key, v in sorted(self.data.items(), + key=lambda x: (x[1].get("type", ""), + x[1].get("what"))): + + wtype = v.get("type", "Symbol") + file_ref = v.get("file") + names = v.get("what", [""]) + + if wtype == "File": + if not show_file: + continue + else: + if not show_symbols: + continue + + if filter_path: + if v.get("path") != filter_path: + continue + + msg = "" + + if wtype != "File": + cur_part = names[0] + if cur_part.find("/") >= 0: + match = self.re_what.match(cur_part) + if match: + symbol = match.group(1).rstrip("/") + cur_part = "Symbols under " + symbol + + if cur_part and cur_part != part: + part = cur_part + msg += part + "\n"+ "-" * len(part) +"\n\n" + + msg += f".. _{key}:\n\n" + + max_len = 0 + for i in range(0, len(names)): # pylint: disable=C0200 + names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" + + max_len = max(max_len, len(names[i])) + + msg += "+-" + "-" * max_len + "-+\n" + for name in names: + msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" + msg += "+-" + "-" * max_len + "-+\n" + msg += "\n" + + for ref in file_ref: + if wtype == "File": + msg += f".. _{ref[1]}:\n\n" + else: + base = os.path.basename(ref[0]) + msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" + + if wtype == "File": + msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n" + + desc = v.get("description") + if not desc and wtype != "File": + msg += f"DESCRIPTION MISSING for {names[0]}\n\n" + + if desc: + if output_in_txt: + msg += self.desc_txt(desc) + else: + msg += self.desc_rst(desc) + + symbols = v.get("symbols") + if symbols: + msg += "Has the following ABI:\n\n" + + for w, label in symbols: + # Escape special chars from content + content = self.re_escape.sub(r"\\\1", w) + + msg += f"- :ref:`{content} <{label}>`\n\n" + + users = v.get("users") + if users and users.strip(" \t\n"): + users = users.strip("\n").replace('\n', '\n\t') + msg += f"Users:\n\t{users}\n\n" + + ln = v.get("line_no", 1) + + yield (msg, file_ref[0][0], ln) + + def check_issues(self): + """Warn about duplicated ABI entries""" + + for what, v in self.what_symbols.items(): + files = v.get("file") + if not files: + # Should never happen if the parser works properly + self.log.warning("%s doesn't have a file associated", what) + continue + + if len(files) == 1: + continue + + f = [] + for fname, lines in sorted(files.items()): + if not lines: + f.append(f"{fname}") + elif len(lines) == 1: + f.append(f"{fname}:{lines[0]}") + else: + m = fname + "lines " + m += ", ".join(str(x) for x in lines) + f.append(m) + + self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) + + def search_symbols(self, expr): + """ Searches for ABI symbols """ + + regex = re.compile(expr, re.I) + + found_keys = 0 + for t in sorted(self.data.items(), key=lambda x: [0]): + v = t[1] + + wtype = v.get("type", "") + if wtype == "File": + continue + + for what in v.get("what", [""]): + if regex.search(what): + found_keys += 1 + + kernelversion = v.get("kernelversion", "").strip(" \t\n") + date = v.get("date", "").strip(" \t\n") + contact = v.get("contact", "").strip(" \t\n") + users = v.get("users", "").strip(" \t\n") + desc = v.get("description", "").strip(" \t\n") + + files = [] + for f in v.get("file", ()): + files.append(f[0]) + + what = str(found_keys) + ". " + what + title_tag = "-" * len(what) + + print(f"\n{what}\n{title_tag}\n") + + if kernelversion: + print(f"Kernel version:\t\t{kernelversion}") + + if date: + print(f"Date:\t\t\t{date}") + + if contact: + print(f"Contact:\t\t{contact}") + + if users: + print(f"Users:\t\t\t{users}") + + print("Defined on file(s):\t" + ", ".join(files)) + + if desc: + desc = desc.strip("\n") + print(f"\n{desc}\n") + + if not found_keys: + print(f"Regular expression /{expr}/ not found.") diff --git a/tools/lib/python/abi/abi_regex.py b/tools/lib/python/abi/abi_regex.py new file mode 100644 index 000000000000..d5553206de3c --- /dev/null +++ b/tools/lib/python/abi/abi_regex.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python3 +# xxpylint: disable=R0903 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# SPDX-License-Identifier: GPL-2.0 + +""" +Convert ABI what into regular expressions +""" + +import re +import sys + +from pprint import pformat + +from abi.abi_parser import AbiParser +from abi.helpers import AbiDebug + +class AbiRegex(AbiParser): + """Extends AbiParser to search ABI nodes with regular expressions""" + + # Escape only ASCII visible characters + escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])" + leave_others = "others" + + # Tuples with regular expressions to be compiled and replacement data + re_whats = [ + # Drop escape characters that might exist + (re.compile("\\\\"), ""), + + # Temporarily escape dot characters + (re.compile(r"\."), "\xf6"), + + # Temporarily change [0-9]+ type of patterns + (re.compile(r"\[0\-9\]\+"), "\xff"), + + # Temporarily change [\d+-\d+] type of patterns + (re.compile(r"\[0\-\d+\]"), "\xff"), + (re.compile(r"\[0:\d+\]"), "\xff"), + (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"), + + # Temporarily change [0-9] type of patterns + (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"), + + # Handle multiple option patterns + (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"), + + # Handle wildcards + (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"), + (re.compile(r"/\*/"), "/.*/"), + (re.compile(r"/\xf6\xf6\xf6"), "/.*"), + (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"), + (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"), + (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"), + + (re.compile(r"XX+"), "\\\\w\xf7"), + (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"), + (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"), + (re.compile(r"_[AB]_"), "_\\\\w\xf7_"), + + # Recover [0-9] type of patterns + (re.compile(r"\xf4"), "["), + (re.compile(r"\xf5"), "]"), + + # Remove duplicated spaces + (re.compile(r"\s+"), r" "), + + # Special case: drop comparison as in: + # What: foo = <something> + # (this happens on a few IIO definitions) + (re.compile(r"\s*\=.*$"), ""), + + # Escape all other symbols + (re.compile(escape_symbols), r"\\\1"), + (re.compile(r"\\\\"), r"\\"), + (re.compile(r"\\([\[\]\(\)\|])"), r"\1"), + (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"), + + (re.compile(r"\xff"), r"\\d+"), + + # Special case: IIO ABI which a parenthesis. + (re.compile(r"sqrt(.*)"), r"sqrt(.*)"), + + # Simplify regexes with multiple .* + (re.compile(r"(?:\.\*){2,}"), ""), + + # Recover dot characters + (re.compile(r"\xf6"), "\\."), + # Recover plus characters + (re.compile(r"\xf7"), "+"), + ] + re_has_num = re.compile(r"\\d") + + # Symbol name after escape_chars that are considered a devnode basename + re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$") + + # List of popular group names to be skipped to minimize regex group size + # Use AbiDebug.SUBGROUP_SIZE to detect those + skip_names = set(["devices", "hwmon"]) + + def regex_append(self, what, new): + """ + Get a search group for a subset of regular expressions. + + As ABI may have thousands of symbols, using a for to search all + regular expressions is at least O(n^2). When there are wildcards, + the complexity increases substantially, eventually becoming exponential. + + To avoid spending too much time on them, use a logic to split + them into groups. The smaller the group, the better, as it would + mean that searches will be confined to a small number of regular + expressions. + + The conversion to a regex subset is tricky, as we need something + that can be easily obtained from the sysfs symbol and from the + regular expression. So, we need to discard nodes that have + wildcards. + + If it can't obtain a subgroup, place the regular expression inside + a special group (self.leave_others). + """ + + search_group = None + + for search_group in reversed(new.split("/")): + if not search_group or search_group in self.skip_names: + continue + if self.re_symbol_name.match(search_group): + break + + if not search_group: + search_group = self.leave_others + + if self.debug & AbiDebug.SUBGROUP_MAP: + self.log.debug("%s: mapped as %s", what, search_group) + + try: + if search_group not in self.regex_group: + self.regex_group[search_group] = [] + + self.regex_group[search_group].append(re.compile(new)) + if self.search_string: + if what.find(self.search_string) >= 0: + print(f"What: {what}") + except re.PatternError: + self.log.warning("Ignoring '%s' as it produced an invalid regex:\n" + " '%s'", what, new) + + def get_regexes(self, what): + """ + Given an ABI devnode, return a list of all regular expressions that + may match it, based on the sub-groups created by regex_append() + """ + + re_list = [] + + patches = what.split("/") + patches.reverse() + patches.append(self.leave_others) + + for search_group in patches: + if search_group in self.regex_group: + re_list += self.regex_group[search_group] + + return re_list + + def __init__(self, *args, **kwargs): + """ + Override init method to get verbose argument + """ + + self.regex_group = None + self.search_string = None + self.re_string = None + + if "search_string" in kwargs: + self.search_string = kwargs.get("search_string") + del kwargs["search_string"] + + if self.search_string: + + try: + self.re_string = re.compile(self.search_string) + except re.PatternError as e: + msg = f"{self.search_string} is not a valid regular expression" + raise ValueError(msg) from e + + super().__init__(*args, **kwargs) + + def parse_abi(self, *args, **kwargs): + + super().parse_abi(*args, **kwargs) + + self.regex_group = {} + + print("Converting ABI What fields into regexes...", file=sys.stderr) + + for t in sorted(self.data.items(), key=lambda x: x[0]): + v = t[1] + if v.get("type") == "File": + continue + + v["regex"] = [] + + for what in v.get("what", []): + if not what.startswith("/sys"): + continue + + new = what + for r, s in self.re_whats: + try: + new = r.sub(s, new) + except re.PatternError as e: + # Help debugging troubles with new regexes + raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e + + v["regex"].append(new) + + if self.debug & AbiDebug.REGEX: + self.log.debug("%-90s <== %s", new, what) + + # Store regex into a subgroup to speedup searches + self.regex_append(what, new) + + if self.debug & AbiDebug.SUBGROUP_DICT: + self.log.debug("%s", pformat(self.regex_group)) + + if self.debug & AbiDebug.SUBGROUP_SIZE: + biggestd_keys = sorted(self.regex_group.keys(), + key= lambda k: len(self.regex_group[k]), + reverse=True) + + print("Top regex subgroups:", file=sys.stderr) + for k in biggestd_keys[:10]: + print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr) diff --git a/tools/lib/python/abi/helpers.py b/tools/lib/python/abi/helpers.py new file mode 100644 index 000000000000..639b23e4ca33 --- /dev/null +++ b/tools/lib/python/abi/helpers.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# pylint: disable=R0903 +# SPDX-License-Identifier: GPL-2.0 + +""" +Helper classes for ABI parser +""" + +ABI_DIR = "Documentation/ABI/" + + +class AbiDebug: + """Debug levels""" + + WHAT_PARSING = 1 + WHAT_OPEN = 2 + DUMP_ABI_STRUCTS = 4 + UNDEFINED = 8 + REGEX = 16 + SUBGROUP_MAP = 32 + SUBGROUP_DICT = 64 + SUBGROUP_SIZE = 128 + GRAPH = 256 + + +DEBUG_HELP = """ +1 - enable debug parsing logic +2 - enable debug messages on file open +4 - enable debug for ABI parse data +8 - enable extra debug information to identify troubles + with ABI symbols found at the local machine that + weren't found on ABI documentation (used only for + undefined subcommand) +16 - enable debug for what to regex conversion +32 - enable debug for symbol regex subgroups +64 - enable debug for sysfs graph tree variable +""" diff --git a/tools/lib/python/abi/system_symbols.py b/tools/lib/python/abi/system_symbols.py new file mode 100644 index 000000000000..4a2554da217b --- /dev/null +++ b/tools/lib/python/abi/system_symbols.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +# pylint: disable=R0902,R0912,R0914,R0915,R1702 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# SPDX-License-Identifier: GPL-2.0 + +""" +Parse ABI documentation and produce results from it. +""" + +import os +import re +import sys + +from concurrent import futures +from datetime import datetime +from random import shuffle + +from abi.helpers import AbiDebug + +class SystemSymbols: + """Stores arguments for the class and initialize class vars""" + + def graph_add_file(self, path, link=None): + """ + add a file path to the sysfs graph stored at self.root + """ + + if path in self.files: + return + + name = "" + ref = self.root + for edge in path.split("/"): + name += edge + "/" + if edge not in ref: + ref[edge] = {"__name": [name.rstrip("/")]} + + ref = ref[edge] + + if link and link not in ref["__name"]: + ref["__name"].append(link.rstrip("/")) + + self.files.add(path) + + def print_graph(self, root_prefix="", root=None, level=0): + """Prints a reference tree graph using UTF-8 characters""" + + if not root: + root = self.root + level = 0 + + # Prevent endless traverse + if level > 5: + return + + if level > 0: + prefix = "├──" + last_prefix = "└──" + else: + prefix = "" + last_prefix = "" + + items = list(root.items()) + + names = root.get("__name", []) + for k, edge in items: + if k == "__name": + continue + + if not k: + k = "/" + + if len(names) > 1: + k += " links: " + ",".join(names[1:]) + + if edge == items[-1][1]: + print(root_prefix + last_prefix + k) + p = root_prefix + if level > 0: + p += " " + self.print_graph(p, edge, level + 1) + else: + print(root_prefix + prefix + k) + p = root_prefix + "│ " + self.print_graph(p, edge, level + 1) + + def _walk(self, root): + """ + Walk through sysfs to get all devnodes that aren't ignored. + + By default, uses /sys as sysfs mounting point. If another + directory is used, it replaces them to /sys at the patches. + """ + + with os.scandir(root) as obj: + for entry in obj: + path = os.path.join(root, entry.name) + if self.sysfs: + p = path.replace(self.sysfs, "/sys", count=1) + else: + p = path + + if self.re_ignore.search(p): + return + + # Handle link first to avoid directory recursion + if entry.is_symlink(): + real = os.path.realpath(path) + if not self.sysfs: + self.aliases[path] = real + else: + real = real.replace(self.sysfs, "/sys", count=1) + + # Add absfile location to graph if it doesn't exist + if not self.re_ignore.search(real): + # Add link to the graph + self.graph_add_file(real, p) + + elif entry.is_file(): + self.graph_add_file(p) + + elif entry.is_dir(): + self._walk(path) + + def __init__(self, abi, sysfs="/sys", hints=False): + """ + Initialize internal variables and get a list of all files inside + sysfs that can currently be parsed. + + Please notice that there are several entries on sysfs that aren't + documented as ABI. Ignore those. + + The real paths will be stored under self.files. Aliases will be + stored in separate, as self.aliases. + """ + + self.abi = abi + self.log = abi.log + + if sysfs != "/sys": + self.sysfs = sysfs.rstrip("/") + else: + self.sysfs = None + + self.hints = hints + + self.root = {} + self.aliases = {} + self.files = set() + + dont_walk = [ + # Those require root access and aren't documented at ABI + f"^{sysfs}/kernel/debug", + f"^{sysfs}/kernel/tracing", + f"^{sysfs}/fs/pstore", + f"^{sysfs}/fs/bpf", + f"^{sysfs}/fs/fuse", + + # This is not documented at ABI + f"^{sysfs}/module", + + f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI + f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings + "sections|notes", # aren't actually part of ABI + + # kernel-parameters.txt - not easy to parse + "parameters", + ] + + self.re_ignore = re.compile("|".join(dont_walk)) + + print(f"Reading {sysfs} directory contents...", file=sys.stderr) + self._walk(sysfs) + + def check_file(self, refs, found): + """Check missing ABI symbols for a given sysfs file""" + + res_list = [] + + try: + for names in refs: + fname = names[0] + + res = { + "found": False, + "fname": fname, + "msg": "", + } + res_list.append(res) + + re_what = self.abi.get_regexes(fname) + if not re_what: + self.abi.log.warning(f"missing rules for {fname}") + continue + + for name in names: + for r in re_what: + if self.abi.debug & AbiDebug.UNDEFINED: + self.log.debug("check if %s matches '%s'", name, r.pattern) + if r.match(name): + res["found"] = True + if found: + res["msg"] += f" {fname}: regex:\n\t" + continue + + if self.hints and not res["found"]: + res["msg"] += f" {fname} not found. Tested regexes:\n" + for r in re_what: + res["msg"] += " " + r.pattern + "\n" + + except KeyboardInterrupt: + pass + + return res_list + + def _ref_interactor(self, root): + """Recursive function to interact over the sysfs tree""" + + for k, v in root.items(): + if isinstance(v, dict): + yield from self._ref_interactor(v) + + if root == self.root or k == "__name": + continue + + if self.abi.re_string: + fname = v["__name"][0] + if self.abi.re_string.search(fname): + yield v + else: + yield v + + + def get_fileref(self, all_refs, chunk_size): + """Interactor to group refs into chunks""" + + n = 0 + refs = [] + + for ref in all_refs: + refs.append(ref) + + n += 1 + if n >= chunk_size: + yield refs + n = 0 + refs = [] + + yield refs + + def check_undefined_symbols(self, max_workers=None, chunk_size=50, + found=None, dry_run=None): + """Seach ABI for sysfs symbols missing documentation""" + + self.abi.parse_abi() + + if self.abi.debug & AbiDebug.GRAPH: + self.print_graph() + + all_refs = [] + for ref in self._ref_interactor(self.root): + all_refs.append(ref["__name"]) + + if dry_run: + print("Would check", file=sys.stderr) + for ref in all_refs: + print(", ".join(ref)) + + return + + print("Starting to search symbols (it may take several minutes):", + file=sys.stderr) + start = datetime.now() + old_elapsed = None + + # Python doesn't support multithreading due to limitations on its + # global lock (GIL). While Python 3.13 finally made GIL optional, + # there are still issues related to it. Also, we want to have + # backward compatibility with older versions of Python. + # + # So, use instead multiprocess. However, Python is very slow passing + # data from/to multiple processes. Also, it may consume lots of memory + # if the data to be shared is not small. So, we need to group workload + # in chunks that are big enough to generate performance gains while + # not being so big that would cause out-of-memory. + + num_refs = len(all_refs) + print(f"Number of references to parse: {num_refs}", file=sys.stderr) + + if not max_workers: + max_workers = os.cpu_count() + elif max_workers > os.cpu_count(): + max_workers = os.cpu_count() + + max_workers = max(max_workers, 1) + + max_chunk_size = int((num_refs + max_workers - 1) / max_workers) + chunk_size = min(chunk_size, max_chunk_size) + chunk_size = max(1, chunk_size) + + if max_workers > 1: + executor = futures.ProcessPoolExecutor + + # Place references in a random order. This may help improving + # performance, by mixing complex/simple expressions when creating + # chunks + shuffle(all_refs) + else: + # Python has a high overhead with processes. When there's just + # one worker, it is faster to not create a new process. + # Yet, User still deserves to have a progress print. So, use + # python's "thread", which is actually a single process, using + # an internal schedule to switch between tasks. No performance + # gains for non-IO tasks, but still it can be quickly interrupted + # from time to time to display progress. + executor = futures.ThreadPoolExecutor + + not_found = [] + f_list = [] + with executor(max_workers=max_workers) as exe: + for refs in self.get_fileref(all_refs, chunk_size): + if refs: + try: + f_list.append(exe.submit(self.check_file, refs, found)) + + except KeyboardInterrupt: + return + + total = len(f_list) + + if not total: + if self.abi.re_string: + print(f"No ABI symbol matches {self.abi.search_string}") + else: + self.abi.log.warning("No ABI symbols found") + return + + print(f"{len(f_list):6d} jobs queued on {max_workers} workers", + file=sys.stderr) + + while f_list: + try: + t = futures.wait(f_list, timeout=1, + return_when=futures.FIRST_COMPLETED) + + done = t[0] + + for fut in done: + res_list = fut.result() + + for res in res_list: + if not res["found"]: + not_found.append(res["fname"]) + if res["msg"]: + print(res["msg"]) + + f_list.remove(fut) + except KeyboardInterrupt: + return + + except RuntimeError as e: + self.abi.log.warning(f"Future: {e}") + break + + if sys.stderr.isatty(): + elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] + if len(f_list) < total: + elapsed += f" ({total - len(f_list)}/{total} jobs completed). " + if elapsed != old_elapsed: + print(elapsed + "\r", end="", flush=True, + file=sys.stderr) + old_elapsed = elapsed + + elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] + print(elapsed, file=sys.stderr) + + for f in sorted(not_found): + print(f"{f} not found.") diff --git a/tools/lib/python/feat/parse_features.py b/tools/lib/python/feat/parse_features.py new file mode 100755 index 000000000000..b88c04d3e2fe --- /dev/null +++ b/tools/lib/python/feat/parse_features.py @@ -0,0 +1,494 @@ +#!/usr/bin/env python3 +# pylint: disable=R0902,R0911,R0912,R0914,R0915 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# SPDX-License-Identifier: GPL-2.0 + + +""" +Library to parse the Linux Feature files and produce a ReST book. +""" + +import os +import re +import sys + +from glob import iglob + + +class ParseFeature: + """ + Parses Documentation/features, allowing to generate ReST documentation + from it. + """ + + h_name = "Feature" + h_kconfig = "Kconfig" + h_description = "Description" + h_subsys = "Subsystem" + h_status = "Status" + h_arch = "Architecture" + + # Sort order for status. Others will be mapped at the end. + status_map = { + "ok": 0, + "TODO": 1, + "N/A": 2, + # The only missing status is "..", which was mapped as "---", + # as this is an special ReST cell value. Let it get the + # default order (99). + } + + def __init__(self, prefix, debug=0, enable_fname=False): + """ + Sets internal variables + """ + + self.prefix = prefix + self.debug = debug + self.enable_fname = enable_fname + + self.data = {} + + # Initial maximum values use just the headers + self.max_size_name = len(self.h_name) + self.max_size_kconfig = len(self.h_kconfig) + self.max_size_description = len(self.h_description) + self.max_size_desc_word = 0 + self.max_size_subsys = len(self.h_subsys) + self.max_size_status = len(self.h_status) + self.max_size_arch = len(self.h_arch) + self.max_size_arch_with_header = self.max_size_arch + self.max_size_arch + self.description_size = 1 + + self.msg = "" + + def emit(self, msg="", end="\n"): + self.msg += msg + end + + def parse_error(self, fname, ln, msg, data=None): + """ + Displays an error message, printing file name and line + """ + + if ln: + fname += f"#{ln}" + + print(f"Warning: file {fname}: {msg}", file=sys.stderr, end="") + + if data: + data = data.rstrip() + print(f":\n\t{data}", file=sys.stderr) + else: + print("", file=sys.stderr) + + def parse_feat_file(self, fname): + """Parses a single arch-support.txt feature file""" + + if os.path.isdir(fname): + return + + base = os.path.basename(fname) + + if base != "arch-support.txt": + if self.debug: + print(f"ignoring {fname}", file=sys.stderr) + return + + subsys = os.path.dirname(fname).split("/")[-2] + self.max_size_subsys = max(self.max_size_subsys, len(subsys)) + + feature_name = "" + kconfig = "" + description = "" + comments = "" + arch_table = {} + + if self.debug > 1: + print(f"Opening {fname}", file=sys.stderr) + + if self.enable_fname: + full_fname = os.path.abspath(fname) + self.emit(f".. FILE {full_fname}") + + with open(fname, encoding="utf-8") as f: + for ln, line in enumerate(f, start=1): + line = line.strip() + + match = re.match(r"^\#\s+Feature\s+name:\s*(.*\S)", line) + if match: + feature_name = match.group(1) + + self.max_size_name = max(self.max_size_name, + len(feature_name)) + continue + + match = re.match(r"^\#\s+Kconfig:\s*(.*\S)", line) + if match: + kconfig = match.group(1) + + self.max_size_kconfig = max(self.max_size_kconfig, + len(kconfig)) + continue + + match = re.match(r"^\#\s+description:\s*(.*\S)", line) + if match: + description = match.group(1) + + self.max_size_description = max(self.max_size_description, + len(description)) + + words = re.split(r"\s+", line)[1:] + for word in words: + self.max_size_desc_word = max(self.max_size_desc_word, + len(word)) + + continue + + if re.search(r"^\\s*$", line): + continue + + if re.match(r"^\s*\-+\s*$", line): + continue + + if re.search(r"^\s*\|\s*arch\s*\|\s*status\s*\|\s*$", line): + continue + + match = re.match(r"^\#\s*(.*)$", line) + if match: + comments += match.group(1) + continue + + match = re.match(r"^\s*\|\s*(\S+):\s*\|\s*(\S+)\s*\|\s*$", line) + if match: + arch = match.group(1) + status = match.group(2) + + self.max_size_status = max(self.max_size_status, + len(status)) + self.max_size_arch = max(self.max_size_arch, len(arch)) + + if status == "..": + status = "---" + + arch_table[arch] = status + + continue + + self.parse_error(fname, ln, "Line is invalid", line) + + if not feature_name: + self.parse_error(fname, 0, "Feature name not found") + return + if not subsys: + self.parse_error(fname, 0, "Subsystem not found") + return + if not kconfig: + self.parse_error(fname, 0, "Kconfig not found") + return + if not description: + self.parse_error(fname, 0, "Description not found") + return + if not arch_table: + self.parse_error(fname, 0, "Architecture table not found") + return + + self.data[feature_name] = { + "where": fname, + "subsys": subsys, + "kconfig": kconfig, + "description": description, + "comments": comments, + "table": arch_table, + } + + self.max_size_arch_with_header = self.max_size_arch + len(self.h_arch) + + def parse(self): + """Parses all arch-support.txt feature files inside self.prefix""" + + path = os.path.expanduser(self.prefix) + + if self.debug > 2: + print(f"Running parser for {path}") + + example_path = os.path.join(path, "arch-support.txt") + + for fname in iglob(os.path.join(path, "**"), recursive=True): + if fname != example_path: + self.parse_feat_file(fname) + + return self.data + + def output_arch_table(self, arch, feat=None): + """ + Output feature(s) for a given architecture. + """ + + title = f"Feature status on {arch} architecture" + + self.emit("=" * len(title)) + self.emit(title) + self.emit("=" * len(title)) + self.emit() + + self.emit("=" * self.max_size_subsys + " ", end="") + self.emit("=" * self.max_size_name + " ", end="") + self.emit("=" * self.max_size_kconfig + " ", end="") + self.emit("=" * self.max_size_status + " ", end="") + self.emit("=" * self.max_size_description) + + self.emit(f"{self.h_subsys:<{self.max_size_subsys}} ", end="") + self.emit(f"{self.h_name:<{self.max_size_name}} ", end="") + self.emit(f"{self.h_kconfig:<{self.max_size_kconfig}} ", end="") + self.emit(f"{self.h_status:<{self.max_size_status}} ", end="") + self.emit(f"{self.h_description:<{self.max_size_description}}") + + self.emit("=" * self.max_size_subsys + " ", end="") + self.emit("=" * self.max_size_name + " ", end="") + self.emit("=" * self.max_size_kconfig + " ", end="") + self.emit("=" * self.max_size_status + " ", end="") + self.emit("=" * self.max_size_description) + + sorted_features = sorted(self.data.keys(), + key=lambda x: (self.data[x]["subsys"], + x.lower())) + + for name in sorted_features: + if feat and name != feat: + continue + + arch_table = self.data[name]["table"] + + if not arch in arch_table: + continue + + self.emit(f"{self.data[name]['subsys']:<{self.max_size_subsys}} ", + end="") + self.emit(f"{name:<{self.max_size_name}} ", end="") + self.emit(f"{self.data[name]['kconfig']:<{self.max_size_kconfig}} ", + end="") + self.emit(f"{arch_table[arch]:<{self.max_size_status}} ", + end="") + self.emit(f"{self.data[name]['description']}") + + self.emit("=" * self.max_size_subsys + " ", end="") + self.emit("=" * self.max_size_name + " ", end="") + self.emit("=" * self.max_size_kconfig + " ", end="") + self.emit("=" * self.max_size_status + " ", end="") + self.emit("=" * self.max_size_description) + + return self.msg + + def output_feature(self, feat): + """ + Output a feature on all architectures + """ + + title = f"Feature {feat}" + + self.emit("=" * len(title)) + self.emit(title) + self.emit("=" * len(title)) + self.emit() + + if not feat in self.data: + return + + if self.data[feat]["subsys"]: + self.emit(f":Subsystem: {self.data[feat]['subsys']}") + if self.data[feat]["kconfig"]: + self.emit(f":Kconfig: {self.data[feat]['kconfig']}") + + desc = self.data[feat]["description"] + desc = desc[0].upper() + desc[1:] + desc = desc.rstrip(". \t") + self.emit(f"\n{desc}.\n") + + com = self.data[feat]["comments"].strip() + if com: + self.emit("Comments") + self.emit("--------") + self.emit(f"\n{com}\n") + + self.emit("=" * self.max_size_arch + " ", end="") + self.emit("=" * self.max_size_status) + + self.emit(f"{self.h_arch:<{self.max_size_arch}} ", end="") + self.emit(f"{self.h_status:<{self.max_size_status}}") + + self.emit("=" * self.max_size_arch + " ", end="") + self.emit("=" * self.max_size_status) + + arch_table = self.data[feat]["table"] + for arch in sorted(arch_table.keys()): + self.emit(f"{arch:<{self.max_size_arch}} ", end="") + self.emit(f"{arch_table[arch]:<{self.max_size_status}}") + + self.emit("=" * self.max_size_arch + " ", end="") + self.emit("=" * self.max_size_status) + + return self.msg + + def matrix_lines(self, desc_size, max_size_status, header): + """ + Helper function to split element tables at the output matrix + """ + + if header: + ln_marker = "=" + else: + ln_marker = "-" + + self.emit("+" + ln_marker * self.max_size_name + "+", end="") + self.emit(ln_marker * desc_size, end="") + self.emit("+" + ln_marker * max_size_status + "+") + + def output_matrix(self): + """ + Generates a set of tables, groped by subsystem, containing + what's the feature state on each architecture. + """ + + title = "Feature status on all architectures" + + self.emit("=" * len(title)) + self.emit(title) + self.emit("=" * len(title)) + self.emit() + + desc_title = f"{self.h_kconfig} / {self.h_description}" + + desc_size = self.max_size_kconfig + 4 + if not self.description_size: + desc_size = max(self.max_size_description, desc_size) + else: + desc_size = max(self.description_size, desc_size) + + desc_size = max(self.max_size_desc_word, desc_size, len(desc_title)) + + notcompat = "Not compatible" + self.max_size_status = max(self.max_size_status, len(notcompat)) + + min_status_size = self.max_size_status + self.max_size_arch + 4 + max_size_status = max(min_status_size, self.max_size_status) + + h_status_per_arch = "Status per architecture" + max_size_status = max(max_size_status, len(h_status_per_arch)) + + cur_subsys = None + for name in sorted(self.data.keys(), + key=lambda x: (self.data[x]["subsys"], x.lower())): + if not cur_subsys or cur_subsys != self.data[name]["subsys"]: + if cur_subsys: + self.emit() + + cur_subsys = self.data[name]["subsys"] + + title = f"Subsystem: {cur_subsys}" + self.emit(title) + self.emit("=" * len(title)) + self.emit() + + self.matrix_lines(desc_size, max_size_status, 0) + + self.emit(f"|{self.h_name:<{self.max_size_name}}", end="") + self.emit(f"|{desc_title:<{desc_size}}", end="") + self.emit(f"|{h_status_per_arch:<{max_size_status}}|") + + self.matrix_lines(desc_size, max_size_status, 1) + + lines = [] + descs = [] + cur_status = "" + line = "" + + arch_table = sorted(self.data[name]["table"].items(), + key=lambda x: (self.status_map.get(x[1], 99), + x[0].lower())) + + for arch, status in arch_table: + if status == "---": + status = notcompat + + if status != cur_status: + if line != "": + lines.append(line) + line = "" + line = f"- **{status}**: {arch}" + elif len(line) + len(arch) + 2 < max_size_status: + line += f", {arch}" + else: + lines.append(line) + line = f" {arch}" + cur_status = status + + if line != "": + lines.append(line) + + description = self.data[name]["description"] + while len(description) > desc_size: + desc_line = description[:desc_size] + + last_space = desc_line.rfind(" ") + if last_space != -1: + desc_line = desc_line[:last_space] + descs.append(desc_line) + description = description[last_space + 1:] + else: + desc_line = desc_line[:-1] + descs.append(desc_line + "\\") + description = description[len(desc_line):] + + if description: + descs.append(description) + + while len(lines) < 2 + len(descs): + lines.append("") + + for ln, line in enumerate(lines): + col = ["", ""] + + if not ln: + col[0] = name + col[1] = f"``{self.data[name]['kconfig']}``" + else: + if ln >= 2 and descs: + col[1] = descs.pop(0) + + self.emit(f"|{col[0]:<{self.max_size_name}}", end="") + self.emit(f"|{col[1]:<{desc_size}}", end="") + self.emit(f"|{line:<{max_size_status}}|") + + self.matrix_lines(desc_size, max_size_status, 0) + + return self.msg + + def list_arch_features(self, arch, feat): + """ + Print a matrix of kernel feature support for the chosen architecture. + """ + self.emit("#") + self.emit(f"# Kernel feature support matrix of the '{arch}' architecture:") + self.emit("#") + + # Sort by subsystem, then by feature name (case‑insensitive) + for name in sorted(self.data.keys(), + key=lambda n: (self.data[n]["subsys"].lower(), + n.lower())): + if feat and name != feat: + continue + + feature = self.data[name] + arch_table = feature["table"] + status = arch_table.get(arch, "") + status = " " * ((4 - len(status)) // 2) + status + + self.emit(f"{feature['subsys']:>{self.max_size_subsys + 1}}/ ", + end="") + self.emit(f"{name:<{self.max_size_name}}: ", end="") + self.emit(f"{status:<5}| ", end="") + self.emit(f"{feature['kconfig']:>{self.max_size_kconfig}} ", + end="") + self.emit(f"# {feature['description']}") + + return self.msg diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py new file mode 100755 index 000000000000..a24f30ef4fa8 --- /dev/null +++ b/tools/lib/python/jobserver.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0+ +# +# pylint: disable=C0103,C0209 +# +# + +""" +Interacts with the POSIX jobserver during the Kernel build time. + +A "normal" jobserver task, like the one initiated by a make subrocess would do: + + - open read/write file descriptors to communicate with the job server; + - ask for one slot by calling: + claim = os.read(reader, 1) + - when the job finshes, call: + os.write(writer, b"+") # os.write(writer, claim) + +Here, the goal is different: This script aims to get the remaining number +of slots available, using all of them to run a command which handle tasks in +parallel. To to that, it has a loop that ends only after there are no +slots left. It then increments the number by one, in order to allow a +call equivalent to make -j$((claim+1)), e.g. having a parent make creating +$claim child to do the actual work. + +The end goal here is to keep the total number of build tasks under the +limit established by the initial make -j$n_proc call. + +See: + https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver +""" + +import errno +import os +import subprocess +import sys + +class JobserverExec: + """ + Claim all slots from make using POSIX Jobserver. + + The main methods here are: + - open(): reserves all slots; + - close(): method returns all used slots back to make; + - run(): executes a command setting PARALLELISM=<available slots jobs + 1> + """ + + def __init__(self): + """Initialize internal vars""" + self.claim = 0 + self.jobs = b"" + self.reader = None + self.writer = None + self.is_open = False + + def open(self): + """Reserve all available slots to be claimed later on""" + + if self.is_open: + return + + try: + # Fetch the make environment options. + flags = os.environ["MAKEFLAGS"] + # Look for "--jobserver=R,W" + # Note that GNU Make has used --jobserver-fds and --jobserver-auth + # so this handles all of them. + opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] + + # Parse out R,W file descriptor numbers and set them nonblocking. + # If the MAKEFLAGS variable contains multiple instances of the + # --jobserver-auth= option, the last one is relevant. + fds = opts[-1].split("=", 1)[1] + + # Starting with GNU Make 4.4, named pipes are used for reader + # and writer. + # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 + _, _, path = fds.partition("fifo:") + + if path: + self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) + self.writer = os.open(path, os.O_WRONLY) + else: + self.reader, self.writer = [int(x) for x in fds.split(",", 1)] + # Open a private copy of reader to avoid setting nonblocking + # on an unexpecting process with the same reader fd. + self.reader = os.open("/proc/self/fd/%d" % (self.reader), + os.O_RDONLY | os.O_NONBLOCK) + + # Read out as many jobserver slots as possible + while True: + try: + slot = os.read(self.reader, 8) + self.jobs += slot + except (OSError, IOError) as e: + if e.errno == errno.EWOULDBLOCK: + # Stop at the end of the jobserver queue. + break + # If something went wrong, give back the jobs. + if self.jobs: + os.write(self.writer, self.jobs) + raise e + + # Add a bump for our caller's reserveration, since we're just going + # to sit here blocked on our child. + self.claim = len(self.jobs) + 1 + + except (KeyError, IndexError, ValueError, OSError, IOError): + # Any missing environment strings or bad fds should result in just + # not being parallel. + self.claim = None + + self.is_open = True + + def close(self): + """Return all reserved slots to Jobserver""" + + if not self.is_open: + return + + # Return all the reserved slots. + if len(self.jobs): + os.write(self.writer, self.jobs) + + self.is_open = False + + def __enter__(self): + self.open() + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + self.close() + + def run(self, cmd, *args, **pwargs): + """ + Run a command setting PARALLELISM env variable to the number of + available job slots (claim) + 1, e.g. it will reserve claim slots + to do the actual build work, plus one to monitor its children. + """ + self.open() # Ensure that self.claim is set + + # We can only claim parallelism if there was a jobserver (i.e. a + # top-level "-jN" argument) and there were no other failures. Otherwise + # leave out the environment variable and let the child figure out what + # is best. + if self.claim: + os.environ["PARALLELISM"] = str(self.claim) + + return subprocess.call(cmd, *args, **pwargs) diff --git a/tools/lib/python/kdoc/__init__.py b/tools/lib/python/kdoc/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/lib/python/kdoc/__init__.py diff --git a/tools/lib/python/kdoc/enrich_formatter.py b/tools/lib/python/kdoc/enrich_formatter.py new file mode 100644 index 000000000000..bb171567a4ca --- /dev/null +++ b/tools/lib/python/kdoc/enrich_formatter.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2025 by Mauro Carvalho Chehab <mchehab@kernel.org>. + +""" +Ancillary argparse HelpFormatter class that works on a similar way as +argparse.RawDescriptionHelpFormatter, e.g. description maintains line +breaks, but it also implement transformations to the help text. The +actual transformations ar given by enrich_text(), if the output is tty. + +Currently, the follow transformations are done: + + - Positional arguments are shown in upper cases; + - if output is TTY, ``var`` and positional arguments are shown prepended + by an ANSI SGR code. This is usually translated to bold. On some + terminals, like, konsole, this is translated into a colored bold text. +""" + +import argparse +import re +import sys + +class EnrichFormatter(argparse.HelpFormatter): + """ + Better format the output, making easier to identify the positional args + and how they're used at the __doc__ description. + """ + def __init__(self, *args, **kwargs): + """Initialize class and check if is TTY""" + super().__init__(*args, **kwargs) + self._tty = sys.stdout.isatty() + + def enrich_text(self, text): + """Handle ReST markups (currently, only ``foo``)""" + if self._tty and text: + # Replace ``text`` with ANSI SGR (bold) + return re.sub(r'\`\`(.+?)\`\`', + lambda m: f'\033[1m{m.group(1)}\033[0m', text) + return text + + def _fill_text(self, text, width, indent): + """Enrich descriptions with markups on it""" + enriched = self.enrich_text(text) + return "\n".join(indent + line for line in enriched.splitlines()) + + def _format_usage(self, usage, actions, groups, prefix): + """Enrich positional arguments at usage: line""" + + prog = self._prog + parts = [] + + for action in actions: + if action.option_strings: + opt = action.option_strings[0] + if action.nargs != 0: + opt += f" {action.dest.upper()}" + parts.append(f"[{opt}]") + else: + # Positional argument + parts.append(self.enrich_text(f"``{action.dest.upper()}``")) + + usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n" + return usage_text + + def _format_action_invocation(self, action): + """Enrich argument names""" + if not action.option_strings: + return self.enrich_text(f"``{action.dest.upper()}``") + + return ", ".join(action.option_strings) diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py new file mode 100644 index 000000000000..bfe02baf1606 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# +# pylint: disable=R0903,R0913,R0914,R0917 + +""" +Parse lernel-doc tags on multiple kernel source files. +""" + +import argparse +import logging +import os +import re + +from kdoc.kdoc_parser import KernelDoc +from kdoc.kdoc_output import OutputFormat + + +class GlobSourceFiles: + """ + Parse C source code file names and directories via an Interactor. + """ + + def __init__(self, srctree=None, valid_extensions=None): + """ + Initialize valid extensions with a tuple. + + If not defined, assume default C extensions (.c and .h) + + It would be possible to use python's glob function, but it is + very slow, and it is not interactive. So, it would wait to read all + directories before actually do something. + + So, let's use our own implementation. + """ + + if not valid_extensions: + self.extensions = (".c", ".h") + else: + self.extensions = valid_extensions + + self.srctree = srctree + + def _parse_dir(self, dirname): + """Internal function to parse files recursively""" + + with os.scandir(dirname) as obj: + for entry in obj: + name = os.path.join(dirname, entry.name) + + if entry.is_dir(follow_symlinks=False): + yield from self._parse_dir(name) + + if not entry.is_file(): + continue + + basename = os.path.basename(name) + + if not basename.endswith(self.extensions): + continue + + yield name + + def parse_files(self, file_list, file_not_found_cb): + """ + Define an iterator to parse all source files from file_list, + handling directories if any + """ + + if not file_list: + return + + for fname in file_list: + if self.srctree: + f = os.path.join(self.srctree, fname) + else: + f = fname + + if os.path.isdir(f): + yield from self._parse_dir(f) + elif os.path.isfile(f): + yield f + elif file_not_found_cb: + file_not_found_cb(fname) + + +class KernelFiles(): + """ + Parse kernel-doc tags on multiple kernel source files. + + There are two type of parsers defined here: + - self.parse_file(): parses both kernel-doc markups and + EXPORT_SYMBOL* macros; + - self.process_export_file(): parses only EXPORT_SYMBOL* macros. + """ + + def warning(self, msg): + """Ancillary routine to output a warning and increment error count""" + + self.config.log.warning(msg) + self.errors += 1 + + def error(self, msg): + """Ancillary routine to output an error and increment error count""" + + self.config.log.error(msg) + self.errors += 1 + + def parse_file(self, fname): + """ + Parse a single Kernel source. + """ + + # Prevent parsing the same file twice if results are cached + if fname in self.files: + return + + doc = KernelDoc(self.config, fname) + export_table, entries = doc.parse_kdoc() + + self.export_table[fname] = export_table + + self.files.add(fname) + self.export_files.add(fname) # parse_kdoc() already check exports + + self.results[fname] = entries + + def process_export_file(self, fname): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + # Prevent parsing the same file twice if results are cached + if fname in self.export_files: + return + + doc = KernelDoc(self.config, fname) + export_table = doc.parse_export() + + if not export_table: + self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}") + export_table = set() + + self.export_table[fname] = export_table + self.export_files.add(fname) + + def file_not_found_cb(self, fname): + """ + Callback to warn if a file was not found. + """ + + self.error(f"Cannot find file {fname}") + + def __init__(self, verbose=False, out_style=None, + werror=False, wreturn=False, wshort_desc=False, + wcontents_before_sections=False, + logger=None): + """ + Initialize startup variables and parse all files + """ + + if not verbose: + verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) + + if out_style is None: + out_style = OutputFormat() + + if not werror: + kcflags = os.environ.get("KCFLAGS", None) + if kcflags: + match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags) + if match: + werror = True + + # reading this variable is for backwards compat just in case + # someone was calling it with the variable from outside the + # kernel's build system + kdoc_werror = os.environ.get("KDOC_WERROR", None) + if kdoc_werror: + werror = kdoc_werror + + # Some variables are global to the parser logic as a whole as they are + # used to send control configuration to KernelDoc class. As such, + # those variables are read-only inside the KernelDoc. + self.config = argparse.Namespace + + self.config.verbose = verbose + self.config.werror = werror + self.config.wreturn = wreturn + self.config.wshort_desc = wshort_desc + self.config.wcontents_before_sections = wcontents_before_sections + + if not logger: + self.config.log = logging.getLogger("kernel-doc") + else: + self.config.log = logger + + self.config.warning = self.warning + + self.config.src_tree = os.environ.get("SRCTREE", None) + + # Initialize variables that are internal to KernelFiles + + self.out_style = out_style + + self.errors = 0 + self.results = {} + + self.files = set() + self.export_files = set() + self.export_table = {} + + def parse(self, file_list, export_file=None): + """ + Parse all files + """ + + glob = GlobSourceFiles(srctree=self.config.src_tree) + + for fname in glob.parse_files(file_list, self.file_not_found_cb): + self.parse_file(fname) + + for fname in glob.parse_files(export_file, self.file_not_found_cb): + self.process_export_file(fname) + + def out_msg(self, fname, name, arg): + """ + Return output messages from a file name using the output style + filtering. + + If output type was not handled by the styler, return None. + """ + + # NOTE: we can add rules here to filter out unwanted parts, + # although OutputFormat.msg already does that. + + return self.out_style.msg(fname, name, arg) + + def msg(self, enable_lineno=False, export=False, internal=False, + symbol=None, nosymbol=None, no_doc_sections=False, + filenames=None, export_file=None): + """ + Interacts over the kernel-doc results and output messages, + returning kernel-doc markups on each interaction + """ + + self.out_style.set_config(self.config) + + if not filenames: + filenames = sorted(self.results.keys()) + + glob = GlobSourceFiles(srctree=self.config.src_tree) + + for fname in filenames: + function_table = set() + + if internal or export: + if not export_file: + export_file = [fname] + + for f in glob.parse_files(export_file, self.file_not_found_cb): + function_table |= self.export_table[f] + + if symbol: + for s in symbol: + function_table.add(s) + + self.out_style.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + + msg = "" + if fname not in self.results: + self.config.log.warning("No kernel-doc for file %s", fname) + continue + + symbols = self.results[fname] + self.out_style.set_symbols(symbols) + + for arg in symbols: + m = self.out_msg(fname, arg.name, arg) + + if m is None: + ln = arg.get("ln", 0) + dtype = arg.get('type', "") + + self.config.log.warning("%s:%d Can't handle %s", + fname, ln, dtype) + else: + msg += m + + if msg: + yield fname, msg diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py new file mode 100644 index 000000000000..19805301cb2c --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# A class that will, eventually, encapsulate all of the parsed data that we +# then pass into the output modules. +# + +class KdocItem: + def __init__(self, name, fname, type, start_line, **other_stuff): + self.name = name + self.fname = fname + self.type = type + self.declaration_start_line = start_line + self.sections = {} + self.sections_start_lines = {} + self.parameterlist = [] + self.parameterdesc_start_lines = [] + self.parameterdescs = {} + self.parametertypes = {} + # + # Just save everything else into our own dict so that the output + # side can grab it directly as before. As we move things into more + # structured data, this will, hopefully, fade away. + # + self.other_stuff = other_stuff + + def get(self, key, default = None): + return self.other_stuff.get(key, default) + + def __getitem__(self, key): + return self.get(key) + + # + # Tracking of section and parameter information. + # + def set_sections(self, sections, start_lines): + self.sections = sections + self.section_start_lines = start_lines + + def set_params(self, names, descs, types, starts): + self.parameterlist = names + self.parameterdescs = descs + self.parametertypes = types + self.parameterdesc_start_lines = starts diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py new file mode 100644 index 000000000000..b1aaa7fc3604 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -0,0 +1,824 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# +# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917 + +""" +Implement output filters to print kernel-doc documentation. + +The implementation uses a virtual base class (OutputFormat) which +contains dispatches to virtual methods, and some code to filter +out output messages. + +The actual implementation is done on one separate class per each type +of output. Currently, there are output classes for ReST and man/troff. +""" + +import os +import re +from datetime import datetime + +from kdoc.kdoc_parser import KernelDoc, type_param +from kdoc.kdoc_re import KernRe + + +function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) + +# match expressions used to find embedded type information +type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False) +type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False) +type_func = KernRe(r"(\w+)\(\)", cache=False) +type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + +# Special RST handling for func ptr params +type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False) + +# Special RST handling for structs with func ptr params +type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False) + +type_env = KernRe(r"(\$\w+)", cache=False) +type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False) +type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False) +type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False) +type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False) +type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) +type_fallback = KernRe(r"\&([_\w]+)", cache=False) +type_member_func = type_member + KernRe(r"\(\)", cache=False) + + +class OutputFormat: + """ + Base class for OutputFormat. If used as-is, it means that only + warnings will be displayed. + """ + + # output mode. + OUTPUT_ALL = 0 # output all symbols and doc sections + OUTPUT_INCLUDE = 1 # output only specified symbols + OUTPUT_EXPORTED = 2 # output exported symbols + OUTPUT_INTERNAL = 3 # output non-exported symbols + + # Virtual member to be overridden at the inherited classes + highlights = [] + + def __init__(self): + """Declare internal vars and set mode to OUTPUT_ALL""" + + self.out_mode = self.OUTPUT_ALL + self.enable_lineno = None + self.nosymbol = {} + self.symbol = None + self.function_table = None + self.config = None + self.no_doc_sections = False + + self.data = "" + + def set_config(self, config): + """ + Setup global config variables used by both parser and output. + """ + + self.config = config + + def set_filter(self, export, internal, symbol, nosymbol, function_table, + enable_lineno, no_doc_sections): + """ + Initialize filter variables according to the requested mode. + + Only one choice is valid between export, internal and symbol. + + The nosymbol filter can be used on all modes. + """ + + self.enable_lineno = enable_lineno + self.no_doc_sections = no_doc_sections + self.function_table = function_table + + if symbol: + self.out_mode = self.OUTPUT_INCLUDE + elif export: + self.out_mode = self.OUTPUT_EXPORTED + elif internal: + self.out_mode = self.OUTPUT_INTERNAL + else: + self.out_mode = self.OUTPUT_ALL + + if nosymbol: + self.nosymbol = set(nosymbol) + + + def highlight_block(self, block): + """ + Apply the RST highlights to a sub-block of text. + """ + + for r, sub in self.highlights: + block = r.sub(sub, block) + + return block + + def out_warnings(self, args): + """ + Output warnings for identifiers that will be displayed. + """ + + for log_msg in args.warnings: + self.config.warning(log_msg) + + def check_doc(self, name, args): + """Check if DOC should be output""" + + if self.no_doc_sections: + return False + + if name in self.nosymbol: + return False + + if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) + return True + + if self.out_mode == self.OUTPUT_INCLUDE: + if name in self.function_table: + self.out_warnings(args) + return True + + return False + + def check_declaration(self, dtype, name, args): + """ + Checks if a declaration should be output or not based on the + filtering criteria. + """ + + if name in self.nosymbol: + return False + + if self.out_mode == self.OUTPUT_ALL: + self.out_warnings(args) + return True + + if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]: + if name in self.function_table: + return True + + if self.out_mode == self.OUTPUT_INTERNAL: + if dtype != "function": + self.out_warnings(args) + return True + + if name not in self.function_table: + self.out_warnings(args) + return True + + return False + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser + """ + + self.data = "" + + dtype = args.type + + if dtype == "doc": + self.out_doc(fname, name, args) + return self.data + + if not self.check_declaration(dtype, name, args): + return self.data + + if dtype == "function": + self.out_function(fname, name, args) + return self.data + + if dtype == "enum": + self.out_enum(fname, name, args) + return self.data + + if dtype == "typedef": + self.out_typedef(fname, name, args) + return self.data + + if dtype in ["struct", "union"]: + self.out_struct(fname, name, args) + return self.data + + # Warn if some type requires an output logic + self.config.log.warning("doesn't know how to output '%s' block", + dtype) + + return None + + # Virtual methods to be overridden by inherited classes + # At the base class, those do nothing. + def set_symbols(self, symbols): + """Get a list of all symbols from kernel_doc""" + + def out_doc(self, fname, name, args): + """Outputs a DOC block""" + + def out_function(self, fname, name, args): + """Outputs a function""" + + def out_enum(self, fname, name, args): + """Outputs an enum""" + + def out_typedef(self, fname, name, args): + """Outputs a typedef""" + + def out_struct(self, fname, name, args): + """Outputs a struct""" + + +class RestFormat(OutputFormat): + """Consts and functions used by ReST output""" + + highlights = [ + (type_constant, r"``\1``"), + (type_constant2, r"``\1``"), + + # Note: need to escape () to avoid func matching later + (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"), + (type_member, r":c:type:`\1\2\3 <\1>`"), + (type_fp_param, r"**\1\\(\\)**"), + (type_fp_param2, r"**\1\\(\\)**"), + (type_func, r"\1()"), + (type_enum, r":c:type:`\1 <\2>`"), + (type_struct, r":c:type:`\1 <\2>`"), + (type_typedef, r":c:type:`\1 <\2>`"), + (type_union, r":c:type:`\1 <\2>`"), + + # in rst this can refer to any type + (type_fallback, r":c:type:`\1`"), + (type_param_ref, r"**\1\2**") + ] + blankline = "\n" + + sphinx_literal = KernRe(r'^[^.].*::$', cache=False) + sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False) + + def __init__(self): + """ + Creates class variables. + + Not really mandatory, but it is a good coding style and makes + pylint happy. + """ + + super().__init__() + self.lineprefix = "" + + def print_lineno(self, ln): + """Outputs a line number""" + + if self.enable_lineno and ln is not None: + ln += 1 + self.data += f".. LINENO {ln}\n" + + def output_highlight(self, args): + """ + Outputs a C symbol that may require being converted to ReST using + the self.highlights variable + """ + + input_text = args + output = "" + in_literal = False + litprefix = "" + block = "" + + for line in input_text.strip("\n").split("\n"): + + # If we're in a literal block, see if we should drop out of it. + # Otherwise, pass the line straight through unmunged. + if in_literal: + if line.strip(): # If the line is not blank + # If this is the first non-blank line in a literal block, + # figure out the proper indent. + if not litprefix: + r = KernRe(r'^(\s*)') + if r.match(line): + litprefix = '^' + r.group(1) + else: + litprefix = "" + + output += line + "\n" + elif not KernRe(litprefix).match(line): + in_literal = False + else: + output += line + "\n" + else: + output += line + "\n" + + # Not in a literal block (or just dropped out) + if not in_literal: + block += line + "\n" + if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line): + in_literal = True + litprefix = "" + output += self.highlight_block(block) + block = "" + + # Handle any remaining block + if block: + output += self.highlight_block(block) + + # Print the output with the line prefix + for line in output.strip("\n").split("\n"): + self.data += self.lineprefix + line + "\n" + + def out_section(self, args, out_docblock=False): + """ + Outputs a block section. + + This could use some work; it's used to output the DOC: sections, and + starts by putting out the name of the doc section itself, but that + tends to duplicate a header already in the template file. + """ + for section, text in args.sections.items(): + # Skip sections that are in the nosymbol_table + if section in self.nosymbol: + continue + + if out_docblock: + if not self.out_mode == self.OUTPUT_INCLUDE: + self.data += f".. _{section}:\n\n" + self.data += f'{self.lineprefix}**{section}**\n\n' + else: + self.data += f'{self.lineprefix}**{section}**\n\n' + + self.print_lineno(args.section_start_lines.get(section, 0)) + self.output_highlight(text) + self.data += "\n" + self.data += "\n" + + def out_doc(self, fname, name, args): + if not self.check_doc(name, args): + return + self.out_section(args, out_docblock=True) + + def out_function(self, fname, name, args): + + oldprefix = self.lineprefix + signature = "" + + func_macro = args.get('func_macro', False) + if func_macro: + signature = name + else: + if args.get('functiontype'): + signature = args['functiontype'] + " " + signature += name + " (" + + ln = args.declaration_start_line + count = 0 + for parameter in args.parameterlist: + if count != 0: + signature += ", " + count += 1 + dtype = args.parametertypes.get(parameter, "") + + if function_pointer.search(dtype): + signature += function_pointer.group(1) + parameter + function_pointer.group(3) + else: + signature += dtype + + if not func_macro: + signature += ")" + + self.print_lineno(ln) + if args.get('typedef') or not args.get('functiontype'): + self.data += f".. c:macro:: {name}\n\n" + + if args.get('typedef'): + self.data += " **Typedef**: " + self.lineprefix = "" + self.output_highlight(args.get('purpose', "")) + self.data += "\n\n**Syntax**\n\n" + self.data += f" ``{signature}``\n\n" + else: + self.data += f"``{signature}``\n\n" + else: + self.data += f".. c:function:: {signature}\n\n" + + if not args.get('typedef'): + self.print_lineno(ln) + self.lineprefix = " " + self.output_highlight(args.get('purpose', "")) + self.data += "\n" + + # Put descriptive text into a container (HTML <div>) to help set + # function prototypes apart + self.lineprefix = " " + + if args.parameterlist: + self.data += ".. container:: kernelindent\n\n" + self.data += f"{self.lineprefix}**Parameters**\n\n" + + for parameter in args.parameterlist: + parameter_name = KernRe(r'\[.*').sub('', parameter) + dtype = args.parametertypes.get(parameter, "") + + if dtype: + self.data += f"{self.lineprefix}``{dtype}``\n" + else: + self.data += f"{self.lineprefix}``{parameter}``\n" + + self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) + + self.lineprefix = " " + if parameter_name in args.parameterdescs and \ + args.parameterdescs[parameter_name] != KernelDoc.undescribed: + + self.output_highlight(args.parameterdescs[parameter_name]) + self.data += "\n" + else: + self.data += f"{self.lineprefix}*undescribed*\n\n" + self.lineprefix = " " + + self.out_section(args) + self.lineprefix = oldprefix + + def out_enum(self, fname, name, args): + + oldprefix = self.lineprefix + ln = args.declaration_start_line + + self.data += f"\n\n.. c:enum:: {name}\n\n" + + self.print_lineno(ln) + self.lineprefix = " " + self.output_highlight(args.get('purpose', '')) + self.data += "\n" + + self.data += ".. container:: kernelindent\n\n" + outer = self.lineprefix + " " + self.lineprefix = outer + " " + self.data += f"{outer}**Constants**\n\n" + + for parameter in args.parameterlist: + self.data += f"{outer}``{parameter}``\n" + + if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed: + self.output_highlight(args.parameterdescs[parameter]) + else: + self.data += f"{self.lineprefix}*undescribed*\n\n" + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + def out_typedef(self, fname, name, args): + + oldprefix = self.lineprefix + ln = args.declaration_start_line + + self.data += f"\n\n.. c:type:: {name}\n\n" + + self.print_lineno(ln) + self.lineprefix = " " + + self.output_highlight(args.get('purpose', '')) + + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + def out_struct(self, fname, name, args): + + purpose = args.get('purpose', "") + declaration = args.get('definition', "") + dtype = args.type + ln = args.declaration_start_line + + self.data += f"\n\n.. c:{dtype}:: {name}\n\n" + + self.print_lineno(ln) + + oldprefix = self.lineprefix + self.lineprefix += " " + + self.output_highlight(purpose) + self.data += "\n" + + self.data += ".. container:: kernelindent\n\n" + self.data += f"{self.lineprefix}**Definition**::\n\n" + + self.lineprefix = self.lineprefix + " " + + declaration = declaration.replace("\t", self.lineprefix) + + self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n" + self.data += f"{declaration}{self.lineprefix}" + "};\n\n" + + self.lineprefix = " " + self.data += f"{self.lineprefix}**Members**\n\n" + for parameter in args.parameterlist: + if not parameter or parameter.startswith("#"): + continue + + parameter_name = parameter.split("[", maxsplit=1)[0] + + if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: + continue + + self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) + + self.data += f"{self.lineprefix}``{parameter}``\n" + + self.lineprefix = " " + self.output_highlight(args.parameterdescs[parameter_name]) + self.lineprefix = " " + + self.data += "\n" + + self.data += "\n" + + self.lineprefix = oldprefix + self.out_section(args) + + +class ManFormat(OutputFormat): + """Consts and functions used by man pages output""" + + highlights = ( + (type_constant, r"\1"), + (type_constant2, r"\1"), + (type_func, r"\\fB\1\\fP"), + (type_enum, r"\\fI\1\\fP"), + (type_struct, r"\\fI\1\\fP"), + (type_typedef, r"\\fI\1\\fP"), + (type_union, r"\\fI\1\\fP"), + (type_param, r"\\fI\1\\fP"), + (type_param_ref, r"\\fI\1\2\\fP"), + (type_member, r"\\fI\1\2\3\\fP"), + (type_fallback, r"\\fI\1\\fP") + ) + blankline = "" + + date_formats = [ + "%a %b %d %H:%M:%S %Z %Y", + "%a %b %d %H:%M:%S %Y", + "%Y-%m-%d", + "%b %d %Y", + "%B %d %Y", + "%m %d %Y", + ] + + def __init__(self, modulename): + """ + Creates class variables. + + Not really mandatory, but it is a good coding style and makes + pylint happy. + """ + + super().__init__() + self.modulename = modulename + self.symbols = [] + + dt = None + tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP") + if tstamp: + for fmt in self.date_formats: + try: + dt = datetime.strptime(tstamp, fmt) + break + except ValueError: + pass + + if not dt: + dt = datetime.now() + + self.man_date = dt.strftime("%B %Y") + + def arg_name(self, args, name): + """ + Return the name that will be used for the man page. + + As we may have the same name on different namespaces, + prepend the data type for all types except functions and typedefs. + + The doc section is special: it uses the modulename. + """ + + dtype = args.type + + if dtype == "doc": + return self.modulename + + if dtype in ["function", "typedef"]: + return name + + return f"{dtype} {name}" + + def set_symbols(self, symbols): + """ + Get a list of all symbols from kernel_doc. + + Man pages will uses it to add a SEE ALSO section with other + symbols at the same file. + """ + self.symbols = symbols + + def out_tail(self, fname, name, args): + """Adds a tail for all man pages""" + + # SEE ALSO section + self.data += f'.SH "SEE ALSO"' + "\n.PP\n" + self.data += (f"Kernel file \\fB{args.fname}\\fR\n") + if len(self.symbols) >= 2: + cur_name = self.arg_name(args, name) + + related = [] + for arg in self.symbols: + out_name = self.arg_name(arg, arg.name) + + if cur_name == out_name: + continue + + related.append(f"\\fB{out_name}\\fR(9)") + + self.data += ",\n".join(related) + "\n" + + # TODO: does it make sense to add other sections? Maybe + # REPORTING ISSUES? LICENSE? + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser. + + Add a tail at the end of man pages output. + """ + super().msg(fname, name, args) + self.out_tail(fname, name, args) + + return self.data + + def output_highlight(self, block): + """ + Outputs a C symbol that may require being highlighted with + self.highlights variable using troff syntax + """ + + contents = self.highlight_block(block) + + if isinstance(contents, list): + contents = "\n".join(contents) + + for line in contents.strip("\n").split("\n"): + line = KernRe(r"^\s*").sub("", line) + if not line: + continue + + if line[0] == ".": + self.data += "\\&" + line + "\n" + else: + self.data += line + "\n" + + def out_doc(self, fname, name, args): + if not self.check_doc(name, args): + return + + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_function(self, fname, name, args): + """output function in man""" + + out_name = self.arg_name(args, name) + + self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{name} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + if args.get('functiontype', ''): + self.data += f'.B "{args["functiontype"]}" {name}' + "\n" + else: + self.data += f'.B "{name}' + "\n" + + count = 0 + parenth = "(" + post = "," + + for parameter in args.parameterlist: + if count == len(args.parameterlist) - 1: + post = ");" + + dtype = args.parametertypes.get(parameter, "") + if function_pointer.match(dtype): + # Pointer-to-function + self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n" + else: + dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype) + + self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n" + count += 1 + parenth = "" + + if args.parameterlist: + self.data += ".SH ARGUMENTS\n" + + for parameter in args.parameterlist: + parameter_name = re.sub(r'\[.*', '', parameter) + + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name, "")) + + for section, text in args.sections.items(): + self.data += f'.SH "{section.upper()}"' + "\n" + self.output_highlight(text) + + def out_enum(self, fname, name, args): + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"enum {name} \\- {args['purpose']}\n" + + self.data += ".SH SYNOPSIS\n" + self.data += f"enum {name}" + " {\n" + + count = 0 + for parameter in args.parameterlist: + self.data += f'.br\n.BI " {parameter}"' + "\n" + if count == len(args.parameterlist) - 1: + self.data += "\n};\n" + else: + self.data += ", \n.br\n" + + count += 1 + + self.data += ".SH Constants\n" + + for parameter in args.parameterlist: + parameter_name = KernRe(r'\[.*').sub('', parameter) + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name, "")) + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_typedef(self, fname, name, args): + module = self.modulename + purpose = args.get('purpose') + out_name = self.arg_name(args, name) + + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"typedef {name} \\- {purpose}\n" + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) + + def out_struct(self, fname, name, args): + module = self.modulename + purpose = args.get('purpose') + definition = args.get('definition') + out_name = self.arg_name(args, name) + + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + + self.data += ".SH NAME\n" + self.data += f"{args.type} {name} \\- {purpose}\n" + + # Replace tabs with two spaces and handle newlines + declaration = definition.replace("\t", " ") + declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration) + + self.data += ".SH SYNOPSIS\n" + self.data += f"{args.type} {name} " + "{" + "\n.br\n" + self.data += f'.BI "{declaration}\n' + "};\n.br\n\n" + + self.data += ".SH Members\n" + for parameter in args.parameterlist: + if parameter.startswith("#"): + continue + + parameter_name = re.sub(r"\[.*", "", parameter) + + if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: + continue + + self.data += f'.IP "{parameter}" 12' + "\n" + self.output_highlight(args.parameterdescs.get(parameter_name)) + + for section, text in args.sections.items(): + self.data += f'.SH "{section}"' + "\n" + self.output_highlight(text) diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py new file mode 100644 index 000000000000..500aafc50032 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -0,0 +1,1670 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. +# +# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 + +""" +kdoc_parser +=========== + +Read a C language source or header FILE and extract embedded +documentation comments +""" + +import sys +import re +from pprint import pformat + +from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.kdoc_item import KdocItem + +# +# Regular expressions used to parse kernel-doc markups at KernelDoc class. +# +# Let's declare them in lowercase outside any class to make it easier to +# convert from the Perl script. +# +# As those are evaluated at the beginning, no need to cache them +# + +# Allow whitespace at end of comment start. +doc_start = KernRe(r'^/\*\*\s*$', cache=False) + +doc_end = KernRe(r'\*/', cache=False) +doc_com = KernRe(r'\s*\*\s*', cache=False) +doc_com_body = KernRe(r'\s*\* ?', cache=False) +doc_decl = doc_com + KernRe(r'(\w+)', cache=False) + +# @params and a strictly limited set of supported section names +# Specifically: +# Match @word: +# @...: +# @{section-name}: +# while trying to not match literal block starts like "example::" +# +known_section_names = 'description|context|returns?|notes?|examples?' +known_sections = KernRe(known_section_names, flags = re.I) +doc_sect = doc_com + \ + KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', + flags=re.I, cache=False) + +doc_content = doc_com_body + KernRe(r'(.*)', cache=False) +doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) +doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) +doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) +doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) + +export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) +export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) + +type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + +# +# Tests for the beginning of a kerneldoc block in its various forms. +# +doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) +doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) +doc_begin_func = KernRe(str(doc_com) + # initial " * ' + r"(?:\w+\s*\*\s*)?" + # type (not captured) + r'(?:define\s+)?' + # possible "define" (not captured) + r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" + r'(?:[-:].*)?$', # description (not captured) + cache = False) + +# +# Here begins a long set of transformations to turn structure member prefixes +# and macro invocations into something we can parse and generate kdoc for. +# +struct_args_pattern = r'([^,)]+)' + +struct_xforms = [ + # Strip attributes + (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*__private', re.S), ' '), + (KernRe(r'\s*__rcu', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), + # + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + # + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + # + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parentheses will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), +] +# +# Regexes here are guaranteed to have the end delimiter matching +# the start delimiter. Yet, right now, only one replace group +# is allowed. +# +struct_nested_prefixes = [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), +] + +# +# Transforms for function prototypes +# +function_xforms = [ + (KernRe(r"^static +"), ""), + (KernRe(r"^extern +"), ""), + (KernRe(r"^asmlinkage +"), ""), + (KernRe(r"^inline +"), ""), + (KernRe(r"^__inline__ +"), ""), + (KernRe(r"^__inline +"), ""), + (KernRe(r"^__always_inline +"), ""), + (KernRe(r"^noinline +"), ""), + (KernRe(r"^__FORTIFY_INLINE +"), ""), + (KernRe(r"__init +"), ""), + (KernRe(r"__init_or_module +"), ""), + (KernRe(r"__deprecated +"), ""), + (KernRe(r"__flatten +"), ""), + (KernRe(r"__meminit +"), ""), + (KernRe(r"__must_check +"), ""), + (KernRe(r"__weak +"), ""), + (KernRe(r"__sched +"), ""), + (KernRe(r"_noprof"), ""), + (KernRe(r"__always_unused *"), ""), + (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), + (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), + (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), + (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__attribute_const__ +"), ""), + (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), +] + +# +# Apply a set of transforms to a block of text. +# +def apply_transforms(xforms, text): + for search, subst in xforms: + text = search.sub(subst, text) + return text + +# +# A little helper to get rid of excess white space +# +multi_space = KernRe(r'\s\s+') +def trim_whitespace(s): + return multi_space.sub(' ', s.strip()) + +# +# Remove struct/enum members that have been marked "private". +# +def trim_private_members(text): + # + # First look for a "public:" block that ends a private region, then + # handle the "private until the end" case. + # + text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) + text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) + # + # We needed the comments to do the above, but now we can take them out. + # + return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + +class state: + """ + State machine enums + """ + + # Parser states + NORMAL = 0 # normal code + NAME = 1 # looking for function name + DECLARATION = 2 # We have seen a declaration which might not be done + BODY = 3 # the body of the comment + SPECIAL_SECTION = 4 # doc section ending with a blank line + PROTO = 5 # scanning prototype + DOCBLOCK = 6 # documentation block + INLINE_NAME = 7 # gathering doc outside main block + INLINE_TEXT = 8 # reading the body of inline docs + + name = [ + "NORMAL", + "NAME", + "DECLARATION", + "BODY", + "SPECIAL_SECTION", + "PROTO", + "DOCBLOCK", + "INLINE_NAME", + "INLINE_TEXT", + ] + + +SECTION_DEFAULT = "Description" # default section + +class KernelEntry: + + def __init__(self, config, fname, ln): + self.config = config + self.fname = fname + + self._contents = [] + self.prototype = "" + + self.warnings = [] + + self.parameterlist = [] + self.parameterdescs = {} + self.parametertypes = {} + self.parameterdesc_start_lines = {} + + self.section_start_lines = {} + self.sections = {} + + self.anon_struct_union = False + + self.leading_space = None + + self.fname = fname + + # State flags + self.brcount = 0 + self.declaration_start_line = ln + 1 + + # + # Management of section contents + # + def add_text(self, text): + self._contents.append(text) + + def contents(self): + return '\n'.join(self._contents) + '\n' + + # TODO: rename to emit_message after removal of kernel-doc.pl + def emit_msg(self, ln, msg, *, warning=True): + """Emit a message""" + + log_msg = f"{self.fname}:{ln} {msg}" + + if not warning: + self.config.log.info(log_msg) + return + + # Delegate warning output to output logic, as this way it + # will report warnings/info only for symbols that are output + + self.warnings.append(log_msg) + return + + # + # Begin a new section. + # + def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): + if dump: + self.dump_section(start_new = True) + self.section = title + self.new_start_line = line_no + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + # + # If we have accumulated no contents in the default ("description") + # section, don't bother. + # + if self.section == SECTION_DEFAULT and not self._contents: + return + name = self.section + contents = self.contents() + + if type_param.match(name): + name = type_param.group(1) + + self.parameterdescs[name] = contents + self.parameterdesc_start_lines[name] = self.new_start_line + + self.new_start_line = 0 + + else: + if name in self.sections and self.sections[name] != "": + # Only warn on user-specified duplicate section names + if name != SECTION_DEFAULT: + self.emit_msg(self.new_start_line, + f"duplicate section name '{name}'") + # Treat as a new paragraph - add a blank line + self.sections[name] += '\n' + contents + else: + self.sections[name] = contents + self.section_start_lines[name] = self.new_start_line + self.new_start_line = 0 + +# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) + + if start_new: + self.section = SECTION_DEFAULT + self._contents = [] + +python_warning = False + +class KernelDoc: + """ + Read a C language source or header FILE and extract embedded + documentation comments. + """ + + # Section names + + section_context = "Context" + section_return = "Return" + + undescribed = "-- undescribed --" + + def __init__(self, config, fname): + """Initialize internal variables""" + + self.fname = fname + self.config = config + + # Initial state for the state machines + self.state = state.NORMAL + + # Store entry currently being processed + self.entry = None + + # Place all potential outputs into an array + self.entries = [] + + # + # We need Python 3.7 for its "dicts remember the insertion + # order" guarantee + # + global python_warning + if (not python_warning and + sys.version_info.major == 3 and sys.version_info.minor < 7): + + self.emit_msg(0, + 'Python 3.7 or later is required for correct results') + python_warning = True + + def emit_msg(self, ln, msg, *, warning=True): + """Emit a message""" + + if self.entry: + self.entry.emit_msg(ln, msg, warning=warning) + return + + log_msg = f"{self.fname}:{ln} {msg}" + + if warning: + self.config.log.warning(log_msg) + else: + self.config.log.info(log_msg) + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + + if self.entry: + self.entry.dump_section(start_new) + + # TODO: rename it to store_declaration after removal of kernel-doc.pl + def output_declaration(self, dtype, name, **args): + """ + Stores the entry into an entry array. + + The actual output and output filters will be handled elsewhere + """ + + item = KdocItem(name, self.fname, dtype, + self.entry.declaration_start_line, **args) + item.warnings = self.entry.warnings + + # Drop empty sections + # TODO: improve empty sections logic to emit warnings + sections = self.entry.sections + for section in ["Description", "Return"]: + if section in sections and not sections[section].rstrip(): + del sections[section] + item.set_sections(sections, self.entry.section_start_lines) + item.set_params(self.entry.parameterlist, self.entry.parameterdescs, + self.entry.parametertypes, + self.entry.parameterdesc_start_lines) + self.entries.append(item) + + self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) + + def reset_state(self, ln): + """ + Ancillary routine to create a new entry. It initializes all + variables used by the state machine. + """ + + # + # Flush the warnings out before we proceed further + # + if self.entry and self.entry not in self.entries: + for log_msg in self.entry.warnings: + self.config.log.warning(log_msg) + + self.entry = KernelEntry(self.config, self.fname, ln) + + # State flags + self.state = state.NORMAL + + def push_parameter(self, ln, decl_type, param, dtype, + org_arg, declaration_name): + """ + Store parameters and their descriptions at self.entry. + """ + + if self.entry.anon_struct_union and dtype == "" and param == "}": + return # Ignore the ending }; from anonymous struct/union + + self.entry.anon_struct_union = False + + param = KernRe(r'[\[\)].*').sub('', param, count=1) + + # + # Look at various "anonymous type" cases. + # + if dtype == '': + if param.endswith("..."): + if len(param) > 3: # there is a name provided, use that + param = param[:-3] + if not self.entry.parameterdescs.get(param): + self.entry.parameterdescs[param] = "variable arguments" + + elif (not param) or param == "void": + param = "void" + self.entry.parameterdescs[param] = "no arguments" + + elif param in ["struct", "union"]: + # Handle unnamed (anonymous) union or struct + dtype = param + param = "{unnamed_" + param + "}" + self.entry.parameterdescs[param] = "anonymous\n" + self.entry.anon_struct_union = True + + # Warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements) + if param not in self.entry.parameterdescs and not param.startswith("#"): + self.entry.parameterdescs[param] = self.undescribed + + if "." not in param: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_msg(ln, + f"{dname} '{param}' not described in '{declaration_name}'") + + # Strip spaces from param so that it is one continuous string on + # parameterlist. This fixes a problem where check_sections() + # cannot find a parameter like "addr[6 + 2]" because it actually + # appears as "addr[6", "+", "2]" on the parameter list. + # However, it's better to maintain the param string unchanged for + # output, so just weaken the string compare in check_sections() + # to ignore "[blah" in a parameter string. + + self.entry.parameterlist.append(param) + org_arg = KernRe(r'\s\s+').sub(' ', org_arg) + self.entry.parametertypes[param] = org_arg + + + def create_parameter_list(self, ln, decl_type, args, + splitter, declaration_name): + """ + Creates a list of parameters, storing them at self.entry. + """ + + # temporarily replace all commas inside function pointer definition + arg_expr = KernRe(r'(\([^\),]+),') + while arg_expr.search(args): + args = arg_expr.sub(r"\1#", args) + + for arg in args.split(splitter): + # Ignore argument attributes + arg = KernRe(r'\sPOS0?\s').sub(' ', arg) + + # Strip leading/trailing spaces + arg = arg.strip() + arg = KernRe(r'\s+').sub(' ', arg, count=1) + + if arg.startswith('#'): + # Treat preprocessor directive as a typeless variable just to fill + # corresponding data structures "correctly". Catch it later in + # output_* subs. + + # Treat preprocessor directive as a typeless variable + self.push_parameter(ln, decl_type, arg, "", + "", declaration_name) + # + # The pointer-to-function case. + # + elif KernRe(r'\(.+\)\s*\(').search(arg): + arg = arg.replace('#', ',') + r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" + r'([\w\[\].]*)' # Capture the name and possible [array] + r'\s*\)') # Make sure the trailing ")" is there + if r.match(arg): + param = r.group(1) + else: + self.emit_msg(ln, f"Invalid param: {arg}") + param = arg + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) + # + # The array-of-pointers case. Dig the parameter name out from the middle + # of the declaration. + # + elif KernRe(r'\(.+\)\s*\[').search(arg): + r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" + r'([\w.]*?)' # The actual pointer name + r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] + if r.match(arg): + param = r.group(1) + else: + self.emit_msg(ln, f"Invalid param: {arg}") + param = arg + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) + elif arg: + # + # Clean up extraneous spaces and split the string at commas; the first + # element of the resulting list will also include the type information. + # + arg = KernRe(r'\s*:\s*').sub(":", arg) + arg = KernRe(r'\s*\[').sub('[', arg) + args = KernRe(r'\s*,\s*').split(arg) + args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) + # + # args[0] has a string of "type a". If "a" includes an [array] + # declaration, we want to not be fooled by any white space inside + # the brackets, so detect and handle that case specially. + # + r = KernRe(r'^([^[\]]*\s+)(.*)$') + if r.match(args[0]): + args[0] = r.group(2) + dtype = r.group(1) + else: + # No space in args[0]; this seems wrong but preserves previous behavior + dtype = '' + + bitfield_re = KernRe(r'(.*?):(\w+)') + for param in args: + # + # For pointers, shift the star(s) from the variable name to the + # type declaration. + # + r = KernRe(r'^(\*+)\s*(.*)') + if r.match(param): + self.push_parameter(ln, decl_type, r.group(2), + f"{dtype} {r.group(1)}", + arg, declaration_name) + # + # Perform a similar shift for bitfields. + # + elif bitfield_re.search(param): + if dtype != "": # Skip unnamed bit-fields + self.push_parameter(ln, decl_type, bitfield_re.group(1), + f"{dtype}:{bitfield_re.group(2)}", + arg, declaration_name) + else: + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + def check_sections(self, ln, decl_name, decl_type): + """ + Check for errors inside sections, emitting warnings if not found + parameters are described. + """ + for section in self.entry.sections: + if section not in self.entry.parameterlist and \ + not known_sections.search(section): + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + self.emit_msg(ln, + f"Excess {dname} '{section}' description in '{decl_name}'") + + def check_return_section(self, ln, declaration_name, return_type): + """ + If the function doesn't return void, warns about the lack of a + return description. + """ + + if not self.config.wreturn: + return + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type (but not "void *") + if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): + return + + if not self.entry.sections.get("Return", None): + self.emit_msg(ln, + f"No description found for return value of '{declaration_name}'") + + # + # Split apart a structure prototype; returns (struct|union, name, members) or None + # + def split_struct_proto(self, proto): + type_pattern = r'(struct|union)' + qualifiers = [ + "__attribute__", + "__packed", + "__aligned", + "____cacheline_aligned_in_smp", + "____cacheline_aligned", + ] + definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" + + r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) + if r.search(proto): + return (r.group(1), r.group(2), r.group(3)) + else: + r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') + if r.search(proto): + return (r.group(1), r.group(3), r.group(2)) + return None + # + # Rewrite the members of a structure or union for easier formatting later on. + # Among other things, this function will turn a member like: + # + # struct { inner_members; } foo; + # + # into: + # + # struct foo; inner_members; + # + def rewrite_struct_members(self, members): + # + # Process struct/union members from the most deeply nested outward. The + # trick is in the ^{ below - it prevents a match of an outer struct/union + # until the inner one has been munged (removing the "{" in the process). + # + struct_members = KernRe(r'(struct|union)' # 0: declaration type + r'([^\{\};]+)' # 1: possible name + r'(\{)' + r'([^\{\}]*)' # 3: Contents of declaration + r'(\})' + r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration + tuples = struct_members.findall(members) + while tuples: + for t in tuples: + newmember = "" + oldmember = "".join(t) # Reconstruct the original formatting + dtype, name, lbr, content, rbr, rest, semi = t + # + # Pass through each field name, normalizing the form and formatting. + # + for s_id in rest.split(','): + s_id = s_id.strip() + newmember += f"{dtype} {s_id}; " + # + # Remove bitfield/array/pointer info, getting the bare name. + # + s_id = KernRe(r'[:\[].*').sub('', s_id) + s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + # + # Pass through the members of this inner structure/union. + # + for arg in content.split(';'): + arg = arg.strip() + # + # Look for (type)(*name)(args) - pointer to function + # + r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') + if r.match(arg): + dtype, name, extra = r.group(1), r.group(2), r.group(3) + # Pointer-to-function + if not s_id: + # Anonymous struct/union + newmember += f"{dtype}{name}{extra}; " + else: + newmember += f"{dtype}{s_id}.{name}{extra}; " + # + # Otherwise a non-function member. + # + else: + # + # Remove bitmap and array portions and spaces around commas + # + arg = KernRe(r':\s*\d+\s*').sub('', arg) + arg = KernRe(r'\[.*\]').sub('', arg) + arg = KernRe(r'\s*,\s*').sub(',', arg) + # + # Look for a normal decl - "type name[,name...]" + # + r = KernRe(r'(.*)\s+([\S+,]+)') + if r.search(arg): + for name in r.group(2).split(','): + name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) + if not s_id: + # Anonymous struct/union + newmember += f"{r.group(1)} {name}; " + else: + newmember += f"{r.group(1)} {s_id}.{name}; " + else: + newmember += f"{arg}; " + # + # At the end of the s_id loop, replace the original declaration with + # the munged version. + # + members = members.replace(oldmember, newmember) + # + # End of the tuple loop - search again and see if there are outer members + # that now turn up. + # + tuples = struct_members.findall(members) + return members + + # + # Format the struct declaration into a standard form for inclusion in the + # resulting docs. + # + def format_struct_decl(self, declaration): + # + # Insert newlines, get rid of extra spaces. + # + declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) + declaration = KernRe(r'\}\s+;').sub('};', declaration) + # + # Format inline enums with each member on its own line. + # + r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') + while r.search(declaration): + declaration = r.sub(r'\1,\n\2', declaration) + # + # Now go through and supply the right number of tabs + # for each line. + # + def_args = declaration.split('\n') + level = 1 + declaration = "" + for clause in def_args: + clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) + if clause: + if '}' in clause and level > 1: + level -= 1 + if not clause.startswith('#'): + declaration += "\t" * level + declaration += "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level += 1 + return declaration + + + def dump_struct(self, ln, proto): + """ + Store an entry for a struct or union + """ + # + # Do the basic parse to get the pieces of the declaration. + # + struct_parts = self.split_struct_proto(proto) + if not struct_parts: + self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") + return + decl_type, declaration_name, members = struct_parts + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " + f"Prototype was for {decl_type} {declaration_name} instead\n") + return + # + # Go through the list of members applying all of our transformations. + # + members = trim_private_members(members) + members = apply_transforms(struct_xforms, members) + + nested = NestedMatch() + for search, sub in struct_nested_prefixes: + members = nested.sub(search, sub, members) + # + # Deal with embedded struct and union members, and drop enums entirely. + # + declaration = members + members = self.rewrite_struct_members(members) + members = re.sub(r'(\{[^\{\}]*\})', '', members) + # + # Output the result and we are done. + # + self.create_parameter_list(ln, decl_type, members, ';', + declaration_name) + self.check_sections(ln, declaration_name, decl_type) + self.output_declaration(decl_type, declaration_name, + definition=self.format_struct_decl(declaration), + purpose=self.entry.declaration_purpose) + + def dump_enum(self, ln, proto): + """ + Stores an enum inside self.entries array. + """ + # + # Strip preprocessor directives. Note that this depends on the + # trailing semicolon we added in process_proto_type(). + # + proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) + # + # Parse out the name and members of the enum. Typedef form first. + # + r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + if r.search(proto): + declaration_name = r.group(2) + members = trim_private_members(r.group(1)) + # + # Failing that, look for a straight enum + # + else: + r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') + if r.match(proto): + declaration_name = r.group(1) + members = trim_private_members(r.group(2)) + # + # OK, this isn't going to work. + # + else: + self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") + return + # + # Make sure we found what we were expecting. + # + if self.entry.identifier != declaration_name: + if self.entry.identifier == "": + self.emit_msg(ln, + f"{proto}: wrong kernel-doc identifier on prototype") + else: + self.emit_msg(ln, + f"expecting prototype for enum {self.entry.identifier}. " + f"Prototype was for enum {declaration_name} instead") + return + + if not declaration_name: + declaration_name = "(anonymous)" + # + # Parse out the name of each enum member, and verify that we + # have a description for it. + # + member_set = set() + members = KernRe(r'\([^;)]*\)').sub('', members) + for arg in members.split(','): + if not arg: + continue + arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + self.entry.parameterlist.append(arg) + if arg not in self.entry.parameterdescs: + self.entry.parameterdescs[arg] = self.undescribed + self.emit_msg(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") + member_set.add(arg) + # + # Ensure that every described member actually exists in the enum. + # + for k in self.entry.parameterdescs: + if k not in member_set: + self.emit_msg(ln, + f"Excess enum value '@{k}' description in '{declaration_name}'") + + self.output_declaration('enum', declaration_name, + purpose=self.entry.declaration_purpose) + + def dump_declaration(self, ln, prototype): + """ + Stores a data declaration inside self.entries array. + """ + + if self.entry.decl_type == "enum": + self.dump_enum(ln, prototype) + elif self.entry.decl_type == "typedef": + self.dump_typedef(ln, prototype) + elif self.entry.decl_type in ["union", "struct"]: + self.dump_struct(ln, prototype) + else: + # This would be a bug + self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') + + def dump_function(self, ln, prototype): + """ + Stores a function or function macro inside self.entries array. + """ + + found = func_macro = False + return_type = '' + decl_type = 'function' + # + # Apply the initial transformations. + # + prototype = apply_transforms(function_xforms, prototype) + # + # If we have a macro, remove the "#define" at the front. + # + new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) + if new_proto != prototype: + prototype = new_proto + # + # Dispense with the simple "#define A B" case here; the key + # is the space after the name of the symbol being defined. + # NOTE that the seemingly misnamed "func_macro" indicates a + # macro *without* arguments. + # + r = KernRe(r'^(\w+)\s+') + if r.search(prototype): + return_type = '' + declaration_name = r.group(1) + func_macro = True + found = True + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer parameters + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + + name = r'\w+' + type1 = r'(?:[\w\s]+)?' + type2 = r'(?:[\w\s]+\*+)+' + # + # Attempt to match first on (args) with no internal parentheses; this + # lets us easily filter out __acquires() and other post-args stuff. If + # that fails, just grab the rest of the line to the last closing + # parenthesis. + # + proto_args = r'\(([^\(]*|.*)\)' + # + # (Except for the simple macro case) attempt to split up the prototype + # in the various ways we understand. + # + if not found: + patterns = [ + rf'^()({name})\s*{proto_args}', + rf'^({type1})\s+({name})\s*{proto_args}', + rf'^({type2})\s*({name})\s*{proto_args}', + ] + + for p in patterns: + r = KernRe(p) + if r.match(prototype): + return_type = r.group(1) + declaration_name = r.group(2) + args = r.group(3) + self.create_parameter_list(ln, decl_type, args, ',', + declaration_name) + found = True + break + # + # Parsing done; make sure that things are as we expect. + # + if not found: + self.emit_msg(ln, + f"cannot understand function prototype: '{prototype}'") + return + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " + f"Prototype was for {declaration_name}() instead") + return + self.check_sections(ln, declaration_name, "function") + self.check_return_section(ln, declaration_name, return_type) + # + # Store the result. + # + self.output_declaration(decl_type, declaration_name, + typedef=('typedef' in return_type), + functiontype=return_type, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + + + def dump_typedef(self, ln, proto): + """ + Stores a typedef inside self.entries array. + """ + # + # We start by looking for function typedefs. + # + typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' + typedef_ident = r'\*?\s*(\w\S+)\s*' + typedef_args = r'\s*\((.*)\);' + + typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) + + # Parse function typedef prototypes + for r in [typedef1, typedef2]: + if not r.match(proto): + continue + + return_type = r.group(1).strip() + declaration_name = r.group(2) + args = r.group(3) + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.create_parameter_list(ln, 'function', args, ',', declaration_name) + + self.output_declaration('function', declaration_name, + typedef=True, + functiontype=return_type, + purpose=self.entry.declaration_purpose) + return + # + # Not a function, try to parse a simple typedef. + # + r = KernRe(r'typedef.*\s+(\w+)\s*;') + if r.match(proto): + declaration_name = r.group(1) + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.output_declaration('typedef', declaration_name, + purpose=self.entry.declaration_purpose) + return + + self.emit_msg(ln, "error: Cannot parse typedef!") + + @staticmethod + def process_export(function_set, line): + """ + process EXPORT_SYMBOL* tags + + This method doesn't use any variable from the class, so declare it + with a staticmethod decorator. + """ + + # We support documenting some exported symbols with different + # names. A horrible hack. + suffixes = [ '_noprof' ] + + # Note: it accepts only one EXPORT_SYMBOL* per line, as having + # multiple export lines would violate Kernel coding style. + + if export_symbol.search(line): + symbol = export_symbol.group(2) + elif export_symbol_ns.search(line): + symbol = export_symbol_ns.group(2) + else: + return False + # + # Found an export, trim out any special suffixes + # + for suffix in suffixes: + # Be backward compatible with Python < 3.9 + if symbol.endswith(suffix): + symbol = symbol[:-len(suffix)] + function_set.add(symbol) + return True + + def process_normal(self, ln, line): + """ + STATE_NORMAL: looking for the /** to begin everything. + """ + + if not doc_start.match(line): + return + + # start a new entry + self.reset_state(ln) + + # next line is always the function name + self.state = state.NAME + + def process_name(self, ln, line): + """ + STATE_NAME: Looking for the "name - description" line + """ + # + # Check for a DOC: block and handle them specially. + # + if doc_block.search(line): + + if not doc_block.group(1): + self.entry.begin_section(ln, "Introduction") + else: + self.entry.begin_section(ln, doc_block.group(1)) + + self.entry.identifier = self.entry.section + self.state = state.DOCBLOCK + # + # Otherwise we're looking for a normal kerneldoc declaration line. + # + elif doc_decl.search(line): + self.entry.identifier = doc_decl.group(1) + + # Test for data declaration + if doc_begin_data.search(line): + self.entry.decl_type = doc_begin_data.group(1) + self.entry.identifier = doc_begin_data.group(2) + # + # Look for a function description + # + elif doc_begin_func.search(line): + self.entry.identifier = doc_begin_func.group(1) + self.entry.decl_type = "function" + # + # We struck out. + # + else: + self.emit_msg(ln, + f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") + self.state = state.NORMAL + return + # + # OK, set up for a new kerneldoc entry. + # + self.state = state.BODY + self.entry.identifier = self.entry.identifier.strip(" ") + # if there's no @param blocks need to set up default section here + self.entry.begin_section(ln + 1) + # + # Find the description portion, which *should* be there but + # isn't always. + # (We should be able to capture this from the previous parsing - someday) + # + r = KernRe("[-:](.*)") + if r.search(line): + self.entry.declaration_purpose = trim_whitespace(r.group(1)) + self.state = state.DECLARATION + else: + self.entry.declaration_purpose = "" + + if not self.entry.declaration_purpose and self.config.wshort_desc: + self.emit_msg(ln, + f"missing initial short description on line:\n{line}") + + if not self.entry.identifier and self.entry.decl_type != "enum": + self.emit_msg(ln, + f"wrong kernel-doc identifier on line:\n{line}") + self.state = state.NORMAL + + if self.config.verbose: + self.emit_msg(ln, + f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", + warning=False) + # + # Failed to find an identifier. Emit a warning + # + else: + self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") + + # + # Helper function to determine if a new section is being started. + # + def is_new_section(self, ln, line): + if doc_sect.search(line): + self.state = state.BODY + # + # Pick out the name of our new section, tweaking it if need be. + # + newsection = doc_sect.group(1) + if newsection.lower() == 'description': + newsection = 'Description' + elif newsection.lower() == 'context': + newsection = 'Context' + self.state = state.SPECIAL_SECTION + elif newsection.lower() in ["@return", "@returns", + "return", "returns"]: + newsection = "Return" + self.state = state.SPECIAL_SECTION + elif newsection[0] == '@': + self.state = state.SPECIAL_SECTION + # + # Initialize the contents, and get the new section going. + # + newcontents = doc_sect.group(2) + if not newcontents: + newcontents = "" + self.dump_section() + self.entry.begin_section(ln, newsection) + self.entry.leading_space = None + + self.entry.add_text(newcontents.lstrip()) + return True + return False + + # + # Helper function to detect (and effect) the end of a kerneldoc comment. + # + def is_comment_end(self, ln, line): + if doc_end.search(line): + self.dump_section() + + # Look for doc_com + <text> + doc_end: + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') + if r.match(line): + self.emit_msg(ln, f"suspicious ending line: {line}") + + self.entry.prototype = "" + self.entry.new_start_line = ln + 1 + + self.state = state.PROTO + return True + return False + + + def process_decl(self, ln, line): + """ + STATE_DECLARATION: We've seen the beginning of a declaration + """ + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + # + # Look for anything with the " * " line beginning. + # + if doc_content.search(line): + cont = doc_content.group(1) + # + # A blank line means that we have moved out of the declaration + # part of the comment (without any "special section" parameter + # descriptions). + # + if cont == "": + self.state = state.BODY + # + # Otherwise we have more of the declaration section to soak up. + # + else: + self.entry.declaration_purpose = \ + trim_whitespace(self.entry.declaration_purpose + ' ' + cont) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + + def process_special(self, ln, line): + """ + STATE_SPECIAL_SECTION: a section ending with a blank line + """ + # + # If we have hit a blank line (only the " * " marker), then this + # section is done. + # + if KernRe(r"\s*\*\s*$").match(line): + self.entry.begin_section(ln, dump = True) + self.state = state.BODY + return + # + # Not a blank line, look for the other ways to end the section. + # + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + # + # OK, we should have a continuation of the text for this section. + # + if doc_content.search(line): + cont = doc_content.group(1) + # + # If the lines of text after the first in a special section have + # leading white space, we need to trim it out or Sphinx will get + # confused. For the second line (the None case), see what we + # find there and remember it. + # + if self.entry.leading_space is None: + r = KernRe(r'^(\s+)') + if r.match(cont): + self.entry.leading_space = len(r.group(1)) + else: + self.entry.leading_space = 0 + # + # Otherwise, before trimming any leading chars, be *sure* + # that they are white space. We should maybe warn if this + # isn't the case. + # + for i in range(0, self.entry.leading_space): + if cont[i] != " ": + self.entry.leading_space = i + break + # + # Add the trimmed result to the section and we're done. + # + self.entry.add_text(cont[self.entry.leading_space:]) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + def process_body(self, ln, line): + """ + STATE_BODY: the bulk of a kerneldoc comment. + """ + if self.is_new_section(ln, line) or self.is_comment_end(ln, line): + return + + if doc_content.search(line): + cont = doc_content.group(1) + self.entry.add_text(cont) + else: + # Unknown line, ignore + self.emit_msg(ln, f"bad line: {line}") + + def process_inline_name(self, ln, line): + """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" + + if doc_inline_sect.search(line): + self.entry.begin_section(ln, doc_inline_sect.group(1)) + self.entry.add_text(doc_inline_sect.group(2).lstrip()) + self.state = state.INLINE_TEXT + elif doc_inline_end.search(line): + self.dump_section() + self.state = state.PROTO + elif doc_content.search(line): + self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") + self.state = state.PROTO + # else ... ?? + + def process_inline_text(self, ln, line): + """STATE_INLINE_TEXT: docbook comments within a prototype.""" + + if doc_inline_end.search(line): + self.dump_section() + self.state = state.PROTO + elif doc_content.search(line): + self.entry.add_text(doc_content.group(1)) + # else ... ?? + + def syscall_munge(self, ln, proto): # pylint: disable=W0613 + """ + Handle syscall definitions + """ + + is_void = False + + # Strip newlines/CR's + proto = re.sub(r'[\r\n]+', ' ', proto) + + # Check if it's a SYSCALL_DEFINE0 + if 'SYSCALL_DEFINE0' in proto: + is_void = True + + # Replace SYSCALL_DEFINE with correct return type & function name + proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + + r = KernRe(r'long\s+(sys_.*?),') + if r.search(proto): + proto = KernRe(',').sub('(', proto, count=1) + elif is_void: + proto = KernRe(r'\)').sub('(void)', proto, count=1) + + # Now delete all of the odd-numbered commas in the proto + # so that argument types & names don't have a comma between them + count = 0 + length = len(proto) + + if is_void: + length = 0 # skip the loop if is_void + + for ix in range(length): + if proto[ix] == ',': + count += 1 + if count % 2 == 1: + proto = proto[:ix] + ' ' + proto[ix + 1:] + + return proto + + def tracepoint_munge(self, ln, proto): + """ + Handle tracepoint definitions + """ + + tracepointname = None + tracepointargs = None + + # Match tracepoint name based on different patterns + r = KernRe(r'TRACE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') + if r.search(proto): + tracepointname = r.group(2) + + if tracepointname: + tracepointname = tracepointname.lstrip() + + r = KernRe(r'TP_PROTO\((.*?)\)') + if r.search(proto): + tracepointargs = r.group(1) + + if not tracepointname or not tracepointargs: + self.emit_msg(ln, + f"Unrecognized tracepoint format:\n{proto}\n") + else: + proto = f"static inline void trace_{tracepointname}({tracepointargs})" + self.entry.identifier = f"trace_{self.entry.identifier}" + + return proto + + def process_proto_function(self, ln, line): + """Ancillary routine to process a function prototype""" + + # strip C99-style comments to end of line + line = KernRe(r"//.*$", re.S).sub('', line) + # + # Soak up the line's worth of prototype text, stopping at { or ; if present. + # + if KernRe(r'\s*#\s*define').match(line): + self.entry.prototype = line + elif not line.startswith('#'): # skip other preprocessor stuff + r = KernRe(r'([^\{]*)') + if r.match(line): + self.entry.prototype += r.group(1) + " " + # + # If we now have the whole prototype, clean it up and declare victory. + # + if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): + # strip comments and surrounding spaces + self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() + # + # Handle self.entry.prototypes for function pointers like: + # int (*pcs_config)(struct foo) + # by turning it into + # int pcs_config(struct foo) + # + r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') + self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) + # + # Handle special declaration syntaxes + # + if 'SYSCALL_DEFINE' in self.entry.prototype: + self.entry.prototype = self.syscall_munge(ln, + self.entry.prototype) + else: + r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + if r.search(self.entry.prototype): + self.entry.prototype = self.tracepoint_munge(ln, + self.entry.prototype) + # + # ... and we're done + # + self.dump_function(ln, self.entry.prototype) + self.reset_state(ln) + + def process_proto_type(self, ln, line): + """Ancillary routine to process a type""" + + # Strip C99-style comments and surrounding whitespace + line = KernRe(r"//.*$", re.S).sub('', line).strip() + if not line: + return # nothing to see here + + # To distinguish preprocessor directive from regular declaration later. + if line.startswith('#'): + line += ";" + # + # Split the declaration on any of { } or ;, and accumulate pieces + # until we hit a semicolon while not inside {brackets} + # + r = KernRe(r'(.*?)([{};])') + for chunk in r.split(line): + if chunk: # Ignore empty matches + self.entry.prototype += chunk + # + # This cries out for a match statement ... someday after we can + # drop Python 3.9 ... + # + if chunk == '{': + self.entry.brcount += 1 + elif chunk == '}': + self.entry.brcount -= 1 + elif chunk == ';' and self.entry.brcount <= 0: + self.dump_declaration(ln, self.entry.prototype) + self.reset_state(ln) + return + # + # We hit the end of the line while still in the declaration; put + # in a space to represent the newline. + # + self.entry.prototype += ' ' + + def process_proto(self, ln, line): + """STATE_PROTO: reading a function/whatever prototype.""" + + if doc_inline_oneline.search(line): + self.entry.begin_section(ln, doc_inline_oneline.group(1)) + self.entry.add_text(doc_inline_oneline.group(2)) + self.dump_section() + + elif doc_inline_start.search(line): + self.state = state.INLINE_NAME + + elif self.entry.decl_type == 'function': + self.process_proto_function(ln, line) + + else: + self.process_proto_type(ln, line) + + def process_docblock(self, ln, line): + """STATE_DOCBLOCK: within a DOC: block.""" + + if doc_end.search(line): + self.dump_section() + self.output_declaration("doc", self.entry.identifier) + self.reset_state(ln) + + elif doc_content.search(line): + self.entry.add_text(doc_content.group(1)) + + def parse_export(self): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + + for line in fp: + self.process_export(export_table, line) + + except IOError: + return None + + return export_table + + # + # The state/action table telling us which function to invoke in + # each state. + # + state_actions = { + state.NORMAL: process_normal, + state.NAME: process_name, + state.BODY: process_body, + state.DECLARATION: process_decl, + state.SPECIAL_SECTION: process_special, + state.INLINE_NAME: process_inline_name, + state.INLINE_TEXT: process_inline_text, + state.PROTO: process_proto, + state.DOCBLOCK: process_docblock, + } + + def parse_kdoc(self): + """ + Open and process each line of a C source file. + The parsing is controlled via a state machine, and the line is passed + to a different process function depending on the state. The process + function may update the state as needed. + + Besides parsing kernel-doc tags, it also parses export symbols. + """ + + prev = "" + prev_ln = None + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + for ln, line in enumerate(fp): + + line = line.expandtabs().strip("\n") + + # Group continuation lines on prototypes + if self.state == state.PROTO: + if line.endswith("\\"): + prev += line.rstrip("\\") + if not prev_ln: + prev_ln = ln + continue + + if prev: + ln = prev_ln + line = prev + line + prev = "" + prev_ln = None + + self.config.log.debug("%d %s: %s", + ln, state.name[self.state], + line) + + # This is an optimization over the original script. + # There, when export_file was used for the same file, + # it was read twice. Here, we use the already-existing + # loop to parse exported symbols as well. + # + if (self.state != state.NORMAL) or \ + not self.process_export(export_table, line): + # Hand this line to the appropriate state handler + self.state_actions[self.state](self, ln, line) + + except OSError: + self.config.log.error(f"Error: Cannot open file {self.fname}") + + return export_table, self.entries diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py new file mode 100644 index 000000000000..2dfa1bf83d64 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. + +""" +Regular expression ancillary classes. + +Those help caching regular expressions and do matching for kernel-doc. +""" + +import re + +# Local cache for regular expressions +re_cache = {} + + +class KernRe: + """ + Helper class to simplify regex declaration and usage. + + It calls re.compile for a given pattern. It also allows adding + regular expressions and define sub at class init time. + + Regular expressions can be cached via an argument, helping to speedup + searches. + """ + + def _add_regex(self, string, flags): + """ + Adds a new regex or reuses it from the cache. + """ + self.regex = re_cache.get(string, None) + if not self.regex: + self.regex = re.compile(string, flags=flags) + if self.cache: + re_cache[string] = self.regex + + def __init__(self, string, cache=True, flags=0): + """ + Compile a regular expression and initialize internal vars. + """ + + self.cache = cache + self.last_match = None + + self._add_regex(string, flags) + + def __str__(self): + """ + Return the regular expression pattern. + """ + return self.regex.pattern + + def __add__(self, other): + """ + Allows adding two regular expressions into one. + """ + + return KernRe(str(self) + str(other), cache=self.cache or other.cache, + flags=self.regex.flags | other.regex.flags) + + def match(self, string): + """ + Handles a re.match storing its results + """ + + self.last_match = self.regex.match(string) + return self.last_match + + def search(self, string): + """ + Handles a re.search storing its results + """ + + self.last_match = self.regex.search(string) + return self.last_match + + def findall(self, string): + """ + Alias to re.findall + """ + + return self.regex.findall(string) + + def split(self, string): + """ + Alias to re.split + """ + + return self.regex.split(string) + + def sub(self, sub, string, count=0): + """ + Alias to re.sub + """ + + return self.regex.sub(sub, string, count=count) + + def group(self, num): + """ + Returns the group results of the last match + """ + + return self.last_match.group(num) + + +class NestedMatch: + """ + Finding nested delimiters is hard with regular expressions. It is + even harder on Python with its normal re module, as there are several + advanced regular expressions that are missing. + + This is the case of this pattern: + + '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' + + which is used to properly match open/close parentheses of the + string search STRUCT_GROUP(), + + Add a class that counts pairs of delimiters, using it to match and + replace nested expressions. + + The original approach was suggested by: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + + Although I re-implemented it to make it more generic and match 3 types + of delimiters. The logic checks if delimiters are paired. If not, it + will ignore the search string. + """ + + # TODO: make NestedMatch handle multiple match groups + # + # Right now, regular expressions to match it are defined only up to + # the start delimiter, e.g.: + # + # \bSTRUCT_GROUP\( + # + # is similar to: STRUCT_GROUP\((.*)\) + # except that the content inside the match group is delimiter-aligned. + # + # The content inside parentheses is converted into a single replace + # group (e.g. r`\1'). + # + # It would be nice to change such definition to support multiple + # match groups, allowing a regex equivalent to: + # + # FOO\((.*), (.*), (.*)\) + # + # it is probably easier to define it not as a regular expression, but + # with some lexical definition like: + # + # FOO(arg1, arg2, arg3) + + DELIMITER_PAIRS = { + '{': '}', + '(': ')', + '[': ']', + } + + RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') + + def _search(self, regex, line): + """ + Finds paired blocks for a regex that ends with a delimiter. + + The suggestion of using finditer to match pairs came from: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + but I ended using a different implementation to align all three types + of delimiters and seek for an initial regular expression. + + The algorithm seeks for open/close paired delimiters and places them + into a stack, yielding a start/stop position of each match when the + stack is zeroed. + + The algorithm should work fine for properly paired lines, but will + silently ignore end delimiters that precede a start delimiter. + This should be OK for kernel-doc parser, as unaligned delimiters + would cause compilation errors. So, we don't need to raise exceptions + to cover such issues. + """ + + stack = [] + + for match_re in regex.finditer(line): + start = match_re.start() + offset = match_re.end() + + d = line[offset - 1] + if d not in self.DELIMITER_PAIRS: + continue + + end = self.DELIMITER_PAIRS[d] + stack.append(end) + + for match in self.RE_DELIM.finditer(line[offset:]): + pos = match.start() + offset + + d = line[pos] + + if d in self.DELIMITER_PAIRS: + end = self.DELIMITER_PAIRS[d] + + stack.append(end) + continue + + # Does the end delimiter match what is expected? + if stack and d == stack[-1]: + stack.pop() + + if not stack: + yield start, offset, pos + 1 + break + + def search(self, regex, line): + """ + This is similar to re.search: + + It matches a regex that it is followed by a delimiter, + returning occurrences only if all delimiters are paired. + """ + + for t in self._search(regex, line): + + yield line[t[0]:t[2]] + + def sub(self, regex, sub, line, count=0): + """ + This is similar to re.sub: + + It matches a regex that it is followed by a delimiter, + replacing occurrences only if all delimiters are paired. + + if r'\1' is used, it works just like re: it places there the + matched paired data with the delimiter stripped. + + If count is different than zero, it will replace at most count + items. + """ + out = "" + + cur_pos = 0 + n = 0 + + for start, end, pos in self._search(regex, line): + out += line[cur_pos:start] + + # Value, ignoring start/end delimiters + value = line[end:pos - 1] + + # replaces \1 at the sub string, if \1 is used there + new_sub = sub + new_sub = new_sub.replace(r'\1', value) + + out += new_sub + + # Drop end ';' if any + if line[pos] == ';': + pos += 1 + + cur_pos = pos + n += 1 + + if count and count >= n: + break + + # Append the remaining string + l = len(line) + out += line[cur_pos:l] + + return out diff --git a/tools/lib/python/kdoc/latex_fonts.py b/tools/lib/python/kdoc/latex_fonts.py new file mode 100755 index 000000000000..29317f8006ea --- /dev/null +++ b/tools/lib/python/kdoc/latex_fonts.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) Akira Yokosawa, 2024 +# +# Ported to Python by (c) Mauro Carvalho Chehab, 2025 + +""" +Detect problematic Noto CJK variable fonts. + +For "make pdfdocs", reports of build errors of translations.pdf started +arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE +tumbleweed have started deploying variable-font [3] format of "Noto CJK" +fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK +(Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which +does not (and likely never will) understand variable fonts for historical +reasons. + +The build error happens even when both of variable- and non-variable-format +fonts are found on the build system. To make matters worse, Fedora enlists +variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, +-zh_TW, etc. Hence developers who have interest in CJK pages are more +likely to encounter the build errors. + +This script is invoked from the error path of "make pdfdocs" and emits +suggestions if variable-font files of "Noto CJK" fonts are in the list of +fonts accessible from XeTeX. + +References: +[1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ +[2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ +[3]: https://en.wikipedia.org/wiki/Variable_font +[4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts +[5]: https://build.opensuse.org/request/show/1157217 + +#=========================================================================== +Workarounds for building translations.pdf +#=========================================================================== + +* Denylist "variable font" Noto CJK fonts. + - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with + tweaks if necessary. Remove leading "". + - Path of fontconfig/fonts.conf can be overridden by setting an env + variable FONTS_CONF_DENY_VF. + + * Template: +----------------------------------------------------------------- +<?xml version="1.0"?> +<!DOCTYPE fontconfig SYSTEM "urn:fontconfig:fonts.dtd"> +<fontconfig> +<!-- + Ignore variable-font glob (not to break xetex) +--> + <selectfont> + <rejectfont> + <!-- + for Fedora + --> + <glob>/usr/share/fonts/google-noto-*-cjk-vf-fonts</glob> + <!-- + for openSUSE tumbleweed + --> + <glob>/usr/share/fonts/truetype/Noto*CJK*-VF.otf</glob> + </rejectfont> + </selectfont> +</fontconfig> +----------------------------------------------------------------- + + The denylisting is activated for "make pdfdocs". + +* For skipping CJK pages in PDF + - Uninstall texlive-xecjk. + Denylisting is not needed in this case. + +* For printing CJK pages in PDF + - Need non-variable "Noto CJK" fonts. + * Fedora + - google-noto-sans-cjk-fonts + - google-noto-serif-cjk-fonts + * openSUSE tumbleweed + - Non-variable "Noto CJK" fonts are not available as distro packages + as of April, 2024. Fetch a set of font files from upstream Noto + CJK Font released at: + https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc + and at: + https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc + , then uncompress and deploy them. + - Remember to update fontconfig cache by running fc-cache. + +!!! Caution !!! + Uninstalling "variable font" packages can be dangerous. + They might be depended upon by other packages important for your work. + Denylisting should be less invasive, as it is effective only while + XeLaTeX runs in "make pdfdocs". +""" + +import os +import re +import subprocess +import textwrap +import sys + +class LatexFontChecker: + """ + Detect problems with CJK variable fonts that affect PDF builds for + translations. + """ + + def __init__(self, deny_vf=None): + if not deny_vf: + deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") + + self.environ = os.environ.copy() + self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) + + self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") + + def description(self): + return __doc__ + + def get_noto_cjk_vf_fonts(self): + """Get Noto CJK fonts""" + + cjk_fonts = set() + cmd = ["fc-list", ":", "file", "family", "variable"] + try: + result = subprocess.run(cmd,stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + env=self.environ, + check=True) + + except subprocess.CalledProcessError as exc: + sys.exit(f"Error running fc-list: {repr(exc)}") + + for line in result.stdout.splitlines(): + if 'variable=True' not in line: + continue + + match = self.re_cjk.search(line) + if match: + cjk_fonts.add(match.group(1)) + + return sorted(cjk_fonts) + + def check(self): + """Check for problems with CJK fonts""" + + fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") + if not fonts: + return None + + rel_file = os.path.relpath(__file__, os.getcwd()) + + msg = "=" * 77 + "\n" + msg += 'XeTeX is confused by "variable font" files listed below:\n' + msg += fonts + "\n" + msg += textwrap.dedent(f""" + For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. + Or, CJK pages can be skipped by uninstalling texlive-xecjk. + + For more info on denylisting, other options, and variable font, run: + + tools/docs/check-variable-fonts.py -h + """) + msg += "=" * 77 + + return msg diff --git a/tools/lib/python/kdoc/parse_data_structs.py b/tools/lib/python/kdoc/parse_data_structs.py new file mode 100755 index 000000000000..25361996cd20 --- /dev/null +++ b/tools/lib/python/kdoc/parse_data_structs.py @@ -0,0 +1,482 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>. +# pylint: disable=R0912,R0915 + +""" +Parse a source file or header, creating ReStructured Text cross references. + +It accepts an optional file to change the default symbol reference or to +suppress symbols from the output. + +It is capable of identifying defines, functions, structs, typedefs, +enums and enum symbols and create cross-references for all of them. +It is also capable of distinguish #define used for specifying a Linux +ioctl. + +The optional rules file contains a set of rules like: + + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` +""" + +import os +import re +import sys + + +class ParseDataStructs: + """ + Creates an enriched version of a Kernel header file with cross-links + to each C data structure type. + + It is meant to allow having a more comprehensive documentation, where + uAPI headers will create cross-reference links to the code. + + It is capable of identifying defines, functions, structs, typedefs, + enums and enum symbols and create cross-references for all of them. + It is also capable of distinguish #define used for specifying a Linux + ioctl. + + By default, it create rules for all symbols and defines, but it also + allows parsing an exception file. Such file contains a set of rules + using the syntax below: + + 1. Ignore rules: + + ignore <type> <symbol>` + + Removes the symbol from reference generation. + + 2. Replace rules: + + replace <type> <old_symbol> <new_reference> + + Replaces how old_symbol with a new reference. The new_reference can be: + + - A simple symbol name; + - A full Sphinx reference. + + 3. Namespace rules + + namespace <namespace> + + Sets C namespace to be used during cross-reference generation. Can + be overridden by replace rules. + + On ignore and replace rules, <type> can be: + - ioctl: for defines that end with _IO*, e.g. ioctl definitions + - define: for other defines + - symbol: for symbols defined within enums; + - typedef: for typedefs; + - enum: for the name of a non-anonymous enum; + - struct: for structs. + + Examples: + + ignore define __LINUX_MEDIA_H + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` + + namespace MC + """ + + # Parser regexes with multiple ways to capture enums and structs + RE_ENUMS = [ + re.compile(r"^\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*enum\s+([\w_]+)\s*$"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"), + ] + RE_STRUCTS = [ + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"), + ] + + # FIXME: the original code was written a long time before Sphinx C + # domain to have multiple namespaces. To avoid to much turn at the + # existing hyperlinks, the code kept using "c:type" instead of the + # right types. To change that, we need to change the types not only + # here, but also at the uAPI media documentation. + DEF_SYMBOL_TYPES = { + "ioctl": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "IOCTL Commands", + }, + "define": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "Macros and Definitions", + }, + # We're calling each definition inside an enum as "symbol" + "symbol": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "Enumeration values", + }, + "typedef": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Type Definitions", + }, + # This is the description of the enum itself + "enum": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Enumerations", + }, + "struct": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Structures", + }, + } + + def __init__(self, debug: bool = False): + """Initialize internal vars""" + self.debug = debug + self.data = "" + + self.symbols = {} + + self.namespace = None + self.ignore = [] + self.replace = [] + + for symbol_type in self.DEF_SYMBOL_TYPES: + self.symbols[symbol_type] = {} + + def read_exceptions(self, fname: str): + if not fname: + return + + name = os.path.basename(fname) + + with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f: + for ln, line in enumerate(f): + ln += 1 + line = line.strip() + if not line or line.startswith("#"): + continue + + # ignore rules + match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line) + + if match: + self.ignore.append((ln, match.group(1), match.group(2))) + continue + + # replace rules + match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line) + if match: + self.replace.append((ln, match.group(1), match.group(2), + match.group(3))) + continue + + match = re.match(r"^namespace\s+(\S+)", line) + if match: + self.namespace = match.group(1) + continue + + sys.exit(f"{name}:{ln}: invalid line: {line}") + + def apply_exceptions(self): + """ + Process exceptions file with rules to ignore or replace references. + """ + + # Handle ignore rules + for ln, c_type, symbol in self.ignore: + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + d = self.symbols[c_type] + if symbol in d: + del d[symbol] + + # Handle replace rules + for ln, c_type, old, new in self.replace: + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + reftype = None + + # Parse reference type when the type is specified + + match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new) + if match: + reftype = f":c:{match.group(1)}" + new = match.group(2) + else: + match = re.search(r"(\:ref)\:\`(.+)\`", new) + if match: + reftype = match.group(1) + new = match.group(2) + + # If the replacement rule doesn't have a type, get default + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type") + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type") + + new_ref = f"{reftype}:`{old} <{new}>`" + + # Change self.symbols to use the replacement rule + if old in self.symbols[c_type]: + (_, ln) = self.symbols[c_type][old] + self.symbols[c_type][old] = (new_ref, ln) + else: + print(f"{name}:{ln}: Warning: can't find {old} {c_type}") + + def store_type(self, ln, symbol_type: str, symbol: str, + ref_name: str = None, replace_underscores: bool = True): + """ + Stores a new symbol at self.symbols under symbol_type. + + By default, underscores are replaced by "-" + """ + defs = self.DEF_SYMBOL_TYPES[symbol_type] + + prefix = defs.get("prefix", "") + suffix = defs.get("suffix", "") + ref_type = defs.get("ref_type") + + # Determine ref_link based on symbol type + if ref_type or self.namespace: + if not ref_name: + ref_name = symbol.lower() + + # c-type references don't support hash + if ref_type == ":ref" and replace_underscores: + ref_name = ref_name.replace("_", "-") + + # C domain references may have namespaces + if ref_type.startswith(":c:"): + if self.namespace: + ref_name = f"{self.namespace}.{ref_name}" + + if ref_type: + ref_link = f"{ref_type}:`{symbol} <{ref_name}>`" + else: + ref_link = f"`{symbol} <{ref_name}>`" + else: + ref_link = symbol + + self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln) + + def store_line(self, line): + """Stores a line at self.data, properly indented""" + line = " " + line.expandtabs() + self.data += line.rstrip(" ") + + def parse_file(self, file_in: str, exceptions: str = None): + """Reads a C source file and get identifiers""" + self.data = "" + is_enum = False + is_comment = False + multiline = "" + + self.read_exceptions(exceptions) + + with open(file_in, "r", + encoding="utf-8", errors="backslashreplace") as f: + for line_no, line in enumerate(f): + self.store_line(line) + line = line.strip("\n") + + # Handle continuation lines + if line.endswith(r"\\"): + multiline += line[-1] + continue + + if multiline: + line = multiline + line + multiline = "" + + # Handle comments. They can be multilined + if not is_comment: + if re.search(r"/\*.*", line): + is_comment = True + else: + # Strip C99-style comments + line = re.sub(r"(//.*)", "", line) + + if is_comment: + if re.search(r".*\*/", line): + is_comment = False + else: + multiline = line + continue + + # At this point, line variable may be a multilined statement, + # if lines end with \ or if they have multi-line comments + # With that, it can safely remove the entire comments, + # and there's no need to use re.DOTALL for the logic below + + line = re.sub(r"(/\*.*\*/)", "", line) + if not line.strip(): + continue + + # It can be useful for debug purposes to print the file after + # having comments stripped and multi-lines grouped. + if self.debug > 1: + print(f"line {line_no + 1}: {line}") + + # Now the fun begins: parse each type and store it. + + # We opted for a two parsing logic here due to: + # 1. it makes easier to debug issues not-parsed symbols; + # 2. we want symbol replacement at the entire content, not + # just when the symbol is detected. + + if is_enum: + match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line) + if match: + self.store_type(line_no, "symbol", match.group(1)) + if "}" in line: + is_enum = False + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line) + if match: + self.store_type(line_no, "ioctl", match.group(1), + replace_underscores=False) + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line) + if match: + self.store_type(line_no, "define", match.group(1)) + continue + + match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);", + line) + if match: + name = match.group(2).strip() + symbol = match.group(3) + self.store_type(line_no, "typedef", symbol, ref_name=name) + continue + + for re_enum in self.RE_ENUMS: + match = re_enum.match(line) + if match: + self.store_type(line_no, "enum", match.group(1)) + is_enum = True + break + + for re_struct in self.RE_STRUCTS: + match = re_struct.match(line) + if match: + self.store_type(line_no, "struct", match.group(1)) + break + + self.apply_exceptions() + + def debug_print(self): + """ + Print debug information containing the replacement rules per symbol. + To make easier to check, group them per type. + """ + if not self.debug: + return + + for c_type, refs in self.symbols.items(): + if not refs: # Skip empty dictionaries + continue + + print(f"{c_type}:") + + for symbol, (ref, ln) in sorted(refs.items()): + print(f" #{ln:<5d} {symbol} -> {ref}") + + print() + + def gen_output(self): + """Write the formatted output to a file.""" + + # Avoid extra blank lines + text = re.sub(r"\s+$", "", self.data) + "\n" + text = re.sub(r"\n\s+\n", "\n\n", text) + + # Escape Sphinx special characters + text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text) + + # Source uAPI files may have special notes. Use bold font for them + text = re.sub(r"DEPRECATED", "**DEPRECATED**", text) + + # Delimiters to catch the entire symbol after escaped + start_delim = r"([ \n\t\(=\*\@])" + end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)" + + # Process all reference types + for ref_dict in self.symbols.values(): + for symbol, (replacement, _) in ref_dict.items(): + symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol)) + text = re.sub(fr'{start_delim}{symbol}{end_delim}', + fr'\1{replacement}\2', text) + + # Remove "\ " where not needed: before spaces and at the end of lines + text = re.sub(r"\\ ([\n ])", r"\1", text) + text = re.sub(r" \\ ", " ", text) + + return text + + def gen_toc(self): + """ + Create a list of symbols to be part of a TOC contents table + """ + text = [] + + # Sort symbol types per description + symbol_descriptions = [] + for k, v in self.DEF_SYMBOL_TYPES.items(): + symbol_descriptions.append((v['description'], k)) + + symbol_descriptions.sort() + + # Process each category + for description, c_type in symbol_descriptions: + + refs = self.symbols[c_type] + if not refs: # Skip empty categories + continue + + text.append(f"{description}") + text.append("-" * len(description)) + text.append("") + + # Sort symbols alphabetically + for symbol, (ref, ln) in sorted(refs.items()): + text.append(f"- LINENO_{ln}: {ref}") + + text.append("") # Add empty line between categories + + return "\n".join(text) + + def write_output(self, file_in: str, file_out: str, toc: bool): + title = os.path.basename(file_in) + + if toc: + text = self.gen_toc() + else: + text = self.gen_output() + + with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: + f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") + f.write(f"{title}\n") + f.write("=" * len(title) + "\n\n") + + if not toc: + f.write(".. parsed-literal::\n\n") + + f.write(text) diff --git a/tools/lib/python/kdoc/python_version.py b/tools/lib/python/kdoc/python_version.py new file mode 100644 index 000000000000..e83088013db2 --- /dev/null +++ b/tools/lib/python/kdoc/python_version.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (c) 2017-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> + +""" +Handle Python version check logic. + +Not all Python versions are supported by scripts. Yet, on some cases, +like during documentation build, a newer version of python could be +available. + +This class allows checking if the minimal requirements are followed. + +Better than that, PythonVersion.check_python() not only checks the minimal +requirements, but it automatically switches to a the newest available +Python version if present. + +""" + +import os +import re +import subprocess +import shlex +import sys + +from glob import glob +from textwrap import indent + +class PythonVersion: + """ + Ancillary methods that checks for missing dependencies for different + types of types, like binaries, python modules, rpm deps, etc. + """ + + def __init__(self, version): + """Ïnitialize self.version tuple from a version string""" + self.version = self.parse_version(version) + + @staticmethod + def parse_version(version): + """Convert a major.minor.patch version into a tuple""" + return tuple(int(x) for x in version.split(".")) + + @staticmethod + def ver_str(version): + """Returns a version tuple as major.minor.patch""" + return ".".join([str(x) for x in version]) + + @staticmethod + def cmd_print(cmd, max_len=80): + cmd_line = [] + + for w in cmd: + w = shlex.quote(w) + + if cmd_line: + if not max_len or len(cmd_line[-1]) + len(w) < max_len: + cmd_line[-1] += " " + w + continue + else: + cmd_line[-1] += " \\" + cmd_line.append(w) + else: + cmd_line.append(w) + + return "\n ".join(cmd_line) + + def __str__(self): + """Returns a version tuple as major.minor.patch from self.version""" + return self.ver_str(self.version) + + @staticmethod + def get_python_version(cmd): + """ + Get python version from a Python binary. As we need to detect if + are out there newer python binaries, we can't rely on sys.release here. + """ + + kwargs = {} + if sys.version_info < (3, 7): + kwargs['universal_newlines'] = True + else: + kwargs['text'] = True + + result = subprocess.run([cmd, "--version"], + stdout = subprocess.PIPE, + stderr = subprocess.PIPE, + **kwargs, check=False) + + version = result.stdout.strip() + + match = re.search(r"(\d+\.\d+\.\d+)", version) + if match: + return PythonVersion.parse_version(match.group(1)) + + print(f"Can't parse version {version}") + return (0, 0, 0) + + @staticmethod + def find_python(min_version): + """ + Detect if are out there any python 3.xy version newer than the + current one. + + Note: this routine is limited to up to 2 digits for python3. We + may need to update it one day, hopefully on a distant future. + """ + patterns = [ + "python3.[0-9][0-9]", + "python3.[0-9]", + ] + + python_cmd = [] + + # Seek for a python binary newer than min_version + for path in os.getenv("PATH", "").split(":"): + for pattern in patterns: + for cmd in glob(os.path.join(path, pattern)): + if os.path.isfile(cmd) and os.access(cmd, os.X_OK): + version = PythonVersion.get_python_version(cmd) + if version >= min_version: + python_cmd.append((version, cmd)) + + return sorted(python_cmd, reverse=True) + + @staticmethod + def check_python(min_version, show_alternatives=False, bail_out=False, + success_on_error=False): + """ + Check if the current python binary satisfies our minimal requirement + for Sphinx build. If not, re-run with a newer version if found. + """ + cur_ver = sys.version_info[:3] + if cur_ver >= min_version: + ver = PythonVersion.ver_str(cur_ver) + return + + python_ver = PythonVersion.ver_str(cur_ver) + + available_versions = PythonVersion.find_python(min_version) + if not available_versions: + print(f"ERROR: Python version {python_ver} is not supported anymore\n") + print(" Can't find a new version. This script may fail") + return + + script_path = os.path.abspath(sys.argv[0]) + + # Check possible alternatives + if available_versions: + new_python_cmd = available_versions[0][1] + else: + new_python_cmd = None + + if show_alternatives and available_versions: + print("You could run, instead:") + for _, cmd in available_versions: + args = [cmd, script_path] + sys.argv[1:] + + cmd_str = indent(PythonVersion.cmd_print(args), " ") + print(f"{cmd_str}\n") + + if bail_out: + msg = f"Python {python_ver} not supported. Bailing out" + if success_on_error: + print(msg, file=sys.stderr) + sys.exit(0) + else: + sys.exit(msg) + + print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") + + # Restart script using the newer version + args = [new_python_cmd, script_path] + sys.argv[1:] + + try: + os.execv(new_python_cmd, args) + except OSError as e: + sys.exit(f"Failed to restart with {new_python_cmd}: {e}") diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c index 727396de6be5..9e7307186b7f 100644 --- a/tools/lib/rbtree.c +++ b/tools/lib/rbtree.c @@ -58,7 +58,7 @@ static inline void rb_set_black(struct rb_node *rb) { - rb->__rb_parent_color |= RB_BLACK; + rb->__rb_parent_color += RB_BLACK; } static inline struct rb_node *rb_red_parent(struct rb_node *red) diff --git a/tools/lib/slab.c b/tools/lib/slab.c new file mode 100644 index 000000000000..981a21404f32 --- /dev/null +++ b/tools/lib/slab.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <string.h> + +#include <urcu/uatomic.h> +#include <linux/slab.h> +#include <malloc.h> +#include <linux/gfp.h> + +int kmalloc_nr_allocated; +int kmalloc_verbose; + +void *kmalloc(size_t size, gfp_t gfp) +{ + void *ret; + + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return NULL; + + ret = malloc(size); + uatomic_inc(&kmalloc_nr_allocated); + if (kmalloc_verbose) + printf("Allocating %p from malloc\n", ret); + if (gfp & __GFP_ZERO) + memset(ret, 0, size); + return ret; +} + +void kfree(void *p) +{ + if (!p) + return; + uatomic_dec(&kmalloc_nr_allocated); + if (kmalloc_verbose) + printf("Freeing %p to malloc\n", p); + free(p); +} + +void *kmalloc_array(size_t n, size_t size, gfp_t gfp) +{ + void *ret; + + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return NULL; + + ret = calloc(n, size); + uatomic_inc(&kmalloc_nr_allocated); + if (kmalloc_verbose) + printf("Allocating %p from calloc\n", ret); + if (gfp & __GFP_ZERO) + memset(ret, 0, n * size); + return ret; +} diff --git a/tools/lib/string.c b/tools/lib/string.c index 8b6892f959ab..3126d2cff716 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -153,6 +153,19 @@ char *strim(char *s) return skip_spaces(s); } +/* + * remove_spaces - Removes whitespaces from @s + */ +void remove_spaces(char *s) +{ + char *d = s; + + do { + while (*d == ' ') + ++d; + } while ((*s++ = *d++)); +} + /** * strreplace - Replace all occurrences of character in string. * @s: The string to operate on. diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 1c777a72bb39..8703ab487b68 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -17,6 +17,15 @@ RM = rm -f MAKEFLAGS += --no-print-directory +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + LIBFILE = $(OUTPUT)libsubcmd.a CFLAGS := -ggdb3 -Wall -Wextra -std=gnu99 -fPIC @@ -29,10 +38,8 @@ endif ifeq ($(DEBUG),1) CFLAGS += -O0 -else ifeq ($(CC_NO_CLANG), 0) - CFLAGS += -O3 else - CFLAGS += -O6 + CFLAGS += -O3 endif # Treat warnings as errors unless directed not to @@ -48,6 +55,18 @@ CFLAGS += $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) SUBCMD_IN := $(OUTPUT)libsubcmd-in.o +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) + all: export srctree OUTPUT CC LD CFLAGS V @@ -55,15 +74,46 @@ include $(srctree)/tools/build/Makefile.include all: fixdep $(LIBFILE) -$(SUBCMD_IN): FORCE +$(SUBCMD_IN): fixdep FORCE @$(MAKE) $(build)=libsubcmd $(LIBFILE): $(SUBCMD_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SUBCMD_IN) +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$2' ]; then \ + $(INSTALL) -d -m 755 '$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$2' +endef + +install_lib: $(LIBFILE) + $(call QUIET_INSTALL, $(LIBFILE)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) + +HDRS := exec-cmd.h help.h pager.h parse-options.h run-command.h +INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/subcmd +INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) + +$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644) + +install_headers: $(INSTALL_HDRS) + $(call QUIET_INSTALL, libsubcmd_headers) + +install: install_lib install_headers + clean: $(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \ - find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) FORCE: diff --git a/tools/lib/subcmd/exec-cmd.c b/tools/lib/subcmd/exec-cmd.c index 33e94fb83986..7739b5217cf6 100644 --- a/tools/lib/subcmd/exec-cmd.c +++ b/tools/lib/subcmd/exec-cmd.c @@ -24,6 +24,9 @@ void exec_cmd_init(const char *exec_name, const char *prefix, subcmd_config.prefix = prefix; subcmd_config.exec_path = exec_path; subcmd_config.exec_path_env = exec_path_env; + + /* Setup environment variable for invoked shell script. */ + setenv("PREFIX", prefix, 1); } #define is_dir_sep(c) ((c) == '/') @@ -33,38 +36,40 @@ static int is_absolute_path(const char *path) return path[0] == '/'; } -static const char *get_pwd_cwd(void) +static const char *get_pwd_cwd(char *buf, size_t sz) { - static char cwd[PATH_MAX + 1]; char *pwd; struct stat cwd_stat, pwd_stat; - if (getcwd(cwd, PATH_MAX) == NULL) + if (getcwd(buf, sz) == NULL) return NULL; pwd = getenv("PWD"); - if (pwd && strcmp(pwd, cwd)) { - stat(cwd, &cwd_stat); + if (pwd && strcmp(pwd, buf)) { + stat(buf, &cwd_stat); if (!stat(pwd, &pwd_stat) && pwd_stat.st_dev == cwd_stat.st_dev && pwd_stat.st_ino == cwd_stat.st_ino) { - strlcpy(cwd, pwd, PATH_MAX); + strlcpy(buf, pwd, sz); } } - return cwd; + return buf; } -static const char *make_nonrelative_path(const char *path) +static const char *make_nonrelative_path(char *buf, size_t sz, const char *path) { - static char buf[PATH_MAX + 1]; - if (is_absolute_path(path)) { - if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) + if (strlcpy(buf, path, sz) >= sz) die("Too long path: %.*s", 60, path); } else { - const char *cwd = get_pwd_cwd(); + const char *cwd = get_pwd_cwd(buf, sz); + if (!cwd) die("Cannot determine the current working directory"); - if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) + + if (strlen(cwd) + strlen(path) + 2 >= sz) die("Too long path: %.*s", 60, path); + + strcat(buf, "/"); + strcat(buf, path); } return buf; } @@ -130,8 +135,11 @@ static void add_path(char **out, const char *path) if (path && *path) { if (is_absolute_path(path)) astrcat(out, path); - else - astrcat(out, make_nonrelative_path(path)); + else { + char buf[PATH_MAX]; + + astrcat(out, make_nonrelative_path(buf, sizeof(buf), path)); + } astrcat(out, ":"); } diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index bf02d62a3b2b..ddaeb4eb3e24 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -9,6 +9,7 @@ #include <sys/stat.h> #include <unistd.h> #include <dirent.h> +#include <assert.h> #include "subcmd-util.h" #include "help.h" #include "exec-cmd.h" @@ -16,6 +17,8 @@ void add_cmdname(struct cmdnames *cmds, const char *name, size_t len) { struct cmdname *ent = malloc(sizeof(*ent) + len + 1); + if (!ent) + return; ent->len = len; memcpy(ent->name, name, len); @@ -50,11 +53,21 @@ void uniq(struct cmdnames *cmds) if (!cmds->cnt) return; - for (i = j = 1; i < cmds->cnt; i++) - if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) - cmds->names[j++] = cmds->names[i]; - + for (i = 1; i < cmds->cnt; i++) { + if (!strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) + zfree(&cmds->names[i - 1]); + } + for (i = 0, j = 0; i < cmds->cnt; i++) { + if (cmds->names[i]) { + if (i == j) + j++; + else + cmds->names[j++] = cmds->names[i]; + } + } cmds->cnt = j; + while (j < i) + cmds->names[j++] = NULL; } void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) @@ -62,22 +75,36 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) size_t ci, cj, ei; int cmp; + if (!excludes->cnt) + return; + ci = cj = ei = 0; while (ci < cmds->cnt && ei < excludes->cnt) { cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name); if (cmp < 0) { - cmds->names[cj++] = cmds->names[ci++]; + if (ci == cj) { + ci++; + cj++; + } else { + cmds->names[cj++] = cmds->names[ci]; + cmds->names[ci++] = NULL; + } } else if (cmp == 0) { + zfree(&cmds->names[ci]); ci++; ei++; } else if (cmp > 0) { ei++; } } - - while (ci < cmds->cnt) - cmds->names[cj++] = cmds->names[ci++]; - + if (ci != cj) { + while (ci < cmds->cnt) { + cmds->names[cj++] = cmds->names[ci]; + cmds->names[ci++] = NULL; + } + } + for (ci = cj; ci < cmds->cnt; ci++) + assert(cmds->names[ci] == NULL); cmds->cnt = cj; } diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index 39ebf6192016..555d617c1f50 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -806,20 +806,43 @@ static int option__cmp(const void *va, const void *vb) static struct option *options__order(const struct option *opts) { - int nr_opts = 0, len; - const struct option *o = opts; - struct option *ordered; - - for (o = opts; o->type != OPTION_END; o++) - ++nr_opts; - - len = sizeof(*o) * (nr_opts + 1); - ordered = malloc(len); - if (!ordered) - goto out; - memcpy(ordered, opts, len); + int nr_opts = 0, nr_group = 0, nr_parent = 0, len; + const struct option *o = NULL, *p = opts; + struct option *opt, *ordered = NULL, *group; + + /* flatten the options that have parents */ + for (p = opts; p != NULL; p = o->parent) { + for (o = p; o->type != OPTION_END; o++) + ++nr_opts; + + /* + * the length is given by the number of options plus a null + * terminator for the last loop iteration. + */ + len = sizeof(*o) * (nr_opts + !o->parent); + group = realloc(ordered, len); + if (!group) + goto out; + ordered = group; + memcpy(&ordered[nr_parent], p, sizeof(*o) * (nr_opts - nr_parent)); + + nr_parent = nr_opts; + } + /* copy the last OPTION_END */ + memcpy(&ordered[nr_opts], o, sizeof(*o)); + + /* sort each option group individually */ + for (opt = group = ordered; opt->type != OPTION_END; opt++) { + if (opt->type == OPTION_GROUP) { + qsort(group, nr_group, sizeof(*opt), option__cmp); + group = opt + 1; + nr_group = 0; + continue; + } + nr_group++; + } + qsort(group, nr_group, sizeof(*opt), option__cmp); - qsort(ordered, nr_opts, sizeof(*o), option__cmp); out: return ordered; } diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h index 41b9b942504d..8e9147358a28 100644 --- a/tools/lib/subcmd/parse-options.h +++ b/tools/lib/subcmd/parse-options.h @@ -6,10 +6,6 @@ #include <stdbool.h> #include <stdint.h> -#ifndef NORETURN -#define NORETURN __attribute__((__noreturn__)) -#endif - enum parse_opt_type { /* special types */ OPTION_END, @@ -183,9 +179,9 @@ extern int parse_options_subcommand(int argc, const char **argv, const char *const subcommands[], const char *usagestr[], int flags); -extern NORETURN void usage_with_options(const char * const *usagestr, +extern __noreturn void usage_with_options(const char * const *usagestr, const struct option *options); -extern NORETURN __attribute__((format(printf,3,4))) +extern __noreturn __attribute__((format(printf,3,4))) void usage_with_options_msg(const char * const *usagestr, const struct option *options, const char *fmt, ...); diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c index 5cdac2162532..b7510f83209a 100644 --- a/tools/lib/subcmd/run-command.c +++ b/tools/lib/subcmd/run-command.c @@ -2,8 +2,10 @@ #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> +#include <ctype.h> #include <fcntl.h> #include <string.h> +#include <linux/compiler.h> #include <linux/string.h> #include <errno.h> #include <sys/wait.h> @@ -122,6 +124,8 @@ int start_command(struct child_process *cmd) } if (cmd->preexec_cb) cmd->preexec_cb(); + if (cmd->no_exec_cmd) + exit(cmd->no_exec_cmd(cmd)); if (cmd->exec_cmd) { execv_cmd(cmd->argv); } else { @@ -163,43 +167,107 @@ int start_command(struct child_process *cmd) return 0; } -static int wait_or_whine(pid_t pid) +static int wait_or_whine(struct child_process *cmd, bool block) { - char sbuf[STRERR_BUFSIZE]; + bool finished = cmd->finished; + int result = cmd->finish_result; - for (;;) { + while (!finished) { int status, code; - pid_t waiting = waitpid(pid, &status, 0); + pid_t waiting = waitpid(cmd->pid, &status, block ? 0 : WNOHANG); + + if (!block && waiting == 0) + break; + + if (waiting < 0 && errno == EINTR) + continue; + finished = true; if (waiting < 0) { - if (errno == EINTR) - continue; + char sbuf[STRERR_BUFSIZE]; + fprintf(stderr, " Error: waitpid failed (%s)", str_error_r(errno, sbuf, sizeof(sbuf))); - return -ERR_RUN_COMMAND_WAITPID; - } - if (waiting != pid) - return -ERR_RUN_COMMAND_WAITPID_WRONG_PID; - if (WIFSIGNALED(status)) - return -ERR_RUN_COMMAND_WAITPID_SIGNAL; - - if (!WIFEXITED(status)) - return -ERR_RUN_COMMAND_WAITPID_NOEXIT; - code = WEXITSTATUS(status); - switch (code) { - case 127: - return -ERR_RUN_COMMAND_EXEC; - case 0: - return 0; - default: - return -code; + result = -ERR_RUN_COMMAND_WAITPID; + } else if (waiting != cmd->pid) { + result = -ERR_RUN_COMMAND_WAITPID_WRONG_PID; + } else if (WIFSIGNALED(status)) { + result = -ERR_RUN_COMMAND_WAITPID_SIGNAL; + } else if (!WIFEXITED(status)) { + result = -ERR_RUN_COMMAND_WAITPID_NOEXIT; + } else { + code = WEXITSTATUS(status); + switch (code) { + case 127: + result = -ERR_RUN_COMMAND_EXEC; + break; + case 0: + result = 0; + break; + default: + result = -code; + break; + } } } + if (finished) { + cmd->finished = 1; + cmd->finish_result = result; + } + return result; +} + +/* + * Conservative estimate of number of characaters needed to hold an a decoded + * integer, assume each 3 bits needs a character byte and plus a possible sign + * character. + */ +#ifndef is_signed_type +#define is_signed_type(type) (((type)(-1)) < (type)1) +#endif +#define MAX_STRLEN_TYPE(type) (sizeof(type) * 8 / 3 + (is_signed_type(type) ? 1 : 0)) + +int check_if_command_finished(struct child_process *cmd) +{ +#ifdef __linux__ + char filename[6 + MAX_STRLEN_TYPE(typeof(cmd->pid)) + 7 + 1]; + char status_line[256]; + FILE *status_file; + + /* + * Check by reading /proc/<pid>/status as calling waitpid causes + * stdout/stderr to be closed and data lost. + */ + sprintf(filename, "/proc/%u/status", cmd->pid); + status_file = fopen(filename, "r"); + if (status_file == NULL) { + /* Open failed assume finish_command was called. */ + return true; + } + while (fgets(status_line, sizeof(status_line), status_file) != NULL) { + char *p; + + if (strncmp(status_line, "State:", 6)) + continue; + + fclose(status_file); + p = status_line + 6; + while (isspace(*p)) + p++; + return *p == 'Z' ? 1 : 0; + } + /* Read failed assume finish_command was called. */ + fclose(status_file); + return 1; +#else + wait_or_whine(cmd, /*block=*/false); + return cmd->finished; +#endif } int finish_command(struct child_process *cmd) { - return wait_or_whine(cmd->pid); + return wait_or_whine(cmd, /*block=*/true); } int run_command(struct child_process *cmd) diff --git a/tools/lib/subcmd/run-command.h b/tools/lib/subcmd/run-command.h index 17d969c6add3..b2d39de6e690 100644 --- a/tools/lib/subcmd/run-command.h +++ b/tools/lib/subcmd/run-command.h @@ -41,15 +41,20 @@ struct child_process { int err; const char *dir; const char *const *env; + int finish_result; unsigned no_stdin:1; unsigned no_stdout:1; unsigned no_stderr:1; unsigned exec_cmd:1; /* if this is to be external sub-command */ unsigned stdout_to_stderr:1; + unsigned finished:1; void (*preexec_cb)(void); + /* If set, call function in child rather than doing an exec. */ + int (*no_exec_cmd)(struct child_process *process); }; int start_command(struct child_process *); +int check_if_command_finished(struct child_process *); int finish_command(struct child_process *); int run_command(struct child_process *); diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h index 794a375dad36..c742b08815dc 100644 --- a/tools/lib/subcmd/subcmd-util.h +++ b/tools/lib/subcmd/subcmd-util.h @@ -5,8 +5,7 @@ #include <stdarg.h> #include <stdlib.h> #include <stdio.h> - -#define NORETURN __attribute__((__noreturn__)) +#include <linux/compiler.h> static inline void report(const char *prefix, const char *err, va_list params) { @@ -15,14 +14,14 @@ static inline void report(const char *prefix, const char *err, va_list params) fprintf(stderr, " %s%s\n", prefix, msg); } -static NORETURN inline void die(const char *err, ...) +static __noreturn inline void die(const char *err, ...) { va_list params; va_start(params, err); report(" Fatal: ", err, params); - exit(128); va_end(params); + exit(128); } #define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) @@ -50,15 +49,8 @@ static NORETURN inline void die(const char *err, ...) static inline void *xrealloc(void *ptr, size_t size) { void *ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) { - ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) - die("Out of memory, realloc failed"); - } + if (!ret) + die("Out of memory, realloc failed"); return ret; } diff --git a/tools/lib/symbol/Build b/tools/lib/symbol/Build new file mode 100644 index 000000000000..9b9a9c78d3c9 --- /dev/null +++ b/tools/lib/symbol/Build @@ -0,0 +1 @@ +libsymbol-y += kallsyms.o diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile new file mode 100644 index 000000000000..426b845edfac --- /dev/null +++ b/tools/lib/symbol/Makefile @@ -0,0 +1,118 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../../scripts/Makefile.include +include ../../scripts/utilities.mak # QUIET_CLEAN + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +CC ?= $(CROSS_COMPILE)gcc +AR ?= $(CROSS_COMPILE)ar +LD ?= $(CROSS_COMPILE)ld + +MAKEFLAGS += --no-print-directory + +INSTALL = install + + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +LIBFILE = $(OUTPUT)libsymbol.a + +CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu11 -U_FORTIFY_SOURCE -fPIC + +ifeq ($(DEBUG),0) + CFLAGS += -O3 +endif + +ifeq ($(DEBUG),0) + CFLAGS += -D_FORTIFY_SOURCE +endif + +# Treat warnings as errors unless directed not to +ifneq ($(WERROR),0) + CFLAGS += -Werror +endif + +CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 + +CFLAGS += -I$(srctree)/tools/lib +CFLAGS += -I$(srctree)/tools/include + +RM = rm -f + +SYMBOL_IN := $(OUTPUT)libsymbol-in.o + +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) + +all: + +export srctree OUTPUT CC LD CFLAGS V +include $(srctree)/tools/build/Makefile.include +include $(srctree)/tools/scripts/Makefile.include + +all: fixdep $(LIBFILE) + +$(SYMBOL_IN): FORCE + @$(MAKE) $(build)=libsymbol + +$(LIBFILE): $(SYMBOL_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SYMBOL_IN) + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$2' ]; then \ + $(INSTALL) -d -m 755 '$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$2' +endef + +install_lib: $(LIBFILE) + $(call QUIET_INSTALL, $(LIBFILE)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) + +HDRS := kallsyms.h +INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/symbol +INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) + +$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(INSTALL_HDRS_PFX)/,644) + +install_headers: $(INSTALL_HDRS) + $(call QUIET_INSTALL, libsymbol_headers) + +install: install_lib install_headers + +clean: + $(call QUIET_CLEAN, libsymbol) $(RM) $(LIBFILE); \ + find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + +FORCE: + +.PHONY: clean FORCE diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h index 72ab9870454b..542f9b059c3b 100644 --- a/tools/lib/symbol/kallsyms.h +++ b/tools/lib/symbol/kallsyms.h @@ -7,7 +7,7 @@ #include <linux/types.h> #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 +#define KSYM_NAME_LEN 512 #endif static inline u8 kallsyms2elf_binding(char type) diff --git a/tools/lib/thermal/.gitignore b/tools/lib/thermal/.gitignore new file mode 100644 index 000000000000..5d2aeda80fea --- /dev/null +++ b/tools/lib/thermal/.gitignore @@ -0,0 +1,2 @@ +libthermal.so* +libthermal.pc diff --git a/tools/lib/thermal/Build b/tools/lib/thermal/Build new file mode 100644 index 000000000000..4a892d9e24f9 --- /dev/null +++ b/tools/lib/thermal/Build @@ -0,0 +1,5 @@ +libthermal-y += commands.o +libthermal-y += events.o +libthermal-y += thermal_nl.o +libthermal-y += sampling.o +libthermal-y += thermal.o diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile new file mode 100644 index 000000000000..41aa7a324ff4 --- /dev/null +++ b/tools/lib/thermal/Makefile @@ -0,0 +1,159 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +# Most of this file is copied from tools/lib/perf/Makefile + +LIBTHERMAL_VERSION = 0 +LIBTHERMAL_PATCHLEVEL = 0 +LIBTHERMAL_EXTRAVERSION = 1 + +MAKEFLAGS += --no-print-directory + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +# $(info Determined 'srctree' to be $(srctree)) +endif + +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +include $(srctree)/tools/scripts/Makefile.include +include $(srctree)/tools/scripts/Makefile.arch + +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) +libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) + +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif + +NL3_CFLAGS = $(shell pkg-config --cflags libnl-3.0 2>/dev/null) +ifeq ($(NL3_CFLAGS),) +NL3_CFLAGS = -I/usr/include/libnl3 +endif + +INCLUDES = \ +-I$(srctree)/tools/lib/thermal/include \ +-I$(srctree)/tools/lib/ \ +-I$(srctree)/tools/include \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/ \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \ +-I$(srctree)/tools/include/uapi + +# Append required CFLAGS +override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += -Werror -Wall +override CFLAGS += -fPIC +override CFLAGS += $(NL3_CFLAGS) +override CFLAGS += $(INCLUDES) +override CFLAGS += -fvisibility=hidden +override CFGLAS += -Wl,-L. +override CFGLAS += -Wl,-lthermal + +all: + +export srctree OUTPUT CC LD CFLAGS V +export DESTDIR DESTDIR_SQ + +include $(srctree)/tools/build/Makefile.include + +VERSION_SCRIPT := libthermal.map + +PATCHLEVEL = $(LIBTHERMAL_PATCHLEVEL) +EXTRAVERSION = $(LIBTHERMAL_EXTRAVERSION) +VERSION = $(LIBTHERMAL_VERSION).$(LIBTHERMAL_PATCHLEVEL).$(LIBTHERMAL_EXTRAVERSION) + +LIBTHERMAL_SO := $(OUTPUT)libthermal.so.$(VERSION) +LIBTHERMAL_A := $(OUTPUT)libthermal.a +LIBTHERMAL_IN := $(OUTPUT)libthermal-in.o +LIBTHERMAL_PC := $(OUTPUT)libthermal.pc +LIBTHERMAL_ALL := $(LIBTHERMAL_A) $(OUTPUT)libthermal.so* + +THERMAL_UAPI := include/uapi/linux/thermal.h + +$(THERMAL_UAPI): FORCE + ln -sf $(srctree)/$@ $(srctree)/tools/$@ + +$(LIBTHERMAL_IN): FORCE + $(Q)$(MAKE) $(build)=libthermal + +$(LIBTHERMAL_A): $(LIBTHERMAL_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBTHERMAL_IN) + +$(LIBTHERMAL_SO): $(LIBTHERMAL_IN) + $(QUIET_LINK)$(CC) --shared -Wl,-soname,libthermal.so \ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@ + @ln -sf $(@F) $(OUTPUT)libthermal.so + @ln -sf $(@F) $(OUTPUT)libthermal.so.$(LIBTHERMAL_VERSION) + + +libs: $(THERMAL_UAPI) $(LIBTHERMAL_A) $(LIBTHERMAL_SO) $(LIBTHERMAL_PC) + +all: fixdep + $(Q)$(MAKE) libs + +clean: + $(call QUIET_CLEAN, libthermal) $(RM) $(LIBTHERMAL_A) \ + *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) \ + .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC) \ + $(srctree)/tools/$(THERMAL_UAPI) + +$(LIBTHERMAL_PC): + $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ + -e "s|@LIBDIR@|$(libdir_SQ)|" \ + -e "s|@VERSION@|$(VERSION)|" \ + < libthermal.pc.template > $@ + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: libs + $(call QUIET_INSTALL, $(LIBTHERMAL_ALL)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fR --preserve=mode,timestamp $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,include/thermal.h,$(prefix)/include/thermal,644); \ + +install_pkgconfig: $(LIBTHERMAL_PC) + $(call QUIET_INSTALL, $(LIBTHERMAL_PC)) \ + $(call do_install,$(LIBTHERMAL_PC),$(libdir_SQ)/pkgconfig,644) + +install_doc: + $(Q)$(MAKE) -C Documentation install-man install-html install-examples + +install: install_lib install_headers install_pkgconfig + +FORCE: + +.PHONY: all install clean FORCE diff --git a/tools/lib/thermal/commands.c b/tools/lib/thermal/commands.c new file mode 100644 index 000000000000..4998cec793ed --- /dev/null +++ b/tools/lib/thermal/commands.c @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#define _GNU_SOURCE +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <limits.h> + +#include <thermal.h> +#include "thermal_nl.h" + +static struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = { + /* Thermal zone */ + [THERMAL_GENL_ATTR_TZ] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_TZ_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_TZ_TRIP_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP_TYPE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP_HYST] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_MODE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_CDEV_WEIGHT] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_NAME] = { .type = NLA_STRING }, + + /* Governor(s) */ + [THERMAL_GENL_ATTR_TZ_GOV] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_TZ_GOV_NAME] = { .type = NLA_STRING }, + + /* Cooling devices */ + [THERMAL_GENL_ATTR_CDEV] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_CDEV_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CDEV_CUR_STATE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING }, + + /* Thresholds */ + [THERMAL_GENL_ATTR_THRESHOLD] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_THRESHOLD_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_THRESHOLD_DIRECTION] = { .type = NLA_U32 }, +}; + +static int parse_tz_get(struct genl_info *info, struct thermal_zone **tz) +{ + struct nlattr *attr; + struct thermal_zone *__tz = NULL; + size_t size = 0; + int rem; + + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_ID) { + + size++; + + __tz = realloc(__tz, sizeof(*__tz) * (size + 2)); + if (!__tz) + return THERMAL_ERROR; + + __tz[size - 1].id = nla_get_u32(attr); + } + + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_NAME) + nla_strlcpy(__tz[size - 1].name, attr, + THERMAL_NAME_LENGTH); + } + + if (__tz) + __tz[size].id = -1; + + *tz = __tz; + + return THERMAL_SUCCESS; +} + +static int parse_cdev_get(struct genl_info *info, struct thermal_cdev **cdev) +{ + struct nlattr *attr; + struct thermal_cdev *__cdev = NULL; + size_t size = 0; + int rem; + + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_CDEV], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_ID) { + + size++; + + __cdev = realloc(__cdev, sizeof(*__cdev) * (size + 2)); + if (!__cdev) + return THERMAL_ERROR; + + __cdev[size - 1].id = nla_get_u32(attr); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_NAME) { + nla_strlcpy(__cdev[size - 1].name, attr, + THERMAL_NAME_LENGTH); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_CUR_STATE) + __cdev[size - 1].cur_state = nla_get_u32(attr); + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_MAX_STATE) + __cdev[size - 1].max_state = nla_get_u32(attr); + } + + if (__cdev) + __cdev[size].id = -1; + + *cdev = __cdev; + + return THERMAL_SUCCESS; +} + +static int parse_tz_get_trip(struct genl_info *info, struct thermal_zone *tz) +{ + struct nlattr *attr; + struct thermal_trip *__tt = NULL; + size_t size = 0; + int rem; + + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ_TRIP], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_ID) { + + size++; + + __tt = realloc(__tt, sizeof(*__tt) * (size + 2)); + if (!__tt) + return THERMAL_ERROR; + + __tt[size - 1].id = nla_get_u32(attr); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TYPE) + __tt[size - 1].type = nla_get_u32(attr); + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TEMP) + __tt[size - 1].temp = nla_get_u32(attr); + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_HYST) + __tt[size - 1].hyst = nla_get_u32(attr); + } + + if (__tt) + __tt[size].id = -1; + + tz->trip = __tt; + + return THERMAL_SUCCESS; +} + +static int parse_tz_get_temp(struct genl_info *info, struct thermal_zone *tz) +{ + int id = -1; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_ID]) + id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]); + + if (tz->id != id) + return THERMAL_ERROR; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_TEMP]) + tz->temp = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_TEMP]); + + return THERMAL_SUCCESS; +} + +static int parse_tz_get_gov(struct genl_info *info, struct thermal_zone *tz) +{ + int id = -1; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_ID]) + id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]); + + if (tz->id != id) + return THERMAL_ERROR; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME]) { + nla_strlcpy(tz->governor, + info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME], + THERMAL_NAME_LENGTH); + } + + return THERMAL_SUCCESS; +} + +static int parse_threshold_get(struct genl_info *info, struct thermal_zone *tz) +{ + struct nlattr *attr; + struct thermal_threshold *__tt = NULL; + size_t size = 0; + int rem; + + /* + * The size contains the size of the array and we want to + * access the last element, size - 1. + * + * The variable size is initialized to zero but it will be + * then incremented by the first if() statement. The message + * attributes are ordered, so the first if() statement will be + * always called before the second one. If it happens that is + * not the case, then it is a kernel bug. + */ + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_THRESHOLD], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_THRESHOLD_TEMP) { + + size++; + + __tt = realloc(__tt, sizeof(*__tt) * (size + 2)); + if (!__tt) + return THERMAL_ERROR; + + __tt[size - 1].temperature = nla_get_u32(attr); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_THRESHOLD_DIRECTION) + __tt[size - 1].direction = nla_get_u32(attr); + } + + if (__tt) + __tt[size].temperature = INT_MAX; + + tz->thresholds = __tt; + + return THERMAL_SUCCESS; +} + +static int handle_netlink(struct nl_cache_ops *unused, + struct genl_cmd *cmd, + struct genl_info *info, void *arg) +{ + int ret; + + switch (cmd->c_id) { + + case THERMAL_GENL_CMD_TZ_GET_ID: + ret = parse_tz_get(info, arg); + break; + + case THERMAL_GENL_CMD_CDEV_GET: + ret = parse_cdev_get(info, arg); + break; + + case THERMAL_GENL_CMD_TZ_GET_TEMP: + ret = parse_tz_get_temp(info, arg); + break; + + case THERMAL_GENL_CMD_TZ_GET_TRIP: + ret = parse_tz_get_trip(info, arg); + break; + + case THERMAL_GENL_CMD_TZ_GET_GOV: + ret = parse_tz_get_gov(info, arg); + break; + + case THERMAL_GENL_CMD_THRESHOLD_GET: + ret = parse_threshold_get(info, arg); + break; + + default: + return THERMAL_ERROR; + } + + return ret; +} + +static struct genl_cmd thermal_cmds[] = { + { + .c_id = THERMAL_GENL_CMD_TZ_GET_ID, + .c_name = (char *)"List thermal zones", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_TZ_GET_GOV, + .c_name = (char *)"Get governor", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_TZ_GET_TEMP, + .c_name = (char *)"Get thermal zone temperature", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_TZ_GET_TRIP, + .c_name = (char *)"Get thermal zone trip points", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_CDEV_GET, + .c_name = (char *)"Get cooling devices", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_GET, + .c_name = (char *)"Get thresholds list", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_ADD, + .c_name = (char *)"Add a threshold", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_DELETE, + .c_name = (char *)"Delete a threshold", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_FLUSH, + .c_name = (char *)"Flush the thresholds", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, +}; + +static struct genl_ops thermal_cmd_ops = { + .o_name = (char *)"thermal", + .o_cmds = thermal_cmds, + .o_ncmds = ARRAY_SIZE(thermal_cmds), +}; + +struct cmd_param { + int tz_id; + int temp; + int direction; +}; + +typedef int (*cmd_cb_t)(struct nl_msg *, struct cmd_param *); + +static int thermal_genl_tz_id_encode(struct nl_msg *msg, struct cmd_param *p) +{ + if (nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id)) + return -1; + + return 0; +} + +static int thermal_genl_threshold_encode(struct nl_msg *msg, struct cmd_param *p) +{ + if (thermal_genl_tz_id_encode(msg, p)) + return -1; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_THRESHOLD_TEMP, p->temp)) + return -1; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, p->direction)) + return -1; + + return 0; +} + +static thermal_error_t thermal_genl_auto(struct thermal_handler *th, cmd_cb_t cmd_cb, + struct cmd_param *param, + int cmd, int flags, void *arg) +{ + thermal_error_t ret = THERMAL_ERROR; + struct nl_msg *msg; + void *hdr; + + msg = nlmsg_alloc(); + if (!msg) + return THERMAL_ERROR; + + hdr = genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, thermal_cmd_ops.o_id, + 0, flags, cmd, THERMAL_GENL_VERSION); + if (!hdr) + goto out; + + if (cmd_cb && cmd_cb(msg, param)) + goto out; + + if (nl_send_msg(th->sk_cmd, th->cb_cmd, msg, genl_handle_msg, arg)) + goto out; + + ret = THERMAL_SUCCESS; +out: + nlmsg_free(msg); + + return ret; +} + +thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, struct thermal_zone **tz) +{ + return thermal_genl_auto(th, NULL, NULL, THERMAL_GENL_CMD_TZ_GET_ID, + NLM_F_DUMP | NLM_F_ACK, tz); +} + +thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, struct thermal_cdev **tc) +{ + return thermal_genl_auto(th, NULL, NULL, THERMAL_GENL_CMD_CDEV_GET, + NLM_F_DUMP | NLM_F_ACK, tc); +} + +thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, struct thermal_zone *tz) +{ + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_TZ_GET_TRIP, 0, tz); +} + +thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, struct thermal_zone *tz) +{ + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz); +} + +thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz) +{ + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_get(struct thermal_handler *th, + struct thermal_zone *tz) +{ + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_GET, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_add(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction) +{ + struct cmd_param p = { .tz_id = tz->id, .temp = temperature, .direction = direction }; + + return thermal_genl_auto(th, thermal_genl_threshold_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_ADD, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_delete(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction) +{ + struct cmd_param p = { .tz_id = tz->id, .temp = temperature, .direction = direction }; + + return thermal_genl_auto(th, thermal_genl_threshold_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_DELETE, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_flush(struct thermal_handler *th, + struct thermal_zone *tz) +{ + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_FLUSH, 0, tz); +} + +thermal_error_t thermal_cmd_exit(struct thermal_handler *th) +{ + if (genl_unregister_family(&thermal_cmd_ops)) + return THERMAL_ERROR; + + nl_thermal_disconnect(th->sk_cmd, th->cb_cmd); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_cmd_init(struct thermal_handler *th) +{ + int ret; + int family; + + if (nl_thermal_connect(&th->sk_cmd, &th->cb_cmd)) + return THERMAL_ERROR; + + ret = genl_register_family(&thermal_cmd_ops); + if (ret) + return THERMAL_ERROR; + + ret = genl_ops_resolve(th->sk_cmd, &thermal_cmd_ops); + if (ret) + return THERMAL_ERROR; + + family = genl_ctrl_resolve(th->sk_cmd, "nlctrl"); + if (family != GENL_ID_CTRL) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/events.c b/tools/lib/thermal/events.c new file mode 100644 index 000000000000..bd851c869029 --- /dev/null +++ b/tools/lib/thermal/events.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <linux/netlink.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + + +#include <thermal.h> +#include "thermal_nl.h" + +/* + * Optimization: fill this array to tell which event we do want to pay + * attention to. That happens at init time with the ops + * structure. Each ops will enable the event and the general handler + * will be able to discard the event if there is not ops associated + * with it. + */ +static int enabled_ops[__THERMAL_GENL_EVENT_MAX]; + +static int handle_thermal_event(struct nl_msg *n, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(n); + struct genlmsghdr *genlhdr = genlmsg_hdr(nlh); + struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; + struct thermal_handler_param *thp = arg; + struct thermal_events_ops *ops = &thp->th->ops->events; + + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); + + arg = thp->arg; + + /* + * This is an event we don't care of, bail out. + */ + if (!enabled_ops[genlhdr->cmd]) + return THERMAL_SUCCESS; + + switch (genlhdr->cmd) { + + case THERMAL_GENL_EVENT_TZ_CREATE: + return ops->tz_create(nla_get_string(attrs[THERMAL_GENL_ATTR_TZ_NAME]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_DELETE: + return ops->tz_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_ENABLE: + return ops->tz_enable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_DISABLE: + return ops->tz_disable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_CHANGE: + return ops->trip_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_ADD: + return ops->trip_add(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_DELETE: + return ops->trip_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_UP: + return ops->trip_high(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_DOWN: + return ops->trip_low(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg); + + case THERMAL_GENL_EVENT_CDEV_ADD: + return ops->cdev_add(nla_get_string(attrs[THERMAL_GENL_ATTR_CDEV_NAME]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_MAX_STATE]), arg); + + case THERMAL_GENL_EVENT_CDEV_DELETE: + return ops->cdev_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), arg); + + case THERMAL_GENL_EVENT_CDEV_STATE_UPDATE: + return ops->cdev_update(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_CUR_STATE]), arg); + + case THERMAL_GENL_EVENT_TZ_GOV_CHANGE: + return ops->gov_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_string(attrs[THERMAL_GENL_ATTR_GOV_NAME]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_ADD: + return ops->threshold_add(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_DELETE: + return ops->threshold_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_FLUSH: + return ops->threshold_flush(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_UP: + return ops->threshold_up(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_PREV_TEMP]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_DOWN: + return ops->threshold_down(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_PREV_TEMP]), arg); + + default: + return -1; + } +} + +static void thermal_events_ops_init(struct thermal_events_ops *ops) +{ + enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE] = !!ops->tz_create; + enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE] = !!ops->tz_delete; + enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE] = !!ops->tz_disable; + enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE] = !!ops->tz_enable; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP] = !!ops->trip_high; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = !!ops->trip_low; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = !!ops->trip_change; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD] = !!ops->trip_add; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = !!ops->trip_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD] = !!ops->cdev_add; + enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE] = !!ops->cdev_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update; + enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = !!ops->gov_change; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_ADD] = !!ops->threshold_add; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_DELETE] = !!ops->threshold_delete; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_FLUSH] = !!ops->threshold_flush; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_UP] = !!ops->threshold_up; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_DOWN] = !!ops->threshold_down; +} + +thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg) +{ + struct thermal_handler_param thp = { .th = th, .arg = arg }; + + if (!th) + return THERMAL_ERROR; + + if (nl_cb_set(th->cb_event, NL_CB_VALID, NL_CB_CUSTOM, + handle_thermal_event, &thp)) + return THERMAL_ERROR; + + return nl_recvmsgs(th->sk_event, th->cb_event); +} + +int thermal_events_fd(struct thermal_handler *th) +{ + if (!th) + return -1; + + return nl_socket_get_fd(th->sk_event); +} + +thermal_error_t thermal_events_exit(struct thermal_handler *th) +{ + if (nl_unsubscribe_thermal(th->sk_event, th->cb_event, + THERMAL_GENL_EVENT_GROUP_NAME)) + return THERMAL_ERROR; + + nl_thermal_disconnect(th->sk_event, th->cb_event); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_events_init(struct thermal_handler *th) +{ + thermal_events_ops_init(&th->ops->events); + + if (nl_thermal_connect(&th->sk_event, &th->cb_event)) + return THERMAL_ERROR; + + if (nl_subscribe_thermal(th->sk_event, th->cb_event, + THERMAL_GENL_EVENT_GROUP_NAME)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/include/thermal.h b/tools/lib/thermal/include/thermal.h new file mode 100644 index 000000000000..818ecdfb46e5 --- /dev/null +++ b/tools/lib/thermal/include/thermal.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __LIBTHERMAL_H +#define __LIBTHERMAL_H + +#include <linux/thermal.h> +#include <sys/types.h> + +#ifndef LIBTHERMAL_API +#define LIBTHERMAL_API __attribute__((visibility("default"))) +#endif + +#ifndef THERMAL_THRESHOLD_WAY_UP +#define THERMAL_THRESHOLD_WAY_UP 0x1 +#endif + +#ifndef THERMAL_THRESHOLD_WAY_DOWN +#define THERMAL_THRESHOLD_WAY_DOWN 0x2 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct thermal_sampling_ops { + int (*tz_temp)(int tz_id, int temp, void *arg); +}; + +struct thermal_events_ops { + int (*tz_create)(const char *name, int tz_id, void *arg); + int (*tz_delete)(int tz_id, void *arg); + int (*tz_enable)(int tz_id, void *arg); + int (*tz_disable)(int tz_id, void *arg); + int (*trip_high)(int tz_id, int trip_id, int temp, void *arg); + int (*trip_low)(int tz_id, int trip_id, int temp, void *arg); + int (*trip_add)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg); + int (*trip_change)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg); + int (*trip_delete)(int tz_id, int trip_id, void *arg); + int (*cdev_add)(const char *name, int cdev_id, int max_state, void *arg); + int (*cdev_delete)(int cdev_id, void *arg); + int (*cdev_update)(int cdev_id, int cur_state, void *arg); + int (*gov_change)(int tz_id, const char *gov_name, void *arg); + int (*threshold_add)(int tz_id, int temperature, int direction, void *arg); + int (*threshold_delete)(int tz_id, int temperature, int direction, void *arg); + int (*threshold_flush)(int tz_id, void *arg); + int (*threshold_up)(int tz_id, int temp, int prev_temp, void *arg); + int (*threshold_down)(int tz_id, int temp, int prev_temp, void *arg); +}; + +struct thermal_ops { + struct thermal_sampling_ops sampling; + struct thermal_events_ops events; +}; + +struct thermal_trip { + int id; + int type; + int temp; + int hyst; +}; + +struct thermal_threshold { + int temperature; + int direction; +}; + +struct thermal_zone { + int id; + int temp; + char name[THERMAL_NAME_LENGTH]; + char governor[THERMAL_NAME_LENGTH]; + struct thermal_trip *trip; + struct thermal_threshold *thresholds; +}; + +struct thermal_cdev { + int id; + char name[THERMAL_NAME_LENGTH]; + int max_state; + int min_state; + int cur_state; +}; + +typedef enum { + THERMAL_ERROR = -1, + THERMAL_SUCCESS = 0, +} thermal_error_t; + +struct thermal_handler; + +typedef int (*cb_tz_t)(struct thermal_zone *, void *); + +typedef int (*cb_tt_t)(struct thermal_trip *, void *); + +typedef int (*cb_tc_t)(struct thermal_cdev *, void *); + +typedef int (*cb_th_t)(struct thermal_threshold *, void *); + +LIBTHERMAL_API int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg); + +LIBTHERMAL_API int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg); + +LIBTHERMAL_API int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg); + +LIBTHERMAL_API int for_each_thermal_threshold(struct thermal_threshold *th, cb_th_t cb, void *arg); + +LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz, + const char *name); + +LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id); + +LIBTHERMAL_API struct thermal_zone *thermal_zone_discover(struct thermal_handler *th); + +LIBTHERMAL_API struct thermal_handler *thermal_init(struct thermal_ops *ops); + +LIBTHERMAL_API void thermal_exit(struct thermal_handler *th); + +/* + * Netlink thermal events + */ +LIBTHERMAL_API thermal_error_t thermal_events_exit(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_events_init(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg); + +LIBTHERMAL_API int thermal_events_fd(struct thermal_handler *th); + +/* + * Netlink thermal commands + */ +LIBTHERMAL_API thermal_error_t thermal_cmd_exit(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_cmd_init(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, + struct thermal_zone **tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, + struct thermal_cdev **tc); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_get(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_add(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_delete(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_flush(struct thermal_handler *th, + struct thermal_zone *tz); + +/* + * Netlink thermal samples + */ +LIBTHERMAL_API thermal_error_t thermal_sampling_exit(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_sampling_init(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg); + +LIBTHERMAL_API int thermal_sampling_fd(struct thermal_handler *th); + +#endif /* __LIBTHERMAL_H */ + +#ifdef __cplusplus +} +#endif diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map new file mode 100644 index 000000000000..1d3d0c04e4b6 --- /dev/null +++ b/tools/lib/thermal/libthermal.map @@ -0,0 +1,33 @@ +LIBTHERMAL_0.0.1 { + global: + for_each_thermal_zone; + for_each_thermal_trip; + for_each_thermal_cdev; + for_each_thermal_threshold; + thermal_zone_find_by_name; + thermal_zone_find_by_id; + thermal_zone_discover; + thermal_init; + thermal_exit; + thermal_events_exit; + thermal_events_init; + thermal_events_handle; + thermal_events_fd; + thermal_cmd_exit; + thermal_cmd_init; + thermal_cmd_get_tz; + thermal_cmd_get_cdev; + thermal_cmd_get_trip; + thermal_cmd_get_governor; + thermal_cmd_get_temp; + thermal_cmd_threshold_get; + thermal_cmd_threshold_add; + thermal_cmd_threshold_delete; + thermal_cmd_threshold_flush; + thermal_sampling_exit; + thermal_sampling_init; + thermal_sampling_handle; + thermal_sampling_fd; +local: + *; +}; diff --git a/tools/lib/thermal/libthermal.pc.template b/tools/lib/thermal/libthermal.pc.template new file mode 100644 index 000000000000..ac24d0ab17f5 --- /dev/null +++ b/tools/lib/thermal/libthermal.pc.template @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=${prefix}/include + +Name: libthermal +Description: thermal library +Requires: libnl-3.0 libnl-genl-3.0 +Version: @VERSION@ +Libs: -L${libdir} -lnl-genl-3 -lnl-3 +Cflags: -I${includedir} -I${include}/libnl3 diff --git a/tools/lib/thermal/sampling.c b/tools/lib/thermal/sampling.c new file mode 100644 index 000000000000..f67c1f9ea1d7 --- /dev/null +++ b/tools/lib/thermal/sampling.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <thermal.h> +#include "thermal_nl.h" + +static int handle_thermal_sample(struct nl_msg *n, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(n); + struct genlmsghdr *genlhdr = genlmsg_hdr(nlh); + struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; + struct thermal_handler_param *thp = arg; + struct thermal_handler *th = thp->th; + + arg = thp->arg; + + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); + + switch (genlhdr->cmd) { + + case THERMAL_GENL_SAMPLING_TEMP: + return th->ops->sampling.tz_temp( + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg); + default: + return THERMAL_ERROR; + } +} + +thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg) +{ + struct thermal_handler_param thp = { .th = th, .arg = arg }; + + if (!th) + return THERMAL_ERROR; + + if (nl_cb_set(th->cb_sampling, NL_CB_VALID, NL_CB_CUSTOM, + handle_thermal_sample, &thp)) + return THERMAL_ERROR; + + return nl_recvmsgs(th->sk_sampling, th->cb_sampling); +} + +int thermal_sampling_fd(struct thermal_handler *th) +{ + if (!th) + return -1; + + return nl_socket_get_fd(th->sk_sampling); +} + +thermal_error_t thermal_sampling_exit(struct thermal_handler *th) +{ + if (nl_unsubscribe_thermal(th->sk_sampling, th->cb_sampling, + THERMAL_GENL_SAMPLING_GROUP_NAME)) + return THERMAL_ERROR; + + nl_thermal_disconnect(th->sk_sampling, th->cb_sampling); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_sampling_init(struct thermal_handler *th) +{ + if (nl_thermal_connect(&th->sk_sampling, &th->cb_sampling)) + return THERMAL_ERROR; + + if (nl_subscribe_thermal(th->sk_sampling, th->cb_sampling, + THERMAL_GENL_SAMPLING_GROUP_NAME)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/thermal.c b/tools/lib/thermal/thermal.c new file mode 100644 index 000000000000..6f02e3539159 --- /dev/null +++ b/tools/lib/thermal/thermal.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <stdio.h> +#include <limits.h> +#include <thermal.h> + +#include "thermal_nl.h" + +int for_each_thermal_threshold(struct thermal_threshold *th, cb_th_t cb, void *arg) +{ + int i, ret = 0; + + if (!th) + return 0; + + for (i = 0; th[i].temperature != INT_MAX; i++) + ret |= cb(&th[i], arg); + + return ret; +} + +int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg) +{ + int i, ret = 0; + + if (!cdev) + return 0; + + for (i = 0; cdev[i].id != -1; i++) + ret |= cb(&cdev[i], arg); + + return ret; +} + +int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg) +{ + int i, ret = 0; + + if (!tt) + return 0; + + for (i = 0; tt[i].id != -1; i++) + ret |= cb(&tt[i], arg); + + return ret; +} + +int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg) +{ + int i, ret = 0; + + if (!tz) + return 0; + + for (i = 0; tz[i].id != -1; i++) + ret |= cb(&tz[i], arg); + + return ret; +} + +struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz, + const char *name) +{ + int i; + + if (!tz || !name) + return NULL; + + for (i = 0; tz[i].id != -1; i++) { + if (!strcmp(tz[i].name, name)) + return &tz[i]; + } + + return NULL; +} + +struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id) +{ + int i; + + if (!tz || id < 0) + return NULL; + + for (i = 0; tz[i].id != -1; i++) { + if (tz[i].id == id) + return &tz[i]; + } + + return NULL; +} + +static int __thermal_zone_discover(struct thermal_zone *tz, void *th) +{ + if (thermal_cmd_get_trip(th, tz) < 0) + return -1; + + if (thermal_cmd_threshold_get(th, tz)) + return -1; + + if (thermal_cmd_get_governor(th, tz)) + return -1; + + return 0; +} + +struct thermal_zone *thermal_zone_discover(struct thermal_handler *th) +{ + struct thermal_zone *tz; + + if (thermal_cmd_get_tz(th, &tz) < 0) + return NULL; + + if (for_each_thermal_zone(tz, __thermal_zone_discover, th)) + return NULL; + + return tz; +} + +void thermal_exit(struct thermal_handler *th) +{ + thermal_cmd_exit(th); + thermal_events_exit(th); + thermal_sampling_exit(th); + + free(th); +} + +struct thermal_handler *thermal_init(struct thermal_ops *ops) +{ + struct thermal_handler *th; + + th = malloc(sizeof(*th)); + if (!th) + return NULL; + th->ops = ops; + + if (thermal_events_init(th)) + goto out_free; + + if (thermal_sampling_init(th)) + goto out_free; + + if (thermal_cmd_init(th)) + goto out_free; + + return th; + +out_free: + free(th); + + return NULL; +} diff --git a/tools/lib/thermal/thermal_nl.c b/tools/lib/thermal/thermal_nl.c new file mode 100644 index 000000000000..b05cf9569858 --- /dev/null +++ b/tools/lib/thermal/thermal_nl.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <thermal.h> +#include "thermal_nl.h" + +struct handler_args { + const char *group; + int id; +}; + +static __thread int err; +static __thread int done; + +static int nl_seq_check_handler(struct nl_msg *msg, void *arg) +{ + return NL_OK; +} + +static int nl_error_handler(struct sockaddr_nl *nla, struct nlmsgerr *nl_err, + void *arg) +{ + int *ret = arg; + + if (ret) + *ret = nl_err->error; + + return NL_STOP; +} + +static int nl_finish_handler(struct nl_msg *msg, void *arg) +{ + int *ret = arg; + + if (ret) + *ret = 1; + + return NL_OK; +} + +static int nl_ack_handler(struct nl_msg *msg, void *arg) +{ + int *ret = arg; + + if (ret) + *ret = 1; + + return NL_OK; +} + +int nl_send_msg(struct nl_sock *sock, struct nl_cb *cb, struct nl_msg *msg, + int (*rx_handler)(struct nl_msg *, void *), void *data) +{ + if (!rx_handler) + return THERMAL_ERROR; + + err = nl_send_auto_complete(sock, msg); + if (err < 0) + return err; + + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, rx_handler, data); + + err = done = 0; + + while (err == 0 && done == 0) + nl_recvmsgs(sock, cb); + + return err; +} + +static int nl_family_handler(struct nl_msg *msg, void *arg) +{ + struct handler_args *grp = arg; + struct nlattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *mcgrp; + int rem_mcgrp; + + nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!tb[CTRL_ATTR_MCAST_GROUPS]) + return THERMAL_ERROR; + + nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) { + + struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1]; + + nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX, + nla_data(mcgrp), nla_len(mcgrp), NULL); + + if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] || + !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]) + continue; + + if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]), + grp->group, + nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]))) + continue; + + grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]); + + break; + } + + return THERMAL_SUCCESS; +} + +static int nl_get_multicast_id(struct nl_sock *sock, struct nl_cb *cb, + const char *family, const char *group) +{ + struct nl_msg *msg; + int ret = 0, ctrlid; + struct handler_args grp = { + .group = group, + .id = -ENOENT, + }; + + msg = nlmsg_alloc(); + if (!msg) + return THERMAL_ERROR; + + ctrlid = genl_ctrl_resolve(sock, "nlctrl"); + + genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + + nla_put_string(msg, CTRL_ATTR_FAMILY_NAME, family); + + ret = nl_send_msg(sock, cb, msg, nl_family_handler, &grp); + if (ret) + goto nla_put_failure; + + ret = grp.id; + +nla_put_failure: + nlmsg_free(msg); + return ret; +} + +int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb) +{ + struct nl_cb *cb; + struct nl_sock *sock; + + cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!cb) + return THERMAL_ERROR; + + sock = nl_socket_alloc(); + if (!sock) + goto out_cb_free; + + if (genl_connect(sock)) + goto out_socket_free; + + if (nl_cb_err(cb, NL_CB_CUSTOM, nl_error_handler, &err) || + nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, nl_finish_handler, &done) || + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, nl_ack_handler, &done) || + nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check_handler, &done)) + return THERMAL_ERROR; + + *nl_sock = sock; + *nl_cb = cb; + + return THERMAL_SUCCESS; + +out_socket_free: + nl_socket_free(sock); +out_cb_free: + nl_cb_put(cb); + return THERMAL_ERROR; +} + +void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb) +{ + nl_close(nl_sock); + nl_socket_free(nl_sock); + nl_cb_put(nl_cb); +} + +int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group) +{ + int mcid; + + mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME, + group); + if (mcid < 0) + return THERMAL_ERROR; + + if (nl_socket_drop_membership(nl_sock, mcid)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} + +int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group) +{ + int mcid; + + mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME, + group); + if (mcid < 0) + return THERMAL_ERROR; + + if (nl_socket_add_membership(nl_sock, mcid)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/thermal_nl.h b/tools/lib/thermal/thermal_nl.h new file mode 100644 index 000000000000..ddf635642f07 --- /dev/null +++ b/tools/lib/thermal/thermal_nl.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __THERMAL_H +#define __THERMAL_H + +#include <netlink/netlink.h> +#include <netlink/genl/genl.h> +#include <netlink/genl/mngt.h> +#include <netlink/genl/ctrl.h> + +struct thermal_handler { + int done; + int error; + struct thermal_ops *ops; + struct nl_msg *msg; + struct nl_sock *sk_event; + struct nl_sock *sk_sampling; + struct nl_sock *sk_cmd; + struct nl_cb *cb_cmd; + struct nl_cb *cb_event; + struct nl_cb *cb_sampling; +}; + +struct thermal_handler_param { + struct thermal_handler *th; + void *arg; +}; + +/* + * Low level netlink + */ +extern int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group); + +extern int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group); + +extern int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb); + +extern void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb); + +extern int nl_send_msg(struct nl_sock *sock, struct nl_cb *nl_cb, struct nl_msg *msg, + int (*rx_handler)(struct nl_msg *, void *), + void *data); + +#endif /* __THERMAL_H */ diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore deleted file mode 100644 index 7123c70b9ebc..000000000000 --- a/tools/lib/traceevent/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -TRACEEVENT-CFLAGS -libtraceevent-dynamic-list -libtraceevent.so.* diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build deleted file mode 100644 index f9a5d79578f5..000000000000 --- a/tools/lib/traceevent/Build +++ /dev/null @@ -1,8 +0,0 @@ -libtraceevent-y += event-parse.o -libtraceevent-y += event-plugin.o -libtraceevent-y += trace-seq.o -libtraceevent-y += parse-filter.o -libtraceevent-y += parse-utils.o -libtraceevent-y += kbuffer-parse.o -libtraceevent-y += tep_strerror.o -libtraceevent-y += event-parse-api.o diff --git a/tools/lib/traceevent/Documentation/Makefile b/tools/lib/traceevent/Documentation/Makefile deleted file mode 100644 index aa72ab96c3c1..000000000000 --- a/tools/lib/traceevent/Documentation/Makefile +++ /dev/null @@ -1,207 +0,0 @@ -include ../../../scripts/Makefile.include -include ../../../scripts/utilities.mak - -# This Makefile and manpage XSL files were taken from tools/perf/Documentation -# and modified for libtraceevent. - -MAN3_TXT= \ - $(wildcard libtraceevent-*.txt) \ - libtraceevent.txt - -MAN_TXT = $(MAN3_TXT) -_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) -_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) -_DOC_MAN3=$(patsubst %.txt,%.3,$(MAN3_TXT)) - -MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML)) -MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML)) -DOC_MAN3=$(addprefix $(OUTPUT),$(_DOC_MAN3)) - -# Make the path relative to DESTDIR, not prefix -ifndef DESTDIR -prefix?=$(HOME) -endif -bindir?=$(prefix)/bin -htmldir?=$(prefix)/share/doc/libtraceevent-doc -pdfdir?=$(prefix)/share/doc/libtraceevent-doc -mandir?=$(prefix)/share/man -man3dir=$(mandir)/man3 - -ASCIIDOC=asciidoc -ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf -ASCIIDOC_HTML = xhtml11 -MANPAGE_XSL = manpage-normal.xsl -XMLTO_EXTRA = -INSTALL?=install -RM ?= rm -f - -ifdef USE_ASCIIDOCTOR -ASCIIDOC = asciidoctor -ASCIIDOC_EXTRA = -a compat-mode -ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions -ASCIIDOC_EXTRA += -a mansource="libtraceevent" -a manmanual="libtraceevent Manual" -ASCIIDOC_HTML = xhtml5 -endif - -XMLTO=xmlto - -_tmp_tool_path := $(call get-executable,$(ASCIIDOC)) -ifeq ($(_tmp_tool_path),) - missing_tools = $(ASCIIDOC) -endif - -ifndef USE_ASCIIDOCTOR -_tmp_tool_path := $(call get-executable,$(XMLTO)) -ifeq ($(_tmp_tool_path),) - missing_tools += $(XMLTO) -endif -endif - -# -# For asciidoc ... -# -7.1.2, no extra settings are needed. -# 8.0-, set ASCIIDOC8. -# - -# -# For docbook-xsl ... -# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0) -# 1.69.0, no extra settings are needed? -# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP? -# 1.71.1, no extra settings are needed? -# 1.72.0, set DOCBOOK_XSL_172. -# 1.73.0-, set ASCIIDOC_NO_ROFF -# - -# -# If you had been using DOCBOOK_XSL_172 in an attempt to get rid -# of 'the ".ft C" problem' in your generated manpages, and you -# instead ended up with weird characters around callouts, try -# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8). -# - -ifdef ASCIIDOC8 -ASCIIDOC_EXTRA += -a asciidoc7compatible -endif -ifdef DOCBOOK_XSL_172 -ASCIIDOC_EXTRA += -a libtraceevent-asciidoc-no-roff -MANPAGE_XSL = manpage-1.72.xsl -else - ifdef ASCIIDOC_NO_ROFF - # docbook-xsl after 1.72 needs the regular XSL, but will not - # pass-thru raw roff codes from asciidoc.conf, so turn them off. - ASCIIDOC_EXTRA += -a libtraceevent-asciidoc-no-roff - endif -endif -ifdef MAN_BOLD_LITERAL -XMLTO_EXTRA += -m manpage-bold-literal.xsl -endif -ifdef DOCBOOK_SUPPRESS_SP -XMLTO_EXTRA += -m manpage-suppress-sp.xsl -endif - -SHELL_PATH ?= $(SHELL) -# Shell quote; -SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) - -DESTDIR ?= -DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' - -export DESTDIR DESTDIR_SQ - -# -# Please note that there is a minor bug in asciidoc. -# The version after 6.0.3 _will_ include the patch found here: -# http://marc.theaimsgroup.com/?l=libtraceevent&m=111558757202243&w=2 -# -# Until that version is released you may have to apply the patch -# yourself - yes, all 6 characters of it! -# -QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir -QUIET_SUBDIR1 = - -ifneq ($(findstring $(MAKEFLAGS),w),w) -PRINT_DIR = --no-print-directory -else # "make -w" -NO_SUBDIR = : -endif - -ifneq ($(findstring $(MAKEFLAGS),s),s) -ifneq ($(V),1) - QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@; - QUIET_XMLTO = @echo ' XMLTO '$@; - QUIET_SUBDIR0 = +@subdir= - QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ - echo ' SUBDIR ' $$subdir; \ - $(MAKE) $(PRINT_DIR) -C $$subdir - export V -endif -endif - -all: html man - -man: man3 -man3: $(DOC_MAN3) - -html: $(MAN_HTML) - -$(MAN_HTML) $(DOC_MAN3): asciidoc.conf - -install: install-man - -check-man-tools: -ifdef missing_tools - $(error "You need to install $(missing_tools) for man pages") -endif - -do-install-man: man - $(call QUIET_INSTALL, Documentation-man) \ - $(INSTALL) -d -m 755 $(DESTDIR)$(man3dir); \ - $(INSTALL) -m 644 $(DOC_MAN3) $(DESTDIR)$(man3dir); - -install-man: check-man-tools man do-install-man - -uninstall: uninstall-man - -uninstall-man: - $(call QUIET_UNINST, Documentation-man) \ - $(Q)$(RM) $(addprefix $(DESTDIR)$(man3dir)/,$(DOC_MAN3)) - - -ifdef missing_tools - DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed) -else - DO_INSTALL_MAN = do-install-man -endif - -CLEAN_FILES = \ - $(MAN_XML) $(addsuffix +,$(MAN_XML)) \ - $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \ - $(DOC_MAN3) *.3 - -clean: - $(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES) - -ifdef USE_ASCIIDOCTOR -$(OUTPUT)%.3 : $(OUTPUT)%.txt - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b manpage -d manpage \ - $(ASCIIDOC_EXTRA) -alibtraceevent_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \ - mv $@+ $@ -endif - -$(OUTPUT)%.3 : $(OUTPUT)%.xml - $(QUIET_XMLTO)$(RM) $@ && \ - $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< - -$(OUTPUT)%.xml : %.txt - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b docbook -d manpage \ - $(ASCIIDOC_EXTRA) -alibtraceevent_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \ - mv $@+ $@ - -$(MAN_HTML): $(OUTPUT)%.html : %.txt - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \ - $(ASCIIDOC_EXTRA) -aperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \ - mv $@+ $@ diff --git a/tools/lib/traceevent/Documentation/asciidoc.conf b/tools/lib/traceevent/Documentation/asciidoc.conf deleted file mode 100644 index 07595717f06e..000000000000 --- a/tools/lib/traceevent/Documentation/asciidoc.conf +++ /dev/null @@ -1,120 +0,0 @@ -## linktep: macro -# -# Usage: linktep:command[manpage-section] -# -# Note, {0} is the manpage section, while {target} is the command. -# -# Show TEP link as: <command>(<section>); if section is defined, else just show -# the command. - -[macros] -(?su)[\\]?(?P<name>linktep):(?P<target>\S*?)\[(?P<attrlist>.*?)\]= - -[attributes] -asterisk=* -plus=+ -caret=^ -startsb=[ -endsb=] -tilde=~ - -ifdef::backend-docbook[] -[linktep-inlinemacro] -{0%{target}} -{0#<citerefentry>} -{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>} -{0#</citerefentry>} -endif::backend-docbook[] - -ifdef::backend-docbook[] -ifndef::tep-asciidoc-no-roff[] -# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this. -# v1.72 breaks with this because it replaces dots not in roff requests. -[listingblock] -<example><title>{title}</title> -<literallayout> -ifdef::doctype-manpage[] - .ft C -endif::doctype-manpage[] -| -ifdef::doctype-manpage[] - .ft -endif::doctype-manpage[] -</literallayout> -{title#}</example> -endif::tep-asciidoc-no-roff[] - -ifdef::tep-asciidoc-no-roff[] -ifdef::doctype-manpage[] -# The following two small workarounds insert a simple paragraph after screen -[listingblock] -<example><title>{title}</title> -<literallayout> -| -</literallayout><simpara></simpara> -{title#}</example> - -[verseblock] -<formalpara{id? id="{id}"}><title>{title}</title><para> -{title%}<literallayout{id? id="{id}"}> -{title#}<literallayout> -| -</literallayout> -{title#}</para></formalpara> -{title%}<simpara></simpara> -endif::doctype-manpage[] -endif::tep-asciidoc-no-roff[] -endif::backend-docbook[] - -ifdef::doctype-manpage[] -ifdef::backend-docbook[] -[header] -template::[header-declarations] -<refentry> -<refmeta> -<refentrytitle>{mantitle}</refentrytitle> -<manvolnum>{manvolnum}</manvolnum> -<refmiscinfo class="source">libtraceevent</refmiscinfo> -<refmiscinfo class="version">{libtraceevent_version}</refmiscinfo> -<refmiscinfo class="manual">libtraceevent Manual</refmiscinfo> -</refmeta> -<refnamediv> - <refname>{manname1}</refname> - <refname>{manname2}</refname> - <refname>{manname3}</refname> - <refname>{manname4}</refname> - <refname>{manname5}</refname> - <refname>{manname6}</refname> - <refname>{manname7}</refname> - <refname>{manname8}</refname> - <refname>{manname9}</refname> - <refname>{manname10}</refname> - <refname>{manname11}</refname> - <refname>{manname12}</refname> - <refname>{manname13}</refname> - <refname>{manname14}</refname> - <refname>{manname15}</refname> - <refname>{manname16}</refname> - <refname>{manname17}</refname> - <refname>{manname18}</refname> - <refname>{manname19}</refname> - <refname>{manname20}</refname> - <refname>{manname21}</refname> - <refname>{manname22}</refname> - <refname>{manname23}</refname> - <refname>{manname24}</refname> - <refname>{manname25}</refname> - <refname>{manname26}</refname> - <refname>{manname27}</refname> - <refname>{manname28}</refname> - <refname>{manname29}</refname> - <refname>{manname30}</refname> - <refpurpose>{manpurpose}</refpurpose> -</refnamediv> -endif::backend-docbook[] -endif::doctype-manpage[] - -ifdef::backend-xhtml11[] -[linktep-inlinemacro] -<a href="{target}.html">{target}{0?({0})}</a> -endif::backend-xhtml11[] diff --git a/tools/lib/traceevent/Documentation/libtraceevent-commands.txt b/tools/lib/traceevent/Documentation/libtraceevent-commands.txt deleted file mode 100644 index bec552001f8e..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-commands.txt +++ /dev/null @@ -1,153 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_register_comm, tep_override_comm, tep_pid_is_registered, -tep_data_comm_from_pid, tep_data_pid_from_comm, tep_cmdline_pid - -Manage pid to process name mappings. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -int *tep_register_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_); -int *tep_override_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_); -bool *tep_is_pid_registered*(struct tep_handle pass:[*]_tep_, int _pid_); -const char pass:[*]*tep_data_comm_from_pid*(struct tep_handle pass:[*]_pevent_, int _pid_); -struct cmdline pass:[*]*tep_data_pid_from_comm*(struct tep_handle pass:[*]_pevent_, const char pass:[*]_comm_, struct cmdline pass:[*]_next_); -int *tep_cmdline_pid*(struct tep_handle pass:[*]_pevent_, struct cmdline pass:[*]_cmdline_); --- - -DESCRIPTION ------------ -These functions can be used to handle the mapping between pid and process name. -The library builds a cache of these mappings, which is used to display the name -of the process, instead of its pid. This information can be retrieved from -tracefs/saved_cmdlines file. - -The _tep_register_comm()_ function registers a _pid_ / process name mapping. -If a command with the same _pid_ is already registered, an error is returned. -The _pid_ argument is the process ID, the _comm_ argument is the process name, -_tep_ is the event context. The _comm_ is duplicated internally. - -The _tep_override_comm()_ function registers a _pid_ / process name mapping. -If a process with the same pid is already registered, the process name string is -udapted with the new one. The _pid_ argument is the process ID, the _comm_ -argument is the process name, _tep_ is the event context. The _comm_ is -duplicated internally. - -The _tep_is_pid_registered()_ function checks if a pid has a process name -mapping registered. The _pid_ argument is the process ID, _tep_ is the event -context. - -The _tep_data_comm_from_pid()_ function returns the process name for a given -pid. The _pid_ argument is the process ID, _tep_ is the event context. -The returned string should not be freed, but will be freed when the _tep_ -handler is closed. - -The _tep_data_pid_from_comm()_ function returns a pid for a given process name. -The _comm_ argument is the process name, _tep_ is the event context. -The argument _next_ is the cmdline structure to search for the next pid. -As there may be more than one pid for a given process, the result of this call -can be passed back into a recurring call in the _next_ parameter, to search for -the next pid. If _next_ is NULL, it will return the first pid associated with -the _comm_. The function performs a linear search, so it may be slow. - -The _tep_cmdline_pid()_ function returns the pid associated with a given -_cmdline_. The _tep_ argument is the event context. - -RETURN VALUE ------------- -_tep_register_comm()_ function returns 0 on success. In case of an error -1 is -returned and errno is set to indicate the cause of the problem: ENOMEM, if there -is not enough memory to duplicate the _comm_ or EEXIST if a mapping for this -_pid_ is already registered. - -_tep_override_comm()_ function returns 0 on success. In case of an error -1 is -returned and errno is set to indicate the cause of the problem: ENOMEM, if there -is not enough memory to duplicate the _comm_. - -_tep_is_pid_registered()_ function returns true if the _pid_ has a process name -mapped to it, false otherwise. - -_tep_data_comm_from_pid()_ function returns the process name as string, or the -string "<...>" if there is no mapping for the given pid. - -_tep_data_pid_from_comm()_ function returns a pointer to a struct cmdline, that -holds a pid for a given process, or NULL if none is found. This result can be -passed back into a recurring call as the _next_ parameter of the function. - -_tep_cmdline_pid()_ functions returns the pid for the give cmdline. If _cmdline_ - is NULL, then -1 is returned. - -EXAMPLE -------- -The following example registers pid for command "ls", in context of event _tep_ -and performs various searches for pid / process name mappings: -[source,c] --- -#include <event-parse.h> -... -int ret; -int ls_pid = 1021; -struct tep_handle *tep = tep_alloc(); -... - ret = tep_register_comm(tep, "ls", ls_pid); - if (ret != 0 && errno == EEXIST) - ret = tep_override_comm(tep, "ls", ls_pid); - if (ret != 0) { - /* Failed to register pid / command mapping */ - } -... - if (tep_is_pid_registered(tep, ls_pid) == 0) { - /* Command mapping for ls_pid is not registered */ - } -... - const char *comm = tep_data_comm_from_pid(tep, ls_pid); - if (comm) { - /* Found process name for ls_pid */ - } -... - int pid; - struct cmdline *cmd = tep_data_pid_from_comm(tep, "ls", NULL); - while (cmd) { - pid = tep_cmdline_pid(tep, cmd); - /* Found pid for process "ls" */ - cmd = tep_data_pid_from_comm(tep, "ls", cmd); - } --- -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt b/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt deleted file mode 100644 index 5ad70e43b752..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt +++ /dev/null @@ -1,77 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_cpus, tep_set_cpus - Get / set the number of CPUs, which have a tracing -buffer representing it. Note, the buffer may be empty. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -int *tep_get_cpus*(struct tep_handle pass:[*]_tep_); -void *tep_set_cpus*(struct tep_handle pass:[*]_tep_, int _cpus_); --- - -DESCRIPTION ------------ -The _tep_get_cpus()_ function gets the number of CPUs, which have a tracing -buffer representing it. The _tep_ argument is trace event parser context. - -The _tep_set_cpus()_ function sets the number of CPUs, which have a tracing -buffer representing it. The _tep_ argument is trace event parser context. -The _cpu_ argument is the number of CPUs with tracing data. - -RETURN VALUE ------------- -The _tep_get_cpus()_ functions returns the number of CPUs, which have tracing -data recorded. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... - tep_set_cpus(tep, 5); -... - printf("We have tracing data for %d CPUs", tep_get_cpus(tep)); --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt b/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt deleted file mode 100644 index e64851b6e189..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt +++ /dev/null @@ -1,78 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_read_number - Reads a number from raw data. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -unsigned long long *tep_read_number*(struct tep_handle pass:[*]_tep_, const void pass:[*]_ptr_, int _size_); --- - -DESCRIPTION ------------ -The _tep_read_number()_ function reads an integer from raw data, taking into -account the endianness of the raw data and the current host. The _tep_ argument -is the trace event parser context. The _ptr_ is a pointer to the raw data, where -the integer is, and the _size_ is the size of the integer. - -RETURN VALUE ------------- -The _tep_read_number()_ function returns the integer in the byte order of -the current host. In case of an error, 0 is returned. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -void process_record(struct tep_record *record) -{ - int offset = 24; - int data = tep_read_number(tep, record->data + offset, 4); - - /* Read the 4 bytes at the offset 24 of data as an integer */ -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt deleted file mode 100644 index 7bc062c9f76f..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt +++ /dev/null @@ -1,103 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_find_event,tep_find_event_by_name,tep_find_event_by_record - -Find events by given key. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_); -struct tep_event pass:[*]*tep_find_event_by_name*(struct tep_handle pass:[*]_tep_, const char pass:[*]_sys_, const char pass:[*]_name_); -struct tep_event pass:[*]*tep_find_event_by_record*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_record_); --- - -DESCRIPTION ------------ -This set of functions can be used to search for an event, based on a given -criteria. All functions require a pointer to a _tep_, trace event parser -context. - -The _tep_find_event()_ function searches for an event by given event _id_. The -event ID is assigned dynamically and can be viewed in event's format file, -"ID" field. - -The tep_find_event_by_name()_ function searches for an event by given -event _name_, under the system _sys_. If the _sys_ is NULL (not specified), -the first event with _name_ is returned. - -The tep_find_event_by_record()_ function searches for an event from a given -_record_. - -RETURN VALUE ------------- -All these functions return a pointer to the found event, or NULL if there is no -such event. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -struct tep_event *event; - -event = tep_find_event(tep, 1857); -if (event == NULL) { - /* There is no event with ID 1857 */ -} - -event = tep_find_event_by_name(tep, "kvm", "kvm_exit"); -if (event == NULL) { - /* There is no kvm_exit event, from kvm system */ -} - -void event_from_record(struct tep_record *record) -{ - struct tep_event *event = tep_find_event_by_record(tep, record); - if (event == NULL) { - /* There is no event from given record */ - } -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt deleted file mode 100644 index 6525092fc417..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt +++ /dev/null @@ -1,99 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_event, tep_get_first_event, tep_get_events_count - Access events. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -struct tep_event pass:[*]*tep_get_event*(struct tep_handle pass:[*]_tep_, int _index_); -struct tep_event pass:[*]*tep_get_first_event*(struct tep_handle pass:[*]_tep_); -int *tep_get_events_count*(struct tep_handle pass:[*]_tep_); --- - -DESCRIPTION ------------ -The _tep_get_event()_ function returns a pointer to event at the given _index_. -The _tep_ argument is trace event parser context, the _index_ is the index of -the requested event. - -The _tep_get_first_event()_ function returns a pointer to the first event. -As events are stored in an array, this function returns the pointer to the -beginning of the array. The _tep_ argument is trace event parser context. - -The _tep_get_events_count()_ function returns the number of the events -in the array. The _tep_ argument is trace event parser context. - -RETURN VALUE ------------- -The _tep_get_event()_ returns a pointer to the event located at _index_. -NULL is returned in case of error, in case there are no events or _index_ is -out of range. - -The _tep_get_first_event()_ returns a pointer to the first event. NULL is -returned in case of error, or in case there are no events. - -The _tep_get_events_count()_ returns the number of the events. 0 is -returned in case of error, or in case there are no events. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -int i,count = tep_get_events_count(tep); -struct tep_event *event, *events = tep_get_first_event(tep); - -if (events == NULL) { - /* There are no events */ -} else { - for (i = 0; i < count; i++) { - event = (events+i); - /* process events[i] */ - } - - /* Get the last event */ - event = tep_get_event(tep, count-1); -} --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt deleted file mode 100644 index fba350e5a4cb..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt +++ /dev/null @@ -1,122 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_list_events, tep_list_events_copy - -Get list of events, sorted by given criteria. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -enum *tep_event_sort_type* { - _TEP_EVENT_SORT_ID_, - _TEP_EVENT_SORT_NAME_, - _TEP_EVENT_SORT_SYSTEM_, -}; - -struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); -struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); --- - -DESCRIPTION ------------ -The _tep_list_events()_ function returns an array of pointers to the events, -sorted by the _sort_type_ criteria. The last element of the array is NULL. -The returned memory must not be freed, it is managed by the library. -The function is not thread safe. The _tep_ argument is trace event parser -context. The _sort_type_ argument is the required sort criteria: -[verse] --- - _TEP_EVENT_SORT_ID_ - sort by the event ID. - _TEP_EVENT_SORT_NAME_ - sort by the event (name, system, id) triplet. - _TEP_EVENT_SORT_SYSTEM_ - sort by the event (system, name, id) triplet. --- - -The _tep_list_events_copy()_ is a thread safe version of _tep_list_events()_. -It has the same behavior, but the returned array is allocated internally and -must be freed by the caller. Note that the content of the array must not be -freed (see the EXAMPLE below). - -RETURN VALUE ------------- -The _tep_list_events()_ function returns an array of pointers to events. -In case of an error, NULL is returned. The returned array must not be freed, -it is managed by the library. - -The _tep_list_events_copy()_ function returns an array of pointers to events. -In case of an error, NULL is returned. The returned array must be freed by -the caller. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -int i; -struct tep_event_format **events; - -i=0; -events = tep_list_events(tep, TEP_EVENT_SORT_ID); -if (events == NULL) { - /* Failed to get the events, sorted by ID */ -} else { - while(events[i]) { - /* walk through the list of the events, sorted by ID */ - i++; - } -} - -i=0; -events = tep_list_events_copy(tep, TEP_EVENT_SORT_NAME); -if (events == NULL) { - /* Failed to get the events, sorted by name */ -} else { - while(events[i]) { - /* walk through the list of the events, sorted by name */ - i++; - } - free(events); -} - -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt deleted file mode 100644 index 2c6a61811118..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt +++ /dev/null @@ -1,130 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_print_event - Writes event information into a trace sequence. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* -*#include <trace-seq.h>* - -void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seqpass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._) --- - -DESCRIPTION ------------ - -The _tep_print_event()_ function parses the event information of the given -_record_ and writes it into the trace sequence _s_, according to the format -string _fmt_. The desired information is specified after the format string. -The _fmt_ is printf-like format string, following arguments are supported: -[verse] --- - TEP_PRINT_PID, "%d" - PID of the event. - TEP_PRINT_CPU, "%d" - Event CPU. - TEP_PRINT_COMM, "%s" - Event command string. - TEP_PRINT_NAME, "%s" - Event name. - TEP_PRINT_LATENCY, "%s" - Latency of the event. It prints 4 or more - fields - interrupt state, scheduling state, - current context, and preemption count. - Field 1 is the interrupt enabled state: - d : Interrupts are disabled - . : Interrupts are enabled - X : The architecture does not support this - information - Field 2 is the "need resched" state. - N : The task is set to call the scheduler when - possible, as another higher priority task - may need to be scheduled in. - . : The task is not set to call the scheduler. - Field 3 is the context state. - . : Normal context - s : Soft interrupt context - h : Hard interrupt context - H : Hard interrupt context which triggered - during soft interrupt context. - z : NMI context - Z : NMI context which triggered during hard - interrupt context - Field 4 is the preemption count. - . : The preempt count is zero. - On preemptible kernels (where the task can be scheduled - out in arbitrary locations while in kernel context), the - preempt count, when non zero, will prevent the kernel - from scheduling out the current task. The preempt count - number is displayed when it is not zero. - Depending on the kernel, it may show other fields - (lock depth, or migration disabled, which are unique to - specialized kernels). - TEP_PRINT_TIME, %d - event time stamp. A divisor and precision can be - specified as part of this format string: - "%precision.divisord". Example: - "%3.1000d" - divide the time by 1000 and print the first - 3 digits before the dot. Thus, the time stamp - "123456000" will be printed as "123.456" - TEP_PRINT_INFO, "%s" - event information. - TEP_PRINT_INFO_RAW, "%s" - event information, in raw format. - --- -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -#include <trace-seq.h> -... -struct trace_seq seq; -trace_seq_init(&seq); -struct tep_handle *tep = tep_alloc(); -... -void print_my_event(struct tep_record *record) -{ - trace_seq_reset(&seq); - tep_print_event(tep, s, record, "%16s-%-5d [%03d] %s %6.1000d %s %s", - TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU, - TEP_PRINT_LATENCY, TEP_PRINT_TIME, TEP_PRINT_NAME, - TEP_PRINT_INFO); -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences related APIs. - Trace sequences are used to allow a function to call several other functions - to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt deleted file mode 100644 index 0896af5b9eff..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt +++ /dev/null @@ -1,118 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_find_common_field, tep_find_field, tep_find_any_field - -Search for a field in an event. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -struct tep_format_field pass:[*]*tep_find_common_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_); -struct tep_format_field pass:[*]*tep_find_field*(struct tep_event_ormat pass:[*]_event_, const char pass:[*]_name_); -struct tep_format_field pass:[*]*tep_find_any_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_); --- - -DESCRIPTION ------------ -These functions search for a field with given name in an event. The field -returned can be used to find the field content from within a data record. - -The _tep_find_common_field()_ function searches for a common field with _name_ -in the _event_. - -The _tep_find_field()_ function searches for an event specific field with -_name_ in the _event_. - -The _tep_find_any_field()_ function searches for any field with _name_ in the -_event_. - -RETURN VALUE ------------- -The _tep_find_common_field(), _tep_find_field()_ and _tep_find_any_field()_ -functions return a pointer to the found field, or NULL in case there is no field -with the requested name. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -void get_htimer_info(struct tep_handle *tep, struct tep_record *record) -{ - struct tep_format_field *field; - struct tep_event *event; - long long softexpires; - int mode; - int pid; - - event = tep_find_event_by_name(tep, "timer", "hrtimer_start"); - - field = tep_find_common_field(event, "common_pid"); - if (field == NULL) { - /* Cannot find "common_pid" field in the event */ - } else { - /* Get pid from the data record */ - pid = tep_read_number(tep, record->data + field->offset, - field->size); - } - - field = tep_find_field(event, "softexpires"); - if (field == NULL) { - /* Cannot find "softexpires" event specific field in the event */ - } else { - /* Get softexpires parameter from the data record */ - softexpires = tep_read_number(tep, record->data + field->offset, - field->size); - } - - field = tep_find_any_field(event, "mode"); - if (field == NULL) { - /* Cannot find "mode" field in the event */ - } else - { - /* Get mode parameter from the data record */ - mode = tep_read_number(tep, record->data + field->offset, - field->size); - } -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt deleted file mode 100644 index 6324f0d48aeb..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt +++ /dev/null @@ -1,122 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_any_field_val, tep_get_common_field_val, tep_get_field_val, -tep_get_field_raw - Get value of a field. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* -*#include <trace-seq.h>* - -int *tep_get_any_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); -int *tep_get_common_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); -int *tep_get_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); -void pass:[*]*tep_get_field_raw*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int pass:[*]_len_, int _err_); --- - -DESCRIPTION ------------ -These functions can be used to find a field and retrieve its value. - -The _tep_get_any_field_val()_ function searches in the _record_ for a field -with _name_, part of the _event_. If the field is found, its value is stored in -_val_. If there is an error and _err_ is not zero, then an error string is -written into _s_. - -The _tep_get_common_field_val()_ function does the same as -_tep_get_any_field_val()_, but searches only in the common fields. This works -for any event as all events include the common fields. - -The _tep_get_field_val()_ function does the same as _tep_get_any_field_val()_, -but searches only in the event specific fields. - -The _tep_get_field_raw()_ function searches in the _record_ for a field with -_name_, part of the _event_. If the field is found, a pointer to where the field -exists in the record's raw data is returned. The size of the data is stored in -_len_. If there is an error and _err_ is not zero, then an error string is -written into _s_. - -RETURN VALUE ------------- -The _tep_get_any_field_val()_, _tep_get_common_field_val()_ and -_tep_get_field_val()_ functions return 0 on success, or -1 in case of an error. - -The _tep_get_field_raw()_ function returns a pointer to field's raw data, and -places the length of this data in _len_. In case of an error NULL is returned. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -#include <trace-seq.h> -... -struct tep_handle *tep = tep_alloc(); -... -struct tep_event *event = tep_find_event_by_name(tep, "kvm", "kvm_exit"); -... -void process_record(struct tep_record *record) -{ - int len; - char *comm; - struct tep_event_format *event; - unsigned long long val; - - event = tep_find_event_by_record(pevent, record); - if (event != NULL) { - if (tep_get_common_field_val(NULL, event, "common_type", - record, &val, 0) == 0) { - /* Got the value of common type field */ - } - if (tep_get_field_val(NULL, event, "pid", record, &val, 0) == 0) { - /* Got the value of pid specific field */ - } - comm = tep_get_field_raw(NULL, event, "comm", record, &len, 0); - if (comm != NULL) { - /* Got a pointer to the comm event specific field */ - } - } -} --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences - related APIs. Trace sequences are used to allow a function to call - several other functions to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt deleted file mode 100644 index 9a9df98ac44d..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt +++ /dev/null @@ -1,126 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_print_field, tep_print_fields, tep_print_num_field, tep_print_func_field - -Print the field content. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* -*#include <trace-seq.h>* - -void *tep_print_field*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, struct tep_format_field pass:[*]_field_); -void *tep_print_fields*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, int _size_, struct tep_event pass:[*]_event_); -int *tep_print_num_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_); -int *tep_print_func_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_); --- - -DESCRIPTION ------------ -These functions print recorded field's data, according to the field's type. - -The _tep_print_field()_ function extracts from the recorded raw _data_ value of -the _field_ and prints it into _s_, according to the field type. - -The _tep_print_fields()_ prints each field name followed by the record's field -value according to the field's type: -[verse] --- -"field1_name=field1_value field2_name=field2_value ..." --- -It iterates all fields of the _event_, and calls _tep_print_field()_ for each of -them. - -The _tep_print_num_field()_ function prints a numeric field with given format -string. A search is performed in the _event_ for a field with _name_. If such -field is found, its value is extracted from the _record_ and is printed in the -_s_, according to the given format string _fmt_. If the argument _err_ is -non-zero, and an error occures - it is printed in the _s_. - -The _tep_print_func_field()_ function prints a function field with given format -string. A search is performed in the _event_ for a field with _name_. If such -field is found, its value is extracted from the _record_. The value is assumed -to be a function address, and a search is perform to find the name of this -function. The function name (if found) and its address are printed in the _s_, -according to the given format string _fmt_. If the argument _err_ is non-zero, -and an error occures - it is printed in _s_. - -RETURN VALUE ------------- -The _tep_print_num_field()_ and _tep_print_func_field()_ functions return 1 -on success, -1 in case of an error or 0 if the print buffer _s_ is full. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -#include <trace-seq.h> -... -struct tep_handle *tep = tep_alloc(); -... -struct trace_seq seq; -trace_seq_init(&seq); -struct tep_event *event = tep_find_event_by_name(tep, "timer", "hrtimer_start"); -... -void process_record(struct tep_record *record) -{ - struct tep_format_field *field_pid = tep_find_common_field(event, "common_pid"); - - trace_seq_reset(&seq); - - /* Print the value of "common_pid" */ - tep_print_field(&seq, record->data, field_pid); - - /* Print all fields of the "hrtimer_start" event */ - tep_print_fields(&seq, record->data, record->size, event); - - /* Print the value of "expires" field with custom format string */ - tep_print_num_field(&seq, " timer expires in %llu ", event, "expires", record, 0); - - /* Print the address and the name of "function" field with custom format string */ - tep_print_func_field(&seq, " timer function is %s ", event, "function", record, 0); - } - ... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences related APIs. - Trace sequences are used to allow a function to call several other functions - to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt deleted file mode 100644 index 64e9e25d3fd9..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt +++ /dev/null @@ -1,81 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_read_number_field - Reads a number from raw data. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -int *tep_read_number_field*(struct tep_format_field pass:[*]_field_, const void pass:[*]_data_, unsigned long long pass:[*]_value_); --- - -DESCRIPTION ------------ -The _tep_read_number_field()_ function reads the value of the _field_ from the -raw _data_ and stores it in the _value_. The function sets the _value_ according -to the endianness of the raw data and the current machine and stores it in -_value_. - -RETURN VALUE ------------- -The _tep_read_number_field()_ function retunrs 0 in case of success, or -1 in -case of an error. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -struct tep_event *event = tep_find_event_by_name(tep, "timer", "hrtimer_start"); -... -void process_record(struct tep_record *record) -{ - unsigned long long pid; - struct tep_format_field *field_pid = tep_find_common_field(event, "common_pid"); - - if (tep_read_number_field(field_pid, record->data, &pid) != 0) { - /* Failed to get "common_pid" value */ - } -} -... --- -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-fields.txt b/tools/lib/traceevent/Documentation/libtraceevent-fields.txt deleted file mode 100644 index 1ccb531d5114..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-fields.txt +++ /dev/null @@ -1,105 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_event_common_fields, tep_event_fields - Get a list of fields for an event. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -struct tep_format_field pass:[*]pass:[*]*tep_event_common_fields*(struct tep_event pass:[*]_event_); -struct tep_format_field pass:[*]pass:[*]*tep_event_fields*(struct tep_event pass:[*]_event_); --- - -DESCRIPTION ------------ -The _tep_event_common_fields()_ function returns an array of pointers to common -fields for the _event_. The array is allocated in the function and must be freed -by free(). The last element of the array is NULL. - -The _tep_event_fields()_ function returns an array of pointers to event specific -fields for the _event_. The array is allocated in the function and must be freed -by free(). The last element of the array is NULL. - -RETURN VALUE ------------- -Both _tep_event_common_fields()_ and _tep_event_fields()_ functions return -an array of pointers to tep_format_field structures in case of success, or -NULL in case of an error. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -int i; -struct tep_format_field **fields; -struct tep_event *event = tep_find_event_by_name(tep, "kvm", "kvm_exit"); -if (event != NULL) { - fields = tep_event_common_fields(event); - if (fields != NULL) { - i = 0; - while (fields[i]) { - /* - walk through the list of the common fields - of the kvm_exit event - */ - i++; - } - free(fields); - } - fields = tep_event_fields(event); - if (fields != NULL) { - i = 0; - while (fields[i]) { - /* - walk through the list of the event specific - fields of the kvm_exit event - */ - i++; - } - free(fields); - } -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt b/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt deleted file mode 100644 index f401ad311047..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt +++ /dev/null @@ -1,91 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_is_file_bigendian, tep_set_file_bigendian - Get / set the endianness of the -raw data being accessed by the tep handler. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -enum *tep_endian* { - TEP_LITTLE_ENDIAN = 0, - TEP_BIG_ENDIAN -}; - -bool *tep_is_file_bigendian*(struct tep_handle pass:[*]_tep_); -void *tep_set_file_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_); - --- -DESCRIPTION ------------ -The _tep_is_file_bigendian()_ function gets the endianness of the raw data, -being accessed by the tep handler. The _tep_ argument is trace event parser -context. - -The _tep_set_file_bigendian()_ function sets the endianness of raw data being -accessed by the tep handler. The _tep_ argument is trace event parser context. -[verse] --- -The _endian_ argument is the endianness: - _TEP_LITTLE_ENDIAN_ - the raw data is in little endian format, - _TEP_BIG_ENDIAN_ - the raw data is in big endian format. --- -RETURN VALUE ------------- -The _tep_is_file_bigendian()_ function returns true if the data is in bigendian -format, false otherwise. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... - tep_set_file_bigendian(tep, TEP_LITTLE_ENDIAN); -... - if (tep_is_file_bigendian(tep)) { - /* The raw data is in big endian */ - } else { - /* The raw data is in little endian */ - } --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-filter.txt b/tools/lib/traceevent/Documentation/libtraceevent-filter.txt deleted file mode 100644 index 4a9962d8cb59..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-filter.txt +++ /dev/null @@ -1,209 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_filter_alloc, tep_filter_free, tep_filter_reset, tep_filter_make_string, -tep_filter_copy, tep_filter_compare, tep_filter_match, tep_event_filtered, -tep_filter_remove_event, tep_filter_strerror, tep_filter_add_filter_str - -Event filter related APIs. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -struct tep_event_filter pass:[*]*tep_filter_alloc*(struct tep_handle pass:[*]_tep_); -void *tep_filter_free*(struct tep_event_filter pass:[*]_filter_); -void *tep_filter_reset*(struct tep_event_filter pass:[*]_filter_); -enum tep_errno *tep_filter_add_filter_str*(struct tep_event_filter pass:[*]_filter_, const char pass:[*]_filter_str_); -int *tep_event_filtered*(struct tep_event_filter pass:[*]_filter_, int _event_id_); -int *tep_filter_remove_event*(struct tep_event_filter pass:[*]_filter_, int _event_id_); -enum tep_errno *tep_filter_match*(struct tep_event_filter pass:[*]_filter_, struct tep_record pass:[*]_record_); -int *tep_filter_copy*(struct tep_event_filter pass:[*]_dest_, struct tep_event_filter pass:[*]_source_); -int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_); -char pass:[*]*tep_filter_make_string*(struct tep_event_filter pass:[*]_filter_, int _event_id_); -int *tep_filter_strerror*(struct tep_event_filter pass:[*]_filter_, enum tep_errno _err_, char pass:[*]buf, size_t _buflen_); --- - -DESCRIPTION ------------ -Filters can be attached to traced events. They can be used to filter out various -events when outputting them. Each event can be filtered based on its parameters, -described in the event's format file. This set of functions can be used to -create, delete, modify and attach event filters. - -The _tep_filter_alloc()_ function creates a new event filter. The _tep_ argument -is the trace event parser context. - -The _tep_filter_free()_ function frees an event filter and all resources that it -had used. - -The _tep_filter_reset()_ function removes all rules from an event filter and -resets it. - -The _tep_filter_add_filter_str()_ function adds a new rule to the _filter_. The -_filter_str_ argument is the filter string, that contains the rule. - -The _tep_event_filtered()_ function checks if the event with _event_id_ has -_filter_. - -The _tep_filter_remove_event()_ function removes a _filter_ for an event with -_event_id_. - -The _tep_filter_match()_ function tests if a _record_ matches given _filter_. - -The _tep_filter_copy()_ function copies a _source_ filter into a _dest_ filter. - -The _tep_filter_compare()_ function compares two filers - _filter1_ and _filter2_. - -The _tep_filter_make_string()_ function constructs a string, displaying -the _filter_ contents for given _event_id_. - -The _tep_filter_strerror()_ function copies the _filter_ error buffer into the -given _buf_ with the size _buflen_. If the error buffer is empty, in the _buf_ -is copied a string, describing the error _err_. - -RETURN VALUE ------------- -The _tep_filter_alloc()_ function returns a pointer to the newly created event -filter, or NULL in case of an error. - -The _tep_filter_add_filter_str()_ function returns 0 if the rule was -successfully added or a negative error code. Use _tep_filter_strerror()_ to see -actual error message in case of an error. - -The _tep_event_filtered()_ function returns 1 if the filter is found for given -event, or 0 otherwise. - -The _tep_filter_remove_event()_ function returns 1 if the vent was removed, or -0 if the event was not found. - -The _tep_filter_match()_ function returns _tep_errno_, according to the result: -[verse] --- -_pass:[TEP_ERRNO__FILTER_MATCH]_ - filter found for event, the record matches. -_pass:[TEP_ERRNO__FILTER_MISS]_ - filter found for event, the record does not match. -_pass:[TEP_ERRNO__FILTER_NOT_FOUND]_ - no filter found for record's event. -_pass:[TEP_ERRNO__NO_FILTER]_ - no rules in the filter. --- -or any other _tep_errno_, if an error occurred during the test. - -The _tep_filter_copy()_ function returns 0 on success or -1 if not all rules - were copied. - -The _tep_filter_compare()_ function returns 1 if the two filters hold the same -content, or 0 if they do not. - -The _tep_filter_make_string()_ function returns a string, which must be freed -with free(), or NULL in case of an error. - -The _tep_filter_strerror()_ function returns 0 if message was filled -successfully, or -1 in case of an error. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -char errstr[200]; -int ret; - -struct tep_event_filter *filter = tep_filter_alloc(tep); -struct tep_event_filter *filter1 = tep_filter_alloc(tep); -ret = tep_filter_add_filter_str(filter, "sched/sched_wakeup:target_cpu==1"); -if(ret < 0) { - tep_filter_strerror(filter, ret, errstr, sizeof(errstr)); - /* Failed to add a new rule to the filter, the error string is in errstr */ -} -if (tep_filter_copy(filter1, filter) != 0) { - /* Failed to copy filter in filter1 */ -} -... -if (tep_filter_compare(filter, filter1) != 1) { - /* Both filters are different */ -} -... -void process_record(struct tep_handle *tep, struct tep_record *record) -{ - struct tep_event *event; - char *fstring; - - event = tep_find_event_by_record(tep, record); - - if (tep_event_filtered(filter, event->id) == 1) { - /* The event has filter */ - fstring = tep_filter_make_string(filter, event->id); - if (fstring != NULL) { - /* The filter for the event is in fstring */ - free(fstring); - } - } - - switch (tep_filter_match(filter, record)) { - case TEP_ERRNO__FILTER_MATCH: - /* The filter matches the record */ - break; - case TEP_ERRNO__FILTER_MISS: - /* The filter does not match the record */ - break; - case TEP_ERRNO__FILTER_NOT_FOUND: - /* No filter found for record's event */ - break; - case TEP_ERRNO__NO_FILTER: - /* There are no rules in the filter */ - break - default: - /* An error occurred during the test */ - break; - } - - if (tep_filter_remove_event(filter, event->id) == 1) { - /* The event was removed from the filter */ - } -} - -... -tep_filter_reset(filter); -... -tep_filter_free(filter); -tep_filter_free(filter1); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt deleted file mode 100644 index f6aca0df2151..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt +++ /dev/null @@ -1,183 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_find_function, tep_find_function_address, tep_set_function_resolver, -tep_reset_function_resolver, tep_register_function, tep_register_print_string - -function related tep APIs - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -typedef char pass:[*](*tep_func_resolver_t*)(void pass:[*]_priv_, unsigned long long pass:[*]_addrp_, char pass:[**]_modp_); -int *tep_set_function_resolver*(struct tep_handle pass:[*]_tep_, tep_func_resolver_t pass:[*]_func_, void pass:[*]_priv_); -void *tep_reset_function_resolver*(struct tep_handle pass:[*]_tep_); -const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); -unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); -int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_); -int *tep_register_print_string*(struct tep_handle pass:[*]_tep_, const char pass:[*]_fmt_, unsigned long long _addr_); --- - -DESCRIPTION ------------ -Some tools may have already a way to resolve the kernel functions. These APIs -allow them to keep using it instead of duplicating all the entries inside. - -The _tep_func_resolver_t_ type is the prototype of the alternative kernel -functions resolver. This function receives a pointer to its custom context -(set with the _tep_set_function_resolver()_ call ) and the address of a kernel -function, which has to be resolved. In case of success, it should return -the name of the function and its module (if any) in _modp_. - -The _tep_set_function_resolver()_ function registers _func_ as an alternative -kernel functions resolver. The _tep_ argument is trace event parser context. -The _priv_ argument is a custom context of the _func_ function. The function -resolver is used by the APIs _tep_find_function()_, -_tep_find_function_address()_, and _tep_print_func_field()_ to resolve -a function address to a function name. - -The _tep_reset_function_resolver()_ function resets the kernel functions -resolver to the default function. The _tep_ argument is trace event parser -context. - - -These APIs can be used to find function name and start address, by given -address. The given address does not have to be exact, it will select -the function that would contain it. - -The _tep_find_function()_ function returns the function name, which contains the -given address _addr_. The _tep_ argument is the trace event parser context. - -The _tep_find_function_address()_ function returns the function start address, -by given address _addr_. The _addr_ does not have to be exact, it will select -the function that would contain it. The _tep_ argument is the trace event -parser context. - -The _tep_register_function()_ function registers a function name mapped to an -address and (optional) module. This mapping is used in case the function tracer -or events have "%pS" parameter in its format string. It is common to pass in -the kallsyms function names with their corresponding addresses with this -function. The _tep_ argument is the trace event parser context. The _name_ is -the name of the function, the string is copied internally. The _addr_ is the -start address of the function. The _mod_ is the kernel module the function may -be in (NULL for none). - -The _tep_register_print_string()_ function registers a string by the address -it was stored in the kernel. Some strings internal to the kernel with static -address are passed to certain events. The "%s" in the event's format field -which has an address needs to know what string would be at that address. The -tep_register_print_string() supplies the parsing with the mapping between kernel -addresses and those strings. The _tep_ argument is the trace event parser -context. The _fmt_ is the string to register, it is copied internally. -The _addr_ is the address the string was located at. - - -RETURN VALUE ------------- -The _tep_set_function_resolver()_ function returns 0 in case of success, or -1 -in case of an error. - -The _tep_find_function()_ function returns the function name, or NULL in case -it cannot be found. - -The _tep_find_function_address()_ function returns the function start address, -or 0 in case it cannot be found. - -The _tep_register_function()_ function returns 0 in case of success. In case of -an error -1 is returned, and errno is set to the appropriate error number. - -The _tep_register_print_string()_ function returns 0 in case of success. In case -of an error -1 is returned, and errno is set to the appropriate error number. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -char *my_resolve_kernel_addr(void *context, - unsigned long long *addrp, char **modp) -{ - struct db *function_database = context; - struct symbol *sym = sql_lookup(function_database, *addrp); - - if (!sym) - return NULL; - - *modp = sym->module_name; - return sym->name; -} - -void show_function( unsigned long long addr) -{ - unsigned long long fstart; - const char *fname; - - if (tep_set_function_resolver(tep, my_resolve_kernel_addr, - function_database) != 0) { - /* failed to register my_resolve_kernel_addr */ - } - - /* These APIs use my_resolve_kernel_addr() to resolve the addr */ - fname = tep_find_function(tep, addr); - fstart = tep_find_function_address(tep, addr); - - /* - addr is in function named fname, starting at fstart address, - at offset (addr - fstart) - */ - - tep_reset_function_resolver(tep); - -} -... - if (tep_register_function(tep, "kvm_exit", - (unsigned long long) 0x12345678, "kvm") != 0) { - /* Failed to register kvm_exit address mapping */ - } -... - if (tep_register_print_string(tep, "print string", - (unsigned long long) 0x87654321, NULL) != 0) { - /* Failed to register "print string" address mapping */ - } -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt deleted file mode 100644 index 04840e244445..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt +++ /dev/null @@ -1,88 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_find_function,tep_find_function_address - Find function name / start address. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); -unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); --- - -DESCRIPTION ------------ -These functions can be used to find function name and start address, by given -address. The given address does not have to be exact, it will select the function -that would contain it. - -The _tep_find_function()_ function returns the function name, which contains the -given address _addr_. The _tep_ argument is the trace event parser context. - -The _tep_find_function_address()_ function returns the function start address, -by given address _addr_. The _addr_ does not have to be exact, it will select the -function that would contain it. The _tep_ argument is the trace event parser context. - -RETURN VALUE ------------- -The _tep_find_function()_ function returns the function name, or NULL in case -it cannot be found. - -The _tep_find_function_address()_ function returns the function start address, -or 0 in case it cannot be found. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -void show_function( unsigned long long addr) -{ - const char *fname = tep_find_function(tep, addr); - unsigned long long fstart = tep_find_function_address(tep, addr); - - /* addr is in function named fname, starting at fstart address, at offset (addr - fstart) */ -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt b/tools/lib/traceevent/Documentation/libtraceevent-handle.txt deleted file mode 100644 index 45b20172e262..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt +++ /dev/null @@ -1,101 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_alloc, tep_free,tep_ref, tep_unref,tep_get_ref - Create, destroy, manage -references of trace event parser context. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -struct tep_handle pass:[*]*tep_alloc*(void); -void *tep_free*(struct tep_handle pass:[*]_tep_); -void *tep_ref*(struct tep_handle pass:[*]_tep_); -void *tep_unref*(struct tep_handle pass:[*]_tep_); -int *tep_get_ref*(struct tep_handle pass:[*]_tep_); --- - -DESCRIPTION ------------ -These are the main functions to create and destroy tep_handle - the main -structure, representing the trace event parser context. This context is used as -the input parameter of most library APIs. - -The _tep_alloc()_ function allocates and initializes the tep context. - -The _tep_free()_ function will decrement the reference of the _tep_ handler. -When there is no more references, then it will free the handler, as well -as clean up all its resources that it had used. The argument _tep_ is -the pointer to the trace event parser context. - -The _tep_ref()_ function adds a reference to the _tep_ handler. - -The _tep_unref()_ function removes a reference from the _tep_ handler. When -the last reference is removed, the _tep_ is destroyed, and all resources that -it had used are cleaned up. - -The _tep_ref_get()_ functions gets the current references of the _tep_ handler. - -RETURN VALUE ------------- -_tep_alloc()_ returns a pointer to a newly created tep_handle structure. -NULL is returned in case there is not enough free memory to allocate it. - -_tep_ref_get()_ returns the current references of _tep_. -If _tep_ is NULL, 0 is returned. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> - -... -struct tep_handle *tep = tep_alloc(); -... -int ref = tep_get_ref(tep); -tep_ref(tep); -if ( (ref+1) != tep_get_ref(tep)) { - /* Something wrong happened, the counter is not incremented by 1 */ -} -tep_unref(tep); -... -tep_free(tep); -... --- -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt b/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt deleted file mode 100644 index 615d117dc39f..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt +++ /dev/null @@ -1,102 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_header_page_size, tep_get_header_timestamp_size, tep_is_old_format - -Get the data stored in the header page, in kernel context. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_); -int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_); -bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_); --- -DESCRIPTION ------------ -These functions retrieve information from kernel context, stored in tracefs -events/header_page. Old kernels do not have header page info, so default values -from user space context are used. - -The _tep_get_header_page_size()_ function returns the size of a long integer, -in kernel context. The _tep_ argument is trace event parser context. -This information is retrieved from tracefs events/header_page, "commit" field. - -The _tep_get_header_timestamp_size()_ function returns the size of timestamps, -in kernel context. The _tep_ argument is trace event parser context. This -information is retrieved from tracefs events/header_page, "timestamp" field. - -The _tep_is_old_format()_ function returns true if the kernel predates -the addition of events/header_page, otherwise it returns false. - -RETURN VALUE ------------- -The _tep_get_header_page_size()_ function returns the size of a long integer, -in bytes. - -The _tep_get_header_timestamp_size()_ function returns the size of timestamps, -in bytes. - -The _tep_is_old_format()_ function returns true, if an old kernel is used to -generate the tracing data, which has no event/header_page. If the kernel is new, -or _tep_ is NULL, false is returned. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... - int longsize; - int timesize; - bool old; - - longsize = tep_get_header_page_size(tep); - timesize = tep_get_header_timestamp_size(tep); - old = tep_is_old_format(tep); - - printf ("%s kernel is used to generate the tracing data.\n", - old?"Old":"New"); - printf("The size of a long integer is %d bytes.\n", longsize); - printf("The timestamps size is %d bytes.\n", timesize); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt b/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt deleted file mode 100644 index d5d375eb8d1e..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt +++ /dev/null @@ -1,104 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_is_bigendian, tep_is_local_bigendian, tep_set_local_bigendian - Get / set -the endianness of the local machine. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -enum *tep_endian* { - TEP_LITTLE_ENDIAN = 0, - TEP_BIG_ENDIAN -}; - -int *tep_is_bigendian*(void); -bool *tep_is_local_bigendian*(struct tep_handle pass:[*]_tep_); -void *tep_set_local_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_); --- - -DESCRIPTION ------------ - -The _tep_is_bigendian()_ gets the endianness of the machine, executing -the function. - -The _tep_is_local_bigendian()_ function gets the endianness of the local -machine, saved in the _tep_ handler. The _tep_ argument is the trace event -parser context. This API is a bit faster than _tep_is_bigendian()_, as it -returns cached endianness of the local machine instead of checking it each time. - -The _tep_set_local_bigendian()_ function sets the endianness of the local -machine in the _tep_ handler. The _tep_ argument is trace event parser context. -The _endian_ argument is the endianness: -[verse] --- - _TEP_LITTLE_ENDIAN_ - the machine is little endian, - _TEP_BIG_ENDIAN_ - the machine is big endian. --- - -RETURN VALUE ------------- -The _tep_is_bigendian()_ function returns non zero if the endianness of the -machine, executing the code, is big endian and zero otherwise. - -The _tep_is_local_bigendian()_ function returns true, if the endianness of the -local machine, saved in the _tep_ handler, is big endian, or false otherwise. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... - if (tep_is_bigendian()) - tep_set_local_bigendian(tep, TEP_BIG_ENDIAN); - else - tep_set_local_bigendian(tep, TEP_LITTLE_ENDIAN); -... - if (tep_is_local_bigendian(tep)) - printf("This machine you are running on is bigendian\n"); - else - printf("This machine you are running on is little endian\n"); - --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt b/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt deleted file mode 100644 index 01d78ea2519a..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt +++ /dev/null @@ -1,78 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_long_size, tep_set_long_size - Get / set the size of a long integer on -the machine, where the trace is generated, in bytes - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -int *tep_get_long_size*(strucqt tep_handle pass:[*]_tep_); -void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_); --- - -DESCRIPTION ------------ -The _tep_get_long_size()_ function returns the size of a long integer on the machine, -where the trace is generated. The _tep_ argument is trace event parser context. - -The _tep_set_long_size()_ function sets the size of a long integer on the machine, -where the trace is generated. The _tep_ argument is trace event parser context. -The _long_size_ is the size of a long integer, in bytes. - -RETURN VALUE ------------- -The _tep_get_long_size()_ function returns the size of a long integer on the machine, -where the trace is generated, in bytes. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -tep_set_long_size(tep, 4); -... -int long_size = tep_get_long_size(tep); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt b/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt deleted file mode 100644 index 452c0cfa1822..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt +++ /dev/null @@ -1,82 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_get_page_size, tep_set_page_size - Get / set the size of a memory page on -the machine, where the trace is generated - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -int *tep_get_page_size*(struct tep_handle pass:[*]_tep_); -void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_); --- - -DESCRIPTION ------------ -The _tep_get_page_size()_ function returns the size of a memory page on -the machine, where the trace is generated. The _tep_ argument is trace -event parser context. - -The _tep_set_page_size()_ function stores in the _tep_ context the size of a -memory page on the machine, where the trace is generated. -The _tep_ argument is trace event parser context. -The _page_size_ argument is the size of a memory page, in bytes. - -RETURN VALUE ------------- -The _tep_get_page_size()_ function returns size of the memory page, in bytes. - -EXAMPLE -------- -[source,c] --- -#include <unistd.h> -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... - int page_size = getpagesize(); - - tep_set_page_size(tep, page_size); - - printf("The page size for this machine is %d\n", tep_get_page_size(tep)); - --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt b/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt deleted file mode 100644 index f248114ca1ff..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt +++ /dev/null @@ -1,90 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_parse_event, tep_parse_format - Parse the event format information - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -enum tep_errno *tep_parse_event*(struct tep_handle pass:[*]_tep_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_); -enum tep_errno *tep_parse_format*(struct tep_handle pass:[*]_tep_, struct tep_event pass:[*]pass:[*]_eventp_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_); --- - -DESCRIPTION ------------ -The _tep_parse_event()_ function parses the event format and creates an event -structure to quickly parse raw data for a given event. The _tep_ argument is -the trace event parser context. The created event structure is stored in the -_tep_ context. The _buf_ argument is a buffer with _size_, where the event -format data is. The event format data can be taken from -tracefs/events/.../.../format files. The _sys_ argument is the system of -the event. - -The _tep_parse_format()_ function does the same as _tep_parse_event()_. The only -difference is in the extra _eventp_ argument, where the newly created event -structure is returned. - -RETURN VALUE ------------- -Both _tep_parse_event()_ and _tep_parse_format()_ functions return 0 on success, -or TEP_ERRNO__... in case of an error. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -char *buf; -int size; -struct tep_event *event = NULL; -buf = read_file("/sys/kernel/tracing/events/ftrace/print/format", &size); -if (tep_parse_event(tep, buf, size, "ftrace") != 0) { - /* Failed to parse the ftrace print format */ -} - -if (tep_parse_format(tep, &event, buf, size, "ftrace") != 0) { - /* Failed to parse the ftrace print format */ -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt b/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt deleted file mode 100644 index c90f16c7d8e6..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt +++ /dev/null @@ -1,82 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_parse_header_page - Parses the data stored in the header page. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -int *tep_parse_header_page*(struct tep_handle pass:[*]_tep_, char pass:[*]_buf_, unsigned long _size_, int _long_size_); --- - -DESCRIPTION ------------ -The _tep_parse_header_page()_ function parses the header page data from _buf_, -and initializes the _tep_, trace event parser context, with it. The buffer -_buf_ is with _size_, and is supposed to be copied from -tracefs/events/header_page. - -Some old kernels do not have header page info, in this case the -_tep_parse_header_page()_ function can be called with _size_ equal to 0. The -_tep_ context is initialized with default values. The _long_size_ can be used in -this use case, to set the size of a long integer to be used. - -RETURN VALUE ------------- -The _tep_parse_header_page()_ function returns 0 in case of success, or -1 -in case of an error. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -char *buf; -int size; -buf = read_file("/sys/kernel/tracing/events/header_page", &size); -if (tep_parse_header_page(tep, buf, size, sizeof(unsigned long)) != 0) { - /* Failed to parse the header page */ -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt deleted file mode 100644 index 4d6394397d92..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt +++ /dev/null @@ -1,122 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_load_plugins, tep_unload_plugins, tep_load_plugins_hook - Load / unload traceevent plugins. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_); -void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_); -void *tep_load_plugins_hook*(struct tep_handle pass:[*]_tep_, const char pass:[*]_suffix_, - void (pass:[*]_load_plugin_)(struct tep_handle pass:[*]tep, - const char pass:[*]path, - const char pass:[*]name, - void pass:[*]data), - void pass:[*]_data_); --- - -DESCRIPTION ------------ -The _tep_load_plugins()_ function loads all plugins, located in the plugin -directories. The _tep_ argument is trace event parser context. -The plugin directories are : -[verse] --- - - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_FIRST - - System's plugin directory, defined at the library compile time. It - depends on the library installation prefix and usually is - _(install_preffix)/lib/traceevent/plugins_ - - Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_ - - User's plugin directory, located at _~/.local/lib/traceevent/plugins_ - - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_LAST --- -Loading of plugins can be controlled by the _tep_flags_, using the -_tep_set_flag()_ API: -[verse] --- - _TEP_DISABLE_SYS_PLUGINS_ - do not load plugins, located in - the system's plugin directory. - _TEP_DISABLE_PLUGINS_ - do not load any plugins. --- -The _tep_set_flag()_ API needs to be called before _tep_load_plugins()_, if -loading of all plugins is not the desired case. - -The _tep_unload_plugins()_ function unloads the plugins, previously loaded by -_tep_load_plugins()_. The _tep_ argument is trace event parser context. The -_plugin_list_ is the list of loaded plugins, returned by -the _tep_load_plugins()_ function. - -The _tep_load_plugins_hook_ function walks through all directories with plugins -and calls user specified _load_plugin()_ hook for each plugin file. Only files -with given _suffix_ are considered to be plugins. The _data_ is a user specified -context, passed to _load_plugin()_. Directories and the walk order are the same -as in _tep_load_plugins()_ API. - -RETURN VALUE ------------- -The _tep_load_plugins()_ function returns a list of successfully loaded plugins, -or NULL in case no plugins are loaded. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -struct tep_plugin_list *plugins = tep_load_plugins(tep); -if (plugins == NULL) { - /* no plugins are loaded */ -} -... -tep_unload_plugins(plugins, tep); -... -void print_plugin(struct tep_handle *tep, const char *path, - const char *name, void *data) -{ - pritnf("Found libtraceevent plugin %s/%s\n", path, name); -} -... -tep_load_plugins_hook(tep, ".so", print_plugin, NULL); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_, _tep_set_flag(3)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt b/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt deleted file mode 100644 index e9a69116c78b..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt +++ /dev/null @@ -1,137 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_data_type, tep_data_pid,tep_data_preempt_count, tep_data_flags - -Extract common fields from a record. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -enum *trace_flag_type* { - _TRACE_FLAG_IRQS_OFF_, - _TRACE_FLAG_IRQS_NOSUPPORT_, - _TRACE_FLAG_NEED_RESCHED_, - _TRACE_FLAG_HARDIRQ_, - _TRACE_FLAG_SOFTIRQ_, -}; - -int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); -int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); -int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); -int *tep_data_flags*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); --- - -DESCRIPTION ------------ -This set of functions can be used to extract common fields from a record. - -The _tep_data_type()_ function gets the event id from the record _rec_. -It reads the "common_type" field. The _tep_ argument is the trace event parser -context. - -The _tep_data_pid()_ function gets the process id from the record _rec_. -It reads the "common_pid" field. The _tep_ argument is the trace event parser -context. - -The _tep_data_preempt_count()_ function gets the preemption count from the -record _rec_. It reads the "common_preempt_count" field. The _tep_ argument is -the trace event parser context. - -The _tep_data_flags()_ function gets the latency flags from the record _rec_. -It reads the "common_flags" field. The _tep_ argument is the trace event parser -context. Supported latency flags are: -[verse] --- - _TRACE_FLAG_IRQS_OFF_, Interrupts are disabled. - _TRACE_FLAG_IRQS_NOSUPPORT_, Reading IRQ flag is not supported by the architecture. - _TRACE_FLAG_NEED_RESCHED_, Task needs rescheduling. - _TRACE_FLAG_HARDIRQ_, Hard IRQ is running. - _TRACE_FLAG_SOFTIRQ_, Soft IRQ is running. --- - -RETURN VALUE ------------- -The _tep_data_type()_ function returns an integer, representing the event id. - -The _tep_data_pid()_ function returns an integer, representing the process id - -The _tep_data_preempt_count()_ function returns an integer, representing the -preemption count. - -The _tep_data_flags()_ function returns an integer, representing the latency -flags. Look at the _trace_flag_type_ enum for supported flags. - -All these functions in case of an error return a negative integer. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -void process_record(struct tep_record *record) -{ - int data; - - data = tep_data_type(tep, record); - if (data >= 0) { - /* Got the ID of the event */ - } - - data = tep_data_pid(tep, record); - if (data >= 0) { - /* Got the process ID */ - } - - data = tep_data_preempt_count(tep, record); - if (data >= 0) { - /* Got the preemption count */ - } - - data = tep_data_flags(tep, record); - if (data >= 0) { - /* Got the latency flags */ - } -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt b/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt deleted file mode 100644 index 53d37d72a1c1..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt +++ /dev/null @@ -1,156 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_register_event_handler, tep_unregister_event_handler - Register / -unregisters a callback function to parse an event information. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -enum *tep_reg_handler* { - _TEP_REGISTER_SUCCESS_, - _TEP_REGISTER_SUCCESS_OVERWRITE_, -}; - -int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_); -int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_); - -typedef int (*pass:[*]tep_event_handler_func*)(struct trace_seq pass:[*]s, struct tep_record pass:[*]record, struct tep_event pass:[*]event, void pass:[*]context); --- - -DESCRIPTION ------------ -The _tep_register_event_handler()_ function registers a handler function, -which is going to be called to parse the information for a given event. -The _tep_ argument is the trace event parser context. The _id_ argument is -the id of the event. The _sys_name_ argument is the name of the system, -the event belongs to. The _event_name_ argument is the name of the event. -If _id_ is >= 0, it is used to find the event, otherwise _sys_name_ and -_event_name_ are used. The _func_ is a pointer to the function, which is going -to be called to parse the event information. The _context_ argument is a pointer -to the context data, which will be passed to the _func_. If a handler function -for the same event is already registered, it will be overridden with the new -one. This mechanism allows a developer to override the parsing of a given event. -If for some reason the default print format is not sufficient, the developer -can register a function for an event to be used to parse the data instead. - -The _tep_unregister_event_handler()_ function unregisters the handler function, -previously registered with _tep_register_event_handler()_. The _tep_ argument -is the trace event parser context. The _id_, _sys_name_, _event_name_, _func_, -and _context_ are the same arguments, as when the callback function _func_ was -registered. - -The _tep_event_handler_func_ is the type of the custom event handler -function. The _s_ argument is the trace sequence, it can be used to create a -custom string, describing the event. A _record_ to get the event from is passed -as input parameter and also the _event_ - the handle to the record's event. The -_context_ is custom context, set when the custom event handler is registered. - -RETURN VALUE ------------- -The _tep_register_event_handler()_ function returns _TEP_REGISTER_SUCCESS_ -if the new handler is registered successfully or -_TEP_REGISTER_SUCCESS_OVERWRITE_ if an existing handler is overwritten. -If there is not enough memory to complete the registration, -TEP_ERRNO__MEM_ALLOC_FAILED is returned. - -The _tep_unregister_event_handler()_ function returns 0 if _func_ was removed -successful or, -1 if the event was not found. - -The _tep_event_handler_func_ should return -1 in case of an error, -or 0 otherwise. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -#include <trace-seq.h> -... -struct tep_handle *tep = tep_alloc(); -... -int timer_expire_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - trace_seq_printf(s, "hrtimer="); - - if (tep_print_num_field(s, "0x%llx", event, "timer", record, 0) == -1) - tep_print_num_field(s, "0x%llx", event, "hrtimer", record, 1); - - trace_seq_printf(s, " now="); - - tep_print_num_field(s, "%llu", event, "now", record, 1); - - tep_print_func_field(s, " function=%s", event, "function", record, 0); - - return 0; -} -... - int ret; - - ret = tep_register_event_handler(tep, -1, "timer", "hrtimer_expire_entry", - timer_expire_handler, NULL); - if (ret < 0) { - char buf[32]; - - tep_strerror(tep, ret, buf, 32) - printf("Failed to register handler for hrtimer_expire_entry: %s\n", buf); - } else { - switch (ret) { - case TEP_REGISTER_SUCCESS: - printf ("Registered handler for hrtimer_expire_entry\n"); - break; - case TEP_REGISTER_SUCCESS_OVERWRITE: - printf ("Overwrote handler for hrtimer_expire_entry\n"); - break; - } - } -... - ret = tep_unregister_event_handler(tep, -1, "timer", "hrtimer_expire_entry", - timer_expire_handler, NULL); - if ( ret ) - printf ("Failed to unregister handler for hrtimer_expire_entry\n"); - --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences - related APIs. Trace sequences are used to allow a function to call - several other functions to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt b/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt deleted file mode 100644 index 708dce91ebd8..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt +++ /dev/null @@ -1,155 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_register_print_function,tep_unregister_print_function - -Registers / Unregisters a helper function. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -enum *tep_func_arg_type* { - TEP_FUNC_ARG_VOID, - TEP_FUNC_ARG_INT, - TEP_FUNC_ARG_LONG, - TEP_FUNC_ARG_STRING, - TEP_FUNC_ARG_PTR, - TEP_FUNC_ARG_MAX_TYPES -}; - -typedef unsigned long long (*pass:[*]tep_func_handler*)(struct trace_seq pass:[*]s, unsigned long long pass:[*]args); - -int *tep_register_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, enum tep_func_arg_type _ret_type_, char pass:[*]_name_, _..._); -int *tep_unregister_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, char pass:[*]_name_); --- - -DESCRIPTION ------------ -Some events may have helper functions in the print format arguments. -This allows a plugin to dynamically create a way to process one of -these functions. - -The _tep_register_print_function()_ registers such helper function. The _tep_ -argument is the trace event parser context. The _func_ argument is a pointer -to the helper function. The _ret_type_ argument is the return type of the -helper function, value from the _tep_func_arg_type_ enum. The _name_ is the name -of the helper function, as seen in the print format arguments. The _..._ is a -variable list of _tep_func_arg_type_ enums, the _func_ function arguments. -This list must end with _TEP_FUNC_ARG_VOID_. See 'EXAMPLE' section. - -The _tep_unregister_print_function()_ unregisters a helper function, previously -registered with _tep_register_print_function()_. The _tep_ argument is the -trace event parser context. The _func_ and _name_ arguments are the same, used -when the helper function was registered. - -The _tep_func_handler_ is the type of the helper function. The _s_ argument is -the trace sequence, it can be used to create a custom string. -The _args_ is a list of arguments, defined when the helper function was -registered. - -RETURN VALUE ------------- -The _tep_register_print_function()_ function returns 0 in case of success. -In case of an error, TEP_ERRNO_... code is returned. - -The _tep_unregister_print_function()_ returns 0 in case of success, or -1 in -case of an error. - -EXAMPLE -------- -Some events have internal functions calls, that appear in the print format -output. For example "tracefs/events/i915/g4x_wm/format" has: -[source,c] --- -print fmt: "pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s", - ((REC->pipe) + 'A'), REC->frame, REC->scanline, REC->primary, - REC->sprite, REC->cursor, yesno(REC->cxsr), REC->sr_plane, - REC->sr_cursor, REC->sr_fbc, yesno(REC->hpll), REC->hpll_plane, - REC->hpll_cursor, REC->hpll_fbc, yesno(REC->fbc) --- -Notice the call to function _yesno()_ in the print arguments. In the kernel -context, this function has the following implementation: -[source,c] --- -static const char *yesno(int x) -{ - static const char *yes = "yes"; - static const char *no = "no"; - - return x ? yes : no; -} --- -The user space event parser has no idea how to handle this _yesno()_ function. -The _tep_register_print_function()_ API can be used to register a user space -helper function, mapped to the kernel's _yesno()_: -[source,c] --- -#include <event-parse.h> -#include <trace-seq.h> -... -struct tep_handle *tep = tep_alloc(); -... -static const char *yes_no_helper(int x) -{ - return x ? "yes" : "no"; -} -... - if ( tep_register_print_function(tep, - yes_no_helper, - TEP_FUNC_ARG_STRING, - "yesno", - TEP_FUNC_ARG_INT, - TEP_FUNC_ARG_VOID) != 0) { - /* Failed to register yes_no_helper function */ - } - -/* - Now, when the event parser encounters this yesno() function, it will know - how to handle it. -*/ -... - if (tep_unregister_print_function(tep, yes_no_helper, "yesno") != 0) { - /* Failed to unregister yes_no_helper function */ - } --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences - related APIs. Trace sequences are used to allow a function to call - several other functions to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt b/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt deleted file mode 100644 index b0599780b9a6..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt +++ /dev/null @@ -1,104 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_set_flag, tep_clear_flag, tep_test_flag - -Manage flags of trace event parser context. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -enum *tep_flag* { - _TEP_NSEC_OUTPUT_, - _TEP_DISABLE_SYS_PLUGINS_, - _TEP_DISABLE_PLUGINS_ -}; -void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); -void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); -bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); --- - -DESCRIPTION ------------ -Trace event parser context flags are defined in *enum tep_flag*: -[verse] --- -_TEP_NSEC_OUTPUT_ - print event's timestamp in nano seconds, instead of micro seconds. -_TEP_DISABLE_SYS_PLUGINS_ - disable plugins, located in system's plugin - directory. This directory is defined at library compile - time, and usually depends on library installation - prefix: (install_preffix)/lib/traceevent/plugins -_TEP_DISABLE_PLUGINS_ - disable all library plugins: - - in system's plugin directory - - in directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_ - - in user's home directory, _~/.traceevent/plugins_ --- -Note: plugin related flags must me set before calling _tep_load_plugins()_ API. - -The _tep_set_flag()_ function sets _flag_ to _tep_ context. - -The _tep_clear_flag()_ function clears _flag_ from _tep_ context. - -The _tep_test_flag()_ function tests if _flag_ is set to _tep_ context. - -RETURN VALUE ------------- -_tep_test_flag()_ function returns true if _flag_ is set, false otherwise. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -/* Print timestamps in nanoseconds */ -tep_set_flag(tep, TEP_NSEC_OUTPUT); -... -if (tep_test_flag(tep, TEP_NSEC_OUTPUT)) { - /* print timestamps in nanoseconds */ -} else { - /* print timestamps in microseconds */ -} -... -/* Print timestamps in microseconds */ -tep_clear_flag(tep, TEP_NSEC_OUTPUT); -... --- -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt b/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt deleted file mode 100644 index ee4062a00c9f..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt +++ /dev/null @@ -1,85 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -tep_strerror - Returns a string describing regular errno and tep error number. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_); - --- -DESCRIPTION ------------ -The _tep_strerror()_ function converts tep error number into a human -readable string. -The _tep_ argument is trace event parser context. The _errnum_ is a regular -errno, defined in errno.h, or a tep error number. The string, describing this -error number is copied in the _buf_ argument. The _buflen_ argument is -the size of the _buf_. - -It as a thread safe wrapper around strerror_r(). The library function has two -different behaviors - POSIX and GNU specific. The _tep_strerror()_ API always -behaves as the POSIX version - the error string is copied in the user supplied -buffer. - -RETURN VALUE ------------- -The _tep_strerror()_ function returns 0, if a valid _errnum_ is passed and the -string is copied into _buf_. If _errnum_ is not a valid error number, --1 is returned and _buf_ is not modified. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -... -struct tep_handle *tep = tep_alloc(); -... -char buf[32]; -char *pool = calloc(1, 128); -if (tep == NULL) { - tep_strerror(tep, TEP_ERRNO__MEM_ALLOC_FAILED, buf, 32); - printf ("The pool is not initialized, %s", buf); -} -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt b/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt deleted file mode 100644 index 8ac6aa174e12..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt +++ /dev/null @@ -1,158 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -trace_seq_init, trace_seq_destroy, trace_seq_reset, trace_seq_terminate, -trace_seq_putc, trace_seq_puts, trace_seq_printf, trace_seq_vprintf, -trace_seq_do_fprintf, trace_seq_do_printf - -Initialize / destroy a trace sequence. - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* -*#include <trace-seq.h>* - -void *trace_seq_init*(struct trace_seq pass:[*]_s_); -void *trace_seq_destroy*(struct trace_seq pass:[*]_s_); -void *trace_seq_reset*(struct trace_seq pass:[*]_s_); -void *trace_seq_terminate*(struct trace_seq pass:[*]_s_); -int *trace_seq_putc*(struct trace_seq pass:[*]_s_, unsigned char _c_); -int *trace_seq_puts*(struct trace_seq pass:[*]_s_, const char pass:[*]_str_); -int *trace_seq_printf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, _..._); -int *trace_seq_vprintf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, va_list _args_); -int *trace_seq_do_printf*(struct trace_seq pass:[*]_s_); -int *trace_seq_do_fprintf*(struct trace_seq pass:[*]_s_, FILE pass:[*]_fp_); --- - -DESCRIPTION ------------ -Trace sequences are used to allow a function to call several other functions -to create a string of data to use. - -The _trace_seq_init()_ function initializes the trace sequence _s_. - -The _trace_seq_destroy()_ function destroys the trace sequence _s_ and frees -all its resources that it had used. - -The _trace_seq_reset()_ function re-initializes the trace sequence _s_. All -characters already written in _s_ will be deleted. - -The _trace_seq_terminate()_ function terminates the trace sequence _s_. It puts -the null character pass:['\0'] at the end of the buffer. - -The _trace_seq_putc()_ function puts a single character _c_ in the trace -sequence _s_. - -The _trace_seq_puts()_ function puts a NULL terminated string _str_ in the -trace sequence _s_. - -The _trace_seq_printf()_ function puts a formated string _fmt _with -variable arguments _..._ in the trace sequence _s_. - -The _trace_seq_vprintf()_ function puts a formated string _fmt _with -list of arguments _args_ in the trace sequence _s_. - -The _trace_seq_do_printf()_ function prints the buffer of trace sequence _s_ to -the standard output stdout. - -The _trace_seq_do_fprintf()_ function prints the buffer of trace sequence _s_ -to the given file _fp_. - -RETURN VALUE ------------- -Both _trace_seq_putc()_ and _trace_seq_puts()_ functions return the number of -characters put in the trace sequence, or 0 in case of an error - -Both _trace_seq_printf()_ and _trace_seq_vprintf()_ functions return 0 if the -trace oversizes the buffer's free space, the number of characters printed, or -a negative value in case of an error. - -Both _trace_seq_do_printf()_ and _trace_seq_do_fprintf()_ functions return the -number of printed characters, or -1 in case of an error. - -EXAMPLE -------- -[source,c] --- -#include <event-parse.h> -#include <trace-seq.h> -... -struct trace_seq seq; -trace_seq_init(&seq); -... -void foo_seq_print(struct trace_seq *tseq, char *format, ...) -{ - va_list ap; - va_start(ap, format); - if (trace_seq_vprintf(tseq, format, ap) <= 0) { - /* Failed to print in the trace sequence */ - } - va_end(ap); -} - -trace_seq_reset(&seq); - -char *str = " MAN page example"; -if (trace_seq_puts(&seq, str) != strlen(str)) { - /* Failed to put str in the trace sequence */ -} -if (trace_seq_putc(&seq, ':') != 1) { - /* Failed to put ':' in the trace sequence */ -} -if (trace_seq_printf(&seq, " trace sequence: %d", 1) <= 0) { - /* Failed to print in the trace sequence */ -} -foo_seq_print( &seq, " %d\n", 2); - -trace_seq_terminate(&seq); -... - -if (trace_seq_do_printf(&seq) < 0 ) { - /* Failed to print the sequence buffer to the standard output */ -} -FILE *fp = fopen("trace.txt", "w"); -if (trace_seq_do_fprintf(&seq, fp) < 0 ) [ - /* Failed to print the sequence buffer to the trace.txt file */ -} - -trace_seq_destroy(&seq); -... --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences related APIs. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_libtraceevent(3)_, _trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/libtraceevent.txt b/tools/lib/traceevent/Documentation/libtraceevent.txt deleted file mode 100644 index d530a7ce8fb2..000000000000 --- a/tools/lib/traceevent/Documentation/libtraceevent.txt +++ /dev/null @@ -1,192 +0,0 @@ -libtraceevent(3) -================ - -NAME ----- -libtraceevent - Linux kernel trace event library - -SYNOPSIS --------- -[verse] --- -*#include <event-parse.h>* - -Management of tep handler data structure and access of its members: - struct tep_handle pass:[*]*tep_alloc*(void); - void *tep_free*(struct tep_handle pass:[*]_tep_); - void *tep_ref*(struct tep_handle pass:[*]_tep_); - void *tep_unref*(struct tep_handle pass:[*]_tep_); - int *tep_get_ref*(struct tep_handle pass:[*]_tep_); - void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); - void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); - bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flags_); - int *tep_get_cpus*(struct tep_handle pass:[*]_tep_); - void *tep_set_cpus*(struct tep_handle pass:[*]_tep_, int _cpus_); - int *tep_get_long_size*(strucqt tep_handle pass:[*]_tep_); - void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_); - int *tep_get_page_size*(struct tep_handle pass:[*]_tep_); - void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_); - int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_); - int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_); - bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_); - int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_); - -Register / unregister APIs: - int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_); - int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_); - int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_); - int *tep_register_print_string*(struct tep_handle pass:[*]_tep_, const char pass:[*]_fmt_, unsigned long long _addr_); - int *tep_register_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, enum tep_func_arg_type _ret_type_, char pass:[*]_name_, _..._); - int *tep_unregister_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, char pass:[*]_name_); - -Plugins management: - struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_); - void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_); - char pass:[*]pass:[*]*tep_plugin_list_options*(void); - void *tep_plugin_free_options_list*(char pass:[*]pass:[*]_list_); - int *tep_plugin_add_options*(const char pass:[*]_name_, struct tep_plugin_option pass:[*]_options_); - void *tep_plugin_remove_options*(struct tep_plugin_option pass:[*]_options_); - void *tep_print_plugins*(struct trace_seq pass:[*]_s_, const char pass:[*]_prefix_, const char pass:[*]_suffix_, const struct tep_plugin_list pass:[*]_list_); - -Event related APIs: - struct tep_event pass:[*]*tep_get_event*(struct tep_handle pass:[*]_tep_, int _index_); - struct tep_event pass:[*]*tep_get_first_event*(struct tep_handle pass:[*]_tep_); - int *tep_get_events_count*(struct tep_handle pass:[*]_tep_); - struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); - struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); - void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._); - -Event finding: - struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_); - struct tep_event pass:[*]*tep_find_event_by_name*(struct tep_handle pass:[*]_tep_, const char pass:[*]_sys_, const char pass:[*]_name_); - struct tep_event pass:[*]*tep_find_event_by_record*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_record_); - -Parsing of event files: - int *tep_parse_header_page*(struct tep_handle pass:[*]_tep_, char pass:[*]_buf_, unsigned long _size_, int _long_size_); - enum tep_errno *tep_parse_event*(struct tep_handle pass:[*]_tep_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_); - enum tep_errno *tep_parse_format*(struct tep_handle pass:[*]_tep_, struct tep_event pass:[*]pass:[*]_eventp_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_); - -APIs related to fields from event's format files: - struct tep_format_field pass:[*]pass:[*]*tep_event_common_fields*(struct tep_event pass:[*]_event_); - struct tep_format_field pass:[*]pass:[*]*tep_event_fields*(struct tep_event pass:[*]_event_); - void pass:[*]*tep_get_field_raw*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int pass:[*]_len_, int _err_); - int *tep_get_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); - int *tep_get_common_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); - int *tep_get_any_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_); - int *tep_read_number_field*(struct tep_format_field pass:[*]_field_, const void pass:[*]_data_, unsigned long long pass:[*]_value_); - -Event fields printing: - void *tep_print_field*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, struct tep_format_field pass:[*]_field_); - void *tep_print_fields*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, int _size_, struct tep_event pass:[*]_event_); - int *tep_print_num_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_); - int *tep_print_func_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_); - -Event fields finding: - struct tep_format_field pass:[*]*tep_find_common_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_); - struct tep_format_field pass:[*]*tep_find_field*(struct tep_event_ormat pass:[*]_event_, const char pass:[*]_name_); - struct tep_format_field pass:[*]*tep_find_any_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_); - -Functions resolver: - int *tep_set_function_resolver*(struct tep_handle pass:[*]_tep_, tep_func_resolver_t pass:[*]_func_, void pass:[*]_priv_); - void *tep_reset_function_resolver*(struct tep_handle pass:[*]_tep_); - const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); - unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_); - -Filter management: - struct tep_event_filter pass:[*]*tep_filter_alloc*(struct tep_handle pass:[*]_tep_); - enum tep_errno *tep_filter_add_filter_str*(struct tep_event_filter pass:[*]_filter_, const char pass:[*]_filter_str_); - enum tep_errno *tep_filter_match*(struct tep_event_filter pass:[*]_filter_, struct tep_record pass:[*]_record_); - int *tep_filter_strerror*(struct tep_event_filter pass:[*]_filter_, enum tep_errno _err_, char pass:[*]buf, size_t _buflen_); - int *tep_event_filtered*(struct tep_event_filter pass:[*]_filter_, int _event_id_); - void *tep_filter_reset*(struct tep_event_filter pass:[*]_filter_); - void *tep_filter_free*(struct tep_event_filter pass:[*]_filter_); - char pass:[*]*tep_filter_make_string*(struct tep_event_filter pass:[*]_filter_, int _event_id_); - int *tep_filter_remove_event*(struct tep_event_filter pass:[*]_filter_, int _event_id_); - int *tep_filter_copy*(struct tep_event_filter pass:[*]_dest_, struct tep_event_filter pass:[*]_source_); - int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_); - -Parsing various data from the records: - int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); - int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); - int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); - int *tep_data_flags*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); - -Command and task related APIs: - const char pass:[*]*tep_data_comm_from_pid*(struct tep_handle pass:[*]_tep_, int _pid_); - struct cmdline pass:[*]*tep_data_pid_from_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, struct cmdline pass:[*]_next_); - int *tep_register_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_); - int *tep_override_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_); - bool *tep_is_pid_registered*(struct tep_handle pass:[*]_tep_, int _pid_); - int *tep_cmdline_pid*(struct tep_handle pass:[*]_tep_, struct cmdline pass:[*]_cmdline_); - -Endian related APIs: - int *tep_is_bigendian*(void); - unsigned long long *tep_read_number*(struct tep_handle pass:[*]_tep_, const void pass:[*]_ptr_, int _size_); - bool *tep_is_file_bigendian*(struct tep_handle pass:[*]_tep_); - void *tep_set_file_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_); - bool *tep_is_local_bigendian*(struct tep_handle pass:[*]_tep_); - void *tep_set_local_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_); - -Trace sequences: -*#include <trace-seq.h>* - void *trace_seq_init*(struct trace_seq pass:[*]_s_); - void *trace_seq_reset*(struct trace_seq pass:[*]_s_); - void *trace_seq_destroy*(struct trace_seq pass:[*]_s_); - int *trace_seq_printf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, ...); - int *trace_seq_vprintf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, va_list _args_); - int *trace_seq_puts*(struct trace_seq pass:[*]_s_, const char pass:[*]_str_); - int *trace_seq_putc*(struct trace_seq pass:[*]_s_, unsigned char _c_); - void *trace_seq_terminate*(struct trace_seq pass:[*]_s_); - int *trace_seq_do_fprintf*(struct trace_seq pass:[*]_s_, FILE pass:[*]_fp_); - int *trace_seq_do_printf*(struct trace_seq pass:[*]_s_); --- - -DESCRIPTION ------------ -The libtraceevent(3) library provides APIs to access kernel tracepoint events, -located in the tracefs file system under the events directory. - -ENVIRONMENT ------------ -[verse] --- -TRACEEVENT_PLUGIN_DIR - Additional plugin directory. All shared object files, located in this directory will be loaded as traceevent plugins. --- - -FILES ------ -[verse] --- -*event-parse.h* - Header file to include in order to have access to the library APIs. -*trace-seq.h* - Header file to include in order to have access to trace sequences related APIs. - Trace sequences are used to allow a function to call several other functions - to create a string of data to use. -*-ltraceevent* - Linker switch to add when building a program that uses the library. --- - -SEE ALSO --------- -_trace-cmd(1)_ - -AUTHOR ------- -[verse] --- -*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*. -*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page. --- -REPORTING BUGS --------------- -Report bugs to <linux-trace-devel@vger.kernel.org> - -LICENSE -------- -libtraceevent is Free Software licensed under the GNU LGPL 2.1 - -RESOURCES ---------- -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git diff --git a/tools/lib/traceevent/Documentation/manpage-1.72.xsl b/tools/lib/traceevent/Documentation/manpage-1.72.xsl deleted file mode 100644 index b4d315cb8c47..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-1.72.xsl +++ /dev/null @@ -1,14 +0,0 @@ -<!-- manpage-1.72.xsl: - special settings for manpages rendered from asciidoc+docbook - handles peculiarities in docbook-xsl 1.72.0 --> -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" - version="1.0"> - -<xsl:import href="manpage-base.xsl"/> - -<!-- these are the special values for the roff control characters - needed for docbook-xsl 1.72.0 --> -<xsl:param name="git.docbook.backslash">▓</xsl:param> -<xsl:param name="git.docbook.dot" >⌂</xsl:param> - -</xsl:stylesheet> diff --git a/tools/lib/traceevent/Documentation/manpage-base.xsl b/tools/lib/traceevent/Documentation/manpage-base.xsl deleted file mode 100644 index a264fa616093..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-base.xsl +++ /dev/null @@ -1,35 +0,0 @@ -<!-- manpage-base.xsl: - special formatting for manpages rendered from asciidoc+docbook --> -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" - version="1.0"> - -<!-- these params silence some output from xmlto --> -<xsl:param name="man.output.quietly" select="1"/> -<xsl:param name="refentry.meta.get.quietly" select="1"/> - -<!-- convert asciidoc callouts to man page format; - git.docbook.backslash and git.docbook.dot params - must be supplied by another XSL file or other means --> -<xsl:template match="co"> - <xsl:value-of select="concat( - $git.docbook.backslash,'fB(', - substring-after(@id,'-'),')', - $git.docbook.backslash,'fR')"/> -</xsl:template> -<xsl:template match="calloutlist"> - <xsl:value-of select="$git.docbook.dot"/> - <xsl:text>sp </xsl:text> - <xsl:apply-templates/> - <xsl:text> </xsl:text> -</xsl:template> -<xsl:template match="callout"> - <xsl:value-of select="concat( - $git.docbook.backslash,'fB', - substring-after(@arearefs,'-'), - '. ',$git.docbook.backslash,'fR')"/> - <xsl:apply-templates/> - <xsl:value-of select="$git.docbook.dot"/> - <xsl:text>br </xsl:text> -</xsl:template> - -</xsl:stylesheet> diff --git a/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl b/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl deleted file mode 100644 index 608eb5df6281..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl +++ /dev/null @@ -1,17 +0,0 @@ -<!-- manpage-bold-literal.xsl: - special formatting for manpages rendered from asciidoc+docbook --> -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" - version="1.0"> - -<!-- render literal text as bold (instead of plain or monospace); - this makes literal text easier to distinguish in manpages - viewed on a tty --> -<xsl:template match="literal"> - <xsl:value-of select="$git.docbook.backslash"/> - <xsl:text>fB</xsl:text> - <xsl:apply-templates/> - <xsl:value-of select="$git.docbook.backslash"/> - <xsl:text>fR</xsl:text> -</xsl:template> - -</xsl:stylesheet> diff --git a/tools/lib/traceevent/Documentation/manpage-normal.xsl b/tools/lib/traceevent/Documentation/manpage-normal.xsl deleted file mode 100644 index a48f5b11f3dc..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-normal.xsl +++ /dev/null @@ -1,13 +0,0 @@ -<!-- manpage-normal.xsl: - special settings for manpages rendered from asciidoc+docbook - handles anything we want to keep away from docbook-xsl 1.72.0 --> -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" - version="1.0"> - -<xsl:import href="manpage-base.xsl"/> - -<!-- these are the normal values for the roff control characters --> -<xsl:param name="git.docbook.backslash">\</xsl:param> -<xsl:param name="git.docbook.dot" >.</xsl:param> - -</xsl:stylesheet> diff --git a/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl b/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl deleted file mode 100644 index a63c7632a87d..000000000000 --- a/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl +++ /dev/null @@ -1,21 +0,0 @@ -<!-- manpage-suppress-sp.xsl: - special settings for manpages rendered from asciidoc+docbook - handles erroneous, inline .sp in manpage output of some - versions of docbook-xsl --> -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" - version="1.0"> - -<!-- attempt to work around spurious .sp at the tail of the line - that some versions of docbook stylesheets seem to add --> -<xsl:template match="simpara"> - <xsl:variable name="content"> - <xsl:apply-templates/> - </xsl:variable> - <xsl:value-of select="normalize-space($content)"/> - <xsl:if test="not(ancestor::authorblurb) and - not(ancestor::personblurb)"> - <xsl:text> </xsl:text> - </xsl:if> -</xsl:template> - -</xsl:stylesheet> diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile deleted file mode 100644 index c874c017c636..000000000000 --- a/tools/lib/traceevent/Makefile +++ /dev/null @@ -1,300 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# trace-cmd version -EP_VERSION = 1 -EP_PATCHLEVEL = 1 -EP_EXTRAVERSION = 0 - -# file format version -FILE_VERSION = 6 - -MAKEFLAGS += --no-print-directory - - -# Makefiles suck: This macro sets a default value of $(2) for the -# variable named by $(1), unless the variable has been set by -# environment or command line. This is necessary for CC and AR -# because make sets default values, so the simpler ?= approach -# won't work as expected. -define allow-override - $(if $(or $(findstring environment,$(origin $(1))),\ - $(findstring command line,$(origin $(1)))),,\ - $(eval $(1) = $(2))) -endef - -# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. -$(call allow-override,CC,$(CROSS_COMPILE)gcc) -$(call allow-override,AR,$(CROSS_COMPILE)ar) -$(call allow-override,NM,$(CROSS_COMPILE)nm) -$(call allow-override,PKG_CONFIG,pkg-config) - -EXT = -std=gnu99 -INSTALL = install - -# Use DESTDIR for installing into a different root directory. -# This is useful for building a package. The program will be -# installed in this directory as if it was the root directory. -# Then the build tool can move it later. -DESTDIR ?= -DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' - -LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) -ifeq ($(LP64), 1) - libdir_relative_temp = lib64 -else - libdir_relative_temp = lib -endif - -libdir_relative ?= $(libdir_relative_temp) -prefix ?= /usr/local -libdir = $(prefix)/$(libdir_relative) -man_dir = $(prefix)/share/man -man_dir_SQ = '$(subst ','\'',$(man_dir))' -pkgconfig_dir ?= $(word 1,$(shell $(PKG_CONFIG) \ - --variable pc_path pkg-config | tr ":" " ")) -includedir_relative = traceevent -includedir = $(prefix)/include/$(includedir_relative) -includedir_SQ = '$(subst ','\'',$(includedir))' - -export man_dir man_dir_SQ INSTALL -export DESTDIR DESTDIR_SQ -export EVENT_PARSE_VERSION - -include ../../scripts/Makefile.include - -# copy a bit from Linux kbuild - -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(srctree),) -srctree := $(patsubst %/,%,$(dir $(CURDIR))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -#$(info Determined 'srctree' to be $(srctree)) -endif - -export prefix libdir src obj - -# Shell quotes -libdir_SQ = $(subst ','\'',$(libdir)) -libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) - -CONFIG_INCLUDES = -CONFIG_LIBS = -CONFIG_FLAGS = - -VERSION = $(EP_VERSION) -PATCHLEVEL = $(EP_PATCHLEVEL) -EXTRAVERSION = $(EP_EXTRAVERSION) - -OBJ = $@ -N = - -EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) - -LIB_TARGET = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION) -LIB_INSTALL = libtraceevent.a libtraceevent.so* -LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL)) - -INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES) - -# Set compile option CFLAGS -ifdef EXTRA_CFLAGS - CFLAGS := $(EXTRA_CFLAGS) -else - CFLAGS := -g -Wall -endif - -# Append required CFLAGS -override CFLAGS += -fPIC -override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) -override CFLAGS += $(udis86-flags) -D_GNU_SOURCE - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - -# Disable command line variables (CFLAGS) override from top -# level Makefile (perf), otherwise build Makefile will get -# the same command line setup. -MAKEOVERRIDES= - -export srctree OUTPUT CC LD CFLAGS V -build := -f $(srctree)/tools/build/Makefile.build dir=. obj - -TE_IN := $(OUTPUT)libtraceevent-in.o -LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) - -CMD_TARGETS = $(LIB_TARGET) - -TARGETS = $(CMD_TARGETS) - -all: all_cmd plugins - -all_cmd: $(CMD_TARGETS) - -$(TE_IN): force - $(Q)$(MAKE) $(build)=libtraceevent - -$(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN) - $(QUIET_LINK)$(CC) --shared $(LDFLAGS) $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@ - @ln -sf $(@F) $(OUTPUT)libtraceevent.so - @ln -sf $(@F) $(OUTPUT)libtraceevent.so.$(EP_VERSION) - -$(OUTPUT)libtraceevent.a: $(TE_IN) - $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ - -$(OUTPUT)%.so: $(OUTPUT)%-in.o - $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^ - -define make_version.h - (echo '/* This file is automatically generated. Do not modify. */'; \ - echo \#define VERSION_CODE $(shell \ - expr $(VERSION) \* 256 + $(PATCHLEVEL)); \ - echo '#define EXTRAVERSION ' $(EXTRAVERSION); \ - echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \ - echo '#define FILE_VERSION '$(FILE_VERSION); \ - ) > $1 -endef - -define update_version.h - ($(call make_version.h, $@.tmp); \ - if [ -r $@ ] && cmp -s $@ $@.tmp; then \ - rm -f $@.tmp; \ - else \ - echo ' UPDATE $@'; \ - mv -f $@.tmp $@; \ - fi); -endef - -ep_version.h: force - $(Q)$(N)$(call update_version.h) - -VERSION_FILES = ep_version.h - -define update_dir - (echo $1 > $@.tmp; \ - if [ -r $@ ] && cmp -s $@ $@.tmp; then \ - rm -f $@.tmp; \ - else \ - echo ' UPDATE $@'; \ - mv -f $@.tmp $@; \ - fi); -endef - -tags: force - $(RM) tags - find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ - --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/' - -TAGS: force - $(RM) TAGS - find . -name '*.[ch]' | xargs etags \ - --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/' - -define do_install_mkdir - if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ - fi -endef - -define do_install - $(call do_install_mkdir,$2); \ - $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' -endef - -PKG_CONFIG_SOURCE_FILE = libtraceevent.pc -PKG_CONFIG_FILE := $(addprefix $(OUTPUT),$(PKG_CONFIG_SOURCE_FILE)) -define do_install_pkgconfig_file - if [ -n "${pkgconfig_dir}" ]; then \ - cp -f ${PKG_CONFIG_SOURCE_FILE}.template ${PKG_CONFIG_FILE}; \ - sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; \ - sed -i "s|LIB_VERSION|${EVENT_PARSE_VERSION}|g" ${PKG_CONFIG_FILE}; \ - sed -i "s|LIB_DIR|${libdir}|g" ${PKG_CONFIG_FILE}; \ - sed -i "s|HEADER_DIR|$(includedir)|g" ${PKG_CONFIG_FILE}; \ - $(call do_install,$(PKG_CONFIG_FILE),$(pkgconfig_dir),644); \ - else \ - (echo Failed to locate pkg-config directory) 1>&2; \ - fi -endef - -install_lib: all_cmd install_plugins install_headers install_pkgconfig - $(call QUIET_INSTALL, $(LIB_TARGET)) \ - $(call do_install_mkdir,$(libdir_SQ)); \ - cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ) - -install_pkgconfig: - $(call QUIET_INSTALL, $(PKG_CONFIG_FILE)) \ - $(call do_install_pkgconfig_file,$(prefix)) - -install_headers: - $(call QUIET_INSTALL, headers) \ - $(call do_install,event-parse.h,$(includedir_SQ),644); \ - $(call do_install,event-utils.h,$(includedir_SQ),644); \ - $(call do_install,trace-seq.h,$(includedir_SQ),644); \ - $(call do_install,kbuffer.h,$(includedir_SQ),644) - -install: install_lib - -clean: clean_plugins - $(call QUIET_CLEAN, libtraceevent) \ - $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \ - $(RM) TRACEEVENT-CFLAGS tags TAGS; \ - $(RM) $(PKG_CONFIG_FILE) - -PHONY += doc -doc: - $(call descend,Documentation) - -PHONY += doc-clean -doc-clean: - $(call descend,Documentation,clean) - -PHONY += doc-install -doc-install: - $(call descend,Documentation,install) - -PHONY += doc-uninstall -doc-uninstall: - $(call descend,Documentation,uninstall) - -PHONY += help -help: - @echo 'Possible targets:' - @echo'' - @echo ' all - default, compile the library and the'\ - 'plugins' - @echo ' plugins - compile the plugins' - @echo ' install - install the library, the plugins,'\ - 'the header and pkgconfig files' - @echo ' clean - clean the library and the plugins object files' - @echo ' doc - compile the documentation files - man'\ - 'and html pages, in the Documentation directory' - @echo ' doc-clean - clean the documentation files' - @echo ' doc-install - install the man pages' - @echo ' doc-uninstall - uninstall the man pages' - @echo'' - -PHONY += plugins -plugins: - $(call descend,plugins) - -PHONY += install_plugins -install_plugins: - $(call descend,plugins,install) - -PHONY += clean_plugins -clean_plugins: - $(call descend,plugins,clean) - -force: - -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable so we can use it in if_changed and friends. -.PHONY: $(PHONY) diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c deleted file mode 100644 index f8361e45d446..000000000000 --- a/tools/lib/traceevent/event-parse-api.c +++ /dev/null @@ -1,333 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - */ - -#include "event-parse.h" -#include "event-parse-local.h" -#include "event-utils.h" - -/** - * tep_get_event - returns the event with the given index - * @tep: a handle to the tep_handle - * @index: index of the requested event, in the range 0 .. nr_events - * - * This returns pointer to the element of the events array with the given index - * If @tep is NULL, or @index is not in the range 0 .. nr_events, NULL is returned. - */ -struct tep_event *tep_get_event(struct tep_handle *tep, int index) -{ - if (tep && tep->events && index < tep->nr_events) - return tep->events[index]; - - return NULL; -} - -/** - * tep_get_first_event - returns the first event in the events array - * @tep: a handle to the tep_handle - * - * This returns pointer to the first element of the events array - * If @tep is NULL, NULL is returned. - */ -struct tep_event *tep_get_first_event(struct tep_handle *tep) -{ - return tep_get_event(tep, 0); -} - -/** - * tep_get_events_count - get the number of defined events - * @tep: a handle to the tep_handle - * - * This returns number of elements in event array - * If @tep is NULL, 0 is returned. - */ -int tep_get_events_count(struct tep_handle *tep) -{ - if (tep) - return tep->nr_events; - return 0; -} - -/** - * tep_set_flag - set event parser flag - * @tep: a handle to the tep_handle - * @flag: flag, or combination of flags to be set - * can be any combination from enum tep_flag - * - * This sets a flag or combination of flags from enum tep_flag - */ -void tep_set_flag(struct tep_handle *tep, int flag) -{ - if (tep) - tep->flags |= flag; -} - -/** - * tep_clear_flag - clear event parser flag - * @tep: a handle to the tep_handle - * @flag: flag to be cleared - * - * This clears a tep flag - */ -void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag) -{ - if (tep) - tep->flags &= ~flag; -} - -/** - * tep_test_flag - check the state of event parser flag - * @tep: a handle to the tep_handle - * @flag: flag to be checked - * - * This returns the state of the requested tep flag. - * Returns: true if the flag is set, false otherwise. - */ -bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag) -{ - if (tep) - return tep->flags & flag; - return false; -} - -__hidden unsigned short data2host2(struct tep_handle *tep, unsigned short data) -{ - unsigned short swap; - - if (!tep || tep->host_bigendian == tep->file_bigendian) - return data; - - swap = ((data & 0xffULL) << 8) | - ((data & (0xffULL << 8)) >> 8); - - return swap; -} - -__hidden unsigned int data2host4(struct tep_handle *tep, unsigned int data) -{ - unsigned int swap; - - if (!tep || tep->host_bigendian == tep->file_bigendian) - return data; - - swap = ((data & 0xffULL) << 24) | - ((data & (0xffULL << 8)) << 8) | - ((data & (0xffULL << 16)) >> 8) | - ((data & (0xffULL << 24)) >> 24); - - return swap; -} - -__hidden unsigned long long -data2host8(struct tep_handle *tep, unsigned long long data) -{ - unsigned long long swap; - - if (!tep || tep->host_bigendian == tep->file_bigendian) - return data; - - swap = ((data & 0xffULL) << 56) | - ((data & (0xffULL << 8)) << 40) | - ((data & (0xffULL << 16)) << 24) | - ((data & (0xffULL << 24)) << 8) | - ((data & (0xffULL << 32)) >> 8) | - ((data & (0xffULL << 40)) >> 24) | - ((data & (0xffULL << 48)) >> 40) | - ((data & (0xffULL << 56)) >> 56); - - return swap; -} - -/** - * tep_get_header_page_size - get size of the header page - * @tep: a handle to the tep_handle - * - * This returns size of the header page - * If @tep is NULL, 0 is returned. - */ -int tep_get_header_page_size(struct tep_handle *tep) -{ - if (tep) - return tep->header_page_size_size; - return 0; -} - -/** - * tep_get_header_timestamp_size - get size of the timestamp in the header page - * @tep: a handle to the tep_handle - * - * This returns size of the timestamp in the header page - * If @tep is NULL, 0 is returned. - */ -int tep_get_header_timestamp_size(struct tep_handle *tep) -{ - if (tep) - return tep->header_page_ts_size; - return 0; -} - -/** - * tep_get_cpus - get the number of CPUs - * @tep: a handle to the tep_handle - * - * This returns the number of CPUs - * If @tep is NULL, 0 is returned. - */ -int tep_get_cpus(struct tep_handle *tep) -{ - if (tep) - return tep->cpus; - return 0; -} - -/** - * tep_set_cpus - set the number of CPUs - * @tep: a handle to the tep_handle - * - * This sets the number of CPUs - */ -void tep_set_cpus(struct tep_handle *tep, int cpus) -{ - if (tep) - tep->cpus = cpus; -} - -/** - * tep_get_long_size - get the size of a long integer on the traced machine - * @tep: a handle to the tep_handle - * - * This returns the size of a long integer on the traced machine - * If @tep is NULL, 0 is returned. - */ -int tep_get_long_size(struct tep_handle *tep) -{ - if (tep) - return tep->long_size; - return 0; -} - -/** - * tep_set_long_size - set the size of a long integer on the traced machine - * @tep: a handle to the tep_handle - * @size: size, in bytes, of a long integer - * - * This sets the size of a long integer on the traced machine - */ -void tep_set_long_size(struct tep_handle *tep, int long_size) -{ - if (tep) - tep->long_size = long_size; -} - -/** - * tep_get_page_size - get the size of a memory page on the traced machine - * @tep: a handle to the tep_handle - * - * This returns the size of a memory page on the traced machine - * If @tep is NULL, 0 is returned. - */ -int tep_get_page_size(struct tep_handle *tep) -{ - if (tep) - return tep->page_size; - return 0; -} - -/** - * tep_set_page_size - set the size of a memory page on the traced machine - * @tep: a handle to the tep_handle - * @_page_size: size of a memory page, in bytes - * - * This sets the size of a memory page on the traced machine - */ -void tep_set_page_size(struct tep_handle *tep, int _page_size) -{ - if (tep) - tep->page_size = _page_size; -} - -/** - * tep_is_file_bigendian - return the endian of the file - * @tep: a handle to the tep_handle - * - * This returns true if the file is in big endian order - * If @tep is NULL, false is returned. - */ -bool tep_is_file_bigendian(struct tep_handle *tep) -{ - if (tep) - return (tep->file_bigendian == TEP_BIG_ENDIAN); - return false; -} - -/** - * tep_set_file_bigendian - set if the file is in big endian order - * @tep: a handle to the tep_handle - * @endian: non zero, if the file is in big endian order - * - * This sets if the file is in big endian order - */ -void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian) -{ - if (tep) - tep->file_bigendian = endian; -} - -/** - * tep_is_local_bigendian - return the endian of the saved local machine - * @tep: a handle to the tep_handle - * - * This returns true if the saved local machine in @tep is big endian. - * If @tep is NULL, false is returned. - */ -bool tep_is_local_bigendian(struct tep_handle *tep) -{ - if (tep) - return (tep->host_bigendian == TEP_BIG_ENDIAN); - return 0; -} - -/** - * tep_set_local_bigendian - set the stored local machine endian order - * @tep: a handle to the tep_handle - * @endian: non zero, if the local host has big endian order - * - * This sets the endian order for the local machine. - */ -void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian) -{ - if (tep) - tep->host_bigendian = endian; -} - -/** - * tep_is_old_format - get if an old kernel is used - * @tep: a handle to the tep_handle - * - * This returns true, if an old kernel is used to generate the tracing events or - * false if a new kernel is used. Old kernels did not have header page info. - * If @tep is NULL, false is returned. - */ -bool tep_is_old_format(struct tep_handle *tep) -{ - if (tep) - return tep->old_format; - return false; -} - -/** - * tep_set_test_filters - set a flag to test a filter string - * @tep: a handle to the tep_handle - * @test_filters: the new value of the test_filters flag - * - * This sets a flag to test a filter string. If this flag is set, when - * tep_filter_add_filter_str() API as called,it will print the filter string - * instead of adding it. - */ -void tep_set_test_filters(struct tep_handle *tep, int test_filters) -{ - if (tep) - tep->test_filters = test_filters; -} diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h deleted file mode 100644 index fd4bbcfbb849..000000000000 --- a/tools/lib/traceevent/event-parse-local.h +++ /dev/null @@ -1,123 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - */ - -#ifndef _PARSE_EVENTS_INT_H -#define _PARSE_EVENTS_INT_H - -struct tep_cmdline; -struct cmdline_list; -struct func_map; -struct func_list; -struct event_handler; -struct func_resolver; -struct tep_plugins_dir; - -#define __hidden __attribute__((visibility ("hidden"))) - -struct tep_handle { - int ref_count; - - int header_page_ts_offset; - int header_page_ts_size; - int header_page_size_offset; - int header_page_size_size; - int header_page_data_offset; - int header_page_data_size; - int header_page_overwrite; - - enum tep_endian file_bigendian; - enum tep_endian host_bigendian; - - int old_format; - - int cpus; - int long_size; - int page_size; - - struct tep_cmdline *cmdlines; - struct cmdline_list *cmdlist; - int cmdline_count; - - struct func_map *func_map; - struct func_resolver *func_resolver; - struct func_list *funclist; - unsigned int func_count; - - struct printk_map *printk_map; - struct printk_list *printklist; - unsigned int printk_count; - - struct tep_event **events; - int nr_events; - struct tep_event **sort_events; - enum tep_event_sort_type last_type; - - int type_offset; - int type_size; - - int pid_offset; - int pid_size; - - int pc_offset; - int pc_size; - - int flags_offset; - int flags_size; - - int ld_offset; - int ld_size; - - int test_filters; - - int flags; - - struct tep_format_field *bprint_ip_field; - struct tep_format_field *bprint_fmt_field; - struct tep_format_field *bprint_buf_field; - - struct event_handler *handlers; - struct tep_function_handler *func_handlers; - - /* cache */ - struct tep_event *last_event; - - struct tep_plugins_dir *plugins_dir; -}; - -enum tep_print_parse_type { - PRINT_FMT_STRING, - PRINT_FMT_ARG_DIGIT, - PRINT_FMT_ARG_POINTER, - PRINT_FMT_ARG_STRING, -}; - -struct tep_print_parse { - struct tep_print_parse *next; - - char *format; - int ls; - enum tep_print_parse_type type; - struct tep_print_arg *arg; - struct tep_print_arg *len_as_arg; -}; - -void free_tep_event(struct tep_event *event); -void free_tep_format_field(struct tep_format_field *field); -void free_tep_plugin_paths(struct tep_handle *tep); - -unsigned short data2host2(struct tep_handle *tep, unsigned short data); -unsigned int data2host4(struct tep_handle *tep, unsigned int data); -unsigned long long data2host8(struct tep_handle *tep, unsigned long long data); - -/* access to the internal parser */ -int peek_char(void); -void init_input_buf(const char *buf, unsigned long long size); -unsigned long long get_input_buf_ptr(void); -const char *get_input_buf(void); -enum tep_event_type read_token(char **tok); -void free_token(char *tok); - -#endif /* _PARSE_EVENTS_INT_H */ diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c deleted file mode 100644 index 8e24c4c78c7f..000000000000 --- a/tools/lib/traceevent/event-parse.c +++ /dev/null @@ -1,7624 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - * - * The parts for function graph printing was taken and modified from the - * Linux Kernel that were written by - * - Copyright (C) 2009 Frederic Weisbecker, - * Frederic Weisbecker gave his permission to relicense the code to - * the Lesser General Public License. - */ -#include <inttypes.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> -#include <ctype.h> -#include <errno.h> -#include <stdint.h> -#include <limits.h> -#include <linux/time64.h> - -#include <netinet/in.h> -#include "event-parse.h" - -#include "event-parse-local.h" -#include "event-utils.h" -#include "trace-seq.h" - -static const char *input_buf; -static unsigned long long input_buf_ptr; -static unsigned long long input_buf_siz; - -static int is_flag_field; -static int is_symbolic_field; - -static int show_warning = 1; - -#define do_warning(fmt, ...) \ - do { \ - if (show_warning) \ - warning(fmt, ##__VA_ARGS__); \ - } while (0) - -#define do_warning_event(event, fmt, ...) \ - do { \ - if (!show_warning) \ - continue; \ - \ - if (event) \ - warning("[%s:%s] " fmt, event->system, \ - event->name, ##__VA_ARGS__); \ - else \ - warning(fmt, ##__VA_ARGS__); \ - } while (0) - -/** - * init_input_buf - init buffer for parsing - * @buf: buffer to parse - * @size: the size of the buffer - * - * Initializes the internal buffer that tep_read_token() will parse. - */ -__hidden void init_input_buf(const char *buf, unsigned long long size) -{ - input_buf = buf; - input_buf_siz = size; - input_buf_ptr = 0; -} - -__hidden const char *get_input_buf(void) -{ - return input_buf; -} - -__hidden unsigned long long get_input_buf_ptr(void) -{ - return input_buf_ptr; -} - -struct event_handler { - struct event_handler *next; - int id; - const char *sys_name; - const char *event_name; - tep_event_handler_func func; - void *context; -}; - -struct func_params { - struct func_params *next; - enum tep_func_arg_type type; -}; - -struct tep_function_handler { - struct tep_function_handler *next; - enum tep_func_arg_type ret_type; - char *name; - tep_func_handler func; - struct func_params *params; - int nr_args; -}; - -static unsigned long long -process_defined_func(struct trace_seq *s, void *data, int size, - struct tep_event *event, struct tep_print_arg *arg); - -static void free_func_handle(struct tep_function_handler *func); - -void breakpoint(void) -{ - static int x; - x++; -} - -static struct tep_print_arg *alloc_arg(void) -{ - return calloc(1, sizeof(struct tep_print_arg)); -} - -struct tep_cmdline { - char *comm; - int pid; -}; - -static int cmdline_cmp(const void *a, const void *b) -{ - const struct tep_cmdline *ca = a; - const struct tep_cmdline *cb = b; - - if (ca->pid < cb->pid) - return -1; - if (ca->pid > cb->pid) - return 1; - - return 0; -} - -/* Looking for where to place the key */ -static int cmdline_slot_cmp(const void *a, const void *b) -{ - const struct tep_cmdline *ca = a; - const struct tep_cmdline *cb = b; - const struct tep_cmdline *cb1 = cb + 1; - - if (ca->pid < cb->pid) - return -1; - - if (ca->pid > cb->pid) { - if (ca->pid <= cb1->pid) - return 0; - return 1; - } - - return 0; -} - -struct cmdline_list { - struct cmdline_list *next; - char *comm; - int pid; -}; - -static int cmdline_init(struct tep_handle *tep) -{ - struct cmdline_list *cmdlist = tep->cmdlist; - struct cmdline_list *item; - struct tep_cmdline *cmdlines; - int i; - - cmdlines = malloc(sizeof(*cmdlines) * tep->cmdline_count); - if (!cmdlines) - return -1; - - i = 0; - while (cmdlist) { - cmdlines[i].pid = cmdlist->pid; - cmdlines[i].comm = cmdlist->comm; - i++; - item = cmdlist; - cmdlist = cmdlist->next; - free(item); - } - - qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp); - - tep->cmdlines = cmdlines; - tep->cmdlist = NULL; - - return 0; -} - -static const char *find_cmdline(struct tep_handle *tep, int pid) -{ - const struct tep_cmdline *comm; - struct tep_cmdline key; - - if (!pid) - return "<idle>"; - - if (!tep->cmdlines && cmdline_init(tep)) - return "<not enough memory for cmdlines!>"; - - key.pid = pid; - - comm = bsearch(&key, tep->cmdlines, tep->cmdline_count, - sizeof(*tep->cmdlines), cmdline_cmp); - - if (comm) - return comm->comm; - return "<...>"; -} - -/** - * tep_is_pid_registered - return if a pid has a cmdline registered - * @tep: a handle to the trace event parser context - * @pid: The pid to check if it has a cmdline registered with. - * - * Returns true if the pid has a cmdline mapped to it - * false otherwise. - */ -bool tep_is_pid_registered(struct tep_handle *tep, int pid) -{ - const struct tep_cmdline *comm; - struct tep_cmdline key; - - if (!pid) - return true; - - if (!tep->cmdlines && cmdline_init(tep)) - return false; - - key.pid = pid; - - comm = bsearch(&key, tep->cmdlines, tep->cmdline_count, - sizeof(*tep->cmdlines), cmdline_cmp); - - if (comm) - return true; - return false; -} - -/* - * If the command lines have been converted to an array, then - * we must add this pid. This is much slower than when cmdlines - * are added before the array is initialized. - */ -static int add_new_comm(struct tep_handle *tep, - const char *comm, int pid, bool override) -{ - struct tep_cmdline *cmdlines = tep->cmdlines; - struct tep_cmdline *cmdline; - struct tep_cmdline key; - char *new_comm; - int cnt; - - if (!pid) - return 0; - - /* avoid duplicates */ - key.pid = pid; - - cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count, - sizeof(*tep->cmdlines), cmdline_cmp); - if (cmdline) { - if (!override) { - errno = EEXIST; - return -1; - } - new_comm = strdup(comm); - if (!new_comm) { - errno = ENOMEM; - return -1; - } - free(cmdline->comm); - cmdline->comm = new_comm; - - return 0; - } - - cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (tep->cmdline_count + 1)); - if (!cmdlines) { - errno = ENOMEM; - return -1; - } - tep->cmdlines = cmdlines; - - key.comm = strdup(comm); - if (!key.comm) { - errno = ENOMEM; - return -1; - } - - if (!tep->cmdline_count) { - /* no entries yet */ - tep->cmdlines[0] = key; - tep->cmdline_count++; - return 0; - } - - /* Now find where we want to store the new cmdline */ - cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count - 1, - sizeof(*tep->cmdlines), cmdline_slot_cmp); - - cnt = tep->cmdline_count; - if (cmdline) { - /* cmdline points to the one before the spot we want */ - cmdline++; - cnt -= cmdline - tep->cmdlines; - - } else { - /* The new entry is either before or after the list */ - if (key.pid > tep->cmdlines[tep->cmdline_count - 1].pid) { - tep->cmdlines[tep->cmdline_count++] = key; - return 0; - } - cmdline = &tep->cmdlines[0]; - } - memmove(cmdline + 1, cmdline, (cnt * sizeof(*cmdline))); - *cmdline = key; - - tep->cmdline_count++; - - return 0; -} - -static int _tep_register_comm(struct tep_handle *tep, - const char *comm, int pid, bool override) -{ - struct cmdline_list *item; - - if (tep->cmdlines) - return add_new_comm(tep, comm, pid, override); - - item = malloc(sizeof(*item)); - if (!item) - return -1; - - if (comm) - item->comm = strdup(comm); - else - item->comm = strdup("<...>"); - if (!item->comm) { - free(item); - return -1; - } - item->pid = pid; - item->next = tep->cmdlist; - - tep->cmdlist = item; - tep->cmdline_count++; - - return 0; -} - -/** - * tep_register_comm - register a pid / comm mapping - * @tep: a handle to the trace event parser context - * @comm: the command line to register - * @pid: the pid to map the command line to - * - * This adds a mapping to search for command line names with - * a given pid. The comm is duplicated. If a command with the same pid - * already exist, -1 is returned and errno is set to EEXIST - */ -int tep_register_comm(struct tep_handle *tep, const char *comm, int pid) -{ - return _tep_register_comm(tep, comm, pid, false); -} - -/** - * tep_override_comm - register a pid / comm mapping - * @tep: a handle to the trace event parser context - * @comm: the command line to register - * @pid: the pid to map the command line to - * - * This adds a mapping to search for command line names with - * a given pid. The comm is duplicated. If a command with the same pid - * already exist, the command string is udapted with the new one - */ -int tep_override_comm(struct tep_handle *tep, const char *comm, int pid) -{ - if (!tep->cmdlines && cmdline_init(tep)) { - errno = ENOMEM; - return -1; - } - return _tep_register_comm(tep, comm, pid, true); -} - -struct func_map { - unsigned long long addr; - char *func; - char *mod; -}; - -struct func_list { - struct func_list *next; - unsigned long long addr; - char *func; - char *mod; -}; - -static int func_cmp(const void *a, const void *b) -{ - const struct func_map *fa = a; - const struct func_map *fb = b; - - if (fa->addr < fb->addr) - return -1; - if (fa->addr > fb->addr) - return 1; - - return 0; -} - -/* - * We are searching for a record in between, not an exact - * match. - */ -static int func_bcmp(const void *a, const void *b) -{ - const struct func_map *fa = a; - const struct func_map *fb = b; - - if ((fa->addr == fb->addr) || - - (fa->addr > fb->addr && - fa->addr < (fb+1)->addr)) - return 0; - - if (fa->addr < fb->addr) - return -1; - - return 1; -} - -static int func_map_init(struct tep_handle *tep) -{ - struct func_list *funclist; - struct func_list *item; - struct func_map *func_map; - int i; - - func_map = malloc(sizeof(*func_map) * (tep->func_count + 1)); - if (!func_map) - return -1; - - funclist = tep->funclist; - - i = 0; - while (funclist) { - func_map[i].func = funclist->func; - func_map[i].addr = funclist->addr; - func_map[i].mod = funclist->mod; - i++; - item = funclist; - funclist = funclist->next; - free(item); - } - - qsort(func_map, tep->func_count, sizeof(*func_map), func_cmp); - - /* - * Add a special record at the end. - */ - func_map[tep->func_count].func = NULL; - func_map[tep->func_count].addr = 0; - func_map[tep->func_count].mod = NULL; - - tep->func_map = func_map; - tep->funclist = NULL; - - return 0; -} - -static struct func_map * -__find_func(struct tep_handle *tep, unsigned long long addr) -{ - struct func_map *func; - struct func_map key; - - if (!tep->func_map) - func_map_init(tep); - - key.addr = addr; - - func = bsearch(&key, tep->func_map, tep->func_count, - sizeof(*tep->func_map), func_bcmp); - - return func; -} - -struct func_resolver { - tep_func_resolver_t *func; - void *priv; - struct func_map map; -}; - -/** - * tep_set_function_resolver - set an alternative function resolver - * @tep: a handle to the trace event parser context - * @resolver: function to be used - * @priv: resolver function private state. - * - * Some tools may have already a way to resolve kernel functions, allow them to - * keep using it instead of duplicating all the entries inside tep->funclist. - */ -int tep_set_function_resolver(struct tep_handle *tep, - tep_func_resolver_t *func, void *priv) -{ - struct func_resolver *resolver = malloc(sizeof(*resolver)); - - if (resolver == NULL) - return -1; - - resolver->func = func; - resolver->priv = priv; - - free(tep->func_resolver); - tep->func_resolver = resolver; - - return 0; -} - -/** - * tep_reset_function_resolver - reset alternative function resolver - * @tep: a handle to the trace event parser context - * - * Stop using whatever alternative resolver was set, use the default - * one instead. - */ -void tep_reset_function_resolver(struct tep_handle *tep) -{ - free(tep->func_resolver); - tep->func_resolver = NULL; -} - -static struct func_map * -find_func(struct tep_handle *tep, unsigned long long addr) -{ - struct func_map *map; - - if (!tep->func_resolver) - return __find_func(tep, addr); - - map = &tep->func_resolver->map; - map->mod = NULL; - map->addr = addr; - map->func = tep->func_resolver->func(tep->func_resolver->priv, - &map->addr, &map->mod); - if (map->func == NULL) - return NULL; - - return map; -} - -/** - * tep_find_function - find a function by a given address - * @tep: a handle to the trace event parser context - * @addr: the address to find the function with - * - * Returns a pointer to the function stored that has the given - * address. Note, the address does not have to be exact, it - * will select the function that would contain the address. - */ -const char *tep_find_function(struct tep_handle *tep, unsigned long long addr) -{ - struct func_map *map; - - map = find_func(tep, addr); - if (!map) - return NULL; - - return map->func; -} - -/** - * tep_find_function_address - find a function address by a given address - * @tep: a handle to the trace event parser context - * @addr: the address to find the function with - * - * Returns the address the function starts at. This can be used in - * conjunction with tep_find_function to print both the function - * name and the function offset. - */ -unsigned long long -tep_find_function_address(struct tep_handle *tep, unsigned long long addr) -{ - struct func_map *map; - - map = find_func(tep, addr); - if (!map) - return 0; - - return map->addr; -} - -/** - * tep_register_function - register a function with a given address - * @tep: a handle to the trace event parser context - * @function: the function name to register - * @addr: the address the function starts at - * @mod: the kernel module the function may be in (NULL for none) - * - * This registers a function name with an address and module. - * The @func passed in is duplicated. - */ -int tep_register_function(struct tep_handle *tep, char *func, - unsigned long long addr, char *mod) -{ - struct func_list *item = malloc(sizeof(*item)); - - if (!item) - return -1; - - item->next = tep->funclist; - item->func = strdup(func); - if (!item->func) - goto out_free; - - if (mod) { - item->mod = strdup(mod); - if (!item->mod) - goto out_free_func; - } else - item->mod = NULL; - item->addr = addr; - - tep->funclist = item; - tep->func_count++; - - return 0; - -out_free_func: - free(item->func); - item->func = NULL; -out_free: - free(item); - errno = ENOMEM; - return -1; -} - -/** - * tep_print_funcs - print out the stored functions - * @tep: a handle to the trace event parser context - * - * This prints out the stored functions. - */ -void tep_print_funcs(struct tep_handle *tep) -{ - int i; - - if (!tep->func_map) - func_map_init(tep); - - for (i = 0; i < (int)tep->func_count; i++) { - printf("%016llx %s", - tep->func_map[i].addr, - tep->func_map[i].func); - if (tep->func_map[i].mod) - printf(" [%s]\n", tep->func_map[i].mod); - else - printf("\n"); - } -} - -struct printk_map { - unsigned long long addr; - char *printk; -}; - -struct printk_list { - struct printk_list *next; - unsigned long long addr; - char *printk; -}; - -static int printk_cmp(const void *a, const void *b) -{ - const struct printk_map *pa = a; - const struct printk_map *pb = b; - - if (pa->addr < pb->addr) - return -1; - if (pa->addr > pb->addr) - return 1; - - return 0; -} - -static int printk_map_init(struct tep_handle *tep) -{ - struct printk_list *printklist; - struct printk_list *item; - struct printk_map *printk_map; - int i; - - printk_map = malloc(sizeof(*printk_map) * (tep->printk_count + 1)); - if (!printk_map) - return -1; - - printklist = tep->printklist; - - i = 0; - while (printklist) { - printk_map[i].printk = printklist->printk; - printk_map[i].addr = printklist->addr; - i++; - item = printklist; - printklist = printklist->next; - free(item); - } - - qsort(printk_map, tep->printk_count, sizeof(*printk_map), printk_cmp); - - tep->printk_map = printk_map; - tep->printklist = NULL; - - return 0; -} - -static struct printk_map * -find_printk(struct tep_handle *tep, unsigned long long addr) -{ - struct printk_map *printk; - struct printk_map key; - - if (!tep->printk_map && printk_map_init(tep)) - return NULL; - - key.addr = addr; - - printk = bsearch(&key, tep->printk_map, tep->printk_count, - sizeof(*tep->printk_map), printk_cmp); - - return printk; -} - -/** - * tep_register_print_string - register a string by its address - * @tep: a handle to the trace event parser context - * @fmt: the string format to register - * @addr: the address the string was located at - * - * This registers a string by the address it was stored in the kernel. - * The @fmt passed in is duplicated. - */ -int tep_register_print_string(struct tep_handle *tep, const char *fmt, - unsigned long long addr) -{ - struct printk_list *item = malloc(sizeof(*item)); - char *p; - - if (!item) - return -1; - - item->next = tep->printklist; - item->addr = addr; - - /* Strip off quotes and '\n' from the end */ - if (fmt[0] == '"') - fmt++; - item->printk = strdup(fmt); - if (!item->printk) - goto out_free; - - p = item->printk + strlen(item->printk) - 1; - if (*p == '"') - *p = 0; - - p -= 2; - if (strcmp(p, "\\n") == 0) - *p = 0; - - tep->printklist = item; - tep->printk_count++; - - return 0; - -out_free: - free(item); - errno = ENOMEM; - return -1; -} - -/** - * tep_print_printk - print out the stored strings - * @tep: a handle to the trace event parser context - * - * This prints the string formats that were stored. - */ -void tep_print_printk(struct tep_handle *tep) -{ - int i; - - if (!tep->printk_map) - printk_map_init(tep); - - for (i = 0; i < (int)tep->printk_count; i++) { - printf("%016llx %s\n", - tep->printk_map[i].addr, - tep->printk_map[i].printk); - } -} - -static struct tep_event *alloc_event(void) -{ - return calloc(1, sizeof(struct tep_event)); -} - -static int add_event(struct tep_handle *tep, struct tep_event *event) -{ - int i; - struct tep_event **events = realloc(tep->events, sizeof(event) * - (tep->nr_events + 1)); - if (!events) - return -1; - - tep->events = events; - - for (i = 0; i < tep->nr_events; i++) { - if (tep->events[i]->id > event->id) - break; - } - if (i < tep->nr_events) - memmove(&tep->events[i + 1], - &tep->events[i], - sizeof(event) * (tep->nr_events - i)); - - tep->events[i] = event; - tep->nr_events++; - - event->tep = tep; - - return 0; -} - -static int event_item_type(enum tep_event_type type) -{ - switch (type) { - case TEP_EVENT_ITEM ... TEP_EVENT_SQUOTE: - return 1; - case TEP_EVENT_ERROR ... TEP_EVENT_DELIM: - default: - return 0; - } -} - -static void free_flag_sym(struct tep_print_flag_sym *fsym) -{ - struct tep_print_flag_sym *next; - - while (fsym) { - next = fsym->next; - free(fsym->value); - free(fsym->str); - free(fsym); - fsym = next; - } -} - -static void free_arg(struct tep_print_arg *arg) -{ - struct tep_print_arg *farg; - - if (!arg) - return; - - switch (arg->type) { - case TEP_PRINT_ATOM: - free(arg->atom.atom); - break; - case TEP_PRINT_FIELD: - free(arg->field.name); - break; - case TEP_PRINT_FLAGS: - free_arg(arg->flags.field); - free(arg->flags.delim); - free_flag_sym(arg->flags.flags); - break; - case TEP_PRINT_SYMBOL: - free_arg(arg->symbol.field); - free_flag_sym(arg->symbol.symbols); - break; - case TEP_PRINT_HEX: - case TEP_PRINT_HEX_STR: - free_arg(arg->hex.field); - free_arg(arg->hex.size); - break; - case TEP_PRINT_INT_ARRAY: - free_arg(arg->int_array.field); - free_arg(arg->int_array.count); - free_arg(arg->int_array.el_size); - break; - case TEP_PRINT_TYPE: - free(arg->typecast.type); - free_arg(arg->typecast.item); - break; - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - free(arg->string.string); - break; - case TEP_PRINT_BITMASK: - free(arg->bitmask.bitmask); - break; - case TEP_PRINT_DYNAMIC_ARRAY: - case TEP_PRINT_DYNAMIC_ARRAY_LEN: - free(arg->dynarray.index); - break; - case TEP_PRINT_OP: - free(arg->op.op); - free_arg(arg->op.left); - free_arg(arg->op.right); - break; - case TEP_PRINT_FUNC: - while (arg->func.args) { - farg = arg->func.args; - arg->func.args = farg->next; - free_arg(farg); - } - break; - - case TEP_PRINT_NULL: - default: - break; - } - - free(arg); -} - -static enum tep_event_type get_type(int ch) -{ - if (ch == '\n') - return TEP_EVENT_NEWLINE; - if (isspace(ch)) - return TEP_EVENT_SPACE; - if (isalnum(ch) || ch == '_') - return TEP_EVENT_ITEM; - if (ch == '\'') - return TEP_EVENT_SQUOTE; - if (ch == '"') - return TEP_EVENT_DQUOTE; - if (!isprint(ch)) - return TEP_EVENT_NONE; - if (ch == '(' || ch == ')' || ch == ',') - return TEP_EVENT_DELIM; - - return TEP_EVENT_OP; -} - -static int __read_char(void) -{ - if (input_buf_ptr >= input_buf_siz) - return -1; - - return input_buf[input_buf_ptr++]; -} - -/** - * peek_char - peek at the next character that will be read - * - * Returns the next character read, or -1 if end of buffer. - */ -__hidden int peek_char(void) -{ - if (input_buf_ptr >= input_buf_siz) - return -1; - - return input_buf[input_buf_ptr]; -} - -static int extend_token(char **tok, char *buf, int size) -{ - char *newtok = realloc(*tok, size); - - if (!newtok) { - free(*tok); - *tok = NULL; - return -1; - } - - if (!*tok) - strcpy(newtok, buf); - else - strcat(newtok, buf); - *tok = newtok; - - return 0; -} - -static enum tep_event_type force_token(const char *str, char **tok); - -static enum tep_event_type __read_token(char **tok) -{ - char buf[BUFSIZ]; - int ch, last_ch, quote_ch, next_ch; - int i = 0; - int tok_size = 0; - enum tep_event_type type; - - *tok = NULL; - - - ch = __read_char(); - if (ch < 0) - return TEP_EVENT_NONE; - - type = get_type(ch); - if (type == TEP_EVENT_NONE) - return type; - - buf[i++] = ch; - - switch (type) { - case TEP_EVENT_NEWLINE: - case TEP_EVENT_DELIM: - if (asprintf(tok, "%c", ch) < 0) - return TEP_EVENT_ERROR; - - return type; - - case TEP_EVENT_OP: - switch (ch) { - case '-': - next_ch = peek_char(); - if (next_ch == '>') { - buf[i++] = __read_char(); - break; - } - /* fall through */ - case '+': - case '|': - case '&': - case '>': - case '<': - last_ch = ch; - ch = peek_char(); - if (ch != last_ch) - goto test_equal; - buf[i++] = __read_char(); - switch (last_ch) { - case '>': - case '<': - goto test_equal; - default: - break; - } - break; - case '!': - case '=': - goto test_equal; - default: /* what should we do instead? */ - break; - } - buf[i] = 0; - *tok = strdup(buf); - return type; - - test_equal: - ch = peek_char(); - if (ch == '=') - buf[i++] = __read_char(); - goto out; - - case TEP_EVENT_DQUOTE: - case TEP_EVENT_SQUOTE: - /* don't keep quotes */ - i--; - quote_ch = ch; - last_ch = 0; - concat: - do { - if (i == (BUFSIZ - 1)) { - buf[i] = 0; - tok_size += BUFSIZ; - - if (extend_token(tok, buf, tok_size) < 0) - return TEP_EVENT_NONE; - i = 0; - } - last_ch = ch; - ch = __read_char(); - buf[i++] = ch; - /* the '\' '\' will cancel itself */ - if (ch == '\\' && last_ch == '\\') - last_ch = 0; - } while (ch != quote_ch || last_ch == '\\'); - /* remove the last quote */ - i--; - - /* - * For strings (double quotes) check the next token. - * If it is another string, concatinate the two. - */ - if (type == TEP_EVENT_DQUOTE) { - unsigned long long save_input_buf_ptr = input_buf_ptr; - - do { - ch = __read_char(); - } while (isspace(ch)); - if (ch == '"') - goto concat; - input_buf_ptr = save_input_buf_ptr; - } - - goto out; - - case TEP_EVENT_ERROR ... TEP_EVENT_SPACE: - case TEP_EVENT_ITEM: - default: - break; - } - - while (get_type(peek_char()) == type) { - if (i == (BUFSIZ - 1)) { - buf[i] = 0; - tok_size += BUFSIZ; - - if (extend_token(tok, buf, tok_size) < 0) - return TEP_EVENT_NONE; - i = 0; - } - ch = __read_char(); - buf[i++] = ch; - } - - out: - buf[i] = 0; - if (extend_token(tok, buf, tok_size + i + 1) < 0) - return TEP_EVENT_NONE; - - if (type == TEP_EVENT_ITEM) { - /* - * Older versions of the kernel has a bug that - * creates invalid symbols and will break the mac80211 - * parsing. This is a work around to that bug. - * - * See Linux kernel commit: - * 811cb50baf63461ce0bdb234927046131fc7fa8b - */ - if (strcmp(*tok, "LOCAL_PR_FMT") == 0) { - free(*tok); - *tok = NULL; - return force_token("\"%s\" ", tok); - } else if (strcmp(*tok, "STA_PR_FMT") == 0) { - free(*tok); - *tok = NULL; - return force_token("\" sta:%pM\" ", tok); - } else if (strcmp(*tok, "VIF_PR_FMT") == 0) { - free(*tok); - *tok = NULL; - return force_token("\" vif:%p(%d)\" ", tok); - } - } - - return type; -} - -static enum tep_event_type force_token(const char *str, char **tok) -{ - const char *save_input_buf; - unsigned long long save_input_buf_ptr; - unsigned long long save_input_buf_siz; - enum tep_event_type type; - - /* save off the current input pointers */ - save_input_buf = input_buf; - save_input_buf_ptr = input_buf_ptr; - save_input_buf_siz = input_buf_siz; - - init_input_buf(str, strlen(str)); - - type = __read_token(tok); - - /* reset back to original token */ - input_buf = save_input_buf; - input_buf_ptr = save_input_buf_ptr; - input_buf_siz = save_input_buf_siz; - - return type; -} - -/** - * free_token - free a token returned by tep_read_token - * @token: the token to free - */ -__hidden void free_token(char *tok) -{ - if (tok) - free(tok); -} - -/** - * read_token - access to utilities to use the tep parser - * @tok: The token to return - * - * This will parse tokens from the string given by - * tep_init_data(). - * - * Returns the token type. - */ -__hidden enum tep_event_type read_token(char **tok) -{ - enum tep_event_type type; - - for (;;) { - type = __read_token(tok); - if (type != TEP_EVENT_SPACE) - return type; - - free_token(*tok); - } - - /* not reached */ - *tok = NULL; - return TEP_EVENT_NONE; -} - -/* no newline */ -static enum tep_event_type read_token_item(char **tok) -{ - enum tep_event_type type; - - for (;;) { - type = __read_token(tok); - if (type != TEP_EVENT_SPACE && type != TEP_EVENT_NEWLINE) - return type; - free_token(*tok); - *tok = NULL; - } - - /* not reached */ - *tok = NULL; - return TEP_EVENT_NONE; -} - -static int test_type(enum tep_event_type type, enum tep_event_type expect) -{ - if (type != expect) { - do_warning("Error: expected type %d but read %d", - expect, type); - return -1; - } - return 0; -} - -static int test_type_token(enum tep_event_type type, const char *token, - enum tep_event_type expect, const char *expect_tok) -{ - if (type != expect) { - do_warning("Error: expected type %d but read %d", - expect, type); - return -1; - } - - if (strcmp(token, expect_tok) != 0) { - do_warning("Error: expected '%s' but read '%s'", - expect_tok, token); - return -1; - } - return 0; -} - -static int __read_expect_type(enum tep_event_type expect, char **tok, int newline_ok) -{ - enum tep_event_type type; - - if (newline_ok) - type = read_token(tok); - else - type = read_token_item(tok); - return test_type(type, expect); -} - -static int read_expect_type(enum tep_event_type expect, char **tok) -{ - return __read_expect_type(expect, tok, 1); -} - -static int __read_expected(enum tep_event_type expect, const char *str, - int newline_ok) -{ - enum tep_event_type type; - char *token; - int ret; - - if (newline_ok) - type = read_token(&token); - else - type = read_token_item(&token); - - ret = test_type_token(type, token, expect, str); - - free_token(token); - - return ret; -} - -static int read_expected(enum tep_event_type expect, const char *str) -{ - return __read_expected(expect, str, 1); -} - -static int read_expected_item(enum tep_event_type expect, const char *str) -{ - return __read_expected(expect, str, 0); -} - -static char *event_read_name(void) -{ - char *token; - - if (read_expected(TEP_EVENT_ITEM, "name") < 0) - return NULL; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return NULL; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - - return token; - - fail: - free_token(token); - return NULL; -} - -static int event_read_id(void) -{ - char *token; - int id; - - if (read_expected_item(TEP_EVENT_ITEM, "ID") < 0) - return -1; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return -1; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - - id = strtoul(token, NULL, 0); - free_token(token); - return id; - - fail: - free_token(token); - return -1; -} - -static int field_is_string(struct tep_format_field *field) -{ - if ((field->flags & TEP_FIELD_IS_ARRAY) && - (strstr(field->type, "char") || strstr(field->type, "u8") || - strstr(field->type, "s8"))) - return 1; - - return 0; -} - -static int field_is_dynamic(struct tep_format_field *field) -{ - if (strncmp(field->type, "__data_loc", 10) == 0) - return 1; - - return 0; -} - -static int field_is_relative_dynamic(struct tep_format_field *field) -{ - if (strncmp(field->type, "__rel_loc", 9) == 0) - return 1; - - return 0; -} - -static int field_is_long(struct tep_format_field *field) -{ - /* includes long long */ - if (strstr(field->type, "long")) - return 1; - - return 0; -} - -static unsigned int type_size(const char *name) -{ - /* This covers all TEP_FIELD_IS_STRING types. */ - static struct { - const char *type; - unsigned int size; - } table[] = { - { "u8", 1 }, - { "u16", 2 }, - { "u32", 4 }, - { "u64", 8 }, - { "s8", 1 }, - { "s16", 2 }, - { "s32", 4 }, - { "s64", 8 }, - { "char", 1 }, - { }, - }; - int i; - - for (i = 0; table[i].type; i++) { - if (!strcmp(table[i].type, name)) - return table[i].size; - } - - return 0; -} - -static int append(char **buf, const char *delim, const char *str) -{ - char *new_buf; - - new_buf = realloc(*buf, strlen(*buf) + strlen(delim) + strlen(str) + 1); - if (!new_buf) - return -1; - strcat(new_buf, delim); - strcat(new_buf, str); - *buf = new_buf; - return 0; -} - -static int event_read_fields(struct tep_event *event, struct tep_format_field **fields) -{ - struct tep_format_field *field = NULL; - enum tep_event_type type; - char *token; - char *last_token; - char *delim = " "; - int count = 0; - int ret; - - do { - unsigned int size_dynamic = 0; - - type = read_token(&token); - if (type == TEP_EVENT_NEWLINE) { - free_token(token); - return count; - } - - count++; - - if (test_type_token(type, token, TEP_EVENT_ITEM, "field")) - goto fail; - free_token(token); - - type = read_token(&token); - /* - * The ftrace fields may still use the "special" name. - * Just ignore it. - */ - if (event->flags & TEP_EVENT_FL_ISFTRACE && - type == TEP_EVENT_ITEM && strcmp(token, "special") == 0) { - free_token(token); - type = read_token(&token); - } - - if (test_type_token(type, token, TEP_EVENT_OP, ":") < 0) - goto fail; - - free_token(token); - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - - last_token = token; - - field = calloc(1, sizeof(*field)); - if (!field) - goto fail; - - field->event = event; - - /* read the rest of the type */ - for (;;) { - type = read_token(&token); - if (type == TEP_EVENT_ITEM || - (type == TEP_EVENT_OP && strcmp(token, "*") == 0) || - /* - * Some of the ftrace fields are broken and have - * an illegal "." in them. - */ - (event->flags & TEP_EVENT_FL_ISFTRACE && - type == TEP_EVENT_OP && strcmp(token, ".") == 0)) { - - if (strcmp(token, "*") == 0) - field->flags |= TEP_FIELD_IS_POINTER; - - if (field->type) { - ret = append(&field->type, delim, last_token); - free(last_token); - if (ret < 0) - goto fail; - } else - field->type = last_token; - last_token = token; - delim = " "; - continue; - } - - /* Handle __attribute__((user)) */ - if ((type == TEP_EVENT_DELIM) && - strcmp("__attribute__", last_token) == 0 && - token[0] == '(') { - int depth = 1; - int ret; - - ret = append(&field->type, " ", last_token); - ret |= append(&field->type, "", "("); - if (ret < 0) - goto fail; - - delim = " "; - while ((type = read_token(&token)) != TEP_EVENT_NONE) { - if (type == TEP_EVENT_DELIM) { - if (token[0] == '(') - depth++; - else if (token[0] == ')') - depth--; - if (!depth) - break; - ret = append(&field->type, "", token); - delim = ""; - } else { - ret = append(&field->type, delim, token); - delim = " "; - } - if (ret < 0) - goto fail; - free(last_token); - last_token = token; - } - continue; - } - break; - } - - if (!field->type) { - do_warning_event(event, "%s: no type found", __func__); - goto fail; - } - field->name = field->alias = last_token; - - if (test_type(type, TEP_EVENT_OP)) - goto fail; - - if (strcmp(token, "[") == 0) { - enum tep_event_type last_type = type; - char *brackets = token; - - field->flags |= TEP_FIELD_IS_ARRAY; - - type = read_token(&token); - - if (type == TEP_EVENT_ITEM) - field->arraylen = strtoul(token, NULL, 0); - else - field->arraylen = 0; - - while (strcmp(token, "]") != 0) { - const char *delim; - - if (last_type == TEP_EVENT_ITEM && - type == TEP_EVENT_ITEM) - delim = " "; - else - delim = ""; - - last_type = type; - - ret = append(&brackets, delim, token); - if (ret < 0) { - free(brackets); - goto fail; - } - /* We only care about the last token */ - field->arraylen = strtoul(token, NULL, 0); - free_token(token); - type = read_token(&token); - if (type == TEP_EVENT_NONE) { - free(brackets); - do_warning_event(event, "failed to find token"); - goto fail; - } - } - - free_token(token); - - ret = append(&brackets, "", "]"); - if (ret < 0) { - free(brackets); - goto fail; - } - - /* add brackets to type */ - - type = read_token(&token); - /* - * If the next token is not an OP, then it is of - * the format: type [] item; - */ - if (type == TEP_EVENT_ITEM) { - ret = append(&field->type, " ", field->name); - if (ret < 0) { - free(brackets); - goto fail; - } - ret = append(&field->type, "", brackets); - - size_dynamic = type_size(field->name); - free_token(field->name); - field->name = field->alias = token; - type = read_token(&token); - } else { - ret = append(&field->type, "", brackets); - if (ret < 0) { - free(brackets); - goto fail; - } - } - free(brackets); - } - - if (field_is_string(field)) - field->flags |= TEP_FIELD_IS_STRING; - if (field_is_dynamic(field)) - field->flags |= TEP_FIELD_IS_DYNAMIC; - if (field_is_relative_dynamic(field)) - field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE; - if (field_is_long(field)) - field->flags |= TEP_FIELD_IS_LONG; - - if (test_type_token(type, token, TEP_EVENT_OP, ";")) - goto fail; - free_token(token); - - if (read_expected(TEP_EVENT_ITEM, "offset") < 0) - goto fail_expect; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - goto fail_expect; - - if (read_expect_type(TEP_EVENT_ITEM, &token)) - goto fail; - field->offset = strtoul(token, NULL, 0); - free_token(token); - - if (read_expected(TEP_EVENT_OP, ";") < 0) - goto fail_expect; - - if (read_expected(TEP_EVENT_ITEM, "size") < 0) - goto fail_expect; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - goto fail_expect; - - if (read_expect_type(TEP_EVENT_ITEM, &token)) - goto fail; - field->size = strtoul(token, NULL, 0); - free_token(token); - - if (read_expected(TEP_EVENT_OP, ";") < 0) - goto fail_expect; - - type = read_token(&token); - if (type != TEP_EVENT_NEWLINE) { - /* newer versions of the kernel have a "signed" type */ - if (test_type_token(type, token, TEP_EVENT_ITEM, "signed")) - goto fail; - - free_token(token); - - if (read_expected(TEP_EVENT_OP, ":") < 0) - goto fail_expect; - - if (read_expect_type(TEP_EVENT_ITEM, &token)) - goto fail; - - if (strtoul(token, NULL, 0)) - field->flags |= TEP_FIELD_IS_SIGNED; - - free_token(token); - if (read_expected(TEP_EVENT_OP, ";") < 0) - goto fail_expect; - - if (read_expect_type(TEP_EVENT_NEWLINE, &token)) - goto fail; - } - - free_token(token); - - if (field->flags & TEP_FIELD_IS_ARRAY) { - if (field->arraylen) - field->elementsize = field->size / field->arraylen; - else if (field->flags & TEP_FIELD_IS_DYNAMIC) - field->elementsize = size_dynamic; - else if (field->flags & TEP_FIELD_IS_STRING) - field->elementsize = 1; - else if (field->flags & TEP_FIELD_IS_LONG) - field->elementsize = event->tep ? - event->tep->long_size : - sizeof(long); - } else - field->elementsize = field->size; - - *fields = field; - fields = &field->next; - - } while (1); - - return 0; - -fail: - free_token(token); -fail_expect: - if (field) { - free(field->type); - free(field->name); - free(field); - } - return -1; -} - -static int event_read_format(struct tep_event *event) -{ - char *token; - int ret; - - if (read_expected_item(TEP_EVENT_ITEM, "format") < 0) - return -1; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return -1; - - if (read_expect_type(TEP_EVENT_NEWLINE, &token)) - goto fail; - free_token(token); - - ret = event_read_fields(event, &event->format.common_fields); - if (ret < 0) - return ret; - event->format.nr_common = ret; - - ret = event_read_fields(event, &event->format.fields); - if (ret < 0) - return ret; - event->format.nr_fields = ret; - - return 0; - - fail: - free_token(token); - return -1; -} - -static enum tep_event_type -process_arg_token(struct tep_event *event, struct tep_print_arg *arg, - char **tok, enum tep_event_type type); - -static enum tep_event_type -process_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - enum tep_event_type type; - char *token; - - type = read_token(&token); - *tok = token; - - return process_arg_token(event, arg, tok, type); -} - -static enum tep_event_type -process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok); - -/* - * For __print_symbolic() and __print_flags, we need to completely - * evaluate the first argument, which defines what to print next. - */ -static enum tep_event_type -process_field_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - enum tep_event_type type; - - type = process_arg(event, arg, tok); - - while (type == TEP_EVENT_OP) { - type = process_op(event, arg, tok); - } - - return type; -} - -static enum tep_event_type -process_cond(struct tep_event *event, struct tep_print_arg *top, char **tok) -{ - struct tep_print_arg *arg, *left, *right; - enum tep_event_type type; - char *token = NULL; - - arg = alloc_arg(); - left = alloc_arg(); - right = alloc_arg(); - - if (!arg || !left || !right) { - do_warning_event(event, "%s: not enough memory!", __func__); - /* arg will be freed at out_free */ - free_arg(left); - free_arg(right); - goto out_free; - } - - arg->type = TEP_PRINT_OP; - arg->op.left = left; - arg->op.right = right; - - *tok = NULL; - type = process_arg(event, left, &token); - - again: - if (type == TEP_EVENT_ERROR) - goto out_free; - - /* Handle other operations in the arguments */ - if (type == TEP_EVENT_OP && strcmp(token, ":") != 0) { - type = process_op(event, left, &token); - goto again; - } - - if (test_type_token(type, token, TEP_EVENT_OP, ":")) - goto out_free; - - arg->op.op = token; - - type = process_arg(event, right, &token); - - top->op.right = arg; - - *tok = token; - return type; - -out_free: - /* Top may point to itself */ - top->op.right = NULL; - free_token(token); - free_arg(arg); - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_array(struct tep_event *event, struct tep_print_arg *top, char **tok) -{ - struct tep_print_arg *arg; - enum tep_event_type type; - char *token = NULL; - - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s: not enough memory!", __func__); - /* '*tok' is set to top->op.op. No need to free. */ - *tok = NULL; - return TEP_EVENT_ERROR; - } - - *tok = NULL; - type = process_arg(event, arg, &token); - if (test_type_token(type, token, TEP_EVENT_OP, "]")) - goto out_free; - - top->op.right = arg; - - free_token(token); - type = read_token_item(&token); - *tok = token; - - return type; - -out_free: - free_token(token); - free_arg(arg); - return TEP_EVENT_ERROR; -} - -static int get_op_prio(char *op) -{ - if (!op[1]) { - switch (op[0]) { - case '~': - case '!': - return 4; - case '*': - case '/': - case '%': - return 6; - case '+': - case '-': - return 7; - /* '>>' and '<<' are 8 */ - case '<': - case '>': - return 9; - /* '==' and '!=' are 10 */ - case '&': - return 11; - case '^': - return 12; - case '|': - return 13; - case '?': - return 16; - default: - do_warning("unknown op '%c'", op[0]); - return -1; - } - } else { - if (strcmp(op, "++") == 0 || - strcmp(op, "--") == 0) { - return 3; - } else if (strcmp(op, ">>") == 0 || - strcmp(op, "<<") == 0) { - return 8; - } else if (strcmp(op, ">=") == 0 || - strcmp(op, "<=") == 0) { - return 9; - } else if (strcmp(op, "==") == 0 || - strcmp(op, "!=") == 0) { - return 10; - } else if (strcmp(op, "&&") == 0) { - return 14; - } else if (strcmp(op, "||") == 0) { - return 15; - } else { - do_warning("unknown op '%s'", op); - return -1; - } - } -} - -static int set_op_prio(struct tep_print_arg *arg) -{ - - /* single ops are the greatest */ - if (!arg->op.left || arg->op.left->type == TEP_PRINT_NULL) - arg->op.prio = 0; - else - arg->op.prio = get_op_prio(arg->op.op); - - return arg->op.prio; -} - -/* Note, *tok does not get freed, but will most likely be saved */ -static enum tep_event_type -process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg *left, *right = NULL; - enum tep_event_type type; - char *token; - - /* the op is passed in via tok */ - token = *tok; - - if (arg->type == TEP_PRINT_OP && !arg->op.left) { - /* handle single op */ - if (token[1]) { - do_warning_event(event, "bad op token %s", token); - goto out_free; - } - switch (token[0]) { - case '~': - case '!': - case '+': - case '-': - break; - default: - do_warning_event(event, "bad op token %s", token); - goto out_free; - - } - - /* make an empty left */ - left = alloc_arg(); - if (!left) - goto out_warn_free; - - left->type = TEP_PRINT_NULL; - arg->op.left = left; - - right = alloc_arg(); - if (!right) - goto out_warn_free; - - arg->op.right = right; - - /* do not free the token, it belongs to an op */ - *tok = NULL; - type = process_arg(event, right, tok); - - } else if (strcmp(token, "?") == 0) { - - left = alloc_arg(); - if (!left) - goto out_warn_free; - - /* copy the top arg to the left */ - *left = *arg; - - arg->type = TEP_PRINT_OP; - arg->op.op = token; - arg->op.left = left; - arg->op.prio = 0; - - /* it will set arg->op.right */ - type = process_cond(event, arg, tok); - - } else if (strcmp(token, ">>") == 0 || - strcmp(token, "<<") == 0 || - strcmp(token, "&") == 0 || - strcmp(token, "|") == 0 || - strcmp(token, "&&") == 0 || - strcmp(token, "||") == 0 || - strcmp(token, "-") == 0 || - strcmp(token, "+") == 0 || - strcmp(token, "*") == 0 || - strcmp(token, "^") == 0 || - strcmp(token, "/") == 0 || - strcmp(token, "%") == 0 || - strcmp(token, "<") == 0 || - strcmp(token, ">") == 0 || - strcmp(token, "<=") == 0 || - strcmp(token, ">=") == 0 || - strcmp(token, "==") == 0 || - strcmp(token, "!=") == 0) { - - left = alloc_arg(); - if (!left) - goto out_warn_free; - - /* copy the top arg to the left */ - *left = *arg; - - arg->type = TEP_PRINT_OP; - arg->op.op = token; - arg->op.left = left; - arg->op.right = NULL; - - if (set_op_prio(arg) == -1) { - event->flags |= TEP_EVENT_FL_FAILED; - /* arg->op.op (= token) will be freed at out_free */ - arg->op.op = NULL; - goto out_free; - } - - type = read_token_item(&token); - *tok = token; - - /* could just be a type pointer */ - if ((strcmp(arg->op.op, "*") == 0) && - type == TEP_EVENT_DELIM && (strcmp(token, ")") == 0)) { - int ret; - - if (left->type != TEP_PRINT_ATOM) { - do_warning_event(event, "bad pointer type"); - goto out_free; - } - ret = append(&left->atom.atom, " ", "*"); - if (ret < 0) - goto out_warn_free; - - free(arg->op.op); - *arg = *left; - free(left); - - return type; - } - - right = alloc_arg(); - if (!right) - goto out_warn_free; - - type = process_arg_token(event, right, tok, type); - if (type == TEP_EVENT_ERROR) { - free_arg(right); - /* token was freed in process_arg_token() via *tok */ - token = NULL; - goto out_free; - } - - if (right->type == TEP_PRINT_OP && - get_op_prio(arg->op.op) < get_op_prio(right->op.op)) { - struct tep_print_arg tmp; - - /* rotate ops according to the priority */ - arg->op.right = right->op.left; - - tmp = *arg; - *arg = *right; - *right = tmp; - - arg->op.left = right; - } else { - arg->op.right = right; - } - - } else if (strcmp(token, "[") == 0) { - - left = alloc_arg(); - if (!left) - goto out_warn_free; - - *left = *arg; - - arg->type = TEP_PRINT_OP; - arg->op.op = token; - arg->op.left = left; - - arg->op.prio = 0; - - /* it will set arg->op.right */ - type = process_array(event, arg, tok); - - } else { - do_warning_event(event, "unknown op '%s'", token); - event->flags |= TEP_EVENT_FL_FAILED; - /* the arg is now the left side */ - goto out_free; - } - - if (type == TEP_EVENT_OP && strcmp(*tok, ":") != 0) { - int prio; - - /* higher prios need to be closer to the root */ - prio = get_op_prio(*tok); - - if (prio > arg->op.prio) - return process_op(event, arg, tok); - - return process_op(event, right, tok); - } - - return type; - -out_warn_free: - do_warning_event(event, "%s: not enough memory!", __func__); -out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_entry(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, - char **tok) -{ - enum tep_event_type type; - char *field; - char *token; - - if (read_expected(TEP_EVENT_OP, "->") < 0) - goto out_err; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - field = token; - - arg->type = TEP_PRINT_FIELD; - arg->field.name = field; - - if (is_flag_field) { - arg->field.field = tep_find_any_field(event, arg->field.name); - arg->field.field->flags |= TEP_FIELD_IS_FLAG; - is_flag_field = 0; - } else if (is_symbolic_field) { - arg->field.field = tep_find_any_field(event, arg->field.name); - arg->field.field->flags |= TEP_FIELD_IS_SYMBOLIC; - is_symbolic_field = 0; - } - - type = read_token(&token); - *tok = token; - - return type; - - out_free: - free_token(token); - out_err: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static int alloc_and_process_delim(struct tep_event *event, char *next_token, - struct tep_print_arg **print_arg) -{ - struct tep_print_arg *field; - enum tep_event_type type; - char *token; - int ret = 0; - - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - errno = ENOMEM; - return -1; - } - - type = process_arg(event, field, &token); - - if (test_type_token(type, token, TEP_EVENT_DELIM, next_token)) { - errno = EINVAL; - ret = -1; - free_arg(field); - goto out_free_token; - } - - *print_arg = field; - -out_free_token: - free_token(token); - - return ret; -} - -static char *arg_eval (struct tep_print_arg *arg); - -static unsigned long long -eval_type_str(unsigned long long val, const char *type, int pointer) -{ - int sign = 0; - char *ref; - int len; - - len = strlen(type); - - if (pointer) { - - if (type[len-1] != '*') { - do_warning("pointer expected with non pointer type"); - return val; - } - - ref = malloc(len); - if (!ref) { - do_warning("%s: not enough memory!", __func__); - return val; - } - memcpy(ref, type, len); - - /* chop off the " *" */ - ref[len - 2] = 0; - - val = eval_type_str(val, ref, 0); - free(ref); - return val; - } - - /* check if this is a pointer */ - if (type[len - 1] == '*') - return val; - - /* Try to figure out the arg size*/ - if (strncmp(type, "struct", 6) == 0) - /* all bets off */ - return val; - - if (strcmp(type, "u8") == 0) - return val & 0xff; - - if (strcmp(type, "u16") == 0) - return val & 0xffff; - - if (strcmp(type, "u32") == 0) - return val & 0xffffffff; - - if (strcmp(type, "u64") == 0 || - strcmp(type, "s64") == 0) - return val; - - if (strcmp(type, "s8") == 0) - return (unsigned long long)(char)val & 0xff; - - if (strcmp(type, "s16") == 0) - return (unsigned long long)(short)val & 0xffff; - - if (strcmp(type, "s32") == 0) - return (unsigned long long)(int)val & 0xffffffff; - - if (strncmp(type, "unsigned ", 9) == 0) { - sign = 0; - type += 9; - } - - if (strcmp(type, "char") == 0) { - if (sign) - return (unsigned long long)(char)val & 0xff; - else - return val & 0xff; - } - - if (strcmp(type, "short") == 0) { - if (sign) - return (unsigned long long)(short)val & 0xffff; - else - return val & 0xffff; - } - - if (strcmp(type, "int") == 0) { - if (sign) - return (unsigned long long)(int)val & 0xffffffff; - else - return val & 0xffffffff; - } - - return val; -} - -/* - * Try to figure out the type. - */ -static unsigned long long -eval_type(unsigned long long val, struct tep_print_arg *arg, int pointer) -{ - if (arg->type != TEP_PRINT_TYPE) { - do_warning("expected type argument"); - return 0; - } - - return eval_type_str(val, arg->typecast.type, pointer); -} - -static int arg_num_eval(struct tep_print_arg *arg, long long *val) -{ - long long left, right; - int ret = 1; - - switch (arg->type) { - case TEP_PRINT_ATOM: - *val = strtoll(arg->atom.atom, NULL, 0); - break; - case TEP_PRINT_TYPE: - ret = arg_num_eval(arg->typecast.item, val); - if (!ret) - break; - *val = eval_type(*val, arg, 0); - break; - case TEP_PRINT_OP: - switch (arg->op.op[0]) { - case '|': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - if (arg->op.op[1]) - *val = left || right; - else - *val = left | right; - break; - case '&': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - if (arg->op.op[1]) - *val = left && right; - else - *val = left & right; - break; - case '<': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - switch (arg->op.op[1]) { - case 0: - *val = left < right; - break; - case '<': - *val = left << right; - break; - case '=': - *val = left <= right; - break; - default: - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } - break; - case '>': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - switch (arg->op.op[1]) { - case 0: - *val = left > right; - break; - case '>': - *val = left >> right; - break; - case '=': - *val = left >= right; - break; - default: - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } - break; - case '=': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - - if (arg->op.op[1] != '=') { - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } else - *val = left == right; - break; - case '!': - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - - switch (arg->op.op[1]) { - case '=': - *val = left != right; - break; - default: - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } - break; - case '-': - /* check for negative */ - if (arg->op.left->type == TEP_PRINT_NULL) - left = 0; - else - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - *val = left - right; - break; - case '+': - if (arg->op.left->type == TEP_PRINT_NULL) - left = 0; - else - ret = arg_num_eval(arg->op.left, &left); - if (!ret) - break; - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - *val = left + right; - break; - case '~': - ret = arg_num_eval(arg->op.right, &right); - if (!ret) - break; - *val = ~right; - break; - default: - do_warning("unknown op '%s'", arg->op.op); - ret = 0; - } - break; - - case TEP_PRINT_NULL: - case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL: - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - case TEP_PRINT_BITMASK: - default: - do_warning("invalid eval type %d", arg->type); - ret = 0; - - } - return ret; -} - -static char *arg_eval (struct tep_print_arg *arg) -{ - long long val; - static char buf[24]; - - switch (arg->type) { - case TEP_PRINT_ATOM: - return arg->atom.atom; - case TEP_PRINT_TYPE: - return arg_eval(arg->typecast.item); - case TEP_PRINT_OP: - if (!arg_num_eval(arg, &val)) - break; - sprintf(buf, "%lld", val); - return buf; - - case TEP_PRINT_NULL: - case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL: - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - case TEP_PRINT_BITMASK: - default: - do_warning("invalid eval type %d", arg->type); - break; - } - - return NULL; -} - -static enum tep_event_type -process_fields(struct tep_event *event, struct tep_print_flag_sym **list, char **tok) -{ - enum tep_event_type type; - struct tep_print_arg *arg = NULL; - struct tep_print_flag_sym *field; - char *token = *tok; - char *value; - - do { - free_token(token); - type = read_token_item(&token); - if (test_type_token(type, token, TEP_EVENT_OP, "{")) - break; - - arg = alloc_arg(); - if (!arg) - goto out_free; - - free_token(token); - type = process_arg(event, arg, &token); - - if (type == TEP_EVENT_OP) - type = process_op(event, arg, &token); - - if (type == TEP_EVENT_ERROR) - goto out_free; - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto out_free; - - field = calloc(1, sizeof(*field)); - if (!field) - goto out_free; - - value = arg_eval(arg); - if (value == NULL) - goto out_free_field; - field->value = strdup(value); - if (field->value == NULL) - goto out_free_field; - - free_arg(arg); - arg = alloc_arg(); - if (!arg) - goto out_free; - - free_token(token); - type = process_arg(event, arg, &token); - if (test_type_token(type, token, TEP_EVENT_OP, "}")) - goto out_free_field; - - value = arg_eval(arg); - if (value == NULL) - goto out_free_field; - field->str = strdup(value); - if (field->str == NULL) - goto out_free_field; - free_arg(arg); - arg = NULL; - - *list = field; - list = &field->next; - - free_token(token); - type = read_token_item(&token); - } while (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0); - - *tok = token; - return type; - -out_free_field: - free_flag_sym(field); -out_free: - free_arg(arg); - free_token(token); - *tok = NULL; - - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_flags(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg *field; - enum tep_event_type type; - char *token = NULL; - - memset(arg, 0, sizeof(*arg)); - arg->type = TEP_PRINT_FLAGS; - - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - goto out_free; - } - - type = process_field_arg(event, field, &token); - - /* Handle operations in the first argument */ - while (type == TEP_EVENT_OP) - type = process_op(event, field, &token); - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto out_free_field; - free_token(token); - - arg->flags.field = field; - - type = read_token_item(&token); - if (event_item_type(type)) { - arg->flags.delim = token; - type = read_token_item(&token); - } - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto out_free; - - type = process_fields(event, &arg->flags.flags, &token); - if (test_type_token(type, token, TEP_EVENT_DELIM, ")")) - goto out_free; - - free_token(token); - type = read_token_item(tok); - return type; - -out_free_field: - free_arg(field); -out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_symbols(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg *field; - enum tep_event_type type; - char *token = NULL; - - memset(arg, 0, sizeof(*arg)); - arg->type = TEP_PRINT_SYMBOL; - - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - goto out_free; - } - - type = process_field_arg(event, field, &token); - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto out_free_field; - - arg->symbol.field = field; - - type = process_fields(event, &arg->symbol.symbols, &token); - if (test_type_token(type, token, TEP_EVENT_DELIM, ")")) - goto out_free; - - free_token(token); - type = read_token_item(tok); - return type; - -out_free_field: - free_arg(field); -out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_hex_common(struct tep_event *event, struct tep_print_arg *arg, - char **tok, enum tep_print_arg_type type) -{ - memset(arg, 0, sizeof(*arg)); - arg->type = type; - - if (alloc_and_process_delim(event, ",", &arg->hex.field)) - goto out; - - if (alloc_and_process_delim(event, ")", &arg->hex.size)) - goto free_field; - - return read_token_item(tok); - -free_field: - free_arg(arg->hex.field); - arg->hex.field = NULL; -out: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_hex(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - return process_hex_common(event, arg, tok, TEP_PRINT_HEX); -} - -static enum tep_event_type -process_hex_str(struct tep_event *event, struct tep_print_arg *arg, - char **tok) -{ - return process_hex_common(event, arg, tok, TEP_PRINT_HEX_STR); -} - -static enum tep_event_type -process_int_array(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - memset(arg, 0, sizeof(*arg)); - arg->type = TEP_PRINT_INT_ARRAY; - - if (alloc_and_process_delim(event, ",", &arg->int_array.field)) - goto out; - - if (alloc_and_process_delim(event, ",", &arg->int_array.count)) - goto free_field; - - if (alloc_and_process_delim(event, ")", &arg->int_array.el_size)) - goto free_size; - - return read_token_item(tok); - -free_size: - free_arg(arg->int_array.count); - arg->int_array.count = NULL; -free_field: - free_arg(arg->int_array.field); - arg->int_array.field = NULL; -out: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_dynamic_array(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_format_field *field; - enum tep_event_type type; - char *token; - - memset(arg, 0, sizeof(*arg)); - arg->type = TEP_PRINT_DYNAMIC_ARRAY; - - /* - * The item within the parenthesis is another field that holds - * the index into where the array starts. - */ - type = read_token(&token); - *tok = token; - if (type != TEP_EVENT_ITEM) - goto out_free; - - /* Find the field */ - - field = tep_find_field(event, token); - if (!field) - goto out_free; - - arg->dynarray.field = field; - arg->dynarray.index = 0; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_free; - - free_token(token); - type = read_token_item(&token); - *tok = token; - if (type != TEP_EVENT_OP || strcmp(token, "[") != 0) - return type; - - free_token(token); - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s: not enough memory!", __func__); - *tok = NULL; - return TEP_EVENT_ERROR; - } - - type = process_arg(event, arg, &token); - if (type == TEP_EVENT_ERROR) - goto out_free_arg; - - if (!test_type_token(type, token, TEP_EVENT_OP, "]")) - goto out_free_arg; - - free_token(token); - type = read_token_item(tok); - return type; - - out_free_arg: - free_arg(arg); - out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_dynamic_array_len(struct tep_event *event, struct tep_print_arg *arg, - char **tok) -{ - struct tep_format_field *field; - enum tep_event_type type; - char *token; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - - arg->type = TEP_PRINT_DYNAMIC_ARRAY_LEN; - - /* Find the field */ - field = tep_find_field(event, token); - if (!field) - goto out_free; - - arg->dynarray.field = field; - arg->dynarray.index = 0; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_err; - - free_token(token); - type = read_token(&token); - *tok = token; - - return type; - - out_free: - free_token(token); - out_err: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_paren(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg *item_arg; - enum tep_event_type type; - char *token; - - type = process_arg(event, arg, &token); - - if (type == TEP_EVENT_ERROR) - goto out_free; - - if (type == TEP_EVENT_OP) - type = process_op(event, arg, &token); - - if (type == TEP_EVENT_ERROR) - goto out_free; - - if (test_type_token(type, token, TEP_EVENT_DELIM, ")")) - goto out_free; - - free_token(token); - type = read_token_item(&token); - - /* - * If the next token is an item or another open paren, then - * this was a typecast. - */ - if (event_item_type(type) || - (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0)) { - - /* make this a typecast and contine */ - - /* prevous must be an atom */ - if (arg->type != TEP_PRINT_ATOM) { - do_warning_event(event, "previous needed to be TEP_PRINT_ATOM"); - goto out_free; - } - - item_arg = alloc_arg(); - if (!item_arg) { - do_warning_event(event, "%s: not enough memory!", - __func__); - goto out_free; - } - - arg->type = TEP_PRINT_TYPE; - arg->typecast.type = arg->atom.atom; - arg->typecast.item = item_arg; - type = process_arg_token(event, item_arg, &token, type); - - } - - *tok = token; - return type; - - out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - - -static enum tep_event_type -process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, - char **tok) -{ - enum tep_event_type type; - char *token; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - - arg->type = TEP_PRINT_STRING; - arg->string.string = token; - arg->string.field = NULL; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_err; - - type = read_token(&token); - *tok = token; - - return type; - - out_free: - free_token(token); - out_err: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, - char **tok) -{ - enum tep_event_type type; - char *token; - - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - - arg->type = TEP_PRINT_BITMASK; - arg->bitmask.bitmask = token; - arg->bitmask.field = NULL; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_err; - - type = read_token(&token); - *tok = token; - - return type; - - out_free: - free_token(token); - out_err: - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static struct tep_function_handler * -find_func_handler(struct tep_handle *tep, char *func_name) -{ - struct tep_function_handler *func; - - if (!tep) - return NULL; - - for (func = tep->func_handlers; func; func = func->next) { - if (strcmp(func->name, func_name) == 0) - break; - } - - return func; -} - -static void remove_func_handler(struct tep_handle *tep, char *func_name) -{ - struct tep_function_handler *func; - struct tep_function_handler **next; - - next = &tep->func_handlers; - while ((func = *next)) { - if (strcmp(func->name, func_name) == 0) { - *next = func->next; - free_func_handle(func); - break; - } - next = &func->next; - } -} - -static enum tep_event_type -process_func_handler(struct tep_event *event, struct tep_function_handler *func, - struct tep_print_arg *arg, char **tok) -{ - struct tep_print_arg **next_arg; - struct tep_print_arg *farg; - enum tep_event_type type; - char *token; - int i; - - arg->type = TEP_PRINT_FUNC; - arg->func.func = func; - - *tok = NULL; - - next_arg = &(arg->func.args); - for (i = 0; i < func->nr_args; i++) { - farg = alloc_arg(); - if (!farg) { - do_warning_event(event, "%s: not enough memory!", - __func__); - return TEP_EVENT_ERROR; - } - - type = process_arg(event, farg, &token); - if (i < (func->nr_args - 1)) { - if (type != TEP_EVENT_DELIM || strcmp(token, ",") != 0) { - do_warning_event(event, - "Error: function '%s()' expects %d arguments but event %s only uses %d", - func->name, func->nr_args, - event->name, i + 1); - goto err; - } - } else { - if (type != TEP_EVENT_DELIM || strcmp(token, ")") != 0) { - do_warning_event(event, - "Error: function '%s()' only expects %d arguments but event %s has more", - func->name, func->nr_args, event->name); - goto err; - } - } - - *next_arg = farg; - next_arg = &(farg->next); - free_token(token); - } - - type = read_token(&token); - *tok = token; - - return type; - -err: - free_arg(farg); - free_token(token); - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_builtin_expect(struct tep_event *event, struct tep_print_arg *arg, char **tok) -{ - enum tep_event_type type; - char *token = NULL; - - /* Handle __builtin_expect( cond, #) */ - type = process_arg(event, arg, &token); - - if (type != TEP_EVENT_DELIM || token[0] != ',') - goto out_free; - - free_token(token); - - /* We don't care what the second parameter is of the __builtin_expect() */ - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto out_free; - - if (read_expected(TEP_EVENT_DELIM, ")") < 0) - goto out_free; - - free_token(token); - type = read_token_item(tok); - return type; - -out_free: - free_token(token); - *tok = NULL; - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_function(struct tep_event *event, struct tep_print_arg *arg, - char *token, char **tok) -{ - struct tep_function_handler *func; - - if (strcmp(token, "__print_flags") == 0) { - free_token(token); - is_flag_field = 1; - return process_flags(event, arg, tok); - } - if (strcmp(token, "__print_symbolic") == 0) { - free_token(token); - is_symbolic_field = 1; - return process_symbols(event, arg, tok); - } - if (strcmp(token, "__print_hex") == 0) { - free_token(token); - return process_hex(event, arg, tok); - } - if (strcmp(token, "__print_hex_str") == 0) { - free_token(token); - return process_hex_str(event, arg, tok); - } - if (strcmp(token, "__print_array") == 0) { - free_token(token); - return process_int_array(event, arg, tok); - } - if (strcmp(token, "__get_str") == 0 || - strcmp(token, "__get_rel_str") == 0) { - free_token(token); - return process_str(event, arg, tok); - } - if (strcmp(token, "__get_bitmask") == 0 || - strcmp(token, "__get_rel_bitmask") == 0) { - free_token(token); - return process_bitmask(event, arg, tok); - } - if (strcmp(token, "__get_dynamic_array") == 0 || - strcmp(token, "__get_rel_dynamic_array") == 0) { - free_token(token); - return process_dynamic_array(event, arg, tok); - } - if (strcmp(token, "__get_dynamic_array_len") == 0 || - strcmp(token, "__get_rel_dynamic_array_len") == 0) { - free_token(token); - return process_dynamic_array_len(event, arg, tok); - } - if (strcmp(token, "__builtin_expect") == 0) { - free_token(token); - return process_builtin_expect(event, arg, tok); - } - - func = find_func_handler(event->tep, token); - if (func) { - free_token(token); - return process_func_handler(event, func, arg, tok); - } - - do_warning_event(event, "function %s not defined", token); - free_token(token); - return TEP_EVENT_ERROR; -} - -static enum tep_event_type -process_arg_token(struct tep_event *event, struct tep_print_arg *arg, - char **tok, enum tep_event_type type) -{ - char *token; - char *atom; - - token = *tok; - - switch (type) { - case TEP_EVENT_ITEM: - if (strcmp(token, "REC") == 0) { - free_token(token); - type = process_entry(event, arg, &token); - break; - } - atom = token; - /* test the next token */ - type = read_token_item(&token); - - /* - * If the next token is a parenthesis, then this - * is a function. - */ - if (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0) { - free_token(token); - token = NULL; - /* this will free atom. */ - type = process_function(event, arg, atom, &token); - break; - } - /* atoms can be more than one token long */ - while (type == TEP_EVENT_ITEM) { - int ret; - - ret = append(&atom, " ", token); - if (ret < 0) { - free(atom); - *tok = NULL; - free_token(token); - return TEP_EVENT_ERROR; - } - free_token(token); - type = read_token_item(&token); - } - - arg->type = TEP_PRINT_ATOM; - arg->atom.atom = atom; - break; - - case TEP_EVENT_DQUOTE: - case TEP_EVENT_SQUOTE: - arg->type = TEP_PRINT_ATOM; - arg->atom.atom = token; - type = read_token_item(&token); - break; - case TEP_EVENT_DELIM: - if (strcmp(token, "(") == 0) { - free_token(token); - type = process_paren(event, arg, &token); - break; - } - case TEP_EVENT_OP: - /* handle single ops */ - arg->type = TEP_PRINT_OP; - arg->op.op = token; - arg->op.left = NULL; - type = process_op(event, arg, &token); - - /* On error, the op is freed */ - if (type == TEP_EVENT_ERROR) - arg->op.op = NULL; - - /* return error type if errored */ - break; - - case TEP_EVENT_ERROR ... TEP_EVENT_NEWLINE: - default: - do_warning_event(event, "unexpected type %d", type); - return TEP_EVENT_ERROR; - } - *tok = token; - - return type; -} - -static int event_read_print_args(struct tep_event *event, struct tep_print_arg **list) -{ - enum tep_event_type type = TEP_EVENT_ERROR; - struct tep_print_arg *arg; - char *token; - int args = 0; - - do { - if (type == TEP_EVENT_NEWLINE) { - type = read_token_item(&token); - continue; - } - - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s: not enough memory!", - __func__); - return -1; - } - - type = process_arg(event, arg, &token); - - if (type == TEP_EVENT_ERROR) { - free_token(token); - free_arg(arg); - return -1; - } - - *list = arg; - args++; - - if (type == TEP_EVENT_OP) { - type = process_op(event, arg, &token); - free_token(token); - if (type == TEP_EVENT_ERROR) { - *list = NULL; - free_arg(arg); - return -1; - } - list = &arg->next; - continue; - } - - if (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0) { - free_token(token); - *list = arg; - list = &arg->next; - continue; - } - break; - } while (type != TEP_EVENT_NONE); - - if (type != TEP_EVENT_NONE && type != TEP_EVENT_ERROR) - free_token(token); - - return args; -} - -static int event_read_print(struct tep_event *event) -{ - enum tep_event_type type; - char *token; - int ret; - - if (read_expected_item(TEP_EVENT_ITEM, "print") < 0) - return -1; - - if (read_expected(TEP_EVENT_ITEM, "fmt") < 0) - return -1; - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return -1; - - if (read_expect_type(TEP_EVENT_DQUOTE, &token) < 0) - goto fail; - - concat: - event->print_fmt.format = token; - event->print_fmt.args = NULL; - - /* ok to have no arg */ - type = read_token_item(&token); - - if (type == TEP_EVENT_NONE) - return 0; - - /* Handle concatenation of print lines */ - if (type == TEP_EVENT_DQUOTE) { - char *cat; - - if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0) - goto fail; - free_token(token); - free_token(event->print_fmt.format); - event->print_fmt.format = NULL; - token = cat; - goto concat; - } - - if (test_type_token(type, token, TEP_EVENT_DELIM, ",")) - goto fail; - - free_token(token); - - ret = event_read_print_args(event, &event->print_fmt.args); - if (ret < 0) - return -1; - - return ret; - - fail: - free_token(token); - return -1; -} - -/** - * tep_find_common_field - return a common field by event - * @event: handle for the event - * @name: the name of the common field to return - * - * Returns a common field from the event by the given @name. - * This only searches the common fields and not all field. - */ -struct tep_format_field * -tep_find_common_field(struct tep_event *event, const char *name) -{ - struct tep_format_field *format; - - for (format = event->format.common_fields; - format; format = format->next) { - if (strcmp(format->name, name) == 0) - break; - } - - return format; -} - -/** - * tep_find_field - find a non-common field - * @event: handle for the event - * @name: the name of the non-common field - * - * Returns a non-common field by the given @name. - * This does not search common fields. - */ -struct tep_format_field * -tep_find_field(struct tep_event *event, const char *name) -{ - struct tep_format_field *format; - - for (format = event->format.fields; - format; format = format->next) { - if (strcmp(format->name, name) == 0) - break; - } - - return format; -} - -/** - * tep_find_any_field - find any field by name - * @event: handle for the event - * @name: the name of the field - * - * Returns a field by the given @name. - * This searches the common field names first, then - * the non-common ones if a common one was not found. - */ -struct tep_format_field * -tep_find_any_field(struct tep_event *event, const char *name) -{ - struct tep_format_field *format; - - format = tep_find_common_field(event, name); - if (format) - return format; - return tep_find_field(event, name); -} - -/** - * tep_read_number - read a number from data - * @tep: a handle to the trace event parser context - * @ptr: the raw data - * @size: the size of the data that holds the number - * - * Returns the number (converted to host) from the - * raw data. - */ -unsigned long long tep_read_number(struct tep_handle *tep, - const void *ptr, int size) -{ - unsigned long long val; - - switch (size) { - case 1: - return *(unsigned char *)ptr; - case 2: - return data2host2(tep, *(unsigned short *)ptr); - case 4: - return data2host4(tep, *(unsigned int *)ptr); - case 8: - memcpy(&val, (ptr), sizeof(unsigned long long)); - return data2host8(tep, val); - default: - /* BUG! */ - return 0; - } -} - -/** - * tep_read_number_field - read a number from data - * @field: a handle to the field - * @data: the raw data to read - * @value: the value to place the number in - * - * Reads raw data according to a field offset and size, - * and translates it into @value. - * - * Returns 0 on success, -1 otherwise. - */ -int tep_read_number_field(struct tep_format_field *field, const void *data, - unsigned long long *value) -{ - if (!field) - return -1; - switch (field->size) { - case 1: - case 2: - case 4: - case 8: - *value = tep_read_number(field->event->tep, - data + field->offset, field->size); - return 0; - default: - return -1; - } -} - -static int get_common_info(struct tep_handle *tep, - const char *type, int *offset, int *size) -{ - struct tep_event *event; - struct tep_format_field *field; - - /* - * All events should have the same common elements. - * Pick any event to find where the type is; - */ - if (!tep->events) { - do_warning("no event_list!"); - return -1; - } - - event = tep->events[0]; - field = tep_find_common_field(event, type); - if (!field) - return -1; - - *offset = field->offset; - *size = field->size; - - return 0; -} - -static int __parse_common(struct tep_handle *tep, void *data, - int *size, int *offset, const char *name) -{ - int ret; - - if (!*size) { - ret = get_common_info(tep, name, offset, size); - if (ret < 0) - return ret; - } - return tep_read_number(tep, data + *offset, *size); -} - -static int trace_parse_common_type(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->type_size, &tep->type_offset, - "common_type"); -} - -static int parse_common_pid(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->pid_size, &tep->pid_offset, - "common_pid"); -} - -static int parse_common_pc(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->pc_size, &tep->pc_offset, - "common_preempt_count"); -} - -static int parse_common_flags(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->flags_size, &tep->flags_offset, - "common_flags"); -} - -static int parse_common_lock_depth(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->ld_size, &tep->ld_offset, - "common_lock_depth"); -} - -static int parse_common_migrate_disable(struct tep_handle *tep, void *data) -{ - return __parse_common(tep, data, - &tep->ld_size, &tep->ld_offset, - "common_migrate_disable"); -} - -static int events_id_cmp(const void *a, const void *b); - -/** - * tep_find_event - find an event by given id - * @tep: a handle to the trace event parser context - * @id: the id of the event - * - * Returns an event that has a given @id. - */ -struct tep_event *tep_find_event(struct tep_handle *tep, int id) -{ - struct tep_event **eventptr; - struct tep_event key; - struct tep_event *pkey = &key; - - /* Check cache first */ - if (tep->last_event && tep->last_event->id == id) - return tep->last_event; - - key.id = id; - - eventptr = bsearch(&pkey, tep->events, tep->nr_events, - sizeof(*tep->events), events_id_cmp); - - if (eventptr) { - tep->last_event = *eventptr; - return *eventptr; - } - - return NULL; -} - -/** - * tep_find_event_by_name - find an event by given name - * @tep: a handle to the trace event parser context - * @sys: the system name to search for - * @name: the name of the event to search for - * - * This returns an event with a given @name and under the system - * @sys. If @sys is NULL the first event with @name is returned. - */ -struct tep_event * -tep_find_event_by_name(struct tep_handle *tep, - const char *sys, const char *name) -{ - struct tep_event *event = NULL; - int i; - - if (tep->last_event && - strcmp(tep->last_event->name, name) == 0 && - (!sys || strcmp(tep->last_event->system, sys) == 0)) - return tep->last_event; - - for (i = 0; i < tep->nr_events; i++) { - event = tep->events[i]; - if (strcmp(event->name, name) == 0) { - if (!sys) - break; - if (strcmp(event->system, sys) == 0) - break; - } - } - if (i == tep->nr_events) - event = NULL; - - tep->last_event = event; - return event; -} - -static unsigned long long -eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg *arg) -{ - struct tep_handle *tep = event->tep; - unsigned long long val = 0; - unsigned long long left, right; - struct tep_print_arg *typearg = NULL; - struct tep_print_arg *larg; - unsigned long offset; - unsigned int field_size; - - switch (arg->type) { - case TEP_PRINT_NULL: - /* ?? */ - return 0; - case TEP_PRINT_ATOM: - return strtoull(arg->atom.atom, NULL, 0); - case TEP_PRINT_FIELD: - if (!arg->field.field) { - arg->field.field = tep_find_any_field(event, arg->field.name); - if (!arg->field.field) - goto out_warning_field; - - } - /* must be a number */ - val = tep_read_number(tep, data + arg->field.field->offset, - arg->field.field->size); - break; - case TEP_PRINT_FLAGS: - case TEP_PRINT_SYMBOL: - case TEP_PRINT_INT_ARRAY: - case TEP_PRINT_HEX: - case TEP_PRINT_HEX_STR: - break; - case TEP_PRINT_TYPE: - val = eval_num_arg(data, size, event, arg->typecast.item); - return eval_type(val, arg, 0); - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - case TEP_PRINT_BITMASK: - return 0; - case TEP_PRINT_FUNC: { - struct trace_seq s; - trace_seq_init(&s); - val = process_defined_func(&s, data, size, event, arg); - trace_seq_destroy(&s); - return val; - } - case TEP_PRINT_OP: - if (strcmp(arg->op.op, "[") == 0) { - /* - * Arrays are special, since we don't want - * to read the arg as is. - */ - right = eval_num_arg(data, size, event, arg->op.right); - - /* handle typecasts */ - larg = arg->op.left; - while (larg->type == TEP_PRINT_TYPE) { - if (!typearg) - typearg = larg; - larg = larg->typecast.item; - } - - /* Default to long size */ - field_size = tep->long_size; - - switch (larg->type) { - case TEP_PRINT_DYNAMIC_ARRAY: - offset = tep_read_number(tep, - data + larg->dynarray.field->offset, - larg->dynarray.field->size); - if (larg->dynarray.field->elementsize) - field_size = larg->dynarray.field->elementsize; - /* - * The actual length of the dynamic array is stored - * in the top half of the field, and the offset - * is in the bottom half of the 32 bit field. - */ - offset &= 0xffff; - offset += right; - break; - case TEP_PRINT_FIELD: - if (!larg->field.field) { - larg->field.field = - tep_find_any_field(event, larg->field.name); - if (!larg->field.field) { - arg = larg; - goto out_warning_field; - } - } - field_size = larg->field.field->elementsize; - offset = larg->field.field->offset + - right * larg->field.field->elementsize; - break; - default: - goto default_op; /* oops, all bets off */ - } - val = tep_read_number(tep, - data + offset, field_size); - if (typearg) - val = eval_type(val, typearg, 1); - break; - } else if (strcmp(arg->op.op, "?") == 0) { - left = eval_num_arg(data, size, event, arg->op.left); - arg = arg->op.right; - if (left) - val = eval_num_arg(data, size, event, arg->op.left); - else - val = eval_num_arg(data, size, event, arg->op.right); - break; - } - default_op: - left = eval_num_arg(data, size, event, arg->op.left); - right = eval_num_arg(data, size, event, arg->op.right); - switch (arg->op.op[0]) { - case '!': - switch (arg->op.op[1]) { - case 0: - val = !right; - break; - case '=': - val = left != right; - break; - default: - goto out_warning_op; - } - break; - case '~': - val = ~right; - break; - case '|': - if (arg->op.op[1]) - val = left || right; - else - val = left | right; - break; - case '&': - if (arg->op.op[1]) - val = left && right; - else - val = left & right; - break; - case '<': - switch (arg->op.op[1]) { - case 0: - val = left < right; - break; - case '<': - val = left << right; - break; - case '=': - val = left <= right; - break; - default: - goto out_warning_op; - } - break; - case '>': - switch (arg->op.op[1]) { - case 0: - val = left > right; - break; - case '>': - val = left >> right; - break; - case '=': - val = left >= right; - break; - default: - goto out_warning_op; - } - break; - case '=': - if (arg->op.op[1] != '=') - goto out_warning_op; - - val = left == right; - break; - case '-': - val = left - right; - break; - case '+': - val = left + right; - break; - case '/': - val = left / right; - break; - case '%': - val = left % right; - break; - case '*': - val = left * right; - break; - default: - goto out_warning_op; - } - break; - case TEP_PRINT_DYNAMIC_ARRAY_LEN: - offset = tep_read_number(tep, - data + arg->dynarray.field->offset, - arg->dynarray.field->size); - /* - * The total allocated length of the dynamic array is - * stored in the top half of the field, and the offset - * is in the bottom half of the 32 bit field. - */ - val = (unsigned long long)(offset >> 16); - break; - case TEP_PRINT_DYNAMIC_ARRAY: - /* Without [], we pass the address to the dynamic data */ - offset = tep_read_number(tep, - data + arg->dynarray.field->offset, - arg->dynarray.field->size); - /* - * The total allocated length of the dynamic array is - * stored in the top half of the field, and the offset - * is in the bottom half of the 32 bit field. - */ - offset &= 0xffff; - val = (unsigned long long)((unsigned long)data + offset); - break; - default: /* not sure what to do there */ - return 0; - } - return val; - -out_warning_op: - do_warning_event(event, "%s: unknown op '%s'", __func__, arg->op.op); - return 0; - -out_warning_field: - do_warning_event(event, "%s: field %s not found", - __func__, arg->field.name); - return 0; -} - -struct flag { - const char *name; - unsigned long long value; -}; - -static const struct flag flags[] = { - { "HI_SOFTIRQ", 0 }, - { "TIMER_SOFTIRQ", 1 }, - { "NET_TX_SOFTIRQ", 2 }, - { "NET_RX_SOFTIRQ", 3 }, - { "BLOCK_SOFTIRQ", 4 }, - { "IRQ_POLL_SOFTIRQ", 5 }, - { "TASKLET_SOFTIRQ", 6 }, - { "SCHED_SOFTIRQ", 7 }, - { "HRTIMER_SOFTIRQ", 8 }, - { "RCU_SOFTIRQ", 9 }, - - { "HRTIMER_NORESTART", 0 }, - { "HRTIMER_RESTART", 1 }, -}; - -static long long eval_flag(const char *flag) -{ - int i; - - /* - * Some flags in the format files do not get converted. - * If the flag is not numeric, see if it is something that - * we already know about. - */ - if (isdigit(flag[0])) - return strtoull(flag, NULL, 0); - - for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++) - if (strcmp(flags[i].name, flag) == 0) - return flags[i].value; - - return -1LL; -} - -static void print_str_to_seq(struct trace_seq *s, const char *format, - int len_arg, const char *str) -{ - if (len_arg >= 0) - trace_seq_printf(s, format, len_arg, str); - else - trace_seq_printf(s, format, str); -} - -static void print_bitmask_to_seq(struct tep_handle *tep, - struct trace_seq *s, const char *format, - int len_arg, const void *data, int size) -{ - int nr_bits = size * 8; - int str_size = (nr_bits + 3) / 4; - int len = 0; - char buf[3]; - char *str; - int index; - int i; - - /* - * The kernel likes to put in commas every 32 bits, we - * can do the same. - */ - str_size += (nr_bits - 1) / 32; - - str = malloc(str_size + 1); - if (!str) { - do_warning("%s: not enough memory!", __func__); - return; - } - str[str_size] = 0; - - /* Start out with -2 for the two chars per byte */ - for (i = str_size - 2; i >= 0; i -= 2) { - /* - * data points to a bit mask of size bytes. - * In the kernel, this is an array of long words, thus - * endianness is very important. - */ - if (tep->file_bigendian) - index = size - (len + 1); - else - index = len; - - snprintf(buf, 3, "%02x", *((unsigned char *)data + index)); - memcpy(str + i, buf, 2); - len++; - if (!(len & 3) && i > 0) { - i--; - str[i] = ','; - } - } - - if (len_arg >= 0) - trace_seq_printf(s, format, len_arg, str); - else - trace_seq_printf(s, format, str); - - free(str); -} - -static void print_str_arg(struct trace_seq *s, void *data, int size, - struct tep_event *event, const char *format, - int len_arg, struct tep_print_arg *arg) -{ - struct tep_handle *tep = event->tep; - struct tep_print_flag_sym *flag; - struct tep_format_field *field; - struct printk_map *printk; - long long val, fval; - unsigned long long addr; - char *str; - unsigned char *hex; - int print; - int i, len; - - switch (arg->type) { - case TEP_PRINT_NULL: - /* ?? */ - return; - case TEP_PRINT_ATOM: - print_str_to_seq(s, format, len_arg, arg->atom.atom); - return; - case TEP_PRINT_FIELD: - field = arg->field.field; - if (!field) { - field = tep_find_any_field(event, arg->field.name); - if (!field) { - str = arg->field.name; - goto out_warning_field; - } - arg->field.field = field; - } - /* Zero sized fields, mean the rest of the data */ - len = field->size ? : size - field->offset; - - /* - * Some events pass in pointers. If this is not an array - * and the size is the same as long_size, assume that it - * is a pointer. - */ - if (!(field->flags & TEP_FIELD_IS_ARRAY) && - field->size == tep->long_size) { - - /* Handle heterogeneous recording and processing - * architectures - * - * CASE I: - * Traces recorded on 32-bit devices (32-bit - * addressing) and processed on 64-bit devices: - * In this case, only 32 bits should be read. - * - * CASE II: - * Traces recorded on 64 bit devices and processed - * on 32-bit devices: - * In this case, 64 bits must be read. - */ - addr = (tep->long_size == 8) ? - *(unsigned long long *)(data + field->offset) : - (unsigned long long)*(unsigned int *)(data + field->offset); - - /* Check if it matches a print format */ - printk = find_printk(tep, addr); - if (printk) - trace_seq_puts(s, printk->printk); - else - trace_seq_printf(s, "%llx", addr); - break; - } - str = malloc(len + 1); - if (!str) { - do_warning_event(event, "%s: not enough memory!", - __func__); - return; - } - memcpy(str, data + field->offset, len); - str[len] = 0; - print_str_to_seq(s, format, len_arg, str); - free(str); - break; - case TEP_PRINT_FLAGS: - val = eval_num_arg(data, size, event, arg->flags.field); - print = 0; - for (flag = arg->flags.flags; flag; flag = flag->next) { - fval = eval_flag(flag->value); - if (!val && fval < 0) { - print_str_to_seq(s, format, len_arg, flag->str); - break; - } - if (fval > 0 && (val & fval) == fval) { - if (print && arg->flags.delim) - trace_seq_puts(s, arg->flags.delim); - print_str_to_seq(s, format, len_arg, flag->str); - print = 1; - val &= ~fval; - } - } - if (val) { - if (print && arg->flags.delim) - trace_seq_puts(s, arg->flags.delim); - trace_seq_printf(s, "0x%llx", val); - } - break; - case TEP_PRINT_SYMBOL: - val = eval_num_arg(data, size, event, arg->symbol.field); - for (flag = arg->symbol.symbols; flag; flag = flag->next) { - fval = eval_flag(flag->value); - if (val == fval) { - print_str_to_seq(s, format, len_arg, flag->str); - break; - } - } - if (!flag) - trace_seq_printf(s, "0x%llx", val); - break; - case TEP_PRINT_HEX: - case TEP_PRINT_HEX_STR: - if (arg->hex.field->type == TEP_PRINT_DYNAMIC_ARRAY) { - unsigned long offset; - offset = tep_read_number(tep, - data + arg->hex.field->dynarray.field->offset, - arg->hex.field->dynarray.field->size); - hex = data + (offset & 0xffff); - } else { - field = arg->hex.field->field.field; - if (!field) { - str = arg->hex.field->field.name; - field = tep_find_any_field(event, str); - if (!field) - goto out_warning_field; - arg->hex.field->field.field = field; - } - hex = data + field->offset; - } - len = eval_num_arg(data, size, event, arg->hex.size); - for (i = 0; i < len; i++) { - if (i && arg->type == TEP_PRINT_HEX) - trace_seq_putc(s, ' '); - trace_seq_printf(s, "%02x", hex[i]); - } - break; - - case TEP_PRINT_INT_ARRAY: { - void *num; - int el_size; - - if (arg->int_array.field->type == TEP_PRINT_DYNAMIC_ARRAY) { - unsigned long offset; - struct tep_format_field *field = - arg->int_array.field->dynarray.field; - offset = tep_read_number(tep, - data + field->offset, - field->size); - num = data + (offset & 0xffff); - } else { - field = arg->int_array.field->field.field; - if (!field) { - str = arg->int_array.field->field.name; - field = tep_find_any_field(event, str); - if (!field) - goto out_warning_field; - arg->int_array.field->field.field = field; - } - num = data + field->offset; - } - len = eval_num_arg(data, size, event, arg->int_array.count); - el_size = eval_num_arg(data, size, event, - arg->int_array.el_size); - for (i = 0; i < len; i++) { - if (i) - trace_seq_putc(s, ' '); - - if (el_size == 1) { - trace_seq_printf(s, "%u", *(uint8_t *)num); - } else if (el_size == 2) { - trace_seq_printf(s, "%u", *(uint16_t *)num); - } else if (el_size == 4) { - trace_seq_printf(s, "%u", *(uint32_t *)num); - } else if (el_size == 8) { - trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num); - } else { - trace_seq_printf(s, "BAD SIZE:%d 0x%x", - el_size, *(uint8_t *)num); - el_size = 1; - } - - num += el_size; - } - break; - } - case TEP_PRINT_TYPE: - break; - case TEP_PRINT_STRING: { - int str_offset; - - if (!arg->string.field) - arg->string.field = tep_find_any_field(event, arg->string.string); - if (!arg->string.field) - break; - - str_offset = data2host4(tep, - *(unsigned int *)(data + arg->string.field->offset)); - str_offset &= 0xffff; - if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE) - str_offset += arg->string.field->offset + arg->string.field->size; - print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); - break; - } - case TEP_PRINT_BSTRING: - print_str_to_seq(s, format, len_arg, arg->string.string); - break; - case TEP_PRINT_BITMASK: { - int bitmask_offset; - int bitmask_size; - - if (!arg->bitmask.field) - arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask); - if (!arg->bitmask.field) - break; - bitmask_offset = data2host4(tep, - *(unsigned int *)(data + arg->bitmask.field->offset)); - bitmask_size = bitmask_offset >> 16; - bitmask_offset &= 0xffff; - if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE) - bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size; - print_bitmask_to_seq(tep, s, format, len_arg, - data + bitmask_offset, bitmask_size); - break; - } - case TEP_PRINT_OP: - /* - * The only op for string should be ? : - */ - if (arg->op.op[0] != '?') - return; - val = eval_num_arg(data, size, event, arg->op.left); - if (val) - print_str_arg(s, data, size, event, - format, len_arg, arg->op.right->op.left); - else - print_str_arg(s, data, size, event, - format, len_arg, arg->op.right->op.right); - break; - case TEP_PRINT_FUNC: - process_defined_func(s, data, size, event, arg); - break; - default: - /* well... */ - break; - } - - return; - -out_warning_field: - do_warning_event(event, "%s: field %s not found", - __func__, arg->field.name); -} - -static unsigned long long -process_defined_func(struct trace_seq *s, void *data, int size, - struct tep_event *event, struct tep_print_arg *arg) -{ - struct tep_function_handler *func_handle = arg->func.func; - struct func_params *param; - unsigned long long *args; - unsigned long long ret; - struct tep_print_arg *farg; - struct trace_seq str; - struct save_str { - struct save_str *next; - char *str; - } *strings = NULL, *string; - int i; - - if (!func_handle->nr_args) { - ret = (*func_handle->func)(s, NULL); - goto out; - } - - farg = arg->func.args; - param = func_handle->params; - - ret = ULLONG_MAX; - args = malloc(sizeof(*args) * func_handle->nr_args); - if (!args) - goto out; - - for (i = 0; i < func_handle->nr_args; i++) { - switch (param->type) { - case TEP_FUNC_ARG_INT: - case TEP_FUNC_ARG_LONG: - case TEP_FUNC_ARG_PTR: - args[i] = eval_num_arg(data, size, event, farg); - break; - case TEP_FUNC_ARG_STRING: - trace_seq_init(&str); - print_str_arg(&str, data, size, event, "%s", -1, farg); - trace_seq_terminate(&str); - string = malloc(sizeof(*string)); - if (!string) { - do_warning_event(event, "%s(%d): malloc str", - __func__, __LINE__); - goto out_free; - } - string->next = strings; - string->str = strdup(str.buffer); - if (!string->str) { - free(string); - do_warning_event(event, "%s(%d): malloc str", - __func__, __LINE__); - goto out_free; - } - args[i] = (uintptr_t)string->str; - strings = string; - trace_seq_destroy(&str); - break; - default: - /* - * Something went totally wrong, this is not - * an input error, something in this code broke. - */ - do_warning_event(event, "Unexpected end of arguments\n"); - goto out_free; - } - farg = farg->next; - param = param->next; - } - - ret = (*func_handle->func)(s, args); -out_free: - free(args); - while (strings) { - string = strings; - strings = string->next; - free(string->str); - free(string); - } - - out: - /* TBD : handle return type here */ - return ret; -} - -static void free_args(struct tep_print_arg *args) -{ - struct tep_print_arg *next; - - while (args) { - next = args->next; - - free_arg(args); - args = next; - } -} - -static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event *event) -{ - struct tep_handle *tep = event->tep; - struct tep_format_field *field, *ip_field; - struct tep_print_arg *args, *arg, **next; - unsigned long long ip, val; - char *ptr; - void *bptr; - int vsize = 0; - - field = tep->bprint_buf_field; - ip_field = tep->bprint_ip_field; - - if (!field) { - field = tep_find_field(event, "buf"); - if (!field) { - do_warning_event(event, "can't find buffer field for binary printk"); - return NULL; - } - ip_field = tep_find_field(event, "ip"); - if (!ip_field) { - do_warning_event(event, "can't find ip field for binary printk"); - return NULL; - } - tep->bprint_buf_field = field; - tep->bprint_ip_field = ip_field; - } - - ip = tep_read_number(tep, data + ip_field->offset, ip_field->size); - - /* - * The first arg is the IP pointer. - */ - args = alloc_arg(); - if (!args) { - do_warning_event(event, "%s(%d): not enough memory!", - __func__, __LINE__); - return NULL; - } - arg = args; - arg->next = NULL; - next = &arg->next; - - arg->type = TEP_PRINT_ATOM; - - if (asprintf(&arg->atom.atom, "%lld", ip) < 0) - goto out_free; - - /* skip the first "%ps: " */ - for (ptr = fmt + 5, bptr = data + field->offset; - bptr < data + size && *ptr; ptr++) { - int ls = 0; - - if (*ptr == '%') { - process_again: - ptr++; - switch (*ptr) { - case '%': - break; - case 'l': - ls++; - goto process_again; - case 'L': - ls = 2; - goto process_again; - case '0' ... '9': - goto process_again; - case '.': - goto process_again; - case 'z': - case 'Z': - ls = 1; - goto process_again; - case 'p': - ls = 1; - if (isalnum(ptr[1])) { - ptr++; - /* Check for special pointers */ - switch (*ptr) { - case 's': - case 'S': - case 'x': - break; - case 'f': - case 'F': - /* - * Pre-5.5 kernels use %pf and - * %pF for printing symbols - * while kernels since 5.5 use - * %pfw for fwnodes. So check - * %p[fF] isn't followed by 'w'. - */ - if (ptr[1] != 'w') - break; - /* fall through */ - default: - /* - * Older kernels do not process - * dereferenced pointers. - * Only process if the pointer - * value is a printable. - */ - if (isprint(*(char *)bptr)) - goto process_string; - } - } - /* fall through */ - case 'd': - case 'u': - case 'i': - case 'x': - case 'X': - case 'o': - switch (ls) { - case 0: - vsize = 4; - break; - case 1: - vsize = tep->long_size; - break; - case 2: - vsize = 8; - break; - default: - vsize = ls; /* ? */ - break; - } - /* fall through */ - case '*': - if (*ptr == '*') - vsize = 4; - - /* the pointers are always 4 bytes aligned */ - bptr = (void *)(((unsigned long)bptr + 3) & - ~3); - val = tep_read_number(tep, bptr, vsize); - bptr += vsize; - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s(%d): not enough memory!", - __func__, __LINE__); - goto out_free; - } - arg->next = NULL; - arg->type = TEP_PRINT_ATOM; - if (asprintf(&arg->atom.atom, "%lld", val) < 0) { - free(arg); - goto out_free; - } - *next = arg; - next = &arg->next; - /* - * The '*' case means that an arg is used as the length. - * We need to continue to figure out for what. - */ - if (*ptr == '*') - goto process_again; - - break; - case 's': - process_string: - arg = alloc_arg(); - if (!arg) { - do_warning_event(event, "%s(%d): not enough memory!", - __func__, __LINE__); - goto out_free; - } - arg->next = NULL; - arg->type = TEP_PRINT_BSTRING; - arg->string.string = strdup(bptr); - if (!arg->string.string) - goto out_free; - bptr += strlen(bptr) + 1; - *next = arg; - next = &arg->next; - default: - break; - } - } - } - - return args; - -out_free: - free_args(args); - return NULL; -} - -static char * -get_bprint_format(void *data, int size __maybe_unused, - struct tep_event *event) -{ - struct tep_handle *tep = event->tep; - unsigned long long addr; - struct tep_format_field *field; - struct printk_map *printk; - char *format; - - field = tep->bprint_fmt_field; - - if (!field) { - field = tep_find_field(event, "fmt"); - if (!field) { - do_warning_event(event, "can't find format field for binary printk"); - return NULL; - } - tep->bprint_fmt_field = field; - } - - addr = tep_read_number(tep, data + field->offset, field->size); - - printk = find_printk(tep, addr); - if (!printk) { - if (asprintf(&format, "%%ps: (NO FORMAT FOUND at %llx)\n", addr) < 0) - return NULL; - return format; - } - - if (asprintf(&format, "%s: %s", "%ps", printk->printk) < 0) - return NULL; - - return format; -} - -static int print_mac_arg(struct trace_seq *s, const char *format, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x"; - bool reverse = false; - unsigned char *buf; - int ret = 0; - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return 0; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", - arg->type); - return 0; - } - - if (format[0] == 'm') { - fmt = "%.2x%.2x%.2x%.2x%.2x%.2x"; - } else if (format[0] == 'M' && format[1] == 'F') { - fmt = "%.2x-%.2x-%.2x-%.2x-%.2x-%.2x"; - ret++; - } - if (format[1] == 'R') { - reverse = true; - ret++; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning_event(event, "%s: field %s not found", - __func__, arg->field.name); - return ret; - } - } - if (arg->field.field->size != 6) { - trace_seq_printf(s, "INVALIDMAC"); - return ret; - } - - buf = data + arg->field.field->offset; - if (reverse) - trace_seq_printf(s, fmt, buf[5], buf[4], buf[3], buf[2], buf[1], buf[0]); - else - trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); - - return ret; -} - -static int parse_ip4_print_args(struct tep_handle *tep, - const char *ptr, bool *reverse) -{ - int ret = 0; - - *reverse = false; - - /* hnbl */ - switch (*ptr) { - case 'h': - if (tep->file_bigendian) - *reverse = false; - else - *reverse = true; - ret++; - break; - case 'l': - *reverse = true; - ret++; - break; - case 'n': - case 'b': - ret++; - /* fall through */ - default: - *reverse = false; - break; - } - - return ret; -} - -static void print_ip4_addr(struct trace_seq *s, char i, bool reverse, unsigned char *buf) -{ - const char *fmt; - - if (i == 'i') - fmt = "%03d.%03d.%03d.%03d"; - else - fmt = "%d.%d.%d.%d"; - - if (reverse) - trace_seq_printf(s, fmt, buf[3], buf[2], buf[1], buf[0]); - else - trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]); - -} - -static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) -{ - return ((unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) | - (unsigned long)(a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL; -} - -static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr) -{ - return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); -} - -static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr) -{ - int i, j, range; - unsigned char zerolength[8]; - int longest = 1; - int colonpos = -1; - uint16_t word; - uint8_t hi, lo; - bool needcolon = false; - bool useIPv4; - struct in6_addr in6; - - memcpy(&in6, addr, sizeof(struct in6_addr)); - - useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6); - - memset(zerolength, 0, sizeof(zerolength)); - - if (useIPv4) - range = 6; - else - range = 8; - - /* find position of longest 0 run */ - for (i = 0; i < range; i++) { - for (j = i; j < range; j++) { - if (in6.s6_addr16[j] != 0) - break; - zerolength[i]++; - } - } - for (i = 0; i < range; i++) { - if (zerolength[i] > longest) { - longest = zerolength[i]; - colonpos = i; - } - } - if (longest == 1) /* don't compress a single 0 */ - colonpos = -1; - - /* emit address */ - for (i = 0; i < range; i++) { - if (i == colonpos) { - if (needcolon || i == 0) - trace_seq_printf(s, ":"); - trace_seq_printf(s, ":"); - needcolon = false; - i += longest - 1; - continue; - } - if (needcolon) { - trace_seq_printf(s, ":"); - needcolon = false; - } - /* hex u16 without leading 0s */ - word = ntohs(in6.s6_addr16[i]); - hi = word >> 8; - lo = word & 0xff; - if (hi) - trace_seq_printf(s, "%x%02x", hi, lo); - else - trace_seq_printf(s, "%x", lo); - - needcolon = true; - } - - if (useIPv4) { - if (needcolon) - trace_seq_printf(s, ":"); - print_ip4_addr(s, 'I', false, &in6.s6_addr[12]); - } - - return; -} - -static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf) -{ - int j; - - for (j = 0; j < 16; j += 2) { - trace_seq_printf(s, "%02x%02x", buf[j], buf[j+1]); - if (i == 'I' && j < 14) - trace_seq_printf(s, ":"); - } -} - -/* - * %pi4 print an IPv4 address with leading zeros - * %pI4 print an IPv4 address without leading zeros - * %pi6 print an IPv6 address without colons - * %pI6 print an IPv6 address with colons - * %pI6c print an IPv6 address in compressed form with colons - * %pISpc print an IP address based on sockaddr; p adds port. - */ -static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - bool reverse = false; - unsigned char *buf; - int ret; - - ret = parse_ip4_print_args(event->tep, ptr, &reverse); - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return ret; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return ret; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning("%s: field %s not found", - __func__, arg->field.name); - return ret; - } - } - - buf = data + arg->field.field->offset; - - if (arg->field.field->size != 4) { - trace_seq_printf(s, "INVALIDIPv4"); - return ret; - } - - print_ip4_addr(s, i, reverse, buf); - return ret; - -} - -static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - char have_c = 0; - unsigned char *buf; - int rc = 0; - - /* pI6c */ - if (i == 'I' && *ptr == 'c') { - have_c = 1; - ptr++; - rc++; - } - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return rc; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return rc; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning("%s: field %s not found", - __func__, arg->field.name); - return rc; - } - } - - buf = data + arg->field.field->offset; - - if (arg->field.field->size != 16) { - trace_seq_printf(s, "INVALIDIPv6"); - return rc; - } - - if (have_c) - print_ip6c_addr(s, buf); - else - print_ip6_addr(s, i, buf); - - return rc; -} - -static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - char have_c = 0, have_p = 0; - unsigned char *buf; - struct sockaddr_storage *sa; - bool reverse = false; - int rc = 0; - int ret; - - /* pISpc */ - if (i == 'I') { - if (*ptr == 'p') { - have_p = 1; - ptr++; - rc++; - } - if (*ptr == 'c') { - have_c = 1; - ptr++; - rc++; - } - } - ret = parse_ip4_print_args(event->tep, ptr, &reverse); - ptr += ret; - rc += ret; - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return rc; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return rc; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning("%s: field %s not found", - __func__, arg->field.name); - return rc; - } - } - - sa = (struct sockaddr_storage *) (data + arg->field.field->offset); - - if (sa->ss_family == AF_INET) { - struct sockaddr_in *sa4 = (struct sockaddr_in *) sa; - - if (arg->field.field->size < sizeof(struct sockaddr_in)) { - trace_seq_printf(s, "INVALIDIPv4"); - return rc; - } - - print_ip4_addr(s, i, reverse, (unsigned char *) &sa4->sin_addr); - if (have_p) - trace_seq_printf(s, ":%d", ntohs(sa4->sin_port)); - - - } else if (sa->ss_family == AF_INET6) { - struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) sa; - - if (arg->field.field->size < sizeof(struct sockaddr_in6)) { - trace_seq_printf(s, "INVALIDIPv6"); - return rc; - } - - if (have_p) - trace_seq_printf(s, "["); - - buf = (unsigned char *) &sa6->sin6_addr; - if (have_c) - print_ip6c_addr(s, buf); - else - print_ip6_addr(s, i, buf); - - if (have_p) - trace_seq_printf(s, "]:%d", ntohs(sa6->sin6_port)); - } - - return rc; -} - -static int print_ip_arg(struct trace_seq *s, const char *ptr, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - char i = *ptr; /* 'i' or 'I' */ - int rc = 1; - - /* IP version */ - ptr++; - - switch (*ptr) { - case '4': - rc += print_ipv4_arg(s, ptr + 1, i, data, size, event, arg); - break; - case '6': - rc += print_ipv6_arg(s, ptr + 1, i, data, size, event, arg); - break; - case 'S': - rc += print_ipsa_arg(s, ptr + 1, i, data, size, event, arg); - break; - default: - return 0; - } - - return rc; -} - -static const int guid_index[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15}; -static const int uuid_index[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - -static int print_uuid_arg(struct trace_seq *s, const char *ptr, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - const int *index = uuid_index; - char *format = "%02x"; - int ret = 0; - char *buf; - int i; - - switch (*(ptr + 1)) { - case 'L': - format = "%02X"; - /* fall through */ - case 'l': - index = guid_index; - ret++; - break; - case 'B': - format = "%02X"; - /* fall through */ - case 'b': - ret++; - break; - } - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return ret; - } - - if (arg->type != TEP_PRINT_FIELD) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return ret; - } - - if (!arg->field.field) { - arg->field.field = - tep_find_any_field(event, arg->field.name); - if (!arg->field.field) { - do_warning("%s: field %s not found", - __func__, arg->field.name); - return ret; - } - } - - if (arg->field.field->size != 16) { - trace_seq_printf(s, "INVALIDUUID"); - return ret; - } - - buf = data + arg->field.field->offset; - - for (i = 0; i < 16; i++) { - trace_seq_printf(s, format, buf[index[i]] & 0xff); - switch (i) { - case 3: - case 5: - case 7: - case 9: - trace_seq_printf(s, "-"); - break; - } - } - - return ret; -} - -static int print_raw_buff_arg(struct trace_seq *s, const char *ptr, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg, int print_len) -{ - int plen = print_len; - char *delim = " "; - int ret = 0; - char *buf; - int i; - unsigned long offset; - int arr_len; - - switch (*(ptr + 1)) { - case 'C': - delim = ":"; - ret++; - break; - case 'D': - delim = "-"; - ret++; - break; - case 'N': - delim = ""; - ret++; - break; - } - - if (arg->type == TEP_PRINT_FUNC) { - process_defined_func(s, data, size, event, arg); - return ret; - } - - if (arg->type != TEP_PRINT_DYNAMIC_ARRAY) { - trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return ret; - } - - offset = tep_read_number(event->tep, - data + arg->dynarray.field->offset, - arg->dynarray.field->size); - arr_len = (unsigned long long)(offset >> 16); - buf = data + (offset & 0xffff); - - if (arr_len < plen) - plen = arr_len; - - if (plen < 1) - return ret; - - trace_seq_printf(s, "%02x", buf[0] & 0xff); - for (i = 1; i < plen; i++) - trace_seq_printf(s, "%s%02x", delim, buf[i] & 0xff); - - return ret; -} - -static int is_printable_array(char *p, unsigned int len) -{ - unsigned int i; - - for (i = 0; i < len && p[i]; i++) - if (!isprint(p[i]) && !isspace(p[i])) - return 0; - return 1; -} - -void tep_print_field(struct trace_seq *s, void *data, - struct tep_format_field *field) -{ - unsigned long long val; - unsigned int offset, len, i; - struct tep_handle *tep = field->event->tep; - - if (field->flags & TEP_FIELD_IS_ARRAY) { - offset = field->offset; - len = field->size; - if (field->flags & TEP_FIELD_IS_DYNAMIC) { - val = tep_read_number(tep, data + offset, len); - offset = val; - len = offset >> 16; - offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; - } - if (field->flags & TEP_FIELD_IS_STRING && - is_printable_array(data + offset, len)) { - trace_seq_printf(s, "%s", (char *)data + offset); - } else { - trace_seq_puts(s, "ARRAY["); - for (i = 0; i < len; i++) { - if (i) - trace_seq_puts(s, ", "); - trace_seq_printf(s, "%02x", - *((unsigned char *)data + offset + i)); - } - trace_seq_putc(s, ']'); - field->flags &= ~TEP_FIELD_IS_STRING; - } - } else { - val = tep_read_number(tep, data + field->offset, - field->size); - if (field->flags & TEP_FIELD_IS_POINTER) { - trace_seq_printf(s, "0x%llx", val); - } else if (field->flags & TEP_FIELD_IS_SIGNED) { - switch (field->size) { - case 4: - /* - * If field is long then print it in hex. - * A long usually stores pointers. - */ - if (field->flags & TEP_FIELD_IS_LONG) - trace_seq_printf(s, "0x%x", (int)val); - else - trace_seq_printf(s, "%d", (int)val); - break; - case 2: - trace_seq_printf(s, "%2d", (short)val); - break; - case 1: - trace_seq_printf(s, "%1d", (char)val); - break; - default: - trace_seq_printf(s, "%lld", val); - } - } else { - if (field->flags & TEP_FIELD_IS_LONG) - trace_seq_printf(s, "0x%llx", val); - else - trace_seq_printf(s, "%llu", val); - } - } -} - -void tep_print_fields(struct trace_seq *s, void *data, - int size __maybe_unused, struct tep_event *event) -{ - struct tep_format_field *field; - - field = event->format.fields; - while (field) { - trace_seq_printf(s, " %s=", field->name); - tep_print_field(s, data, field); - field = field->next; - } -} - -static int print_function(struct trace_seq *s, const char *format, - void *data, int size, struct tep_event *event, - struct tep_print_arg *arg) -{ - struct func_map *func; - unsigned long long val; - - val = eval_num_arg(data, size, event, arg); - func = find_func(event->tep, val); - if (func) { - trace_seq_puts(s, func->func); - if (*format == 'F' || *format == 'S') - trace_seq_printf(s, "+0x%llx", val - func->addr); - } else { - if (event->tep->long_size == 4) - trace_seq_printf(s, "0x%lx", (long)val); - else - trace_seq_printf(s, "0x%llx", (long long)val); - } - - return 0; -} - -static int print_arg_pointer(struct trace_seq *s, const char *format, int plen, - void *data, int size, - struct tep_event *event, struct tep_print_arg *arg) -{ - unsigned long long val; - int ret = 1; - - if (arg->type == TEP_PRINT_BSTRING) { - trace_seq_puts(s, arg->string.string); - return 0; - } - while (*format) { - if (*format == 'p') { - format++; - break; - } - format++; - } - - switch (*format) { - case 'F': - case 'f': - case 'S': - case 's': - ret += print_function(s, format, data, size, event, arg); - break; - case 'M': - case 'm': - ret += print_mac_arg(s, format, data, size, event, arg); - break; - case 'I': - case 'i': - ret += print_ip_arg(s, format, data, size, event, arg); - break; - case 'U': - ret += print_uuid_arg(s, format, data, size, event, arg); - break; - case 'h': - ret += print_raw_buff_arg(s, format, data, size, event, arg, plen); - break; - default: - ret = 0; - val = eval_num_arg(data, size, event, arg); - trace_seq_printf(s, "%p", (void *)(intptr_t)val); - break; - } - - return ret; - -} - -static int print_arg_number(struct trace_seq *s, const char *format, int plen, - void *data, int size, int ls, - struct tep_event *event, struct tep_print_arg *arg) -{ - unsigned long long val; - - val = eval_num_arg(data, size, event, arg); - - switch (ls) { - case -2: - if (plen >= 0) - trace_seq_printf(s, format, plen, (char)val); - else - trace_seq_printf(s, format, (char)val); - break; - case -1: - if (plen >= 0) - trace_seq_printf(s, format, plen, (short)val); - else - trace_seq_printf(s, format, (short)val); - break; - case 0: - if (plen >= 0) - trace_seq_printf(s, format, plen, (int)val); - else - trace_seq_printf(s, format, (int)val); - break; - case 1: - if (plen >= 0) - trace_seq_printf(s, format, plen, (long)val); - else - trace_seq_printf(s, format, (long)val); - break; - case 2: - if (plen >= 0) - trace_seq_printf(s, format, plen, (long long)val); - else - trace_seq_printf(s, format, (long long)val); - break; - default: - do_warning_event(event, "bad count (%d)", ls); - event->flags |= TEP_EVENT_FL_FAILED; - } - return 0; -} - - -static void print_arg_string(struct trace_seq *s, const char *format, int plen, - void *data, int size, - struct tep_event *event, struct tep_print_arg *arg) -{ - struct trace_seq p; - - /* Use helper trace_seq */ - trace_seq_init(&p); - print_str_arg(&p, data, size, event, - format, plen, arg); - trace_seq_terminate(&p); - trace_seq_puts(s, p.buffer); - trace_seq_destroy(&p); -} - -static int parse_arg_format_pointer(const char *format) -{ - int ret = 0; - int index; - int loop; - - switch (*format) { - case 'F': - case 'S': - case 'f': - case 's': - ret++; - break; - case 'M': - case 'm': - /* [mM]R , [mM]F */ - switch (format[1]) { - case 'R': - case 'F': - ret++; - break; - } - ret++; - break; - case 'I': - case 'i': - index = 2; - loop = 1; - switch (format[1]) { - case 'S': - /*[S][pfs]*/ - while (loop) { - switch (format[index]) { - case 'p': - case 'f': - case 's': - ret++; - index++; - break; - default: - loop = 0; - break; - } - } - /* fall through */ - case '4': - /* [4S][hnbl] */ - switch (format[index]) { - case 'h': - case 'n': - case 'l': - case 'b': - ret++; - index++; - break; - } - if (format[1] == '4') { - ret++; - break; - } - /* fall through */ - case '6': - /* [6S]c */ - if (format[index] == 'c') - ret++; - ret++; - break; - } - ret++; - break; - case 'U': - switch (format[1]) { - case 'L': - case 'l': - case 'B': - case 'b': - ret++; - break; - } - ret++; - break; - case 'h': - switch (format[1]) { - case 'C': - case 'D': - case 'N': - ret++; - break; - } - ret++; - break; - default: - break; - } - - return ret; -} - -static void free_parse_args(struct tep_print_parse *arg) -{ - struct tep_print_parse *del; - - while (arg) { - del = arg; - arg = del->next; - free(del->format); - free(del); - } -} - -static int parse_arg_add(struct tep_print_parse **parse, char *format, - enum tep_print_parse_type type, - struct tep_print_arg *arg, - struct tep_print_arg *len_as_arg, - int ls) -{ - struct tep_print_parse *parg = NULL; - - parg = calloc(1, sizeof(*parg)); - if (!parg) - goto error; - parg->format = strdup(format); - if (!parg->format) - goto error; - parg->type = type; - parg->arg = arg; - parg->len_as_arg = len_as_arg; - parg->ls = ls; - *parse = parg; - return 0; -error: - if (parg) { - free(parg->format); - free(parg); - } - return -1; -} - -static int parse_arg_format(struct tep_print_parse **parse, - struct tep_event *event, - const char *format, struct tep_print_arg **arg) -{ - struct tep_print_arg *len_arg = NULL; - char print_format[32]; - const char *start = format; - int ret = 0; - int ls = 0; - int res; - int len; - - format++; - ret++; - for (; *format; format++) { - switch (*format) { - case '#': - /* FIXME: need to handle properly */ - break; - case 'h': - ls--; - break; - case 'l': - ls++; - break; - case 'L': - ls = 2; - break; - case '.': - case 'z': - case 'Z': - case '0' ... '9': - case '-': - break; - case '*': - /* The argument is the length. */ - if (!*arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - if (len_arg) { - do_warning_event(event, "argument already matched"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - len_arg = *arg; - *arg = (*arg)->next; - break; - case 'p': - if (!*arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - res = parse_arg_format_pointer(format + 1); - if (res > 0) { - format += res; - ret += res; - } - len = ((unsigned long)format + 1) - - (unsigned long)start; - /* should never happen */ - if (len > 31) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } - memcpy(print_format, start, len); - print_format[len] = 0; - - parse_arg_add(parse, print_format, - PRINT_FMT_ARG_POINTER, *arg, len_arg, ls); - *arg = (*arg)->next; - ret++; - return ret; - case 'd': - case 'u': - case 'i': - case 'x': - case 'X': - case 'o': - if (!*arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - - len = ((unsigned long)format + 1) - - (unsigned long)start; - - /* should never happen */ - if (len > 30) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } - memcpy(print_format, start, len); - print_format[len] = 0; - - if (event->tep->long_size == 8 && ls == 1 && - sizeof(long) != 8) { - char *p; - - /* make %l into %ll */ - if (ls == 1 && (p = strchr(print_format, 'l'))) - memmove(p+1, p, strlen(p)+1); - ls = 2; - } - if (ls < -2 || ls > 2) { - do_warning_event(event, "bad count (%d)", ls); - event->flags |= TEP_EVENT_FL_FAILED; - } - parse_arg_add(parse, print_format, - PRINT_FMT_ARG_DIGIT, *arg, len_arg, ls); - *arg = (*arg)->next; - ret++; - return ret; - case 's': - if (!*arg) { - do_warning_event(event, "no matching argument"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - - len = ((unsigned long)format + 1) - - (unsigned long)start; - - /* should never happen */ - if (len > 31) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } - - memcpy(print_format, start, len); - print_format[len] = 0; - - parse_arg_add(parse, print_format, - PRINT_FMT_ARG_STRING, *arg, len_arg, 0); - *arg = (*arg)->next; - ret++; - return ret; - default: - snprintf(print_format, 32, ">%c<", *format); - parse_arg_add(parse, print_format, - PRINT_FMT_STRING, NULL, NULL, 0); - ret++; - return ret; - } - ret++; - } - -out_failed: - return ret; - -} - -static int parse_arg_string(struct tep_print_parse **parse, const char *format) -{ - struct trace_seq s; - int ret = 0; - - trace_seq_init(&s); - for (; *format; format++) { - if (*format == '\\') { - format++; - ret++; - switch (*format) { - case 'n': - trace_seq_putc(&s, '\n'); - break; - case 't': - trace_seq_putc(&s, '\t'); - break; - case 'r': - trace_seq_putc(&s, '\r'); - break; - case '\\': - trace_seq_putc(&s, '\\'); - break; - default: - trace_seq_putc(&s, *format); - break; - } - } else if (*format == '%') { - if (*(format + 1) == '%') { - trace_seq_putc(&s, '%'); - format++; - ret++; - } else - break; - } else - trace_seq_putc(&s, *format); - - ret++; - } - trace_seq_terminate(&s); - parse_arg_add(parse, s.buffer, PRINT_FMT_STRING, NULL, NULL, 0); - trace_seq_destroy(&s); - - return ret; -} - -static struct tep_print_parse * -parse_args(struct tep_event *event, const char *format, struct tep_print_arg *arg) -{ - struct tep_print_parse *parse_ret = NULL; - struct tep_print_parse **parse = NULL; - int ret; - int len; - - len = strlen(format); - while (*format) { - if (!parse_ret) - parse = &parse_ret; - if (*format == '%' && *(format + 1) != '%') - ret = parse_arg_format(parse, event, format, &arg); - else - ret = parse_arg_string(parse, format); - if (*parse) - parse = &((*parse)->next); - - len -= ret; - if (len > 0) - format += ret; - else - break; - } - return parse_ret; -} - -static void print_event_cache(struct tep_print_parse *parse, struct trace_seq *s, - void *data, int size, struct tep_event *event) -{ - int len_arg; - - while (parse) { - if (parse->len_as_arg) - len_arg = eval_num_arg(data, size, event, parse->len_as_arg); - switch (parse->type) { - case PRINT_FMT_ARG_DIGIT: - print_arg_number(s, parse->format, - parse->len_as_arg ? len_arg : -1, data, - size, parse->ls, event, parse->arg); - break; - case PRINT_FMT_ARG_POINTER: - print_arg_pointer(s, parse->format, - parse->len_as_arg ? len_arg : 1, - data, size, event, parse->arg); - break; - case PRINT_FMT_ARG_STRING: - print_arg_string(s, parse->format, - parse->len_as_arg ? len_arg : -1, - data, size, event, parse->arg); - break; - case PRINT_FMT_STRING: - default: - trace_seq_printf(s, "%s", parse->format); - break; - } - parse = parse->next; - } -} - -static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event) -{ - struct tep_print_parse *parse = event->print_fmt.print_cache; - struct tep_print_arg *args = NULL; - char *bprint_fmt = NULL; - - if (event->flags & TEP_EVENT_FL_FAILED) { - trace_seq_printf(s, "[FAILED TO PARSE]"); - tep_print_fields(s, data, size, event); - return; - } - - if (event->flags & TEP_EVENT_FL_ISBPRINT) { - bprint_fmt = get_bprint_format(data, size, event); - args = make_bprint_args(bprint_fmt, data, size, event); - parse = parse_args(event, bprint_fmt, args); - } - - print_event_cache(parse, s, data, size, event); - - if (event->flags & TEP_EVENT_FL_ISBPRINT) { - free_parse_args(parse); - free_args(args); - free(bprint_fmt); - } -} - -/* - * This parses out the Latency format (interrupts disabled, - * need rescheduling, in hard/soft interrupt, preempt count - * and lock depth) and places it into the trace_seq. - */ -static void data_latency_format(struct tep_handle *tep, struct trace_seq *s, - char *format, struct tep_record *record) -{ - static int check_lock_depth = 1; - static int check_migrate_disable = 1; - static int lock_depth_exists; - static int migrate_disable_exists; - unsigned int lat_flags; - struct trace_seq sq; - unsigned int pc; - int lock_depth = 0; - int migrate_disable = 0; - int hardirq; - int softirq; - void *data = record->data; - - trace_seq_init(&sq); - lat_flags = parse_common_flags(tep, data); - pc = parse_common_pc(tep, data); - /* lock_depth may not always exist */ - if (lock_depth_exists) - lock_depth = parse_common_lock_depth(tep, data); - else if (check_lock_depth) { - lock_depth = parse_common_lock_depth(tep, data); - if (lock_depth < 0) - check_lock_depth = 0; - else - lock_depth_exists = 1; - } - - /* migrate_disable may not always exist */ - if (migrate_disable_exists) - migrate_disable = parse_common_migrate_disable(tep, data); - else if (check_migrate_disable) { - migrate_disable = parse_common_migrate_disable(tep, data); - if (migrate_disable < 0) - check_migrate_disable = 0; - else - migrate_disable_exists = 1; - } - - hardirq = lat_flags & TRACE_FLAG_HARDIRQ; - softirq = lat_flags & TRACE_FLAG_SOFTIRQ; - - trace_seq_printf(&sq, "%c%c%c", - (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' : - (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ? - 'X' : '.', - (lat_flags & TRACE_FLAG_NEED_RESCHED) ? - 'N' : '.', - (hardirq && softirq) ? 'H' : - hardirq ? 'h' : softirq ? 's' : '.'); - - if (pc) - trace_seq_printf(&sq, "%x", pc); - else - trace_seq_printf(&sq, "."); - - if (migrate_disable_exists) { - if (migrate_disable < 0) - trace_seq_printf(&sq, "."); - else - trace_seq_printf(&sq, "%d", migrate_disable); - } - - if (lock_depth_exists) { - if (lock_depth < 0) - trace_seq_printf(&sq, "."); - else - trace_seq_printf(&sq, "%d", lock_depth); - } - - if (sq.state == TRACE_SEQ__MEM_ALLOC_FAILED) { - s->state = TRACE_SEQ__MEM_ALLOC_FAILED; - return; - } - - trace_seq_terminate(&sq); - trace_seq_puts(s, sq.buffer); - trace_seq_destroy(&sq); - trace_seq_terminate(s); -} - -/** - * tep_data_type - parse out the given event type - * @tep: a handle to the trace event parser context - * @rec: the record to read from - * - * This returns the event id from the @rec. - */ -int tep_data_type(struct tep_handle *tep, struct tep_record *rec) -{ - return trace_parse_common_type(tep, rec->data); -} - -/** - * tep_data_pid - parse the PID from record - * @tep: a handle to the trace event parser context - * @rec: the record to parse - * - * This returns the PID from a record. - */ -int tep_data_pid(struct tep_handle *tep, struct tep_record *rec) -{ - return parse_common_pid(tep, rec->data); -} - -/** - * tep_data_preempt_count - parse the preempt count from the record - * @tep: a handle to the trace event parser context - * @rec: the record to parse - * - * This returns the preempt count from a record. - */ -int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec) -{ - return parse_common_pc(tep, rec->data); -} - -/** - * tep_data_flags - parse the latency flags from the record - * @tep: a handle to the trace event parser context - * @rec: the record to parse - * - * This returns the latency flags from a record. - * - * Use trace_flag_type enum for the flags (see event-parse.h). - */ -int tep_data_flags(struct tep_handle *tep, struct tep_record *rec) -{ - return parse_common_flags(tep, rec->data); -} - -/** - * tep_data_comm_from_pid - return the command line from PID - * @tep: a handle to the trace event parser context - * @pid: the PID of the task to search for - * - * This returns a pointer to the command line that has the given - * @pid. - */ -const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid) -{ - const char *comm; - - comm = find_cmdline(tep, pid); - return comm; -} - -static struct tep_cmdline * -pid_from_cmdlist(struct tep_handle *tep, const char *comm, struct tep_cmdline *next) -{ - struct cmdline_list *cmdlist = (struct cmdline_list *)next; - - if (cmdlist) - cmdlist = cmdlist->next; - else - cmdlist = tep->cmdlist; - - while (cmdlist && strcmp(cmdlist->comm, comm) != 0) - cmdlist = cmdlist->next; - - return (struct tep_cmdline *)cmdlist; -} - -/** - * tep_data_pid_from_comm - return the pid from a given comm - * @tep: a handle to the trace event parser context - * @comm: the cmdline to find the pid from - * @next: the cmdline structure to find the next comm - * - * This returns the cmdline structure that holds a pid for a given - * comm, or NULL if none found. As there may be more than one pid for - * a given comm, the result of this call can be passed back into - * a recurring call in the @next parameter, and then it will find the - * next pid. - * Also, it does a linear search, so it may be slow. - */ -struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm, - struct tep_cmdline *next) -{ - struct tep_cmdline *cmdline; - - /* - * If the cmdlines have not been converted yet, then use - * the list. - */ - if (!tep->cmdlines) - return pid_from_cmdlist(tep, comm, next); - - if (next) { - /* - * The next pointer could have been still from - * a previous call before cmdlines were created - */ - if (next < tep->cmdlines || - next >= tep->cmdlines + tep->cmdline_count) - next = NULL; - else - cmdline = next++; - } - - if (!next) - cmdline = tep->cmdlines; - - while (cmdline < tep->cmdlines + tep->cmdline_count) { - if (strcmp(cmdline->comm, comm) == 0) - return cmdline; - cmdline++; - } - return NULL; -} - -/** - * tep_cmdline_pid - return the pid associated to a given cmdline - * @tep: a handle to the trace event parser context - * @cmdline: The cmdline structure to get the pid from - * - * Returns the pid for a give cmdline. If @cmdline is NULL, then - * -1 is returned. - */ -int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline) -{ - struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline; - - if (!cmdline) - return -1; - - /* - * If cmdlines have not been created yet, or cmdline is - * not part of the array, then treat it as a cmdlist instead. - */ - if (!tep->cmdlines || - cmdline < tep->cmdlines || - cmdline >= tep->cmdlines + tep->cmdline_count) - return cmdlist->pid; - - return cmdline->pid; -} - -/* - * This parses the raw @data using the given @event information and - * writes the print format into the trace_seq. - */ -static void print_event_info(struct trace_seq *s, char *format, bool raw, - struct tep_event *event, struct tep_record *record) -{ - int print_pretty = 1; - - if (raw || (event->flags & TEP_EVENT_FL_PRINTRAW)) - tep_print_fields(s, record->data, record->size, event); - else { - - if (event->handler && !(event->flags & TEP_EVENT_FL_NOHANDLE)) - print_pretty = event->handler(s, record, event, - event->context); - - if (print_pretty) - pretty_print(s, record->data, record->size, event); - } - - trace_seq_terminate(s); -} - -/** - * tep_find_event_by_record - return the event from a given record - * @tep: a handle to the trace event parser context - * @record: The record to get the event from - * - * Returns the associated event for a given record, or NULL if non is - * is found. - */ -struct tep_event * -tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record) -{ - int type; - - if (record->size < 0) { - do_warning("ug! negative record size %d", record->size); - return NULL; - } - - type = trace_parse_common_type(tep, record->data); - - return tep_find_event(tep, type); -} - -/* - * Writes the timestamp of the record into @s. Time divisor and precision can be - * specified as part of printf @format string. Example: - * "%3.1000d" - divide the time by 1000 and print the first 3 digits - * before the dot. Thus, the timestamp "123456000" will be printed as - * "123.456" - */ -static void print_event_time(struct tep_handle *tep, struct trace_seq *s, - char *format, struct tep_event *event, - struct tep_record *record) -{ - unsigned long long time; - char *divstr; - int prec = 0, pr; - int div = 0; - int p10 = 1; - - if (isdigit(*(format + 1))) - prec = atoi(format + 1); - divstr = strchr(format, '.'); - if (divstr && isdigit(*(divstr + 1))) - div = atoi(divstr + 1); - time = record->ts; - if (div) { - time += div / 2; - time /= div; - } - pr = prec; - while (pr--) - p10 *= 10; - - if (p10 > 1 && p10 < time) - trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10); - else - trace_seq_printf(s, "%12llu", time); -} - -struct print_event_type { - enum { - EVENT_TYPE_INT = 1, - EVENT_TYPE_STRING, - EVENT_TYPE_UNKNOWN, - } type; - char format[32]; -}; - -static void print_string(struct tep_handle *tep, struct trace_seq *s, - struct tep_record *record, struct tep_event *event, - const char *arg, struct print_event_type *type) -{ - const char *comm; - int pid; - - if (strncmp(arg, TEP_PRINT_LATENCY, strlen(TEP_PRINT_LATENCY)) == 0) { - data_latency_format(tep, s, type->format, record); - } else if (strncmp(arg, TEP_PRINT_COMM, strlen(TEP_PRINT_COMM)) == 0) { - pid = parse_common_pid(tep, record->data); - comm = find_cmdline(tep, pid); - trace_seq_printf(s, type->format, comm); - } else if (strncmp(arg, TEP_PRINT_INFO_RAW, strlen(TEP_PRINT_INFO_RAW)) == 0) { - print_event_info(s, type->format, true, event, record); - } else if (strncmp(arg, TEP_PRINT_INFO, strlen(TEP_PRINT_INFO)) == 0) { - print_event_info(s, type->format, false, event, record); - } else if (strncmp(arg, TEP_PRINT_NAME, strlen(TEP_PRINT_NAME)) == 0) { - trace_seq_printf(s, type->format, event->name); - } else { - trace_seq_printf(s, "[UNKNOWN TEP TYPE %s]", arg); - } - -} - -static void print_int(struct tep_handle *tep, struct trace_seq *s, - struct tep_record *record, struct tep_event *event, - int arg, struct print_event_type *type) -{ - int param; - - switch (arg) { - case TEP_PRINT_CPU: - param = record->cpu; - break; - case TEP_PRINT_PID: - param = parse_common_pid(tep, record->data); - break; - case TEP_PRINT_TIME: - return print_event_time(tep, s, type->format, event, record); - default: - return; - } - trace_seq_printf(s, type->format, param); -} - -static int tep_print_event_param_type(char *format, - struct print_event_type *type) -{ - char *str = format + 1; - int i = 1; - - type->type = EVENT_TYPE_UNKNOWN; - while (*str) { - switch (*str) { - case 'd': - case 'u': - case 'i': - case 'x': - case 'X': - case 'o': - type->type = EVENT_TYPE_INT; - break; - case 's': - type->type = EVENT_TYPE_STRING; - break; - } - str++; - i++; - if (type->type != EVENT_TYPE_UNKNOWN) - break; - } - memset(type->format, 0, 32); - memcpy(type->format, format, i < 32 ? i : 31); - return i; -} - -/** - * tep_print_event - Write various event information - * @tep: a handle to the trace event parser context - * @s: the trace_seq to write to - * @record: The record to get the event from - * @format: a printf format string. Supported event fileds: - * TEP_PRINT_PID, "%d" - event PID - * TEP_PRINT_CPU, "%d" - event CPU - * TEP_PRINT_COMM, "%s" - event command string - * TEP_PRINT_NAME, "%s" - event name - * TEP_PRINT_LATENCY, "%s" - event latency - * TEP_PRINT_TIME, %d - event time stamp. A divisor and precision - * can be specified as part of this format string: - * "%precision.divisord". Example: - * "%3.1000d" - divide the time by 1000 and print the first - * 3 digits before the dot. Thus, the time stamp - * "123456000" will be printed as "123.456" - * TEP_PRINT_INFO, "%s" - event information. If any width is specified in - * the format string, the event information will be printed - * in raw format. - * Writes the specified event information into @s. - */ -void tep_print_event(struct tep_handle *tep, struct trace_seq *s, - struct tep_record *record, const char *fmt, ...) -{ - struct print_event_type type; - char *format = strdup(fmt); - char *current = format; - char *str = format; - int offset; - va_list args; - struct tep_event *event; - - if (!format) - return; - - event = tep_find_event_by_record(tep, record); - va_start(args, fmt); - while (*current) { - current = strchr(str, '%'); - if (!current) { - trace_seq_puts(s, str); - break; - } - memset(&type, 0, sizeof(type)); - offset = tep_print_event_param_type(current, &type); - *current = '\0'; - trace_seq_puts(s, str); - current += offset; - switch (type.type) { - case EVENT_TYPE_STRING: - print_string(tep, s, record, event, - va_arg(args, char*), &type); - break; - case EVENT_TYPE_INT: - print_int(tep, s, record, event, - va_arg(args, int), &type); - break; - case EVENT_TYPE_UNKNOWN: - default: - trace_seq_printf(s, "[UNKNOWN TYPE]"); - break; - } - str = current; - - } - va_end(args); - free(format); -} - -static int events_id_cmp(const void *a, const void *b) -{ - struct tep_event * const * ea = a; - struct tep_event * const * eb = b; - - if ((*ea)->id < (*eb)->id) - return -1; - - if ((*ea)->id > (*eb)->id) - return 1; - - return 0; -} - -static int events_name_cmp(const void *a, const void *b) -{ - struct tep_event * const * ea = a; - struct tep_event * const * eb = b; - int res; - - res = strcmp((*ea)->name, (*eb)->name); - if (res) - return res; - - res = strcmp((*ea)->system, (*eb)->system); - if (res) - return res; - - return events_id_cmp(a, b); -} - -static int events_system_cmp(const void *a, const void *b) -{ - struct tep_event * const * ea = a; - struct tep_event * const * eb = b; - int res; - - res = strcmp((*ea)->system, (*eb)->system); - if (res) - return res; - - res = strcmp((*ea)->name, (*eb)->name); - if (res) - return res; - - return events_id_cmp(a, b); -} - -static struct tep_event **list_events_copy(struct tep_handle *tep) -{ - struct tep_event **events; - - if (!tep) - return NULL; - - events = malloc(sizeof(*events) * (tep->nr_events + 1)); - if (!events) - return NULL; - - memcpy(events, tep->events, sizeof(*events) * tep->nr_events); - events[tep->nr_events] = NULL; - return events; -} - -static void list_events_sort(struct tep_event **events, int nr_events, - enum tep_event_sort_type sort_type) -{ - int (*sort)(const void *a, const void *b); - - switch (sort_type) { - case TEP_EVENT_SORT_ID: - sort = events_id_cmp; - break; - case TEP_EVENT_SORT_NAME: - sort = events_name_cmp; - break; - case TEP_EVENT_SORT_SYSTEM: - sort = events_system_cmp; - break; - default: - sort = NULL; - } - - if (sort) - qsort(events, nr_events, sizeof(*events), sort); -} - -/** - * tep_list_events - Get events, sorted by given criteria. - * @tep: a handle to the tep context - * @sort_type: desired sort order of the events in the array - * - * Returns an array of pointers to all events, sorted by the given - * @sort_type criteria. The last element of the array is NULL. The returned - * memory must not be freed, it is managed by the library. - * The function is not thread safe. - */ -struct tep_event **tep_list_events(struct tep_handle *tep, - enum tep_event_sort_type sort_type) -{ - struct tep_event **events; - - if (!tep) - return NULL; - - events = tep->sort_events; - if (events && tep->last_type == sort_type) - return events; - - if (!events) { - events = list_events_copy(tep); - if (!events) - return NULL; - - tep->sort_events = events; - - /* the internal events are sorted by id */ - if (sort_type == TEP_EVENT_SORT_ID) { - tep->last_type = sort_type; - return events; - } - } - - list_events_sort(events, tep->nr_events, sort_type); - tep->last_type = sort_type; - - return events; -} - - -/** - * tep_list_events_copy - Thread safe version of tep_list_events() - * @tep: a handle to the tep context - * @sort_type: desired sort order of the events in the array - * - * Returns an array of pointers to all events, sorted by the given - * @sort_type criteria. The last element of the array is NULL. The returned - * array is newly allocated inside the function and must be freed by the caller - */ -struct tep_event **tep_list_events_copy(struct tep_handle *tep, - enum tep_event_sort_type sort_type) -{ - struct tep_event **events; - - if (!tep) - return NULL; - - events = list_events_copy(tep); - if (!events) - return NULL; - - /* the internal events are sorted by id */ - if (sort_type == TEP_EVENT_SORT_ID) - return events; - - list_events_sort(events, tep->nr_events, sort_type); - - return events; -} - -static struct tep_format_field ** -get_event_fields(const char *type, const char *name, - int count, struct tep_format_field *list) -{ - struct tep_format_field **fields; - struct tep_format_field *field; - int i = 0; - - fields = malloc(sizeof(*fields) * (count + 1)); - if (!fields) - return NULL; - - for (field = list; field; field = field->next) { - fields[i++] = field; - if (i == count + 1) { - do_warning("event %s has more %s fields than specified", - name, type); - i--; - break; - } - } - - if (i != count) - do_warning("event %s has less %s fields than specified", - name, type); - - fields[i] = NULL; - - return fields; -} - -/** - * tep_event_common_fields - return a list of common fields for an event - * @event: the event to return the common fields of. - * - * Returns an allocated array of fields. The last item in the array is NULL. - * The array must be freed with free(). - */ -struct tep_format_field **tep_event_common_fields(struct tep_event *event) -{ - return get_event_fields("common", event->name, - event->format.nr_common, - event->format.common_fields); -} - -/** - * tep_event_fields - return a list of event specific fields for an event - * @event: the event to return the fields of. - * - * Returns an allocated array of fields. The last item in the array is NULL. - * The array must be freed with free(). - */ -struct tep_format_field **tep_event_fields(struct tep_event *event) -{ - return get_event_fields("event", event->name, - event->format.nr_fields, - event->format.fields); -} - -static void print_fields(struct trace_seq *s, struct tep_print_flag_sym *field) -{ - trace_seq_printf(s, "{ %s, %s }", field->value, field->str); - if (field->next) { - trace_seq_puts(s, ", "); - print_fields(s, field->next); - } -} - -/* for debugging */ -static void print_args(struct tep_print_arg *args) -{ - int print_paren = 1; - struct trace_seq s; - - switch (args->type) { - case TEP_PRINT_NULL: - printf("null"); - break; - case TEP_PRINT_ATOM: - printf("%s", args->atom.atom); - break; - case TEP_PRINT_FIELD: - printf("REC->%s", args->field.name); - break; - case TEP_PRINT_FLAGS: - printf("__print_flags("); - print_args(args->flags.field); - printf(", %s, ", args->flags.delim); - trace_seq_init(&s); - print_fields(&s, args->flags.flags); - trace_seq_do_printf(&s); - trace_seq_destroy(&s); - printf(")"); - break; - case TEP_PRINT_SYMBOL: - printf("__print_symbolic("); - print_args(args->symbol.field); - printf(", "); - trace_seq_init(&s); - print_fields(&s, args->symbol.symbols); - trace_seq_do_printf(&s); - trace_seq_destroy(&s); - printf(")"); - break; - case TEP_PRINT_HEX: - printf("__print_hex("); - print_args(args->hex.field); - printf(", "); - print_args(args->hex.size); - printf(")"); - break; - case TEP_PRINT_HEX_STR: - printf("__print_hex_str("); - print_args(args->hex.field); - printf(", "); - print_args(args->hex.size); - printf(")"); - break; - case TEP_PRINT_INT_ARRAY: - printf("__print_array("); - print_args(args->int_array.field); - printf(", "); - print_args(args->int_array.count); - printf(", "); - print_args(args->int_array.el_size); - printf(")"); - break; - case TEP_PRINT_STRING: - case TEP_PRINT_BSTRING: - printf("__get_str(%s)", args->string.string); - break; - case TEP_PRINT_BITMASK: - printf("__get_bitmask(%s)", args->bitmask.bitmask); - break; - case TEP_PRINT_TYPE: - printf("(%s)", args->typecast.type); - print_args(args->typecast.item); - break; - case TEP_PRINT_OP: - if (strcmp(args->op.op, ":") == 0) - print_paren = 0; - if (print_paren) - printf("("); - print_args(args->op.left); - printf(" %s ", args->op.op); - print_args(args->op.right); - if (print_paren) - printf(")"); - break; - default: - /* we should warn... */ - return; - } - if (args->next) { - printf("\n"); - print_args(args->next); - } -} - -static void parse_header_field(const char *field, - int *offset, int *size, int mandatory) -{ - unsigned long long save_input_buf_ptr; - unsigned long long save_input_buf_siz; - char *token; - int type; - - save_input_buf_ptr = input_buf_ptr; - save_input_buf_siz = input_buf_siz; - - if (read_expected(TEP_EVENT_ITEM, "field") < 0) - return; - if (read_expected(TEP_EVENT_OP, ":") < 0) - return; - - /* type */ - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - free_token(token); - - /* - * If this is not a mandatory field, then test it first. - */ - if (mandatory) { - if (read_expected(TEP_EVENT_ITEM, field) < 0) - return; - } else { - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - if (strcmp(token, field) != 0) - goto discard; - free_token(token); - } - - if (read_expected(TEP_EVENT_OP, ";") < 0) - return; - if (read_expected(TEP_EVENT_ITEM, "offset") < 0) - return; - if (read_expected(TEP_EVENT_OP, ":") < 0) - return; - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - *offset = atoi(token); - free_token(token); - if (read_expected(TEP_EVENT_OP, ";") < 0) - return; - if (read_expected(TEP_EVENT_ITEM, "size") < 0) - return; - if (read_expected(TEP_EVENT_OP, ":") < 0) - return; - if (read_expect_type(TEP_EVENT_ITEM, &token) < 0) - goto fail; - *size = atoi(token); - free_token(token); - if (read_expected(TEP_EVENT_OP, ";") < 0) - return; - type = read_token(&token); - if (type != TEP_EVENT_NEWLINE) { - /* newer versions of the kernel have a "signed" type */ - if (type != TEP_EVENT_ITEM) - goto fail; - - if (strcmp(token, "signed") != 0) - goto fail; - - free_token(token); - - if (read_expected(TEP_EVENT_OP, ":") < 0) - return; - - if (read_expect_type(TEP_EVENT_ITEM, &token)) - goto fail; - - free_token(token); - if (read_expected(TEP_EVENT_OP, ";") < 0) - return; - - if (read_expect_type(TEP_EVENT_NEWLINE, &token)) - goto fail; - } - fail: - free_token(token); - return; - - discard: - input_buf_ptr = save_input_buf_ptr; - input_buf_siz = save_input_buf_siz; - *offset = 0; - *size = 0; - free_token(token); -} - -/** - * tep_parse_header_page - parse the data stored in the header page - * @tep: a handle to the trace event parser context - * @buf: the buffer storing the header page format string - * @size: the size of @buf - * @long_size: the long size to use if there is no header - * - * This parses the header page format for information on the - * ring buffer used. The @buf should be copied from - * - * /sys/kernel/debug/tracing/events/header_page - */ -int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size, - int long_size) -{ - int ignore; - - if (!size) { - /* - * Old kernels did not have header page info. - * Sorry but we just use what we find here in user space. - */ - tep->header_page_ts_size = sizeof(long long); - tep->header_page_size_size = long_size; - tep->header_page_data_offset = sizeof(long long) + long_size; - tep->old_format = 1; - return -1; - } - init_input_buf(buf, size); - - parse_header_field("timestamp", &tep->header_page_ts_offset, - &tep->header_page_ts_size, 1); - parse_header_field("commit", &tep->header_page_size_offset, - &tep->header_page_size_size, 1); - parse_header_field("overwrite", &tep->header_page_overwrite, - &ignore, 0); - parse_header_field("data", &tep->header_page_data_offset, - &tep->header_page_data_size, 1); - - return 0; -} - -static int event_matches(struct tep_event *event, - int id, const char *sys_name, - const char *event_name) -{ - if (id >= 0 && id != event->id) - return 0; - - if (event_name && (strcmp(event_name, event->name) != 0)) - return 0; - - if (sys_name && (strcmp(sys_name, event->system) != 0)) - return 0; - - return 1; -} - -static void free_handler(struct event_handler *handle) -{ - free((void *)handle->sys_name); - free((void *)handle->event_name); - free(handle); -} - -static int find_event_handle(struct tep_handle *tep, struct tep_event *event) -{ - struct event_handler *handle, **next; - - for (next = &tep->handlers; *next; - next = &(*next)->next) { - handle = *next; - if (event_matches(event, handle->id, - handle->sys_name, - handle->event_name)) - break; - } - - if (!(*next)) - return 0; - - pr_stat("overriding event (%d) %s:%s with new print handler", - event->id, event->system, event->name); - - event->handler = handle->func; - event->context = handle->context; - - *next = handle->next; - free_handler(handle); - - return 1; -} - -/** - * parse_format - parse the event format - * @buf: the buffer storing the event format string - * @size: the size of @buf - * @sys: the system the event belongs to - * - * This parses the event format and creates an event structure - * to quickly parse raw data for a given event. - * - * These files currently come from: - * - * /sys/kernel/debug/tracing/events/.../.../format - */ -static enum tep_errno parse_format(struct tep_event **eventp, - struct tep_handle *tep, const char *buf, - unsigned long size, const char *sys) -{ - struct tep_event *event; - int ret; - - init_input_buf(buf, size); - - *eventp = event = alloc_event(); - if (!event) - return TEP_ERRNO__MEM_ALLOC_FAILED; - - event->name = event_read_name(); - if (!event->name) { - /* Bad event? */ - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto event_alloc_failed; - } - - if (strcmp(sys, "ftrace") == 0) { - event->flags |= TEP_EVENT_FL_ISFTRACE; - - if (strcmp(event->name, "bprint") == 0) - event->flags |= TEP_EVENT_FL_ISBPRINT; - } - - event->id = event_read_id(); - if (event->id < 0) { - ret = TEP_ERRNO__READ_ID_FAILED; - /* - * This isn't an allocation error actually. - * But as the ID is critical, just bail out. - */ - goto event_alloc_failed; - } - - event->system = strdup(sys); - if (!event->system) { - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto event_alloc_failed; - } - - /* Add tep to event so that it can be referenced */ - event->tep = tep; - - ret = event_read_format(event); - if (ret < 0) { - ret = TEP_ERRNO__READ_FORMAT_FAILED; - goto event_parse_failed; - } - - /* - * If the event has an override, don't print warnings if the event - * print format fails to parse. - */ - if (tep && find_event_handle(tep, event)) - show_warning = 0; - - ret = event_read_print(event); - show_warning = 1; - - if (ret < 0) { - ret = TEP_ERRNO__READ_PRINT_FAILED; - goto event_parse_failed; - } - - if (!ret && (event->flags & TEP_EVENT_FL_ISFTRACE)) { - struct tep_format_field *field; - struct tep_print_arg *arg, **list; - - /* old ftrace had no args */ - list = &event->print_fmt.args; - for (field = event->format.fields; field; field = field->next) { - arg = alloc_arg(); - if (!arg) { - event->flags |= TEP_EVENT_FL_FAILED; - return TEP_ERRNO__OLD_FTRACE_ARG_FAILED; - } - arg->type = TEP_PRINT_FIELD; - arg->field.name = strdup(field->name); - if (!arg->field.name) { - event->flags |= TEP_EVENT_FL_FAILED; - free_arg(arg); - return TEP_ERRNO__OLD_FTRACE_ARG_FAILED; - } - arg->field.field = field; - *list = arg; - list = &arg->next; - } - } - - if (!(event->flags & TEP_EVENT_FL_ISBPRINT)) - event->print_fmt.print_cache = parse_args(event, - event->print_fmt.format, - event->print_fmt.args); - - return 0; - - event_parse_failed: - event->flags |= TEP_EVENT_FL_FAILED; - return ret; - - event_alloc_failed: - free(event->system); - free(event->name); - free(event); - *eventp = NULL; - return ret; -} - -static enum tep_errno -__parse_event(struct tep_handle *tep, - struct tep_event **eventp, - const char *buf, unsigned long size, - const char *sys) -{ - int ret = parse_format(eventp, tep, buf, size, sys); - struct tep_event *event = *eventp; - - if (event == NULL) - return ret; - - if (tep && add_event(tep, event)) { - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto event_add_failed; - } - -#define PRINT_ARGS 0 - if (PRINT_ARGS && event->print_fmt.args) - print_args(event->print_fmt.args); - - return 0; - -event_add_failed: - free_tep_event(event); - return ret; -} - -/** - * tep_parse_format - parse the event format - * @tep: a handle to the trace event parser context - * @eventp: returned format - * @buf: the buffer storing the event format string - * @size: the size of @buf - * @sys: the system the event belongs to - * - * This parses the event format and creates an event structure - * to quickly parse raw data for a given event. - * - * These files currently come from: - * - * /sys/kernel/debug/tracing/events/.../.../format - */ -enum tep_errno tep_parse_format(struct tep_handle *tep, - struct tep_event **eventp, - const char *buf, - unsigned long size, const char *sys) -{ - return __parse_event(tep, eventp, buf, size, sys); -} - -/** - * tep_parse_event - parse the event format - * @tep: a handle to the trace event parser context - * @buf: the buffer storing the event format string - * @size: the size of @buf - * @sys: the system the event belongs to - * - * This parses the event format and creates an event structure - * to quickly parse raw data for a given event. - * - * These files currently come from: - * - * /sys/kernel/debug/tracing/events/.../.../format - */ -enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf, - unsigned long size, const char *sys) -{ - struct tep_event *event = NULL; - return __parse_event(tep, &event, buf, size, sys); -} - -int get_field_val(struct trace_seq *s, struct tep_format_field *field, - const char *name, struct tep_record *record, - unsigned long long *val, int err) -{ - if (!field) { - if (err) - trace_seq_printf(s, "<CANT FIND FIELD %s>", name); - return -1; - } - - if (tep_read_number_field(field, record->data, val)) { - if (err) - trace_seq_printf(s, " %s=INVALID", name); - return -1; - } - - return 0; -} - -/** - * tep_get_field_raw - return the raw pointer into the data field - * @s: The seq to print to on error - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @len: place to store the field length. - * @err: print default error if failed. - * - * Returns a pointer into record->data of the field and places - * the length of the field in @len. - * - * On failure, it returns NULL. - */ -void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - int *len, int err) -{ - struct tep_format_field *field; - void *data = record->data; - unsigned offset; - int dummy; - - if (!event) - return NULL; - - field = tep_find_field(event, name); - - if (!field) { - if (err) - trace_seq_printf(s, "<CANT FIND FIELD %s>", name); - return NULL; - } - - /* Allow @len to be NULL */ - if (!len) - len = &dummy; - - offset = field->offset; - if (field->flags & TEP_FIELD_IS_DYNAMIC) { - offset = tep_read_number(event->tep, - data + offset, field->size); - *len = offset >> 16; - offset &= 0xffff; - if (field->flags & TEP_FIELD_IS_RELATIVE) - offset += field->offset + field->size; - } else - *len = field->size; - - return data + offset; -} - -/** - * tep_get_field_val - find a field and return its value - * @s: The seq to print to on error - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @val: place to store the value of the field. - * @err: print default error if failed. - * - * Returns 0 on success -1 on field not found. - */ -int tep_get_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err) -{ - struct tep_format_field *field; - - if (!event) - return -1; - - field = tep_find_field(event, name); - - return get_field_val(s, field, name, record, val, err); -} - -/** - * tep_get_common_field_val - find a common field and return its value - * @s: The seq to print to on error - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @val: place to store the value of the field. - * @err: print default error if failed. - * - * Returns 0 on success -1 on field not found. - */ -int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err) -{ - struct tep_format_field *field; - - if (!event) - return -1; - - field = tep_find_common_field(event, name); - - return get_field_val(s, field, name, record, val, err); -} - -/** - * tep_get_any_field_val - find a any field and return its value - * @s: The seq to print to on error - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @val: place to store the value of the field. - * @err: print default error if failed. - * - * Returns 0 on success -1 on field not found. - */ -int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err) -{ - struct tep_format_field *field; - - if (!event) - return -1; - - field = tep_find_any_field(event, name); - - return get_field_val(s, field, name, record, val, err); -} - -/** - * tep_print_num_field - print a field and a format - * @s: The seq to print to - * @fmt: The printf format to print the field with. - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @err: print default error if failed. - * - * Returns positive value on success, negative in case of an error, - * or 0 if buffer is full. - */ -int tep_print_num_field(struct trace_seq *s, const char *fmt, - struct tep_event *event, const char *name, - struct tep_record *record, int err) -{ - struct tep_format_field *field = tep_find_field(event, name); - unsigned long long val; - - if (!field) - goto failed; - - if (tep_read_number_field(field, record->data, &val)) - goto failed; - - return trace_seq_printf(s, fmt, val); - - failed: - if (err) - trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name); - return -1; -} - -/** - * tep_print_func_field - print a field and a format for function pointers - * @s: The seq to print to - * @fmt: The printf format to print the field with. - * @event: the event that the field is for - * @name: The name of the field - * @record: The record with the field name. - * @err: print default error if failed. - * - * Returns positive value on success, negative in case of an error, - * or 0 if buffer is full. - */ -int tep_print_func_field(struct trace_seq *s, const char *fmt, - struct tep_event *event, const char *name, - struct tep_record *record, int err) -{ - struct tep_format_field *field = tep_find_field(event, name); - struct tep_handle *tep = event->tep; - unsigned long long val; - struct func_map *func; - char tmp[128]; - - if (!field) - goto failed; - - if (tep_read_number_field(field, record->data, &val)) - goto failed; - - func = find_func(tep, val); - - if (func) - snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val); - else - sprintf(tmp, "0x%08llx", val); - - return trace_seq_printf(s, fmt, tmp); - - failed: - if (err) - trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name); - return -1; -} - -static void free_func_handle(struct tep_function_handler *func) -{ - struct func_params *params; - - free(func->name); - - while (func->params) { - params = func->params; - func->params = params->next; - free(params); - } - - free(func); -} - -/** - * tep_register_print_function - register a helper function - * @tep: a handle to the trace event parser context - * @func: the function to process the helper function - * @ret_type: the return type of the helper function - * @name: the name of the helper function - * @parameters: A list of enum tep_func_arg_type - * - * Some events may have helper functions in the print format arguments. - * This allows a plugin to dynamically create a way to process one - * of these functions. - * - * The @parameters is a variable list of tep_func_arg_type enums that - * must end with TEP_FUNC_ARG_VOID. - */ -int tep_register_print_function(struct tep_handle *tep, - tep_func_handler func, - enum tep_func_arg_type ret_type, - char *name, ...) -{ - struct tep_function_handler *func_handle; - struct func_params **next_param; - struct func_params *param; - enum tep_func_arg_type type; - va_list ap; - int ret; - - func_handle = find_func_handler(tep, name); - if (func_handle) { - /* - * This is most like caused by the users own - * plugins updating the function. This overrides the - * system defaults. - */ - pr_stat("override of function helper '%s'", name); - remove_func_handler(tep, name); - } - - func_handle = calloc(1, sizeof(*func_handle)); - if (!func_handle) { - do_warning("Failed to allocate function handler"); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - func_handle->ret_type = ret_type; - func_handle->name = strdup(name); - func_handle->func = func; - if (!func_handle->name) { - do_warning("Failed to allocate function name"); - free(func_handle); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - next_param = &(func_handle->params); - va_start(ap, name); - for (;;) { - type = va_arg(ap, enum tep_func_arg_type); - if (type == TEP_FUNC_ARG_VOID) - break; - - if (type >= TEP_FUNC_ARG_MAX_TYPES) { - do_warning("Invalid argument type %d", type); - ret = TEP_ERRNO__INVALID_ARG_TYPE; - goto out_free; - } - - param = malloc(sizeof(*param)); - if (!param) { - do_warning("Failed to allocate function param"); - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto out_free; - } - param->type = type; - param->next = NULL; - - *next_param = param; - next_param = &(param->next); - - func_handle->nr_args++; - } - va_end(ap); - - func_handle->next = tep->func_handlers; - tep->func_handlers = func_handle; - - return 0; - out_free: - va_end(ap); - free_func_handle(func_handle); - return ret; -} - -/** - * tep_unregister_print_function - unregister a helper function - * @tep: a handle to the trace event parser context - * @func: the function to process the helper function - * @name: the name of the helper function - * - * This function removes existing print handler for function @name. - * - * Returns 0 if the handler was removed successully, -1 otherwise. - */ -int tep_unregister_print_function(struct tep_handle *tep, - tep_func_handler func, char *name) -{ - struct tep_function_handler *func_handle; - - func_handle = find_func_handler(tep, name); - if (func_handle && func_handle->func == func) { - remove_func_handler(tep, name); - return 0; - } - return -1; -} - -static struct tep_event *search_event(struct tep_handle *tep, int id, - const char *sys_name, - const char *event_name) -{ - struct tep_event *event; - - if (id >= 0) { - /* search by id */ - event = tep_find_event(tep, id); - if (!event) - return NULL; - if (event_name && (strcmp(event_name, event->name) != 0)) - return NULL; - if (sys_name && (strcmp(sys_name, event->system) != 0)) - return NULL; - } else { - event = tep_find_event_by_name(tep, sys_name, event_name); - if (!event) - return NULL; - } - return event; -} - -/** - * tep_register_event_handler - register a way to parse an event - * @tep: a handle to the trace event parser context - * @id: the id of the event to register - * @sys_name: the system name the event belongs to - * @event_name: the name of the event - * @func: the function to call to parse the event information - * @context: the data to be passed to @func - * - * This function allows a developer to override the parsing of - * a given event. If for some reason the default print format - * is not sufficient, this function will register a function - * for an event to be used to parse the data instead. - * - * If @id is >= 0, then it is used to find the event. - * else @sys_name and @event_name are used. - * - * Returns: - * TEP_REGISTER_SUCCESS_OVERWRITE if an existing handler is overwritten - * TEP_REGISTER_SUCCESS if a new handler is registered successfully - * negative TEP_ERRNO_... in case of an error - * - */ -int tep_register_event_handler(struct tep_handle *tep, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context) -{ - struct tep_event *event; - struct event_handler *handle; - - event = search_event(tep, id, sys_name, event_name); - if (event == NULL) - goto not_found; - - pr_stat("overriding event (%d) %s:%s with new print handler", - event->id, event->system, event->name); - - event->handler = func; - event->context = context; - return TEP_REGISTER_SUCCESS_OVERWRITE; - - not_found: - /* Save for later use. */ - handle = calloc(1, sizeof(*handle)); - if (!handle) { - do_warning("Failed to allocate event handler"); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - handle->id = id; - if (event_name) - handle->event_name = strdup(event_name); - if (sys_name) - handle->sys_name = strdup(sys_name); - - if ((event_name && !handle->event_name) || - (sys_name && !handle->sys_name)) { - do_warning("Failed to allocate event/sys name"); - free((void *)handle->event_name); - free((void *)handle->sys_name); - free(handle); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - handle->func = func; - handle->next = tep->handlers; - tep->handlers = handle; - handle->context = context; - - return TEP_REGISTER_SUCCESS; -} - -static int handle_matches(struct event_handler *handler, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context) -{ - if (id >= 0 && id != handler->id) - return 0; - - if (event_name && (strcmp(event_name, handler->event_name) != 0)) - return 0; - - if (sys_name && (strcmp(sys_name, handler->sys_name) != 0)) - return 0; - - if (func != handler->func || context != handler->context) - return 0; - - return 1; -} - -/** - * tep_unregister_event_handler - unregister an existing event handler - * @tep: a handle to the trace event parser context - * @id: the id of the event to unregister - * @sys_name: the system name the handler belongs to - * @event_name: the name of the event handler - * @func: the function to call to parse the event information - * @context: the data to be passed to @func - * - * This function removes existing event handler (parser). - * - * If @id is >= 0, then it is used to find the event. - * else @sys_name and @event_name are used. - * - * Returns 0 if handler was removed successfully, -1 if event was not found. - */ -int tep_unregister_event_handler(struct tep_handle *tep, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context) -{ - struct tep_event *event; - struct event_handler *handle; - struct event_handler **next; - - event = search_event(tep, id, sys_name, event_name); - if (event == NULL) - goto not_found; - - if (event->handler == func && event->context == context) { - pr_stat("removing override handler for event (%d) %s:%s. Going back to default handler.", - event->id, event->system, event->name); - - event->handler = NULL; - event->context = NULL; - return 0; - } - -not_found: - for (next = &tep->handlers; *next; next = &(*next)->next) { - handle = *next; - if (handle_matches(handle, id, sys_name, event_name, - func, context)) - break; - } - - if (!(*next)) - return -1; - - *next = handle->next; - free_handler(handle); - - return 0; -} - -/** - * tep_alloc - create a tep handle - */ -struct tep_handle *tep_alloc(void) -{ - struct tep_handle *tep = calloc(1, sizeof(*tep)); - - if (tep) { - tep->ref_count = 1; - tep->host_bigendian = tep_is_bigendian(); - } - - return tep; -} - -void tep_ref(struct tep_handle *tep) -{ - tep->ref_count++; -} - -int tep_get_ref(struct tep_handle *tep) -{ - if (tep) - return tep->ref_count; - return 0; -} - -__hidden void free_tep_format_field(struct tep_format_field *field) -{ - free(field->type); - if (field->alias != field->name) - free(field->alias); - free(field->name); - free(field); -} - -static void free_format_fields(struct tep_format_field *field) -{ - struct tep_format_field *next; - - while (field) { - next = field->next; - free_tep_format_field(field); - field = next; - } -} - -static void free_formats(struct tep_format *format) -{ - free_format_fields(format->common_fields); - free_format_fields(format->fields); -} - -__hidden void free_tep_event(struct tep_event *event) -{ - free(event->name); - free(event->system); - - free_formats(&event->format); - - free(event->print_fmt.format); - free_args(event->print_fmt.args); - free_parse_args(event->print_fmt.print_cache); - free(event); -} - -/** - * tep_free - free a tep handle - * @tep: the tep handle to free - */ -void tep_free(struct tep_handle *tep) -{ - struct cmdline_list *cmdlist, *cmdnext; - struct func_list *funclist, *funcnext; - struct printk_list *printklist, *printknext; - struct tep_function_handler *func_handler; - struct event_handler *handle; - int i; - - if (!tep) - return; - - cmdlist = tep->cmdlist; - funclist = tep->funclist; - printklist = tep->printklist; - - tep->ref_count--; - if (tep->ref_count) - return; - - if (tep->cmdlines) { - for (i = 0; i < tep->cmdline_count; i++) - free(tep->cmdlines[i].comm); - free(tep->cmdlines); - } - - while (cmdlist) { - cmdnext = cmdlist->next; - free(cmdlist->comm); - free(cmdlist); - cmdlist = cmdnext; - } - - if (tep->func_map) { - for (i = 0; i < (int)tep->func_count; i++) { - free(tep->func_map[i].func); - free(tep->func_map[i].mod); - } - free(tep->func_map); - } - - while (funclist) { - funcnext = funclist->next; - free(funclist->func); - free(funclist->mod); - free(funclist); - funclist = funcnext; - } - - while (tep->func_handlers) { - func_handler = tep->func_handlers; - tep->func_handlers = func_handler->next; - free_func_handle(func_handler); - } - - if (tep->printk_map) { - for (i = 0; i < (int)tep->printk_count; i++) - free(tep->printk_map[i].printk); - free(tep->printk_map); - } - - while (printklist) { - printknext = printklist->next; - free(printklist->printk); - free(printklist); - printklist = printknext; - } - - for (i = 0; i < tep->nr_events; i++) - free_tep_event(tep->events[i]); - - while (tep->handlers) { - handle = tep->handlers; - tep->handlers = handle->next; - free_handler(handle); - } - - free(tep->events); - free(tep->sort_events); - free(tep->func_resolver); - free_tep_plugin_paths(tep); - - free(tep); -} - -void tep_unref(struct tep_handle *tep) -{ - tep_free(tep); -} diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h deleted file mode 100644 index 41d4f9f6a843..000000000000 --- a/tools/lib/traceevent/event-parse.h +++ /dev/null @@ -1,750 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 */ -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - */ -#ifndef _PARSE_EVENTS_H -#define _PARSE_EVENTS_H - -#include <stdbool.h> -#include <stdarg.h> -#include <stdio.h> -#include <regex.h> -#include <string.h> - -#include "trace-seq.h" - -#ifndef __maybe_unused -#define __maybe_unused __attribute__((unused)) -#endif - -#ifndef DEBUG_RECORD -#define DEBUG_RECORD 0 -#endif - -struct tep_record { - unsigned long long ts; - unsigned long long offset; - long long missed_events; /* buffer dropped events before */ - int record_size; /* size of binary record */ - int size; /* size of data */ - void *data; - int cpu; - int ref_count; - int locked; /* Do not free, even if ref_count is zero */ - void *priv; -#if DEBUG_RECORD - struct tep_record *prev; - struct tep_record *next; - long alloc_addr; -#endif -}; - -/* ----------------------- tep ----------------------- */ - -struct tep_handle; -struct tep_event; - -typedef int (*tep_event_handler_func)(struct trace_seq *s, - struct tep_record *record, - struct tep_event *event, - void *context); - -typedef int (*tep_plugin_load_func)(struct tep_handle *tep); -typedef int (*tep_plugin_unload_func)(struct tep_handle *tep); - -struct tep_plugin_option { - struct tep_plugin_option *next; - void *handle; - char *file; - char *name; - char *plugin_alias; - char *description; - const char *value; - void *priv; - int set; -}; - -/* - * Plugin hooks that can be called: - * - * TEP_PLUGIN_LOADER: (required) - * The function name to initialized the plugin. - * - * int TEP_PLUGIN_LOADER(struct tep_handle *tep) - * - * TEP_PLUGIN_UNLOADER: (optional) - * The function called just before unloading - * - * int TEP_PLUGIN_UNLOADER(struct tep_handle *tep) - * - * TEP_PLUGIN_OPTIONS: (optional) - * Plugin options that can be set before loading - * - * struct tep_plugin_option TEP_PLUGIN_OPTIONS[] = { - * { - * .name = "option-name", - * .plugin_alias = "override-file-name", (optional) - * .description = "description of option to show users", - * }, - * { - * .name = NULL, - * }, - * }; - * - * Array must end with .name = NULL; - * - * - * .plugin_alias is used to give a shorter name to access - * the vairable. Useful if a plugin handles more than one event. - * - * If .value is not set, then it is considered a boolean and only - * .set will be processed. If .value is defined, then it is considered - * a string option and .set will be ignored. - * - * TEP_PLUGIN_ALIAS: (optional) - * The name to use for finding options (uses filename if not defined) - */ -#define TEP_PLUGIN_LOADER tep_plugin_loader -#define TEP_PLUGIN_UNLOADER tep_plugin_unloader -#define TEP_PLUGIN_OPTIONS tep_plugin_options -#define TEP_PLUGIN_ALIAS tep_plugin_alias -#define _MAKE_STR(x) #x -#define MAKE_STR(x) _MAKE_STR(x) -#define TEP_PLUGIN_LOADER_NAME MAKE_STR(TEP_PLUGIN_LOADER) -#define TEP_PLUGIN_UNLOADER_NAME MAKE_STR(TEP_PLUGIN_UNLOADER) -#define TEP_PLUGIN_OPTIONS_NAME MAKE_STR(TEP_PLUGIN_OPTIONS) -#define TEP_PLUGIN_ALIAS_NAME MAKE_STR(TEP_PLUGIN_ALIAS) - -enum tep_format_flags { - TEP_FIELD_IS_ARRAY = 1, - TEP_FIELD_IS_POINTER = 2, - TEP_FIELD_IS_SIGNED = 4, - TEP_FIELD_IS_STRING = 8, - TEP_FIELD_IS_DYNAMIC = 16, - TEP_FIELD_IS_LONG = 32, - TEP_FIELD_IS_FLAG = 64, - TEP_FIELD_IS_SYMBOLIC = 128, - TEP_FIELD_IS_RELATIVE = 256, -}; - -struct tep_format_field { - struct tep_format_field *next; - struct tep_event *event; - char *type; - char *name; - char *alias; - int offset; - int size; - unsigned int arraylen; - unsigned int elementsize; - unsigned long flags; -}; - -struct tep_format { - int nr_common; - int nr_fields; - struct tep_format_field *common_fields; - struct tep_format_field *fields; -}; - -struct tep_print_arg_atom { - char *atom; -}; - -struct tep_print_arg_string { - char *string; - struct tep_format_field *field; -}; - -struct tep_print_arg_bitmask { - char *bitmask; - struct tep_format_field *field; -}; - -struct tep_print_arg_field { - char *name; - struct tep_format_field *field; -}; - -struct tep_print_flag_sym { - struct tep_print_flag_sym *next; - char *value; - char *str; -}; - -struct tep_print_arg_typecast { - char *type; - struct tep_print_arg *item; -}; - -struct tep_print_arg_flags { - struct tep_print_arg *field; - char *delim; - struct tep_print_flag_sym *flags; -}; - -struct tep_print_arg_symbol { - struct tep_print_arg *field; - struct tep_print_flag_sym *symbols; -}; - -struct tep_print_arg_hex { - struct tep_print_arg *field; - struct tep_print_arg *size; -}; - -struct tep_print_arg_int_array { - struct tep_print_arg *field; - struct tep_print_arg *count; - struct tep_print_arg *el_size; -}; - -struct tep_print_arg_dynarray { - struct tep_format_field *field; - struct tep_print_arg *index; -}; - -struct tep_print_arg; - -struct tep_print_arg_op { - char *op; - int prio; - struct tep_print_arg *left; - struct tep_print_arg *right; -}; - -struct tep_function_handler; - -struct tep_print_arg_func { - struct tep_function_handler *func; - struct tep_print_arg *args; -}; - -enum tep_print_arg_type { - TEP_PRINT_NULL, - TEP_PRINT_ATOM, - TEP_PRINT_FIELD, - TEP_PRINT_FLAGS, - TEP_PRINT_SYMBOL, - TEP_PRINT_HEX, - TEP_PRINT_INT_ARRAY, - TEP_PRINT_TYPE, - TEP_PRINT_STRING, - TEP_PRINT_BSTRING, - TEP_PRINT_DYNAMIC_ARRAY, - TEP_PRINT_OP, - TEP_PRINT_FUNC, - TEP_PRINT_BITMASK, - TEP_PRINT_DYNAMIC_ARRAY_LEN, - TEP_PRINT_HEX_STR, -}; - -struct tep_print_arg { - struct tep_print_arg *next; - enum tep_print_arg_type type; - union { - struct tep_print_arg_atom atom; - struct tep_print_arg_field field; - struct tep_print_arg_typecast typecast; - struct tep_print_arg_flags flags; - struct tep_print_arg_symbol symbol; - struct tep_print_arg_hex hex; - struct tep_print_arg_int_array int_array; - struct tep_print_arg_func func; - struct tep_print_arg_string string; - struct tep_print_arg_bitmask bitmask; - struct tep_print_arg_op op; - struct tep_print_arg_dynarray dynarray; - }; -}; - -struct tep_print_parse; - -struct tep_print_fmt { - char *format; - struct tep_print_arg *args; - struct tep_print_parse *print_cache; -}; - -struct tep_event { - struct tep_handle *tep; - char *name; - int id; - int flags; - struct tep_format format; - struct tep_print_fmt print_fmt; - char *system; - tep_event_handler_func handler; - void *context; -}; - -enum { - TEP_EVENT_FL_ISFTRACE = 0x01, - TEP_EVENT_FL_ISPRINT = 0x02, - TEP_EVENT_FL_ISBPRINT = 0x04, - TEP_EVENT_FL_ISFUNCENT = 0x10, - TEP_EVENT_FL_ISFUNCRET = 0x20, - TEP_EVENT_FL_NOHANDLE = 0x40, - TEP_EVENT_FL_PRINTRAW = 0x80, - - TEP_EVENT_FL_FAILED = 0x80000000 -}; - -enum tep_event_sort_type { - TEP_EVENT_SORT_ID, - TEP_EVENT_SORT_NAME, - TEP_EVENT_SORT_SYSTEM, -}; - -enum tep_event_type { - TEP_EVENT_ERROR, - TEP_EVENT_NONE, - TEP_EVENT_SPACE, - TEP_EVENT_NEWLINE, - TEP_EVENT_OP, - TEP_EVENT_DELIM, - TEP_EVENT_ITEM, - TEP_EVENT_DQUOTE, - TEP_EVENT_SQUOTE, -}; - -typedef unsigned long long (*tep_func_handler)(struct trace_seq *s, - unsigned long long *args); - -enum tep_func_arg_type { - TEP_FUNC_ARG_VOID, - TEP_FUNC_ARG_INT, - TEP_FUNC_ARG_LONG, - TEP_FUNC_ARG_STRING, - TEP_FUNC_ARG_PTR, - TEP_FUNC_ARG_MAX_TYPES -}; - -enum tep_flag { - TEP_NSEC_OUTPUT = 1, /* output in NSECS */ - TEP_DISABLE_SYS_PLUGINS = 1 << 1, - TEP_DISABLE_PLUGINS = 1 << 2, -}; - -#define TEP_ERRORS \ - _PE(MEM_ALLOC_FAILED, "failed to allocate memory"), \ - _PE(PARSE_EVENT_FAILED, "failed to parse event"), \ - _PE(READ_ID_FAILED, "failed to read event id"), \ - _PE(READ_FORMAT_FAILED, "failed to read event format"), \ - _PE(READ_PRINT_FAILED, "failed to read event print fmt"), \ - _PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace"),\ - _PE(INVALID_ARG_TYPE, "invalid argument type"), \ - _PE(INVALID_EXP_TYPE, "invalid expression type"), \ - _PE(INVALID_OP_TYPE, "invalid operator type"), \ - _PE(INVALID_EVENT_NAME, "invalid event name"), \ - _PE(EVENT_NOT_FOUND, "no event found"), \ - _PE(SYNTAX_ERROR, "syntax error"), \ - _PE(ILLEGAL_RVALUE, "illegal rvalue"), \ - _PE(ILLEGAL_LVALUE, "illegal lvalue for string comparison"), \ - _PE(INVALID_REGEX, "regex did not compute"), \ - _PE(ILLEGAL_STRING_CMP, "illegal comparison for string"), \ - _PE(ILLEGAL_INTEGER_CMP,"illegal comparison for integer"), \ - _PE(REPARENT_NOT_OP, "cannot reparent other than OP"), \ - _PE(REPARENT_FAILED, "failed to reparent filter OP"), \ - _PE(BAD_FILTER_ARG, "bad arg in filter tree"), \ - _PE(UNEXPECTED_TYPE, "unexpected type (not a value)"), \ - _PE(ILLEGAL_TOKEN, "illegal token"), \ - _PE(INVALID_PAREN, "open parenthesis cannot come here"), \ - _PE(UNBALANCED_PAREN, "unbalanced number of parenthesis"), \ - _PE(UNKNOWN_TOKEN, "unknown token"), \ - _PE(FILTER_NOT_FOUND, "no filter found"), \ - _PE(NOT_A_NUMBER, "must have number field"), \ - _PE(NO_FILTER, "no filters exists"), \ - _PE(FILTER_MISS, "record does not match to filter") - -#undef _PE -#define _PE(__code, __str) TEP_ERRNO__ ## __code -enum tep_errno { - TEP_ERRNO__SUCCESS = 0, - TEP_ERRNO__FILTER_MATCH = TEP_ERRNO__SUCCESS, - - /* - * Choose an arbitrary negative big number not to clash with standard - * errno since SUS requires the errno has distinct positive values. - * See 'Issue 6' in the link below. - * - * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html - */ - __TEP_ERRNO__START = -100000, - - TEP_ERRORS, - - __TEP_ERRNO__END, -}; -#undef _PE - -struct tep_plugin_list; - -#define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1)) - -enum tep_plugin_load_priority { - TEP_PLUGIN_FIRST, - TEP_PLUGIN_LAST, -}; - -int tep_add_plugin_path(struct tep_handle *tep, char *path, - enum tep_plugin_load_priority prio); -struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep); -void tep_unload_plugins(struct tep_plugin_list *plugin_list, - struct tep_handle *tep); -void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix, - void (*load_plugin)(struct tep_handle *tep, - const char *path, - const char *name, - void *data), - void *data); -char **tep_plugin_list_options(void); -void tep_plugin_free_options_list(char **list); -int tep_plugin_add_options(const char *name, - struct tep_plugin_option *options); -int tep_plugin_add_option(const char *name, const char *val); -void tep_plugin_remove_options(struct tep_plugin_option *options); -void tep_plugin_print_options(struct trace_seq *s); -void tep_print_plugins(struct trace_seq *s, - const char *prefix, const char *suffix, - const struct tep_plugin_list *list); - -/* tep_handle */ -typedef char *(tep_func_resolver_t)(void *priv, - unsigned long long *addrp, char **modp); -void tep_set_flag(struct tep_handle *tep, int flag); -void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag); -bool tep_test_flag(struct tep_handle *tep, enum tep_flag flags); - -static inline int tep_is_bigendian(void) -{ - unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 }; - unsigned int val; - - memcpy(&val, str, 4); - return val == 0x01020304; -} - -/* taken from kernel/trace/trace.h */ -enum trace_flag_type { - TRACE_FLAG_IRQS_OFF = 0x01, - TRACE_FLAG_IRQS_NOSUPPORT = 0x02, - TRACE_FLAG_NEED_RESCHED = 0x04, - TRACE_FLAG_HARDIRQ = 0x08, - TRACE_FLAG_SOFTIRQ = 0x10, -}; - -int tep_set_function_resolver(struct tep_handle *tep, - tep_func_resolver_t *func, void *priv); -void tep_reset_function_resolver(struct tep_handle *tep); -int tep_register_comm(struct tep_handle *tep, const char *comm, int pid); -int tep_override_comm(struct tep_handle *tep, const char *comm, int pid); -int tep_register_function(struct tep_handle *tep, char *name, - unsigned long long addr, char *mod); -int tep_register_print_string(struct tep_handle *tep, const char *fmt, - unsigned long long addr); -bool tep_is_pid_registered(struct tep_handle *tep, int pid); - -struct tep_event *tep_get_event(struct tep_handle *tep, int index); - -#define TEP_PRINT_INFO "INFO" -#define TEP_PRINT_INFO_RAW "INFO_RAW" -#define TEP_PRINT_COMM "COMM" -#define TEP_PRINT_LATENCY "LATENCY" -#define TEP_PRINT_NAME "NAME" -#define TEP_PRINT_PID 1U -#define TEP_PRINT_TIME 2U -#define TEP_PRINT_CPU 3U - -void tep_print_event(struct tep_handle *tep, struct trace_seq *s, - struct tep_record *record, const char *fmt, ...) - __attribute__ ((format (printf, 4, 5))); - -int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size, - int long_size); - -enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf, - unsigned long size, const char *sys); -enum tep_errno tep_parse_format(struct tep_handle *tep, - struct tep_event **eventp, - const char *buf, - unsigned long size, const char *sys); - -void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - int *len, int err); - -int tep_get_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err); -int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err); -int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event, - const char *name, struct tep_record *record, - unsigned long long *val, int err); - -int tep_print_num_field(struct trace_seq *s, const char *fmt, - struct tep_event *event, const char *name, - struct tep_record *record, int err); - -int tep_print_func_field(struct trace_seq *s, const char *fmt, - struct tep_event *event, const char *name, - struct tep_record *record, int err); - -enum tep_reg_handler { - TEP_REGISTER_SUCCESS = 0, - TEP_REGISTER_SUCCESS_OVERWRITE, -}; - -int tep_register_event_handler(struct tep_handle *tep, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context); -int tep_unregister_event_handler(struct tep_handle *tep, int id, - const char *sys_name, const char *event_name, - tep_event_handler_func func, void *context); -int tep_register_print_function(struct tep_handle *tep, - tep_func_handler func, - enum tep_func_arg_type ret_type, - char *name, ...); -int tep_unregister_print_function(struct tep_handle *tep, - tep_func_handler func, char *name); - -struct tep_format_field *tep_find_common_field(struct tep_event *event, const char *name); -struct tep_format_field *tep_find_field(struct tep_event *event, const char *name); -struct tep_format_field *tep_find_any_field(struct tep_event *event, const char *name); - -const char *tep_find_function(struct tep_handle *tep, unsigned long long addr); -unsigned long long -tep_find_function_address(struct tep_handle *tep, unsigned long long addr); -unsigned long long tep_read_number(struct tep_handle *tep, const void *ptr, int size); -int tep_read_number_field(struct tep_format_field *field, const void *data, - unsigned long long *value); - -struct tep_event *tep_get_first_event(struct tep_handle *tep); -int tep_get_events_count(struct tep_handle *tep); -struct tep_event *tep_find_event(struct tep_handle *tep, int id); - -struct tep_event * -tep_find_event_by_name(struct tep_handle *tep, const char *sys, const char *name); -struct tep_event * -tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record); - -int tep_data_type(struct tep_handle *tep, struct tep_record *rec); -int tep_data_pid(struct tep_handle *tep, struct tep_record *rec); -int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec); -int tep_data_flags(struct tep_handle *tep, struct tep_record *rec); -const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid); -struct tep_cmdline; -struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm, - struct tep_cmdline *next); -int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline); - -void tep_print_field(struct trace_seq *s, void *data, - struct tep_format_field *field); -void tep_print_fields(struct trace_seq *s, void *data, - int size __maybe_unused, struct tep_event *event); -int tep_strerror(struct tep_handle *tep, enum tep_errno errnum, - char *buf, size_t buflen); - -struct tep_event **tep_list_events(struct tep_handle *tep, enum tep_event_sort_type); -struct tep_event **tep_list_events_copy(struct tep_handle *tep, - enum tep_event_sort_type); -struct tep_format_field **tep_event_common_fields(struct tep_event *event); -struct tep_format_field **tep_event_fields(struct tep_event *event); - -enum tep_endian { - TEP_LITTLE_ENDIAN = 0, - TEP_BIG_ENDIAN -}; -int tep_get_cpus(struct tep_handle *tep); -void tep_set_cpus(struct tep_handle *tep, int cpus); -int tep_get_long_size(struct tep_handle *tep); -void tep_set_long_size(struct tep_handle *tep, int long_size); -int tep_get_page_size(struct tep_handle *tep); -void tep_set_page_size(struct tep_handle *tep, int _page_size); -bool tep_is_file_bigendian(struct tep_handle *tep); -void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian); -bool tep_is_local_bigendian(struct tep_handle *tep); -void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian); -int tep_get_header_page_size(struct tep_handle *tep); -int tep_get_header_timestamp_size(struct tep_handle *tep); -bool tep_is_old_format(struct tep_handle *tep); -void tep_set_test_filters(struct tep_handle *tep, int test_filters); - -struct tep_handle *tep_alloc(void); -void tep_free(struct tep_handle *tep); -void tep_ref(struct tep_handle *tep); -void tep_unref(struct tep_handle *tep); -int tep_get_ref(struct tep_handle *tep); - -/* for debugging */ -void tep_print_funcs(struct tep_handle *tep); -void tep_print_printk(struct tep_handle *tep); - -/* ----------------------- filtering ----------------------- */ - -enum tep_filter_boolean_type { - TEP_FILTER_FALSE, - TEP_FILTER_TRUE, -}; - -enum tep_filter_op_type { - TEP_FILTER_OP_AND = 1, - TEP_FILTER_OP_OR, - TEP_FILTER_OP_NOT, -}; - -enum tep_filter_cmp_type { - TEP_FILTER_CMP_NONE, - TEP_FILTER_CMP_EQ, - TEP_FILTER_CMP_NE, - TEP_FILTER_CMP_GT, - TEP_FILTER_CMP_LT, - TEP_FILTER_CMP_GE, - TEP_FILTER_CMP_LE, - TEP_FILTER_CMP_MATCH, - TEP_FILTER_CMP_NOT_MATCH, - TEP_FILTER_CMP_REGEX, - TEP_FILTER_CMP_NOT_REGEX, -}; - -enum tep_filter_exp_type { - TEP_FILTER_EXP_NONE, - TEP_FILTER_EXP_ADD, - TEP_FILTER_EXP_SUB, - TEP_FILTER_EXP_MUL, - TEP_FILTER_EXP_DIV, - TEP_FILTER_EXP_MOD, - TEP_FILTER_EXP_RSHIFT, - TEP_FILTER_EXP_LSHIFT, - TEP_FILTER_EXP_AND, - TEP_FILTER_EXP_OR, - TEP_FILTER_EXP_XOR, - TEP_FILTER_EXP_NOT, -}; - -enum tep_filter_arg_type { - TEP_FILTER_ARG_NONE, - TEP_FILTER_ARG_BOOLEAN, - TEP_FILTER_ARG_VALUE, - TEP_FILTER_ARG_FIELD, - TEP_FILTER_ARG_EXP, - TEP_FILTER_ARG_OP, - TEP_FILTER_ARG_NUM, - TEP_FILTER_ARG_STR, -}; - -enum tep_filter_value_type { - TEP_FILTER_NUMBER, - TEP_FILTER_STRING, - TEP_FILTER_CHAR -}; - -struct tep_filter_arg; - -struct tep_filter_arg_boolean { - enum tep_filter_boolean_type value; -}; - -struct tep_filter_arg_field { - struct tep_format_field *field; -}; - -struct tep_filter_arg_value { - enum tep_filter_value_type type; - union { - char *str; - unsigned long long val; - }; -}; - -struct tep_filter_arg_op { - enum tep_filter_op_type type; - struct tep_filter_arg *left; - struct tep_filter_arg *right; -}; - -struct tep_filter_arg_exp { - enum tep_filter_exp_type type; - struct tep_filter_arg *left; - struct tep_filter_arg *right; -}; - -struct tep_filter_arg_num { - enum tep_filter_cmp_type type; - struct tep_filter_arg *left; - struct tep_filter_arg *right; -}; - -struct tep_filter_arg_str { - enum tep_filter_cmp_type type; - struct tep_format_field *field; - char *val; - char *buffer; - regex_t reg; -}; - -struct tep_filter_arg { - enum tep_filter_arg_type type; - union { - struct tep_filter_arg_boolean boolean; - struct tep_filter_arg_field field; - struct tep_filter_arg_value value; - struct tep_filter_arg_op op; - struct tep_filter_arg_exp exp; - struct tep_filter_arg_num num; - struct tep_filter_arg_str str; - }; -}; - -struct tep_filter_type { - int event_id; - struct tep_event *event; - struct tep_filter_arg *filter; -}; - -#define TEP_FILTER_ERROR_BUFSZ 1024 - -struct tep_event_filter { - struct tep_handle *tep; - int filters; - struct tep_filter_type *event_filters; - char error_buffer[TEP_FILTER_ERROR_BUFSZ]; -}; - -struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep); - -/* for backward compatibility */ -#define FILTER_NONE TEP_ERRNO__NO_FILTER -#define FILTER_NOEXIST TEP_ERRNO__FILTER_NOT_FOUND -#define FILTER_MISS TEP_ERRNO__FILTER_MISS -#define FILTER_MATCH TEP_ERRNO__FILTER_MATCH - -enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter, - const char *filter_str); - -enum tep_errno tep_filter_match(struct tep_event_filter *filter, - struct tep_record *record); - -int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err, - char *buf, size_t buflen); - -int tep_event_filtered(struct tep_event_filter *filter, - int event_id); - -void tep_filter_reset(struct tep_event_filter *filter); - -void tep_filter_free(struct tep_event_filter *filter); - -char *tep_filter_make_string(struct tep_event_filter *filter, int event_id); - -int tep_filter_remove_event(struct tep_event_filter *filter, - int event_id); - -int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source); - -int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2); - -#endif /* _PARSE_EVENTS_H */ diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c deleted file mode 100644 index e7f93d5fe4fd..000000000000 --- a/tools/lib/traceevent/event-plugin.c +++ /dev/null @@ -1,711 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - */ - -#include <ctype.h> -#include <stdio.h> -#include <string.h> -#include <dlfcn.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <dirent.h> -#include <errno.h> -#include "event-parse.h" -#include "event-parse-local.h" -#include "event-utils.h" -#include "trace-seq.h" - -#define LOCAL_PLUGIN_DIR ".local/lib/traceevent/plugins/" - -static struct registered_plugin_options { - struct registered_plugin_options *next; - struct tep_plugin_option *options; -} *registered_options; - -static struct trace_plugin_options { - struct trace_plugin_options *next; - char *plugin; - char *option; - char *value; -} *trace_plugin_options; - -struct tep_plugin_list { - struct tep_plugin_list *next; - char *name; - void *handle; -}; - -struct tep_plugins_dir { - struct tep_plugins_dir *next; - char *path; - enum tep_plugin_load_priority prio; -}; - -static void lower_case(char *str) -{ - if (!str) - return; - for (; *str; str++) - *str = tolower(*str); -} - -static int update_option_value(struct tep_plugin_option *op, const char *val) -{ - char *op_val; - - if (!val) { - /* toggle, only if option is boolean */ - if (op->value) - /* Warn? */ - return 0; - op->set ^= 1; - return 0; - } - - /* - * If the option has a value then it takes a string - * otherwise the option is a boolean. - */ - if (op->value) { - op->value = val; - return 0; - } - - /* Option is boolean, must be either "1", "0", "true" or "false" */ - - op_val = strdup(val); - if (!op_val) - return -1; - lower_case(op_val); - - if (strcmp(val, "1") == 0 || strcmp(val, "true") == 0) - op->set = 1; - else if (strcmp(val, "0") == 0 || strcmp(val, "false") == 0) - op->set = 0; - free(op_val); - - return 0; -} - -/** - * tep_plugin_list_options - get list of plugin options - * - * Returns an array of char strings that list the currently registered - * plugin options in the format of <plugin>:<option>. This list can be - * used by toggling the option. - * - * Returns NULL if there's no options registered. On error it returns - * INVALID_PLUGIN_LIST_OPTION - * - * Must be freed with tep_plugin_free_options_list(). - */ -char **tep_plugin_list_options(void) -{ - struct registered_plugin_options *reg; - struct tep_plugin_option *op; - char **list = NULL; - char *name; - int count = 0; - - for (reg = registered_options; reg; reg = reg->next) { - for (op = reg->options; op->name; op++) { - char *alias = op->plugin_alias ? op->plugin_alias : op->file; - char **temp = list; - int ret; - - ret = asprintf(&name, "%s:%s", alias, op->name); - if (ret < 0) - goto err; - - list = realloc(list, count + 2); - if (!list) { - list = temp; - free(name); - goto err; - } - list[count++] = name; - list[count] = NULL; - } - } - return list; - - err: - while (--count >= 0) - free(list[count]); - free(list); - - return INVALID_PLUGIN_LIST_OPTION; -} - -void tep_plugin_free_options_list(char **list) -{ - int i; - - if (!list) - return; - - if (list == INVALID_PLUGIN_LIST_OPTION) - return; - - for (i = 0; list[i]; i++) - free(list[i]); - - free(list); -} - -static int -update_option(const char *file, struct tep_plugin_option *option) -{ - struct trace_plugin_options *op; - char *plugin; - int ret = 0; - - if (option->plugin_alias) { - plugin = strdup(option->plugin_alias); - if (!plugin) - return -1; - } else { - char *p; - plugin = strdup(file); - if (!plugin) - return -1; - p = strstr(plugin, "."); - if (p) - *p = '\0'; - } - - /* first look for named options */ - for (op = trace_plugin_options; op; op = op->next) { - if (!op->plugin) - continue; - if (strcmp(op->plugin, plugin) != 0) - continue; - if (strcmp(op->option, option->name) != 0) - continue; - - ret = update_option_value(option, op->value); - if (ret) - goto out; - break; - } - - /* first look for unnamed options */ - for (op = trace_plugin_options; op; op = op->next) { - if (op->plugin) - continue; - if (strcmp(op->option, option->name) != 0) - continue; - - ret = update_option_value(option, op->value); - break; - } - - out: - free(plugin); - return ret; -} - -/** - * tep_plugin_add_options - Add a set of options by a plugin - * @name: The name of the plugin adding the options - * @options: The set of options being loaded - * - * Sets the options with the values that have been added by user. - */ -int tep_plugin_add_options(const char *name, - struct tep_plugin_option *options) -{ - struct registered_plugin_options *reg; - - reg = malloc(sizeof(*reg)); - if (!reg) - return -1; - reg->next = registered_options; - reg->options = options; - registered_options = reg; - - while (options->name) { - update_option(name, options); - options++; - } - return 0; -} - -/** - * tep_plugin_remove_options - remove plugin options that were registered - * @options: Options to removed that were registered with tep_plugin_add_options - */ -void tep_plugin_remove_options(struct tep_plugin_option *options) -{ - struct registered_plugin_options **last; - struct registered_plugin_options *reg; - - for (last = ®istered_options; *last; last = &(*last)->next) { - if ((*last)->options == options) { - reg = *last; - *last = reg->next; - free(reg); - return; - } - } -} - -static int parse_option_name(char **option, char **plugin) -{ - char *p; - - *plugin = NULL; - - if ((p = strstr(*option, ":"))) { - *plugin = *option; - *p = '\0'; - *option = strdup(p + 1); - if (!*option) - return -1; - } - return 0; -} - -static struct tep_plugin_option * -find_registered_option(const char *plugin, const char *option) -{ - struct registered_plugin_options *reg; - struct tep_plugin_option *op; - const char *op_plugin; - - for (reg = registered_options; reg; reg = reg->next) { - for (op = reg->options; op->name; op++) { - if (op->plugin_alias) - op_plugin = op->plugin_alias; - else - op_plugin = op->file; - - if (plugin && strcmp(plugin, op_plugin) != 0) - continue; - if (strcmp(option, op->name) != 0) - continue; - - return op; - } - } - - return NULL; -} - -static int process_option(const char *plugin, const char *option, const char *val) -{ - struct tep_plugin_option *op; - - op = find_registered_option(plugin, option); - if (!op) - return 0; - - return update_option_value(op, val); -} - -/** - * tep_plugin_add_option - add an option/val pair to set plugin options - * @name: The name of the option (format: <plugin>:<option> or just <option>) - * @val: (optional) the value for the option - * - * Modify a plugin option. If @val is given than the value of the option - * is set (note, some options just take a boolean, so @val must be either - * "1" or "0" or "true" or "false"). - */ -int tep_plugin_add_option(const char *name, const char *val) -{ - struct trace_plugin_options *op; - char *option_str; - char *plugin; - - option_str = strdup(name); - if (!option_str) - return -ENOMEM; - - if (parse_option_name(&option_str, &plugin) < 0) - return -ENOMEM; - - /* If the option exists, update the val */ - for (op = trace_plugin_options; op; op = op->next) { - /* Both must be NULL or not NULL */ - if ((!plugin || !op->plugin) && plugin != op->plugin) - continue; - if (plugin && strcmp(plugin, op->plugin) != 0) - continue; - if (strcmp(op->option, option_str) != 0) - continue; - - /* update option */ - free(op->value); - if (val) { - op->value = strdup(val); - if (!op->value) - goto out_free; - } else - op->value = NULL; - - /* plugin and option_str don't get freed at the end */ - free(plugin); - free(option_str); - - plugin = op->plugin; - option_str = op->option; - break; - } - - /* If not found, create */ - if (!op) { - op = malloc(sizeof(*op)); - if (!op) - goto out_free; - memset(op, 0, sizeof(*op)); - op->plugin = plugin; - op->option = option_str; - if (val) { - op->value = strdup(val); - if (!op->value) { - free(op); - goto out_free; - } - } - op->next = trace_plugin_options; - trace_plugin_options = op; - } - - return process_option(plugin, option_str, val); - -out_free: - free(plugin); - free(option_str); - return -ENOMEM; -} - -static void print_op_data(struct trace_seq *s, const char *name, - const char *op) -{ - if (op) - trace_seq_printf(s, "%8s:\t%s\n", name, op); -} - -/** - * tep_plugin_print_options - print out the registered plugin options - * @s: The trace_seq descriptor to write the plugin options into - * - * Writes a list of options into trace_seq @s. - */ -void tep_plugin_print_options(struct trace_seq *s) -{ - struct registered_plugin_options *reg; - struct tep_plugin_option *op; - - for (reg = registered_options; reg; reg = reg->next) { - if (reg != registered_options) - trace_seq_printf(s, "============\n"); - for (op = reg->options; op->name; op++) { - if (op != reg->options) - trace_seq_printf(s, "------------\n"); - print_op_data(s, "file", op->file); - print_op_data(s, "plugin", op->plugin_alias); - print_op_data(s, "option", op->name); - print_op_data(s, "desc", op->description); - print_op_data(s, "value", op->value); - trace_seq_printf(s, "%8s:\t%d\n", "set", op->set); - } - } -} - -/** - * tep_print_plugins - print out the list of plugins loaded - * @s: the trace_seq descripter to write to - * @prefix: The prefix string to add before listing the option name - * @suffix: The suffix string ot append after the option name - * @list: The list of plugins (usually returned by tep_load_plugins() - * - * Writes to the trace_seq @s the list of plugins (files) that is - * returned by tep_load_plugins(). Use @prefix and @suffix for formating: - * @prefix = " ", @suffix = "\n". - */ -void tep_print_plugins(struct trace_seq *s, - const char *prefix, const char *suffix, - const struct tep_plugin_list *list) -{ - while (list) { - trace_seq_printf(s, "%s%s%s", prefix, list->name, suffix); - list = list->next; - } -} - -static void -load_plugin(struct tep_handle *tep, const char *path, - const char *file, void *data) -{ - struct tep_plugin_list **plugin_list = data; - struct tep_plugin_option *options; - tep_plugin_load_func func; - struct tep_plugin_list *list; - const char *alias; - char *plugin; - void *handle; - int ret; - - ret = asprintf(&plugin, "%s/%s", path, file); - if (ret < 0) { - warning("could not allocate plugin memory\n"); - return; - } - - handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL); - if (!handle) { - warning("could not load plugin '%s'\n%s\n", - plugin, dlerror()); - goto out_free; - } - - alias = dlsym(handle, TEP_PLUGIN_ALIAS_NAME); - if (!alias) - alias = file; - - options = dlsym(handle, TEP_PLUGIN_OPTIONS_NAME); - if (options) { - while (options->name) { - ret = update_option(alias, options); - if (ret < 0) - goto out_free; - options++; - } - } - - func = dlsym(handle, TEP_PLUGIN_LOADER_NAME); - if (!func) { - warning("could not find func '%s' in plugin '%s'\n%s\n", - TEP_PLUGIN_LOADER_NAME, plugin, dlerror()); - goto out_free; - } - - list = malloc(sizeof(*list)); - if (!list) { - warning("could not allocate plugin memory\n"); - goto out_free; - } - - list->next = *plugin_list; - list->handle = handle; - list->name = plugin; - *plugin_list = list; - - pr_stat("registering plugin: %s", plugin); - func(tep); - return; - - out_free: - free(plugin); -} - -static void -load_plugins_dir(struct tep_handle *tep, const char *suffix, - const char *path, - void (*load_plugin)(struct tep_handle *tep, - const char *path, - const char *name, - void *data), - void *data) -{ - struct dirent *dent; - struct stat st; - DIR *dir; - int ret; - - ret = stat(path, &st); - if (ret < 0) - return; - - if (!S_ISDIR(st.st_mode)) - return; - - dir = opendir(path); - if (!dir) - return; - - while ((dent = readdir(dir))) { - const char *name = dent->d_name; - - if (strcmp(name, ".") == 0 || - strcmp(name, "..") == 0) - continue; - - /* Only load plugins that end in suffix */ - if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0) - continue; - - load_plugin(tep, path, name, data); - } - - closedir(dir); -} - -/** - * tep_load_plugins_hook - call a user specified callback to load a plugin - * @tep: handler to traceevent context - * @suffix: filter only plugin files with given suffix - * @load_plugin: user specified callback, called for each plugin file - * @data: custom context, passed to @load_plugin - * - * Searches for traceevent plugin files and calls @load_plugin for each - * The order of plugins search is: - * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_FIRST - * - Directory, specified at compile time with PLUGIN_TRACEEVENT_DIR - * - Directory, specified by environment variable TRACEEVENT_PLUGIN_DIR - * - In user's home: ~/.local/lib/traceevent/plugins/ - * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_LAST - * - */ -void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix, - void (*load_plugin)(struct tep_handle *tep, - const char *path, - const char *name, - void *data), - void *data) -{ - struct tep_plugins_dir *dir = NULL; - char *home; - char *path; - char *envdir; - int ret; - - if (tep && tep->flags & TEP_DISABLE_PLUGINS) - return; - - if (tep) - dir = tep->plugins_dir; - while (dir) { - if (dir->prio == TEP_PLUGIN_FIRST) - load_plugins_dir(tep, suffix, dir->path, - load_plugin, data); - dir = dir->next; - } - - /* - * If a system plugin directory was defined, - * check that first. - */ -#ifdef PLUGIN_DIR - if (!tep || !(tep->flags & TEP_DISABLE_SYS_PLUGINS)) - load_plugins_dir(tep, suffix, PLUGIN_DIR, - load_plugin, data); -#endif - - /* - * Next let the environment-set plugin directory - * override the system defaults. - */ - envdir = getenv("TRACEEVENT_PLUGIN_DIR"); - if (envdir) - load_plugins_dir(tep, suffix, envdir, load_plugin, data); - - /* - * Now let the home directory override the environment - * or system defaults. - */ - home = getenv("HOME"); - if (!home) - return; - - ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR); - if (ret < 0) { - warning("could not allocate plugin memory\n"); - return; - } - - load_plugins_dir(tep, suffix, path, load_plugin, data); - - if (tep) - dir = tep->plugins_dir; - while (dir) { - if (dir->prio == TEP_PLUGIN_LAST) - load_plugins_dir(tep, suffix, dir->path, - load_plugin, data); - dir = dir->next; - } - - free(path); -} - -struct tep_plugin_list* -tep_load_plugins(struct tep_handle *tep) -{ - struct tep_plugin_list *list = NULL; - - tep_load_plugins_hook(tep, ".so", load_plugin, &list); - return list; -} - -/** - * tep_add_plugin_path - Add a new plugin directory. - * @tep: Trace event handler. - * @path: Path to a directory. All plugin files in that - * directory will be loaded. - *@prio: Load priority of the plugins in that directory. - * - * Returns -1 in case of an error, 0 otherwise. - */ -int tep_add_plugin_path(struct tep_handle *tep, char *path, - enum tep_plugin_load_priority prio) -{ - struct tep_plugins_dir *dir; - - if (!tep || !path) - return -1; - - dir = calloc(1, sizeof(*dir)); - if (!dir) - return -1; - - dir->path = strdup(path); - if (!dir->path) { - free(dir); - return -1; - } - dir->prio = prio; - dir->next = tep->plugins_dir; - tep->plugins_dir = dir; - - return 0; -} - -__hidden void free_tep_plugin_paths(struct tep_handle *tep) -{ - struct tep_plugins_dir *dir; - - if (!tep) - return; - - dir = tep->plugins_dir; - while (dir) { - tep->plugins_dir = tep->plugins_dir->next; - free(dir->path); - free(dir); - dir = tep->plugins_dir; - } -} - -void -tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep) -{ - tep_plugin_unload_func func; - struct tep_plugin_list *list; - - while (plugin_list) { - list = plugin_list; - plugin_list = list->next; - func = dlsym(list->handle, TEP_PLUGIN_UNLOADER_NAME); - if (func) - func(tep); - dlclose(list->handle); - free(list->name); - free(list); - } -} diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h deleted file mode 100644 index 0560b96a31d1..000000000000 --- a/tools/lib/traceevent/event-utils.h +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 */ -/* - * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - */ -#ifndef __UTIL_H -#define __UTIL_H - -#include <ctype.h> - -/* Can be overridden */ -void warning(const char *fmt, ...); -void pr_stat(const char *fmt, ...); -void vpr_stat(const char *fmt, va_list ap); - -/* Always available */ -void __warning(const char *fmt, ...); -void __pr_stat(const char *fmt, ...); - -void __vwarning(const char *fmt, ...); -void __vpr_stat(const char *fmt, ...); - -#define min(x, y) ({ \ - typeof(x) _min1 = (x); \ - typeof(y) _min2 = (y); \ - (void) (&_min1 == &_min2); \ - _min1 < _min2 ? _min1 : _min2; }) - -static inline char *strim(char *string) -{ - char *ret; - - if (!string) - return NULL; - while (*string) { - if (!isspace(*string)) - break; - string++; - } - ret = string; - - string = ret + strlen(ret) - 1; - while (string > ret) { - if (!isspace(*string)) - break; - string--; - } - string[1] = 0; - - return ret; -} - -static inline int has_text(const char *text) -{ - if (!text) - return 0; - - while (*text) { - if (!isspace(*text)) - return 1; - text++; - } - - return 0; -} - -#endif diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c deleted file mode 100644 index f1640d651c8a..000000000000 --- a/tools/lib/traceevent/kbuffer-parse.c +++ /dev/null @@ -1,809 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "kbuffer.h" - -#define MISSING_EVENTS (1UL << 31) -#define MISSING_STORED (1UL << 30) - -#define COMMIT_MASK ((1 << 27) - 1) - -enum { - KBUFFER_FL_HOST_BIG_ENDIAN = (1<<0), - KBUFFER_FL_BIG_ENDIAN = (1<<1), - KBUFFER_FL_LONG_8 = (1<<2), - KBUFFER_FL_OLD_FORMAT = (1<<3), -}; - -#define ENDIAN_MASK (KBUFFER_FL_HOST_BIG_ENDIAN | KBUFFER_FL_BIG_ENDIAN) - -/** kbuffer - * @timestamp - timestamp of current event - * @lost_events - # of lost events between this subbuffer and previous - * @flags - special flags of the kbuffer - * @subbuffer - pointer to the sub-buffer page - * @data - pointer to the start of data on the sub-buffer page - * @index - index from @data to the @curr event data - * @curr - offset from @data to the start of current event - * (includes metadata) - * @next - offset from @data to the start of next event - * @size - The size of data on @data - * @start - The offset from @subbuffer where @data lives - * - * @read_4 - Function to read 4 raw bytes (may swap) - * @read_8 - Function to read 8 raw bytes (may swap) - * @read_long - Function to read a long word (4 or 8 bytes with needed swap) - */ -struct kbuffer { - unsigned long long timestamp; - long long lost_events; - unsigned long flags; - void *subbuffer; - void *data; - unsigned int index; - unsigned int curr; - unsigned int next; - unsigned int size; - unsigned int start; - - unsigned int (*read_4)(void *ptr); - unsigned long long (*read_8)(void *ptr); - unsigned long long (*read_long)(struct kbuffer *kbuf, void *ptr); - int (*next_event)(struct kbuffer *kbuf); -}; - -static void *zmalloc(size_t size) -{ - return calloc(1, size); -} - -static int host_is_bigendian(void) -{ - unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 }; - unsigned int *ptr; - - ptr = (unsigned int *)str; - return *ptr == 0x01020304; -} - -static int do_swap(struct kbuffer *kbuf) -{ - return ((kbuf->flags & KBUFFER_FL_HOST_BIG_ENDIAN) + kbuf->flags) & - ENDIAN_MASK; -} - -static unsigned long long __read_8(void *ptr) -{ - unsigned long long data = *(unsigned long long *)ptr; - - return data; -} - -static unsigned long long __read_8_sw(void *ptr) -{ - unsigned long long data = *(unsigned long long *)ptr; - unsigned long long swap; - - swap = ((data & 0xffULL) << 56) | - ((data & (0xffULL << 8)) << 40) | - ((data & (0xffULL << 16)) << 24) | - ((data & (0xffULL << 24)) << 8) | - ((data & (0xffULL << 32)) >> 8) | - ((data & (0xffULL << 40)) >> 24) | - ((data & (0xffULL << 48)) >> 40) | - ((data & (0xffULL << 56)) >> 56); - - return swap; -} - -static unsigned int __read_4(void *ptr) -{ - unsigned int data = *(unsigned int *)ptr; - - return data; -} - -static unsigned int __read_4_sw(void *ptr) -{ - unsigned int data = *(unsigned int *)ptr; - unsigned int swap; - - swap = ((data & 0xffULL) << 24) | - ((data & (0xffULL << 8)) << 8) | - ((data & (0xffULL << 16)) >> 8) | - ((data & (0xffULL << 24)) >> 24); - - return swap; -} - -static unsigned long long read_8(struct kbuffer *kbuf, void *ptr) -{ - return kbuf->read_8(ptr); -} - -static unsigned int read_4(struct kbuffer *kbuf, void *ptr) -{ - return kbuf->read_4(ptr); -} - -static unsigned long long __read_long_8(struct kbuffer *kbuf, void *ptr) -{ - return kbuf->read_8(ptr); -} - -static unsigned long long __read_long_4(struct kbuffer *kbuf, void *ptr) -{ - return kbuf->read_4(ptr); -} - -static unsigned long long read_long(struct kbuffer *kbuf, void *ptr) -{ - return kbuf->read_long(kbuf, ptr); -} - -static int calc_index(struct kbuffer *kbuf, void *ptr) -{ - return (unsigned long)ptr - (unsigned long)kbuf->data; -} - -static int __next_event(struct kbuffer *kbuf); - -/** - * kbuffer_alloc - allocat a new kbuffer - * @size; enum to denote size of word - * @endian: enum to denote endianness - * - * Allocates and returns a new kbuffer. - */ -struct kbuffer * -kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian) -{ - struct kbuffer *kbuf; - int flags = 0; - - switch (size) { - case KBUFFER_LSIZE_4: - break; - case KBUFFER_LSIZE_8: - flags |= KBUFFER_FL_LONG_8; - break; - default: - return NULL; - } - - switch (endian) { - case KBUFFER_ENDIAN_LITTLE: - break; - case KBUFFER_ENDIAN_BIG: - flags |= KBUFFER_FL_BIG_ENDIAN; - break; - default: - return NULL; - } - - kbuf = zmalloc(sizeof(*kbuf)); - if (!kbuf) - return NULL; - - kbuf->flags = flags; - - if (host_is_bigendian()) - kbuf->flags |= KBUFFER_FL_HOST_BIG_ENDIAN; - - if (do_swap(kbuf)) { - kbuf->read_8 = __read_8_sw; - kbuf->read_4 = __read_4_sw; - } else { - kbuf->read_8 = __read_8; - kbuf->read_4 = __read_4; - } - - if (kbuf->flags & KBUFFER_FL_LONG_8) - kbuf->read_long = __read_long_8; - else - kbuf->read_long = __read_long_4; - - /* May be changed by kbuffer_set_old_format() */ - kbuf->next_event = __next_event; - - return kbuf; -} - -/** kbuffer_free - free an allocated kbuffer - * @kbuf: The kbuffer to free - * - * Can take NULL as a parameter. - */ -void kbuffer_free(struct kbuffer *kbuf) -{ - free(kbuf); -} - -static unsigned int type4host(struct kbuffer *kbuf, - unsigned int type_len_ts) -{ - if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN) - return (type_len_ts >> 29) & 3; - else - return type_len_ts & 3; -} - -static unsigned int len4host(struct kbuffer *kbuf, - unsigned int type_len_ts) -{ - if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN) - return (type_len_ts >> 27) & 7; - else - return (type_len_ts >> 2) & 7; -} - -static unsigned int type_len4host(struct kbuffer *kbuf, - unsigned int type_len_ts) -{ - if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN) - return (type_len_ts >> 27) & ((1 << 5) - 1); - else - return type_len_ts & ((1 << 5) - 1); -} - -static unsigned int ts4host(struct kbuffer *kbuf, - unsigned int type_len_ts) -{ - if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN) - return type_len_ts & ((1 << 27) - 1); - else - return type_len_ts >> 5; -} - -/* - * Linux 2.6.30 and earlier (not much ealier) had a different - * ring buffer format. It should be obsolete, but we handle it anyway. - */ -enum old_ring_buffer_type { - OLD_RINGBUF_TYPE_PADDING, - OLD_RINGBUF_TYPE_TIME_EXTEND, - OLD_RINGBUF_TYPE_TIME_STAMP, - OLD_RINGBUF_TYPE_DATA, -}; - -static unsigned int old_update_pointers(struct kbuffer *kbuf) -{ - unsigned long long extend; - unsigned int type_len_ts; - unsigned int type; - unsigned int len; - unsigned int delta; - unsigned int length; - void *ptr = kbuf->data + kbuf->curr; - - type_len_ts = read_4(kbuf, ptr); - ptr += 4; - - type = type4host(kbuf, type_len_ts); - len = len4host(kbuf, type_len_ts); - delta = ts4host(kbuf, type_len_ts); - - switch (type) { - case OLD_RINGBUF_TYPE_PADDING: - kbuf->next = kbuf->size; - return 0; - - case OLD_RINGBUF_TYPE_TIME_EXTEND: - extend = read_4(kbuf, ptr); - extend <<= TS_SHIFT; - extend += delta; - delta = extend; - ptr += 4; - length = 0; - break; - - case OLD_RINGBUF_TYPE_TIME_STAMP: - /* should never happen! */ - kbuf->curr = kbuf->size; - kbuf->next = kbuf->size; - kbuf->index = kbuf->size; - return -1; - default: - if (len) - length = len * 4; - else { - length = read_4(kbuf, ptr); - length -= 4; - ptr += 4; - } - break; - } - - kbuf->timestamp += delta; - kbuf->index = calc_index(kbuf, ptr); - kbuf->next = kbuf->index + length; - - return type; -} - -static int __old_next_event(struct kbuffer *kbuf) -{ - int type; - - do { - kbuf->curr = kbuf->next; - if (kbuf->next >= kbuf->size) - return -1; - type = old_update_pointers(kbuf); - } while (type == OLD_RINGBUF_TYPE_TIME_EXTEND || type == OLD_RINGBUF_TYPE_PADDING); - - return 0; -} - -static unsigned int -translate_data(struct kbuffer *kbuf, void *data, void **rptr, - unsigned long long *delta, int *length) -{ - unsigned long long extend; - unsigned int type_len_ts; - unsigned int type_len; - - type_len_ts = read_4(kbuf, data); - data += 4; - - type_len = type_len4host(kbuf, type_len_ts); - *delta = ts4host(kbuf, type_len_ts); - - switch (type_len) { - case KBUFFER_TYPE_PADDING: - *length = read_4(kbuf, data); - break; - - case KBUFFER_TYPE_TIME_EXTEND: - case KBUFFER_TYPE_TIME_STAMP: - extend = read_4(kbuf, data); - data += 4; - extend <<= TS_SHIFT; - extend += *delta; - *delta = extend; - *length = 0; - break; - - case 0: - *length = read_4(kbuf, data) - 4; - *length = (*length + 3) & ~3; - data += 4; - break; - default: - *length = type_len * 4; - break; - } - - *rptr = data; - - return type_len; -} - -static unsigned int update_pointers(struct kbuffer *kbuf) -{ - unsigned long long delta; - unsigned int type_len; - int length; - void *ptr = kbuf->data + kbuf->curr; - - type_len = translate_data(kbuf, ptr, &ptr, &delta, &length); - - if (type_len == KBUFFER_TYPE_TIME_STAMP) - kbuf->timestamp = delta; - else - kbuf->timestamp += delta; - - kbuf->index = calc_index(kbuf, ptr); - kbuf->next = kbuf->index + length; - - return type_len; -} - -/** - * kbuffer_translate_data - read raw data to get a record - * @swap: Set to 1 if bytes in words need to be swapped when read - * @data: The raw data to read - * @size: Address to store the size of the event data. - * - * Returns a pointer to the event data. To determine the entire - * record size (record metadata + data) just add the difference between - * @data and the returned value to @size. - */ -void *kbuffer_translate_data(int swap, void *data, unsigned int *size) -{ - unsigned long long delta; - struct kbuffer kbuf; - int type_len; - int length; - void *ptr; - - if (swap) { - kbuf.read_8 = __read_8_sw; - kbuf.read_4 = __read_4_sw; - kbuf.flags = host_is_bigendian() ? 0 : KBUFFER_FL_BIG_ENDIAN; - } else { - kbuf.read_8 = __read_8; - kbuf.read_4 = __read_4; - kbuf.flags = host_is_bigendian() ? KBUFFER_FL_BIG_ENDIAN: 0; - } - - type_len = translate_data(&kbuf, data, &ptr, &delta, &length); - switch (type_len) { - case KBUFFER_TYPE_PADDING: - case KBUFFER_TYPE_TIME_EXTEND: - case KBUFFER_TYPE_TIME_STAMP: - return NULL; - } - - *size = length; - - return ptr; -} - -static int __next_event(struct kbuffer *kbuf) -{ - int type; - - do { - kbuf->curr = kbuf->next; - if (kbuf->next >= kbuf->size) - return -1; - type = update_pointers(kbuf); - } while (type == KBUFFER_TYPE_TIME_EXTEND || - type == KBUFFER_TYPE_TIME_STAMP || - type == KBUFFER_TYPE_PADDING); - - return 0; -} - -static int next_event(struct kbuffer *kbuf) -{ - return kbuf->next_event(kbuf); -} - -/** - * kbuffer_next_event - increment the current pointer - * @kbuf: The kbuffer to read - * @ts: Address to store the next record's timestamp (may be NULL to ignore) - * - * Increments the pointers into the subbuffer of the kbuffer to point to the - * next event so that the next kbuffer_read_event() will return a - * new event. - * - * Returns the data of the next event if a new event exists on the subbuffer, - * NULL otherwise. - */ -void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts) -{ - int ret; - - if (!kbuf || !kbuf->subbuffer) - return NULL; - - ret = next_event(kbuf); - if (ret < 0) - return NULL; - - if (ts) - *ts = kbuf->timestamp; - - return kbuf->data + kbuf->index; -} - -/** - * kbuffer_load_subbuffer - load a new subbuffer into the kbuffer - * @kbuf: The kbuffer to load - * @subbuffer: The subbuffer to load into @kbuf. - * - * Load a new subbuffer (page) into @kbuf. This will reset all - * the pointers and update the @kbuf timestamp. The next read will - * return the first event on @subbuffer. - * - * Returns 0 on succes, -1 otherwise. - */ -int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer) -{ - unsigned long long flags; - void *ptr = subbuffer; - - if (!kbuf || !subbuffer) - return -1; - - kbuf->subbuffer = subbuffer; - - kbuf->timestamp = read_8(kbuf, ptr); - ptr += 8; - - kbuf->curr = 0; - - if (kbuf->flags & KBUFFER_FL_LONG_8) - kbuf->start = 16; - else - kbuf->start = 12; - - kbuf->data = subbuffer + kbuf->start; - - flags = read_long(kbuf, ptr); - kbuf->size = (unsigned int)flags & COMMIT_MASK; - - if (flags & MISSING_EVENTS) { - if (flags & MISSING_STORED) { - ptr = kbuf->data + kbuf->size; - kbuf->lost_events = read_long(kbuf, ptr); - } else - kbuf->lost_events = -1; - } else - kbuf->lost_events = 0; - - kbuf->index = 0; - kbuf->next = 0; - - next_event(kbuf); - - return 0; -} - -/** - * kbuffer_subbuf_timestamp - read the timestamp from a sub buffer - * @kbuf: The kbuffer to load - * @subbuf: The subbuffer to read from. - * - * Return the timestamp from a subbuffer. - */ -unsigned long long kbuffer_subbuf_timestamp(struct kbuffer *kbuf, void *subbuf) -{ - return kbuf->read_8(subbuf); -} - -/** - * kbuffer_ptr_delta - read the delta field from a record - * @kbuf: The kbuffer to load - * @ptr: The record in the buffe. - * - * Return the timestamp delta from a record - */ -unsigned int kbuffer_ptr_delta(struct kbuffer *kbuf, void *ptr) -{ - unsigned int type_len_ts; - - type_len_ts = read_4(kbuf, ptr); - return ts4host(kbuf, type_len_ts); -} - - -/** - * kbuffer_read_event - read the next event in the kbuffer subbuffer - * @kbuf: The kbuffer to read from - * @ts: The address to store the timestamp of the event (may be NULL to ignore) - * - * Returns a pointer to the data part of the current event. - * NULL if no event is left on the subbuffer. - */ -void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts) -{ - if (!kbuf || !kbuf->subbuffer) - return NULL; - - if (kbuf->curr >= kbuf->size) - return NULL; - - if (ts) - *ts = kbuf->timestamp; - return kbuf->data + kbuf->index; -} - -/** - * kbuffer_timestamp - Return the timestamp of the current event - * @kbuf: The kbuffer to read from - * - * Returns the timestamp of the current (next) event. - */ -unsigned long long kbuffer_timestamp(struct kbuffer *kbuf) -{ - return kbuf->timestamp; -} - -/** - * kbuffer_read_at_offset - read the event that is at offset - * @kbuf: The kbuffer to read from - * @offset: The offset into the subbuffer - * @ts: The address to store the timestamp of the event (may be NULL to ignore) - * - * The @offset must be an index from the @kbuf subbuffer beginning. - * If @offset is bigger than the stored subbuffer, NULL will be returned. - * - * Returns the data of the record that is at @offset. Note, @offset does - * not need to be the start of the record, the offset just needs to be - * in the record (or beginning of it). - * - * Note, the kbuf timestamp and pointers are updated to the - * returned record. That is, kbuffer_read_event() will return the same - * data and timestamp, and kbuffer_next_event() will increment from - * this record. - */ -void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset, - unsigned long long *ts) -{ - void *data; - - if (offset < kbuf->start) - offset = 0; - else - offset -= kbuf->start; - - /* Reset the buffer */ - kbuffer_load_subbuffer(kbuf, kbuf->subbuffer); - data = kbuffer_read_event(kbuf, ts); - - while (kbuf->curr < offset) { - data = kbuffer_next_event(kbuf, ts); - if (!data) - break; - } - - return data; -} - -/** - * kbuffer_subbuffer_size - the size of the loaded subbuffer - * @kbuf: The kbuffer to read from - * - * Returns the size of the subbuffer. Note, this size is - * where the last event resides. The stored subbuffer may actually be - * bigger due to padding and such. - */ -int kbuffer_subbuffer_size(struct kbuffer *kbuf) -{ - return kbuf->size; -} - -/** - * kbuffer_curr_index - Return the index of the record - * @kbuf: The kbuffer to read from - * - * Returns the index from the start of the data part of - * the subbuffer to the current location. Note this is not - * from the start of the subbuffer. An index of zero will - * point to the first record. Use kbuffer_curr_offset() for - * the actually offset (that can be used by kbuffer_read_at_offset()) - */ -int kbuffer_curr_index(struct kbuffer *kbuf) -{ - return kbuf->curr; -} - -/** - * kbuffer_curr_offset - Return the offset of the record - * @kbuf: The kbuffer to read from - * - * Returns the offset from the start of the subbuffer to the - * current location. - */ -int kbuffer_curr_offset(struct kbuffer *kbuf) -{ - return kbuf->curr + kbuf->start; -} - -/** - * kbuffer_event_size - return the size of the event data - * @kbuf: The kbuffer to read - * - * Returns the size of the event data (the payload not counting - * the meta data of the record) of the current event. - */ -int kbuffer_event_size(struct kbuffer *kbuf) -{ - return kbuf->next - kbuf->index; -} - -/** - * kbuffer_curr_size - return the size of the entire record - * @kbuf: The kbuffer to read - * - * Returns the size of the entire record (meta data and payload) - * of the current event. - */ -int kbuffer_curr_size(struct kbuffer *kbuf) -{ - return kbuf->next - kbuf->curr; -} - -/** - * kbuffer_missed_events - return the # of missed events from last event. - * @kbuf: The kbuffer to read from - * - * Returns the # of missed events (if recorded) before the current - * event. Note, only events on the beginning of a subbuffer can - * have missed events, all other events within the buffer will be - * zero. - */ -int kbuffer_missed_events(struct kbuffer *kbuf) -{ - /* Only the first event can have missed events */ - if (kbuf->curr) - return 0; - - return kbuf->lost_events; -} - -/** - * kbuffer_set_old_forma - set the kbuffer to use the old format parsing - * @kbuf: The kbuffer to set - * - * This is obsolete (or should be). The first kernels to use the - * new ring buffer had a slightly different ring buffer format - * (2.6.30 and earlier). It is still somewhat supported by kbuffer, - * but should not be counted on in the future. - */ -void kbuffer_set_old_format(struct kbuffer *kbuf) -{ - kbuf->flags |= KBUFFER_FL_OLD_FORMAT; - - kbuf->next_event = __old_next_event; -} - -/** - * kbuffer_start_of_data - return offset of where data starts on subbuffer - * @kbuf: The kbuffer - * - * Returns the location on the subbuffer where the data starts. - */ -int kbuffer_start_of_data(struct kbuffer *kbuf) -{ - return kbuf->start; -} - -/** - * kbuffer_raw_get - get raw buffer info - * @kbuf: The kbuffer - * @subbuf: Start of mapped subbuffer - * @info: Info descriptor to fill in - * - * For debugging. This can return internals of the ring buffer. - * Expects to have info->next set to what it will read. - * The type, length and timestamp delta will be filled in, and - * @info->next will be updated to the next element. - * The @subbuf is used to know if the info is passed the end of - * data and NULL will be returned if it is. - */ -struct kbuffer_raw_info * -kbuffer_raw_get(struct kbuffer *kbuf, void *subbuf, struct kbuffer_raw_info *info) -{ - unsigned long long flags; - unsigned long long delta; - unsigned int type_len; - unsigned int size; - int start; - int length; - void *ptr = info->next; - - if (!kbuf || !subbuf) - return NULL; - - if (kbuf->flags & KBUFFER_FL_LONG_8) - start = 16; - else - start = 12; - - flags = read_long(kbuf, subbuf + 8); - size = (unsigned int)flags & COMMIT_MASK; - - if (ptr < subbuf || ptr >= subbuf + start + size) - return NULL; - - type_len = translate_data(kbuf, ptr, &ptr, &delta, &length); - - info->next = ptr + length; - - info->type = type_len; - info->delta = delta; - info->length = length; - - return info; -} diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h deleted file mode 100644 index a2b522093cfd..000000000000 --- a/tools/lib/traceevent/kbuffer.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 */ -/* - * Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - */ -#ifndef _KBUFFER_H -#define _KBUFFER_H - -#ifndef TS_SHIFT -#define TS_SHIFT 27 -#endif - -enum kbuffer_endian { - KBUFFER_ENDIAN_BIG, - KBUFFER_ENDIAN_LITTLE, -}; - -enum kbuffer_long_size { - KBUFFER_LSIZE_4, - KBUFFER_LSIZE_8, -}; - -enum { - KBUFFER_TYPE_PADDING = 29, - KBUFFER_TYPE_TIME_EXTEND = 30, - KBUFFER_TYPE_TIME_STAMP = 31, -}; - -struct kbuffer; - -struct kbuffer *kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian); -void kbuffer_free(struct kbuffer *kbuf); -int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer); -void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts); -void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts); -unsigned long long kbuffer_timestamp(struct kbuffer *kbuf); -unsigned long long kbuffer_subbuf_timestamp(struct kbuffer *kbuf, void *subbuf); -unsigned int kbuffer_ptr_delta(struct kbuffer *kbuf, void *ptr); - -void *kbuffer_translate_data(int swap, void *data, unsigned int *size); - -void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset, unsigned long long *ts); - -int kbuffer_curr_index(struct kbuffer *kbuf); - -int kbuffer_curr_offset(struct kbuffer *kbuf); -int kbuffer_curr_size(struct kbuffer *kbuf); -int kbuffer_event_size(struct kbuffer *kbuf); -int kbuffer_missed_events(struct kbuffer *kbuf); -int kbuffer_subbuffer_size(struct kbuffer *kbuf); - -void kbuffer_set_old_format(struct kbuffer *kbuf); -int kbuffer_start_of_data(struct kbuffer *kbuf); - -/* Debugging */ - -struct kbuffer_raw_info { - int type; - int length; - unsigned long long delta; - void *next; -}; - -/* Read raw data */ -struct kbuffer_raw_info *kbuffer_raw_get(struct kbuffer *kbuf, void *subbuf, - struct kbuffer_raw_info *info); - -#endif /* _K_BUFFER_H */ diff --git a/tools/lib/traceevent/libtraceevent.pc.template b/tools/lib/traceevent/libtraceevent.pc.template deleted file mode 100644 index 86384fcd57f1..000000000000 --- a/tools/lib/traceevent/libtraceevent.pc.template +++ /dev/null @@ -1,10 +0,0 @@ -prefix=INSTALL_PREFIX -libdir=LIB_DIR -includedir=HEADER_DIR - -Name: libtraceevent -URL: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git -Description: Linux kernel trace event library -Version: LIB_VERSION -Cflags: -I${includedir} -Libs: -L${libdir} -ltraceevent diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c deleted file mode 100644 index 5df177070d53..000000000000 --- a/tools/lib/traceevent/parse-filter.c +++ /dev/null @@ -1,2281 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> -#include <errno.h> -#include <sys/types.h> - -#include "event-parse.h" -#include "event-parse-local.h" -#include "event-utils.h" - -#define COMM "COMM" -#define CPU "CPU" - -static struct tep_format_field comm = { - .name = "COMM", -}; - -static struct tep_format_field cpu = { - .name = "CPU", -}; - -struct event_list { - struct event_list *next; - struct tep_event *event; -}; - -static void show_error(char *error_buf, const char *fmt, ...) -{ - unsigned long long index; - const char *input; - va_list ap; - int len; - int i; - - input = get_input_buf(); - index = get_input_buf_ptr(); - len = input ? strlen(input) : 0; - - if (len) { - strcpy(error_buf, input); - error_buf[len] = '\n'; - for (i = 1; i < len && i < index; i++) - error_buf[len+i] = ' '; - error_buf[len + i] = '^'; - error_buf[len + i + 1] = '\n'; - len += i+2; - } - - va_start(ap, fmt); - vsnprintf(error_buf + len, TEP_FILTER_ERROR_BUFSZ - len, fmt, ap); - va_end(ap); -} - -static enum tep_event_type filter_read_token(char **tok) -{ - enum tep_event_type type; - char *token = NULL; - - do { - free_token(token); - type = read_token(&token); - } while (type == TEP_EVENT_NEWLINE || type == TEP_EVENT_SPACE); - - /* If token is = or ! check to see if the next char is ~ */ - if (token && - (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) && - peek_char() == '~') { - /* append it */ - *tok = malloc(3); - if (*tok == NULL) { - free_token(token); - return TEP_EVENT_ERROR; - } - sprintf(*tok, "%c%c", *token, '~'); - free_token(token); - /* Now remove the '~' from the buffer */ - read_token(&token); - free_token(token); - } else - *tok = token; - - return type; -} - -static int filter_cmp(const void *a, const void *b) -{ - const struct tep_filter_type *ea = a; - const struct tep_filter_type *eb = b; - - if (ea->event_id < eb->event_id) - return -1; - - if (ea->event_id > eb->event_id) - return 1; - - return 0; -} - -static struct tep_filter_type * -find_filter_type(struct tep_event_filter *filter, int id) -{ - struct tep_filter_type *filter_type; - struct tep_filter_type key; - - key.event_id = id; - - filter_type = bsearch(&key, filter->event_filters, - filter->filters, - sizeof(*filter->event_filters), - filter_cmp); - - return filter_type; -} - -static struct tep_filter_type * -add_filter_type(struct tep_event_filter *filter, int id) -{ - struct tep_filter_type *filter_type; - int i; - - filter_type = find_filter_type(filter, id); - if (filter_type) - return filter_type; - - filter_type = realloc(filter->event_filters, - sizeof(*filter->event_filters) * - (filter->filters + 1)); - if (!filter_type) - return NULL; - - filter->event_filters = filter_type; - - for (i = 0; i < filter->filters; i++) { - if (filter->event_filters[i].event_id > id) - break; - } - - if (i < filter->filters) - memmove(&filter->event_filters[i+1], - &filter->event_filters[i], - sizeof(*filter->event_filters) * - (filter->filters - i)); - - filter_type = &filter->event_filters[i]; - filter_type->event_id = id; - filter_type->event = tep_find_event(filter->tep, id); - filter_type->filter = NULL; - - filter->filters++; - - return filter_type; -} - -/** - * tep_filter_alloc - create a new event filter - * @tep: The tep that this filter is associated with - */ -struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep) -{ - struct tep_event_filter *filter; - - filter = malloc(sizeof(*filter)); - if (filter == NULL) - return NULL; - - memset(filter, 0, sizeof(*filter)); - filter->tep = tep; - tep_ref(tep); - - return filter; -} - -static struct tep_filter_arg *allocate_arg(void) -{ - return calloc(1, sizeof(struct tep_filter_arg)); -} - -static void free_arg(struct tep_filter_arg *arg) -{ - if (!arg) - return; - - switch (arg->type) { - case TEP_FILTER_ARG_NONE: - case TEP_FILTER_ARG_BOOLEAN: - break; - - case TEP_FILTER_ARG_NUM: - free_arg(arg->num.left); - free_arg(arg->num.right); - break; - - case TEP_FILTER_ARG_EXP: - free_arg(arg->exp.left); - free_arg(arg->exp.right); - break; - - case TEP_FILTER_ARG_STR: - free(arg->str.val); - regfree(&arg->str.reg); - free(arg->str.buffer); - break; - - case TEP_FILTER_ARG_VALUE: - if (arg->value.type == TEP_FILTER_STRING || - arg->value.type == TEP_FILTER_CHAR) - free(arg->value.str); - break; - - case TEP_FILTER_ARG_OP: - free_arg(arg->op.left); - free_arg(arg->op.right); - default: - break; - } - - free(arg); -} - -static int add_event(struct event_list **events, - struct tep_event *event) -{ - struct event_list *list; - - list = malloc(sizeof(*list)); - if (list == NULL) - return -1; - - list->next = *events; - *events = list; - list->event = event; - return 0; -} - -static int event_match(struct tep_event *event, - regex_t *sreg, regex_t *ereg) -{ - if (sreg) { - return !regexec(sreg, event->system, 0, NULL, 0) && - !regexec(ereg, event->name, 0, NULL, 0); - } - - return !regexec(ereg, event->system, 0, NULL, 0) || - !regexec(ereg, event->name, 0, NULL, 0); -} - -static enum tep_errno -find_event(struct tep_handle *tep, struct event_list **events, - char *sys_name, char *event_name) -{ - struct tep_event *event; - regex_t ereg; - regex_t sreg; - int match = 0; - int fail = 0; - char *reg; - int ret; - int i; - - if (!event_name) { - /* if no name is given, then swap sys and name */ - event_name = sys_name; - sys_name = NULL; - } - - ret = asprintf(®, "^%s$", event_name); - if (ret < 0) - return TEP_ERRNO__MEM_ALLOC_FAILED; - - ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB); - free(reg); - - if (ret) - return TEP_ERRNO__INVALID_EVENT_NAME; - - if (sys_name) { - ret = asprintf(®, "^%s$", sys_name); - if (ret < 0) { - regfree(&ereg); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB); - free(reg); - if (ret) { - regfree(&ereg); - return TEP_ERRNO__INVALID_EVENT_NAME; - } - } - - for (i = 0; i < tep->nr_events; i++) { - event = tep->events[i]; - if (event_match(event, sys_name ? &sreg : NULL, &ereg)) { - match = 1; - if (add_event(events, event) < 0) { - fail = 1; - break; - } - } - } - - regfree(&ereg); - if (sys_name) - regfree(&sreg); - - if (!match) - return TEP_ERRNO__EVENT_NOT_FOUND; - if (fail) - return TEP_ERRNO__MEM_ALLOC_FAILED; - - return 0; -} - -static void free_events(struct event_list *events) -{ - struct event_list *event; - - while (events) { - event = events; - events = events->next; - free(event); - } -} - -static enum tep_errno -create_arg_item(struct tep_event *event, const char *token, - enum tep_event_type type, struct tep_filter_arg **parg, char *error_str) -{ - struct tep_format_field *field; - struct tep_filter_arg *arg; - - arg = allocate_arg(); - if (arg == NULL) { - show_error(error_str, "failed to allocate filter arg"); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - switch (type) { - - case TEP_EVENT_SQUOTE: - case TEP_EVENT_DQUOTE: - arg->type = TEP_FILTER_ARG_VALUE; - arg->value.type = - type == TEP_EVENT_DQUOTE ? TEP_FILTER_STRING : TEP_FILTER_CHAR; - arg->value.str = strdup(token); - if (!arg->value.str) { - free_arg(arg); - show_error(error_str, "failed to allocate string filter arg"); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - break; - case TEP_EVENT_ITEM: - /* if it is a number, then convert it */ - if (isdigit(token[0])) { - arg->type = TEP_FILTER_ARG_VALUE; - arg->value.type = TEP_FILTER_NUMBER; - arg->value.val = strtoull(token, NULL, 0); - break; - } - /* Consider this a field */ - field = tep_find_any_field(event, token); - if (!field) { - /* If token is 'COMM' or 'CPU' then it is special */ - if (strcmp(token, COMM) == 0) { - field = &comm; - } else if (strcmp(token, CPU) == 0) { - field = &cpu; - } else { - /* not a field, Make it false */ - arg->type = TEP_FILTER_ARG_BOOLEAN; - arg->boolean.value = TEP_FILTER_FALSE; - break; - } - } - arg->type = TEP_FILTER_ARG_FIELD; - arg->field.field = field; - break; - default: - free_arg(arg); - show_error(error_str, "expected a value but found %s", token); - return TEP_ERRNO__UNEXPECTED_TYPE; - } - *parg = arg; - return 0; -} - -static struct tep_filter_arg * -create_arg_op(enum tep_filter_op_type btype) -{ - struct tep_filter_arg *arg; - - arg = allocate_arg(); - if (!arg) - return NULL; - - arg->type = TEP_FILTER_ARG_OP; - arg->op.type = btype; - - return arg; -} - -static struct tep_filter_arg * -create_arg_exp(enum tep_filter_exp_type etype) -{ - struct tep_filter_arg *arg; - - arg = allocate_arg(); - if (!arg) - return NULL; - - arg->type = TEP_FILTER_ARG_EXP; - arg->exp.type = etype; - - return arg; -} - -static struct tep_filter_arg * -create_arg_cmp(enum tep_filter_cmp_type ctype) -{ - struct tep_filter_arg *arg; - - arg = allocate_arg(); - if (!arg) - return NULL; - - /* Use NUM and change if necessary */ - arg->type = TEP_FILTER_ARG_NUM; - arg->num.type = ctype; - - return arg; -} - -static enum tep_errno -add_right(struct tep_filter_arg *op, struct tep_filter_arg *arg, char *error_str) -{ - struct tep_filter_arg *left; - char *str; - int op_type; - int ret; - - switch (op->type) { - case TEP_FILTER_ARG_EXP: - if (op->exp.right) - goto out_fail; - op->exp.right = arg; - break; - - case TEP_FILTER_ARG_OP: - if (op->op.right) - goto out_fail; - op->op.right = arg; - break; - - case TEP_FILTER_ARG_NUM: - if (op->op.right) - goto out_fail; - /* - * The arg must be num, str, or field - */ - switch (arg->type) { - case TEP_FILTER_ARG_VALUE: - case TEP_FILTER_ARG_FIELD: - break; - default: - show_error(error_str, "Illegal rvalue"); - return TEP_ERRNO__ILLEGAL_RVALUE; - } - - /* - * Depending on the type, we may need to - * convert this to a string or regex. - */ - switch (arg->value.type) { - case TEP_FILTER_CHAR: - /* - * A char should be converted to number if - * the string is 1 byte, and the compare - * is not a REGEX. - */ - if (strlen(arg->value.str) == 1 && - op->num.type != TEP_FILTER_CMP_REGEX && - op->num.type != TEP_FILTER_CMP_NOT_REGEX) { - arg->value.type = TEP_FILTER_NUMBER; - goto do_int; - } - /* fall through */ - case TEP_FILTER_STRING: - - /* convert op to a string arg */ - op_type = op->num.type; - left = op->num.left; - str = arg->value.str; - - /* reset the op for the new field */ - memset(op, 0, sizeof(*op)); - - /* - * If left arg was a field not found then - * NULL the entire op. - */ - if (left->type == TEP_FILTER_ARG_BOOLEAN) { - free_arg(left); - free_arg(arg); - op->type = TEP_FILTER_ARG_BOOLEAN; - op->boolean.value = TEP_FILTER_FALSE; - break; - } - - /* Left arg must be a field */ - if (left->type != TEP_FILTER_ARG_FIELD) { - show_error(error_str, - "Illegal lvalue for string comparison"); - return TEP_ERRNO__ILLEGAL_LVALUE; - } - - /* Make sure this is a valid string compare */ - switch (op_type) { - case TEP_FILTER_CMP_EQ: - op_type = TEP_FILTER_CMP_MATCH; - break; - case TEP_FILTER_CMP_NE: - op_type = TEP_FILTER_CMP_NOT_MATCH; - break; - - case TEP_FILTER_CMP_REGEX: - case TEP_FILTER_CMP_NOT_REGEX: - ret = regcomp(&op->str.reg, str, REG_ICASE|REG_NOSUB); - if (ret) { - show_error(error_str, - "RegEx '%s' did not compute", - str); - return TEP_ERRNO__INVALID_REGEX; - } - break; - default: - show_error(error_str, - "Illegal comparison for string"); - return TEP_ERRNO__ILLEGAL_STRING_CMP; - } - - op->type = TEP_FILTER_ARG_STR; - op->str.type = op_type; - op->str.field = left->field.field; - op->str.val = strdup(str); - if (!op->str.val) { - show_error(error_str, "Failed to allocate string filter"); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - /* - * Need a buffer to copy data for tests - */ - op->str.buffer = malloc(op->str.field->size + 1); - if (!op->str.buffer) { - show_error(error_str, "Failed to allocate string filter"); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - /* Null terminate this buffer */ - op->str.buffer[op->str.field->size] = 0; - - /* We no longer have left or right args */ - free_arg(arg); - free_arg(left); - - break; - - case TEP_FILTER_NUMBER: - - do_int: - switch (op->num.type) { - case TEP_FILTER_CMP_REGEX: - case TEP_FILTER_CMP_NOT_REGEX: - show_error(error_str, - "Op not allowed with integers"); - return TEP_ERRNO__ILLEGAL_INTEGER_CMP; - - default: - break; - } - - /* numeric compare */ - op->num.right = arg; - break; - default: - goto out_fail; - } - break; - default: - goto out_fail; - } - - return 0; - - out_fail: - show_error(error_str, "Syntax error"); - return TEP_ERRNO__SYNTAX_ERROR; -} - -static struct tep_filter_arg * -rotate_op_right(struct tep_filter_arg *a, struct tep_filter_arg *b) -{ - struct tep_filter_arg *arg; - - arg = a->op.right; - a->op.right = b; - return arg; -} - -static enum tep_errno add_left(struct tep_filter_arg *op, struct tep_filter_arg *arg) -{ - switch (op->type) { - case TEP_FILTER_ARG_EXP: - if (arg->type == TEP_FILTER_ARG_OP) - arg = rotate_op_right(arg, op); - op->exp.left = arg; - break; - - case TEP_FILTER_ARG_OP: - op->op.left = arg; - break; - case TEP_FILTER_ARG_NUM: - if (arg->type == TEP_FILTER_ARG_OP) - arg = rotate_op_right(arg, op); - - /* left arg of compares must be a field */ - if (arg->type != TEP_FILTER_ARG_FIELD && - arg->type != TEP_FILTER_ARG_BOOLEAN) - return TEP_ERRNO__INVALID_ARG_TYPE; - op->num.left = arg; - break; - default: - return TEP_ERRNO__INVALID_ARG_TYPE; - } - return 0; -} - -enum op_type { - OP_NONE, - OP_BOOL, - OP_NOT, - OP_EXP, - OP_CMP, -}; - -static enum op_type process_op(const char *token, - enum tep_filter_op_type *btype, - enum tep_filter_cmp_type *ctype, - enum tep_filter_exp_type *etype) -{ - *btype = TEP_FILTER_OP_NOT; - *etype = TEP_FILTER_EXP_NONE; - *ctype = TEP_FILTER_CMP_NONE; - - if (strcmp(token, "&&") == 0) - *btype = TEP_FILTER_OP_AND; - else if (strcmp(token, "||") == 0) - *btype = TEP_FILTER_OP_OR; - else if (strcmp(token, "!") == 0) - return OP_NOT; - - if (*btype != TEP_FILTER_OP_NOT) - return OP_BOOL; - - /* Check for value expressions */ - if (strcmp(token, "+") == 0) { - *etype = TEP_FILTER_EXP_ADD; - } else if (strcmp(token, "-") == 0) { - *etype = TEP_FILTER_EXP_SUB; - } else if (strcmp(token, "*") == 0) { - *etype = TEP_FILTER_EXP_MUL; - } else if (strcmp(token, "/") == 0) { - *etype = TEP_FILTER_EXP_DIV; - } else if (strcmp(token, "%") == 0) { - *etype = TEP_FILTER_EXP_MOD; - } else if (strcmp(token, ">>") == 0) { - *etype = TEP_FILTER_EXP_RSHIFT; - } else if (strcmp(token, "<<") == 0) { - *etype = TEP_FILTER_EXP_LSHIFT; - } else if (strcmp(token, "&") == 0) { - *etype = TEP_FILTER_EXP_AND; - } else if (strcmp(token, "|") == 0) { - *etype = TEP_FILTER_EXP_OR; - } else if (strcmp(token, "^") == 0) { - *etype = TEP_FILTER_EXP_XOR; - } else if (strcmp(token, "~") == 0) - *etype = TEP_FILTER_EXP_NOT; - - if (*etype != TEP_FILTER_EXP_NONE) - return OP_EXP; - - /* Check for compares */ - if (strcmp(token, "==") == 0) - *ctype = TEP_FILTER_CMP_EQ; - else if (strcmp(token, "!=") == 0) - *ctype = TEP_FILTER_CMP_NE; - else if (strcmp(token, "<") == 0) - *ctype = TEP_FILTER_CMP_LT; - else if (strcmp(token, ">") == 0) - *ctype = TEP_FILTER_CMP_GT; - else if (strcmp(token, "<=") == 0) - *ctype = TEP_FILTER_CMP_LE; - else if (strcmp(token, ">=") == 0) - *ctype = TEP_FILTER_CMP_GE; - else if (strcmp(token, "=~") == 0) - *ctype = TEP_FILTER_CMP_REGEX; - else if (strcmp(token, "!~") == 0) - *ctype = TEP_FILTER_CMP_NOT_REGEX; - else - return OP_NONE; - - return OP_CMP; -} - -static int check_op_done(struct tep_filter_arg *arg) -{ - switch (arg->type) { - case TEP_FILTER_ARG_EXP: - return arg->exp.right != NULL; - - case TEP_FILTER_ARG_OP: - return arg->op.right != NULL; - - case TEP_FILTER_ARG_NUM: - return arg->num.right != NULL; - - case TEP_FILTER_ARG_STR: - /* A string conversion is always done */ - return 1; - - case TEP_FILTER_ARG_BOOLEAN: - /* field not found, is ok */ - return 1; - - default: - return 0; - } -} - -enum filter_vals { - FILTER_VAL_NORM, - FILTER_VAL_FALSE, - FILTER_VAL_TRUE, -}; - -static enum tep_errno -reparent_op_arg(struct tep_filter_arg *parent, struct tep_filter_arg *old_child, - struct tep_filter_arg *arg, char *error_str) -{ - struct tep_filter_arg *other_child; - struct tep_filter_arg **ptr; - - if (parent->type != TEP_FILTER_ARG_OP && - arg->type != TEP_FILTER_ARG_OP) { - show_error(error_str, "can not reparent other than OP"); - return TEP_ERRNO__REPARENT_NOT_OP; - } - - /* Get the sibling */ - if (old_child->op.right == arg) { - ptr = &old_child->op.right; - other_child = old_child->op.left; - } else if (old_child->op.left == arg) { - ptr = &old_child->op.left; - other_child = old_child->op.right; - } else { - show_error(error_str, "Error in reparent op, find other child"); - return TEP_ERRNO__REPARENT_FAILED; - } - - /* Detach arg from old_child */ - *ptr = NULL; - - /* Check for root */ - if (parent == old_child) { - free_arg(other_child); - *parent = *arg; - /* Free arg without recussion */ - free(arg); - return 0; - } - - if (parent->op.right == old_child) - ptr = &parent->op.right; - else if (parent->op.left == old_child) - ptr = &parent->op.left; - else { - show_error(error_str, "Error in reparent op"); - return TEP_ERRNO__REPARENT_FAILED; - } - - *ptr = arg; - - free_arg(old_child); - return 0; -} - -/* Returns either filter_vals (success) or tep_errno (failfure) */ -static int test_arg(struct tep_filter_arg *parent, struct tep_filter_arg *arg, - char *error_str) -{ - int lval, rval; - - switch (arg->type) { - - /* bad case */ - case TEP_FILTER_ARG_BOOLEAN: - return FILTER_VAL_FALSE + arg->boolean.value; - - /* good cases: */ - case TEP_FILTER_ARG_STR: - case TEP_FILTER_ARG_VALUE: - case TEP_FILTER_ARG_FIELD: - return FILTER_VAL_NORM; - - case TEP_FILTER_ARG_EXP: - lval = test_arg(arg, arg->exp.left, error_str); - if (lval != FILTER_VAL_NORM) - return lval; - rval = test_arg(arg, arg->exp.right, error_str); - if (rval != FILTER_VAL_NORM) - return rval; - return FILTER_VAL_NORM; - - case TEP_FILTER_ARG_NUM: - lval = test_arg(arg, arg->num.left, error_str); - if (lval != FILTER_VAL_NORM) - return lval; - rval = test_arg(arg, arg->num.right, error_str); - if (rval != FILTER_VAL_NORM) - return rval; - return FILTER_VAL_NORM; - - case TEP_FILTER_ARG_OP: - if (arg->op.type != TEP_FILTER_OP_NOT) { - lval = test_arg(arg, arg->op.left, error_str); - switch (lval) { - case FILTER_VAL_NORM: - break; - case FILTER_VAL_TRUE: - if (arg->op.type == TEP_FILTER_OP_OR) - return FILTER_VAL_TRUE; - rval = test_arg(arg, arg->op.right, error_str); - if (rval != FILTER_VAL_NORM) - return rval; - - return reparent_op_arg(parent, arg, arg->op.right, - error_str); - - case FILTER_VAL_FALSE: - if (arg->op.type == TEP_FILTER_OP_AND) - return FILTER_VAL_FALSE; - rval = test_arg(arg, arg->op.right, error_str); - if (rval != FILTER_VAL_NORM) - return rval; - - return reparent_op_arg(parent, arg, arg->op.right, - error_str); - - default: - return lval; - } - } - - rval = test_arg(arg, arg->op.right, error_str); - switch (rval) { - case FILTER_VAL_NORM: - default: - break; - - case FILTER_VAL_TRUE: - if (arg->op.type == TEP_FILTER_OP_OR) - return FILTER_VAL_TRUE; - if (arg->op.type == TEP_FILTER_OP_NOT) - return FILTER_VAL_FALSE; - - return reparent_op_arg(parent, arg, arg->op.left, - error_str); - - case FILTER_VAL_FALSE: - if (arg->op.type == TEP_FILTER_OP_AND) - return FILTER_VAL_FALSE; - if (arg->op.type == TEP_FILTER_OP_NOT) - return FILTER_VAL_TRUE; - - return reparent_op_arg(parent, arg, arg->op.left, - error_str); - } - - return rval; - default: - show_error(error_str, "bad arg in filter tree"); - return TEP_ERRNO__BAD_FILTER_ARG; - } - return FILTER_VAL_NORM; -} - -/* Remove any unknown event fields */ -static int collapse_tree(struct tep_filter_arg *arg, - struct tep_filter_arg **arg_collapsed, char *error_str) -{ - int ret; - - ret = test_arg(arg, arg, error_str); - switch (ret) { - case FILTER_VAL_NORM: - break; - - case FILTER_VAL_TRUE: - case FILTER_VAL_FALSE: - free_arg(arg); - arg = allocate_arg(); - if (arg) { - arg->type = TEP_FILTER_ARG_BOOLEAN; - arg->boolean.value = ret == FILTER_VAL_TRUE; - } else { - show_error(error_str, "Failed to allocate filter arg"); - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - } - break; - - default: - /* test_arg() already set the error_str */ - free_arg(arg); - arg = NULL; - break; - } - - *arg_collapsed = arg; - return ret; -} - -static enum tep_errno -process_filter(struct tep_event *event, struct tep_filter_arg **parg, - char *error_str, int not) -{ - enum tep_event_type type; - char *token = NULL; - struct tep_filter_arg *current_op = NULL; - struct tep_filter_arg *current_exp = NULL; - struct tep_filter_arg *left_item = NULL; - struct tep_filter_arg *arg = NULL; - enum op_type op_type; - enum tep_filter_op_type btype; - enum tep_filter_exp_type etype; - enum tep_filter_cmp_type ctype; - enum tep_errno ret; - - *parg = NULL; - - do { - free(token); - type = filter_read_token(&token); - switch (type) { - case TEP_EVENT_SQUOTE: - case TEP_EVENT_DQUOTE: - case TEP_EVENT_ITEM: - ret = create_arg_item(event, token, type, &arg, error_str); - if (ret < 0) - goto fail; - if (!left_item) - left_item = arg; - else if (current_exp) { - ret = add_right(current_exp, arg, error_str); - if (ret < 0) - goto fail; - left_item = NULL; - /* Not's only one one expression */ - if (not) { - arg = NULL; - if (current_op) - goto fail_syntax; - free(token); - *parg = current_exp; - return 0; - } - } else - goto fail_syntax; - arg = NULL; - break; - - case TEP_EVENT_DELIM: - if (*token == ',') { - show_error(error_str, "Illegal token ','"); - ret = TEP_ERRNO__ILLEGAL_TOKEN; - goto fail; - } - - if (*token == '(') { - if (left_item) { - show_error(error_str, - "Open paren can not come after item"); - ret = TEP_ERRNO__INVALID_PAREN; - goto fail; - } - if (current_exp) { - show_error(error_str, - "Open paren can not come after expression"); - ret = TEP_ERRNO__INVALID_PAREN; - goto fail; - } - - ret = process_filter(event, &arg, error_str, 0); - if (ret != TEP_ERRNO__UNBALANCED_PAREN) { - if (ret == 0) { - show_error(error_str, - "Unbalanced number of '('"); - ret = TEP_ERRNO__UNBALANCED_PAREN; - } - goto fail; - } - ret = 0; - - /* A not wants just one expression */ - if (not) { - if (current_op) - goto fail_syntax; - *parg = arg; - return 0; - } - - if (current_op) - ret = add_right(current_op, arg, error_str); - else - current_exp = arg; - - if (ret < 0) - goto fail; - - } else { /* ')' */ - if (!current_op && !current_exp) - goto fail_syntax; - - /* Make sure everything is finished at this level */ - if (current_exp && !check_op_done(current_exp)) - goto fail_syntax; - if (current_op && !check_op_done(current_op)) - goto fail_syntax; - - if (current_op) - *parg = current_op; - else - *parg = current_exp; - free(token); - return TEP_ERRNO__UNBALANCED_PAREN; - } - break; - - case TEP_EVENT_OP: - op_type = process_op(token, &btype, &ctype, &etype); - - /* All expect a left arg except for NOT */ - switch (op_type) { - case OP_BOOL: - /* Logic ops need a left expression */ - if (!current_exp && !current_op) - goto fail_syntax; - /* fall through */ - case OP_NOT: - /* logic only processes ops and exp */ - if (left_item) - goto fail_syntax; - break; - case OP_EXP: - case OP_CMP: - if (!left_item) - goto fail_syntax; - break; - case OP_NONE: - show_error(error_str, - "Unknown op token %s", token); - ret = TEP_ERRNO__UNKNOWN_TOKEN; - goto fail; - } - - ret = 0; - switch (op_type) { - case OP_BOOL: - arg = create_arg_op(btype); - if (arg == NULL) - goto fail_alloc; - if (current_op) - ret = add_left(arg, current_op); - else - ret = add_left(arg, current_exp); - current_op = arg; - current_exp = NULL; - break; - - case OP_NOT: - arg = create_arg_op(btype); - if (arg == NULL) - goto fail_alloc; - if (current_op) - ret = add_right(current_op, arg, error_str); - if (ret < 0) - goto fail; - current_exp = arg; - ret = process_filter(event, &arg, error_str, 1); - if (ret < 0) - goto fail; - ret = add_right(current_exp, arg, error_str); - if (ret < 0) - goto fail; - break; - - case OP_EXP: - case OP_CMP: - if (op_type == OP_EXP) - arg = create_arg_exp(etype); - else - arg = create_arg_cmp(ctype); - if (arg == NULL) - goto fail_alloc; - - if (current_op) - ret = add_right(current_op, arg, error_str); - if (ret < 0) - goto fail; - ret = add_left(arg, left_item); - if (ret < 0) { - arg = NULL; - goto fail_syntax; - } - current_exp = arg; - break; - default: - break; - } - arg = NULL; - if (ret < 0) - goto fail_syntax; - break; - case TEP_EVENT_NONE: - break; - case TEP_EVENT_ERROR: - goto fail_alloc; - default: - goto fail_syntax; - } - } while (type != TEP_EVENT_NONE); - - if (!current_op && !current_exp) - goto fail_syntax; - - if (!current_op) - current_op = current_exp; - - ret = collapse_tree(current_op, parg, error_str); - /* collapse_tree() may free current_op, and updates parg accordingly */ - current_op = NULL; - if (ret < 0) - goto fail; - - free(token); - return 0; - - fail_alloc: - show_error(error_str, "failed to allocate filter arg"); - ret = TEP_ERRNO__MEM_ALLOC_FAILED; - goto fail; - fail_syntax: - show_error(error_str, "Syntax error"); - ret = TEP_ERRNO__SYNTAX_ERROR; - fail: - free_arg(current_op); - free_arg(current_exp); - free_arg(arg); - free(token); - return ret; -} - -static enum tep_errno -process_event(struct tep_event *event, const char *filter_str, - struct tep_filter_arg **parg, char *error_str) -{ - int ret; - - init_input_buf(filter_str, strlen(filter_str)); - - ret = process_filter(event, parg, error_str, 0); - if (ret < 0) - return ret; - - /* If parg is NULL, then make it into FALSE */ - if (!*parg) { - *parg = allocate_arg(); - if (*parg == NULL) - return TEP_ERRNO__MEM_ALLOC_FAILED; - - (*parg)->type = TEP_FILTER_ARG_BOOLEAN; - (*parg)->boolean.value = TEP_FILTER_FALSE; - } - - return 0; -} - -static enum tep_errno -filter_event(struct tep_event_filter *filter, struct tep_event *event, - const char *filter_str, char *error_str) -{ - struct tep_filter_type *filter_type; - struct tep_filter_arg *arg; - enum tep_errno ret; - - if (filter_str) { - ret = process_event(event, filter_str, &arg, error_str); - if (ret < 0) - return ret; - - } else { - /* just add a TRUE arg */ - arg = allocate_arg(); - if (arg == NULL) - return TEP_ERRNO__MEM_ALLOC_FAILED; - - arg->type = TEP_FILTER_ARG_BOOLEAN; - arg->boolean.value = TEP_FILTER_TRUE; - } - - filter_type = add_filter_type(filter, event->id); - if (filter_type == NULL) { - free_arg(arg); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - - if (filter_type->filter) - free_arg(filter_type->filter); - filter_type->filter = arg; - - return 0; -} - -static void filter_init_error_buf(struct tep_event_filter *filter) -{ - /* clear buffer to reset show error */ - init_input_buf("", 0); - filter->error_buffer[0] = '\0'; -} - -/** - * tep_filter_add_filter_str - add a new filter - * @filter: the event filter to add to - * @filter_str: the filter string that contains the filter - * - * Returns 0 if the filter was successfully added or a - * negative error code. Use tep_filter_strerror() to see - * actual error message in case of error. - */ -enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter, - const char *filter_str) -{ - struct tep_handle *tep = filter->tep; - struct event_list *event; - struct event_list *events = NULL; - const char *filter_start; - const char *next_event; - char *this_event; - char *event_name = NULL; - char *sys_name = NULL; - char *sp; - enum tep_errno rtn = 0; /* TEP_ERRNO__SUCCESS */ - int len; - int ret; - - filter_init_error_buf(filter); - - filter_start = strchr(filter_str, ':'); - if (filter_start) - len = filter_start - filter_str; - else - len = strlen(filter_str); - - do { - next_event = strchr(filter_str, ','); - if (next_event && - (!filter_start || next_event < filter_start)) - len = next_event - filter_str; - else if (filter_start) - len = filter_start - filter_str; - else - len = strlen(filter_str); - - this_event = malloc(len + 1); - if (this_event == NULL) { - /* This can only happen when events is NULL, but still */ - free_events(events); - return TEP_ERRNO__MEM_ALLOC_FAILED; - } - memcpy(this_event, filter_str, len); - this_event[len] = 0; - - if (next_event) - next_event++; - - filter_str = next_event; - - sys_name = strtok_r(this_event, "/", &sp); - event_name = strtok_r(NULL, "/", &sp); - - if (!sys_name) { - /* This can only happen when events is NULL, but still */ - free_events(events); - free(this_event); - return TEP_ERRNO__FILTER_NOT_FOUND; - } - - /* Find this event */ - ret = find_event(tep, &events, strim(sys_name), strim(event_name)); - if (ret < 0) { - free_events(events); - free(this_event); - return ret; - } - free(this_event); - } while (filter_str); - - /* Skip the ':' */ - if (filter_start) - filter_start++; - - /* filter starts here */ - for (event = events; event; event = event->next) { - ret = filter_event(filter, event->event, filter_start, - filter->error_buffer); - /* Failures are returned if a parse error happened */ - if (ret < 0) - rtn = ret; - - if (ret >= 0 && tep->test_filters) { - char *test; - test = tep_filter_make_string(filter, event->event->id); - if (test) { - printf(" '%s: %s'\n", event->event->name, test); - free(test); - } - } - } - - free_events(events); - - return rtn; -} - -static void free_filter_type(struct tep_filter_type *filter_type) -{ - free_arg(filter_type->filter); -} - -/** - * tep_filter_strerror - fill error message in a buffer - * @filter: the event filter contains error - * @err: the error code - * @buf: the buffer to be filled in - * @buflen: the size of the buffer - * - * Returns 0 if message was filled successfully, -1 if error - */ -int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err, - char *buf, size_t buflen) -{ - if (err <= __TEP_ERRNO__START || err >= __TEP_ERRNO__END) - return -1; - - if (strlen(filter->error_buffer) > 0) { - size_t len = snprintf(buf, buflen, "%s", filter->error_buffer); - - if (len > buflen) - return -1; - return 0; - } - - return tep_strerror(filter->tep, err, buf, buflen); -} - -/** - * tep_filter_remove_event - remove a filter for an event - * @filter: the event filter to remove from - * @event_id: the event to remove a filter for - * - * Removes the filter saved for an event defined by @event_id - * from the @filter. - * - * Returns 1: if an event was removed - * 0: if the event was not found - */ -int tep_filter_remove_event(struct tep_event_filter *filter, - int event_id) -{ - struct tep_filter_type *filter_type; - unsigned long len; - - if (!filter->filters) - return 0; - - filter_type = find_filter_type(filter, event_id); - - if (!filter_type) - return 0; - - free_filter_type(filter_type); - - /* The filter_type points into the event_filters array */ - len = (unsigned long)(filter->event_filters + filter->filters) - - (unsigned long)(filter_type + 1); - - memmove(filter_type, filter_type + 1, len); - filter->filters--; - - memset(&filter->event_filters[filter->filters], 0, - sizeof(*filter_type)); - - return 1; -} - -/** - * tep_filter_reset - clear all filters in a filter - * @filter: the event filter to reset - * - * Removes all filters from a filter and resets it. - */ -void tep_filter_reset(struct tep_event_filter *filter) -{ - int i; - - for (i = 0; i < filter->filters; i++) - free_filter_type(&filter->event_filters[i]); - - free(filter->event_filters); - filter->filters = 0; - filter->event_filters = NULL; -} - -void tep_filter_free(struct tep_event_filter *filter) -{ - tep_unref(filter->tep); - - tep_filter_reset(filter); - - free(filter); -} - -static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg); - -static int copy_filter_type(struct tep_event_filter *filter, - struct tep_event_filter *source, - struct tep_filter_type *filter_type) -{ - struct tep_filter_arg *arg; - struct tep_event *event; - const char *sys; - const char *name; - char *str; - - /* Can't assume that the tep's are the same */ - sys = filter_type->event->system; - name = filter_type->event->name; - event = tep_find_event_by_name(filter->tep, sys, name); - if (!event) - return -1; - - str = arg_to_str(source, filter_type->filter); - if (!str) - return -1; - - if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) { - /* Add trivial event */ - arg = allocate_arg(); - if (arg == NULL) { - free(str); - return -1; - } - - arg->type = TEP_FILTER_ARG_BOOLEAN; - if (strcmp(str, "TRUE") == 0) - arg->boolean.value = 1; - else - arg->boolean.value = 0; - - filter_type = add_filter_type(filter, event->id); - if (filter_type == NULL) { - free(str); - free_arg(arg); - return -1; - } - - filter_type->filter = arg; - - free(str); - return 0; - } - - filter_event(filter, event, str, NULL); - free(str); - - return 0; -} - -/** - * tep_filter_copy - copy a filter using another filter - * @dest - the filter to copy to - * @source - the filter to copy from - * - * Returns 0 on success and -1 if not all filters were copied - */ -int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source) -{ - int ret = 0; - int i; - - tep_filter_reset(dest); - - for (i = 0; i < source->filters; i++) { - if (copy_filter_type(dest, source, &source->event_filters[i])) - ret = -1; - } - return ret; -} - -static int test_filter(struct tep_event *event, struct tep_filter_arg *arg, - struct tep_record *record, enum tep_errno *err); - -static const char * -get_comm(struct tep_event *event, struct tep_record *record) -{ - const char *comm; - int pid; - - pid = tep_data_pid(event->tep, record); - comm = tep_data_comm_from_pid(event->tep, pid); - return comm; -} - -static unsigned long long -get_value(struct tep_event *event, - struct tep_format_field *field, struct tep_record *record) -{ - unsigned long long val; - - /* Handle our dummy "comm" field */ - if (field == &comm) { - const char *name; - - name = get_comm(event, record); - return (unsigned long)name; - } - - /* Handle our dummy "cpu" field */ - if (field == &cpu) - return record->cpu; - - tep_read_number_field(field, record->data, &val); - - if (!(field->flags & TEP_FIELD_IS_SIGNED)) - return val; - - switch (field->size) { - case 1: - return (char)val; - case 2: - return (short)val; - case 4: - return (int)val; - case 8: - return (long long)val; - } - return val; -} - -static unsigned long long -get_arg_value(struct tep_event *event, struct tep_filter_arg *arg, - struct tep_record *record, enum tep_errno *err); - -static unsigned long long -get_exp_value(struct tep_event *event, struct tep_filter_arg *arg, - struct tep_record *record, enum tep_errno *err) -{ - unsigned long long lval, rval; - - lval = get_arg_value(event, arg->exp.left, record, err); - rval = get_arg_value(event, arg->exp.right, record, err); - - if (*err) { - /* - * There was an error, no need to process anymore. - */ - return 0; - } - - switch (arg->exp.type) { - case TEP_FILTER_EXP_ADD: - return lval + rval; - - case TEP_FILTER_EXP_SUB: - return lval - rval; - - case TEP_FILTER_EXP_MUL: - return lval * rval; - - case TEP_FILTER_EXP_DIV: - return lval / rval; - - case TEP_FILTER_EXP_MOD: - return lval % rval; - - case TEP_FILTER_EXP_RSHIFT: - return lval >> rval; - - case TEP_FILTER_EXP_LSHIFT: - return lval << rval; - - case TEP_FILTER_EXP_AND: - return lval & rval; - - case TEP_FILTER_EXP_OR: - return lval | rval; - - case TEP_FILTER_EXP_XOR: - return lval ^ rval; - - case TEP_FILTER_EXP_NOT: - default: - if (!*err) - *err = TEP_ERRNO__INVALID_EXP_TYPE; - } - return 0; -} - -static unsigned long long -get_arg_value(struct tep_event *event, struct tep_filter_arg *arg, - struct tep_record *record, enum tep_errno *err) -{ - switch (arg->type) { - case TEP_FILTER_ARG_FIELD: - return get_value(event, arg->field.field, record); - - case TEP_FILTER_ARG_VALUE: - if (arg->value.type != TEP_FILTER_NUMBER) { - if (!*err) - *err = TEP_ERRNO__NOT_A_NUMBER; - } - return arg->value.val; - - case TEP_FILTER_ARG_EXP: - return get_exp_value(event, arg, record, err); - - default: - if (!*err) - *err = TEP_ERRNO__INVALID_ARG_TYPE; - } - return 0; -} - -static int test_num(struct tep_event *event, struct tep_filter_arg *arg, - struct tep_record *record, enum tep_errno *err) -{ - unsigned long long lval, rval; - - lval = get_arg_value(event, arg->num.left, record, err); - rval = get_arg_value(event, arg->num.right, record, err); - - if (*err) { - /* - * There was an error, no need to process anymore. - */ - return 0; - } - - switch (arg->num.type) { - case TEP_FILTER_CMP_EQ: - return lval == rval; - - case TEP_FILTER_CMP_NE: - return lval != rval; - - case TEP_FILTER_CMP_GT: - return lval > rval; - - case TEP_FILTER_CMP_LT: - return lval < rval; - - case TEP_FILTER_CMP_GE: - return lval >= rval; - - case TEP_FILTER_CMP_LE: - return lval <= rval; - - default: - if (!*err) - *err = TEP_ERRNO__ILLEGAL_INTEGER_CMP; - return 0; - } -} - -static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *record) -{ - struct tep_event *event; - struct tep_handle *tep; - unsigned long long addr; - const char *val = NULL; - unsigned int size; - char hex[64]; - - /* If the field is not a string convert it */ - if (arg->str.field->flags & TEP_FIELD_IS_STRING) { - val = record->data + arg->str.field->offset; - size = arg->str.field->size; - - if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) { - addr = *(unsigned int *)val; - size = addr >> 16; - addr &= 0xffff; - if (arg->str.field->flags & TEP_FIELD_IS_RELATIVE) - addr += arg->str.field->offset + arg->str.field->size; - val = record->data + addr; - } - - /* - * We need to copy the data since we can't be sure the field - * is null terminated. - */ - if (*(val + size - 1)) { - /* copy it */ - memcpy(arg->str.buffer, val, arg->str.field->size); - /* the buffer is already NULL terminated */ - val = arg->str.buffer; - } - - } else { - event = arg->str.field->event; - tep = event->tep; - addr = get_value(event, arg->str.field, record); - - if (arg->str.field->flags & (TEP_FIELD_IS_POINTER | TEP_FIELD_IS_LONG)) - /* convert to a kernel symbol */ - val = tep_find_function(tep, addr); - - if (val == NULL) { - /* just use the hex of the string name */ - snprintf(hex, 64, "0x%llx", addr); - val = hex; - } - } - - return val; -} - -static int test_str(struct tep_event *event, struct tep_filter_arg *arg, - struct tep_record *record, enum tep_errno *err) -{ - const char *val; - - if (arg->str.field == &comm) - val = get_comm(event, record); - else - val = get_field_str(arg, record); - - switch (arg->str.type) { - case TEP_FILTER_CMP_MATCH: - return strcmp(val, arg->str.val) == 0; - - case TEP_FILTER_CMP_NOT_MATCH: - return strcmp(val, arg->str.val) != 0; - - case TEP_FILTER_CMP_REGEX: - /* Returns zero on match */ - return !regexec(&arg->str.reg, val, 0, NULL, 0); - - case TEP_FILTER_CMP_NOT_REGEX: - return regexec(&arg->str.reg, val, 0, NULL, 0); - - default: - if (!*err) - *err = TEP_ERRNO__ILLEGAL_STRING_CMP; - return 0; - } -} - -static int test_op(struct tep_event *event, struct tep_filter_arg *arg, - struct tep_record *record, enum tep_errno *err) -{ - switch (arg->op.type) { - case TEP_FILTER_OP_AND: - return test_filter(event, arg->op.left, record, err) && - test_filter(event, arg->op.right, record, err); - - case TEP_FILTER_OP_OR: - return test_filter(event, arg->op.left, record, err) || - test_filter(event, arg->op.right, record, err); - - case TEP_FILTER_OP_NOT: - return !test_filter(event, arg->op.right, record, err); - - default: - if (!*err) - *err = TEP_ERRNO__INVALID_OP_TYPE; - return 0; - } -} - -static int test_filter(struct tep_event *event, struct tep_filter_arg *arg, - struct tep_record *record, enum tep_errno *err) -{ - if (*err) { - /* - * There was an error, no need to process anymore. - */ - return 0; - } - - switch (arg->type) { - case TEP_FILTER_ARG_BOOLEAN: - /* easy case */ - return arg->boolean.value; - - case TEP_FILTER_ARG_OP: - return test_op(event, arg, record, err); - - case TEP_FILTER_ARG_NUM: - return test_num(event, arg, record, err); - - case TEP_FILTER_ARG_STR: - return test_str(event, arg, record, err); - - case TEP_FILTER_ARG_EXP: - case TEP_FILTER_ARG_VALUE: - case TEP_FILTER_ARG_FIELD: - /* - * Expressions, fields and values evaluate - * to true if they return non zero - */ - return !!get_arg_value(event, arg, record, err); - - default: - if (!*err) - *err = TEP_ERRNO__INVALID_ARG_TYPE; - return 0; - } -} - -/** - * tep_event_filtered - return true if event has filter - * @filter: filter struct with filter information - * @event_id: event id to test if filter exists - * - * Returns 1 if filter found for @event_id - * otherwise 0; - */ -int tep_event_filtered(struct tep_event_filter *filter, int event_id) -{ - struct tep_filter_type *filter_type; - - if (!filter->filters) - return 0; - - filter_type = find_filter_type(filter, event_id); - - return filter_type ? 1 : 0; -} - -/** - * tep_filter_match - test if a record matches a filter - * @filter: filter struct with filter information - * @record: the record to test against the filter - * - * Returns: match result or error code (prefixed with TEP_ERRNO__) - * FILTER_MATCH - filter found for event and @record matches - * FILTER_MISS - filter found for event and @record does not match - * FILTER_NOT_FOUND - no filter found for @record's event - * NO_FILTER - if no filters exist - * otherwise - error occurred during test - */ -enum tep_errno tep_filter_match(struct tep_event_filter *filter, - struct tep_record *record) -{ - struct tep_handle *tep = filter->tep; - struct tep_filter_type *filter_type; - int event_id; - int ret; - enum tep_errno err = 0; - - filter_init_error_buf(filter); - - if (!filter->filters) - return TEP_ERRNO__NO_FILTER; - - event_id = tep_data_type(tep, record); - - filter_type = find_filter_type(filter, event_id); - if (!filter_type) - return TEP_ERRNO__FILTER_NOT_FOUND; - - ret = test_filter(filter_type->event, filter_type->filter, record, &err); - if (err) - return err; - - return ret ? TEP_ERRNO__FILTER_MATCH : TEP_ERRNO__FILTER_MISS; -} - -static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg) -{ - char *str = NULL; - char *left = NULL; - char *right = NULL; - char *op = NULL; - int left_val = -1; - int right_val = -1; - int val; - - switch (arg->op.type) { - case TEP_FILTER_OP_AND: - op = "&&"; - /* fall through */ - case TEP_FILTER_OP_OR: - if (!op) - op = "||"; - - left = arg_to_str(filter, arg->op.left); - right = arg_to_str(filter, arg->op.right); - if (!left || !right) - break; - - /* Try to consolidate boolean values */ - if (strcmp(left, "TRUE") == 0) - left_val = 1; - else if (strcmp(left, "FALSE") == 0) - left_val = 0; - - if (strcmp(right, "TRUE") == 0) - right_val = 1; - else if (strcmp(right, "FALSE") == 0) - right_val = 0; - - if (left_val >= 0) { - if ((arg->op.type == TEP_FILTER_OP_AND && !left_val) || - (arg->op.type == TEP_FILTER_OP_OR && left_val)) { - /* Just return left value */ - str = left; - left = NULL; - break; - } - if (right_val >= 0) { - /* just evaluate this. */ - val = 0; - switch (arg->op.type) { - case TEP_FILTER_OP_AND: - val = left_val && right_val; - break; - case TEP_FILTER_OP_OR: - val = left_val || right_val; - break; - default: - break; - } - if (asprintf(&str, val ? "TRUE" : "FALSE") < 0) - str = NULL; - break; - } - } - if (right_val >= 0) { - if ((arg->op.type == TEP_FILTER_OP_AND && !right_val) || - (arg->op.type == TEP_FILTER_OP_OR && right_val)) { - /* Just return right value */ - str = right; - right = NULL; - break; - } - /* The right value is meaningless */ - str = left; - left = NULL; - break; - } - - if (asprintf(&str, "(%s) %s (%s)", left, op, right) < 0) - str = NULL; - break; - - case TEP_FILTER_OP_NOT: - op = "!"; - right = arg_to_str(filter, arg->op.right); - if (!right) - break; - - /* See if we can consolidate */ - if (strcmp(right, "TRUE") == 0) - right_val = 1; - else if (strcmp(right, "FALSE") == 0) - right_val = 0; - if (right_val >= 0) { - /* just return the opposite */ - if (asprintf(&str, right_val ? "FALSE" : "TRUE") < 0) - str = NULL; - break; - } - if (asprintf(&str, "%s(%s)", op, right) < 0) - str = NULL; - break; - - default: - /* ?? */ - break; - } - free(left); - free(right); - return str; -} - -static char *val_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg) -{ - char *str = NULL; - - if (asprintf(&str, "%lld", arg->value.val) < 0) - str = NULL; - - return str; -} - -static char *field_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg) -{ - return strdup(arg->field.field->name); -} - -static char *exp_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg) -{ - char *lstr; - char *rstr; - char *op; - char *str = NULL; - - lstr = arg_to_str(filter, arg->exp.left); - rstr = arg_to_str(filter, arg->exp.right); - if (!lstr || !rstr) - goto out; - - switch (arg->exp.type) { - case TEP_FILTER_EXP_ADD: - op = "+"; - break; - case TEP_FILTER_EXP_SUB: - op = "-"; - break; - case TEP_FILTER_EXP_MUL: - op = "*"; - break; - case TEP_FILTER_EXP_DIV: - op = "/"; - break; - case TEP_FILTER_EXP_MOD: - op = "%"; - break; - case TEP_FILTER_EXP_RSHIFT: - op = ">>"; - break; - case TEP_FILTER_EXP_LSHIFT: - op = "<<"; - break; - case TEP_FILTER_EXP_AND: - op = "&"; - break; - case TEP_FILTER_EXP_OR: - op = "|"; - break; - case TEP_FILTER_EXP_XOR: - op = "^"; - break; - default: - op = "[ERROR IN EXPRESSION TYPE]"; - break; - } - - if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0) - str = NULL; -out: - free(lstr); - free(rstr); - - return str; -} - -static char *num_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg) -{ - char *lstr; - char *rstr; - char *str = NULL; - char *op = NULL; - - lstr = arg_to_str(filter, arg->num.left); - rstr = arg_to_str(filter, arg->num.right); - if (!lstr || !rstr) - goto out; - - switch (arg->num.type) { - case TEP_FILTER_CMP_EQ: - op = "=="; - /* fall through */ - case TEP_FILTER_CMP_NE: - if (!op) - op = "!="; - /* fall through */ - case TEP_FILTER_CMP_GT: - if (!op) - op = ">"; - /* fall through */ - case TEP_FILTER_CMP_LT: - if (!op) - op = "<"; - /* fall through */ - case TEP_FILTER_CMP_GE: - if (!op) - op = ">="; - /* fall through */ - case TEP_FILTER_CMP_LE: - if (!op) - op = "<="; - - if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0) - str = NULL; - break; - - default: - /* ?? */ - break; - } - -out: - free(lstr); - free(rstr); - return str; -} - -static char *str_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg) -{ - char *str = NULL; - char *op = NULL; - - switch (arg->str.type) { - case TEP_FILTER_CMP_MATCH: - op = "=="; - /* fall through */ - case TEP_FILTER_CMP_NOT_MATCH: - if (!op) - op = "!="; - /* fall through */ - case TEP_FILTER_CMP_REGEX: - if (!op) - op = "=~"; - /* fall through */ - case TEP_FILTER_CMP_NOT_REGEX: - if (!op) - op = "!~"; - - if (asprintf(&str, "%s %s \"%s\"", - arg->str.field->name, op, arg->str.val) < 0) - str = NULL; - break; - - default: - /* ?? */ - break; - } - return str; -} - -static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg) -{ - char *str = NULL; - - switch (arg->type) { - case TEP_FILTER_ARG_BOOLEAN: - if (asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE") < 0) - str = NULL; - return str; - - case TEP_FILTER_ARG_OP: - return op_to_str(filter, arg); - - case TEP_FILTER_ARG_NUM: - return num_to_str(filter, arg); - - case TEP_FILTER_ARG_STR: - return str_to_str(filter, arg); - - case TEP_FILTER_ARG_VALUE: - return val_to_str(filter, arg); - - case TEP_FILTER_ARG_FIELD: - return field_to_str(filter, arg); - - case TEP_FILTER_ARG_EXP: - return exp_to_str(filter, arg); - - default: - /* ?? */ - return NULL; - } - -} - -/** - * tep_filter_make_string - return a string showing the filter - * @filter: filter struct with filter information - * @event_id: the event id to return the filter string with - * - * Returns a string that displays the filter contents. - * This string must be freed with free(str). - * NULL is returned if no filter is found or allocation failed. - */ -char * -tep_filter_make_string(struct tep_event_filter *filter, int event_id) -{ - struct tep_filter_type *filter_type; - - if (!filter->filters) - return NULL; - - filter_type = find_filter_type(filter, event_id); - - if (!filter_type) - return NULL; - - return arg_to_str(filter, filter_type->filter); -} - -/** - * tep_filter_compare - compare two filters and return if they are the same - * @filter1: Filter to compare with @filter2 - * @filter2: Filter to compare with @filter1 - * - * Returns: - * 1 if the two filters hold the same content. - * 0 if they do not. - */ -int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2) -{ - struct tep_filter_type *filter_type1; - struct tep_filter_type *filter_type2; - char *str1, *str2; - int result; - int i; - - /* Do the easy checks first */ - if (filter1->filters != filter2->filters) - return 0; - if (!filter1->filters && !filter2->filters) - return 1; - - /* - * Now take a look at each of the events to see if they have the same - * filters to them. - */ - for (i = 0; i < filter1->filters; i++) { - filter_type1 = &filter1->event_filters[i]; - filter_type2 = find_filter_type(filter2, filter_type1->event_id); - if (!filter_type2) - break; - if (filter_type1->filter->type != filter_type2->filter->type) - break; - /* The best way to compare complex filters is with strings */ - str1 = arg_to_str(filter1, filter_type1->filter); - str2 = arg_to_str(filter2, filter_type2->filter); - if (str1 && str2) - result = strcmp(str1, str2) != 0; - else - /* bail out if allocation fails */ - result = 1; - - free(str1); - free(str2); - if (result) - break; - } - - if (i < filter1->filters) - return 0; - return 1; -} - diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c deleted file mode 100644 index e99867111387..000000000000 --- a/tools/lib/traceevent/parse-utils.c +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> -#include <errno.h> - -#define __weak __attribute__((weak)) - -void __vwarning(const char *fmt, va_list ap) -{ - if (errno) - perror("libtraceevent"); - errno = 0; - - fprintf(stderr, " "); - vfprintf(stderr, fmt, ap); - - fprintf(stderr, "\n"); -} - -void __warning(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - __vwarning(fmt, ap); - va_end(ap); -} - -void __weak warning(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - __vwarning(fmt, ap); - va_end(ap); -} - -void __vpr_stat(const char *fmt, va_list ap) -{ - vprintf(fmt, ap); - printf("\n"); -} - -void __pr_stat(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - __vpr_stat(fmt, ap); - va_end(ap); -} - -void __weak vpr_stat(const char *fmt, va_list ap) -{ - __vpr_stat(fmt, ap); -} - -void __weak pr_stat(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - __vpr_stat(fmt, ap); - va_end(ap); -} diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build deleted file mode 100644 index dd4da823c38f..000000000000 --- a/tools/lib/traceevent/plugins/Build +++ /dev/null @@ -1,12 +0,0 @@ -plugin_jbd2-y += plugin_jbd2.o -plugin_hrtimer-y += plugin_hrtimer.o -plugin_kmem-y += plugin_kmem.o -plugin_kvm-y += plugin_kvm.o -plugin_mac80211-y += plugin_mac80211.o -plugin_sched_switch-y += plugin_sched_switch.o -plugin_function-y += plugin_function.o -plugin_futex-y += plugin_futex.o -plugin_xen-y += plugin_xen.o -plugin_scsi-y += plugin_scsi.o -plugin_cfg80211-y += plugin_cfg80211.o -plugin_tlb-y += plugin_tlb.o
\ No newline at end of file diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile deleted file mode 100644 index 47e802553250..000000000000 --- a/tools/lib/traceevent/plugins/Makefile +++ /dev/null @@ -1,225 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -#MAKEFLAGS += --no-print-directory - - -# Makefiles suck: This macro sets a default value of $(2) for the -# variable named by $(1), unless the variable has been set by -# environment or command line. This is necessary for CC and AR -# because make sets default values, so the simpler ?= approach -# won't work as expected. -define allow-override - $(if $(or $(findstring environment,$(origin $(1))),\ - $(findstring command line,$(origin $(1)))),,\ - $(eval $(1) = $(2))) -endef - -# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. -$(call allow-override,CC,$(CROSS_COMPILE)gcc) -$(call allow-override,AR,$(CROSS_COMPILE)ar) -$(call allow-override,NM,$(CROSS_COMPILE)nm) -$(call allow-override,PKG_CONFIG,pkg-config) - -EXT = -std=gnu99 -INSTALL = install - -# Use DESTDIR for installing into a different root directory. -# This is useful for building a package. The program will be -# installed in this directory as if it was the root directory. -# Then the build tool can move it later. -DESTDIR ?= -DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' - -LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) -ifeq ($(LP64), 1) - libdir_relative_tmp = lib64 -else - libdir_relative_tmp = lib -endif - -libdir_relative ?= $(libdir_relative_tmp) -prefix ?= /usr/local -libdir = $(prefix)/$(libdir_relative) - -set_plugin_dir := 1 - -# Set plugin_dir to preffered global plugin location -# If we install under $HOME directory we go under -# $(HOME)/.local/lib/traceevent/plugins -# -# We dont set PLUGIN_DIR in case we install under $HOME -# directory, because by default the code looks under: -# $(HOME)/.local/lib/traceevent/plugins by default. -# -ifeq ($(plugin_dir),) -ifeq ($(prefix),$(HOME)) -override plugin_dir = $(HOME)/.local/lib/traceevent/plugins -set_plugin_dir := 0 -else -override plugin_dir = $(libdir)/traceevent/plugins -endif -endif - -ifeq ($(set_plugin_dir),1) -PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)" -PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))' -endif - -include ../../../scripts/Makefile.include - -# copy a bit from Linux kbuild - -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(srctree),) -srctree := $(patsubst %/,%,$(dir $(CURDIR))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -#$(info Determined 'srctree' to be $(srctree)) -endif - -export prefix libdir src obj - -# Shell quotes -plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) - -CONFIG_INCLUDES = -CONFIG_LIBS = -CONFIG_FLAGS = - -OBJ = $@ -N = - -INCLUDES = -I. -I.. -I $(srctree)/tools/include $(CONFIG_INCLUDES) - -# Set compile option CFLAGS -ifdef EXTRA_CFLAGS - CFLAGS := $(EXTRA_CFLAGS) -else - CFLAGS := -g -Wall -endif - -# Append required CFLAGS -override CFLAGS += -fPIC -override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) -override CFLAGS += $(udis86-flags) -D_GNU_SOURCE - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - -# Disable command line variables (CFLAGS) override from top -# level Makefile (perf), otherwise build Makefile will get -# the same command line setup. -MAKEOVERRIDES= - -export srctree OUTPUT CC LD CFLAGS V - -build := -f $(srctree)/tools/build/Makefile.build dir=. obj - -DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list - -PLUGINS = plugin_jbd2.so -PLUGINS += plugin_hrtimer.so -PLUGINS += plugin_kmem.so -PLUGINS += plugin_kvm.so -PLUGINS += plugin_mac80211.so -PLUGINS += plugin_sched_switch.so -PLUGINS += plugin_function.so -PLUGINS += plugin_futex.so -PLUGINS += plugin_xen.so -PLUGINS += plugin_scsi.so -PLUGINS += plugin_cfg80211.so -PLUGINS += plugin_tlb.so - -PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) -PLUGINS_IN := $(PLUGINS:.so=-in.o) - -plugins: $(PLUGINS) $(DYNAMIC_LIST_FILE) - -__plugin_obj = $(notdir $@) - plugin_obj = $(__plugin_obj:-in.o=) - -$(PLUGINS_IN): force - $(Q)$(MAKE) $(build)=$(plugin_obj) - -$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS) - $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@) - -$(OUTPUT)%.so: $(OUTPUT)%-in.o - $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^ - -define update_dir - (echo $1 > $@.tmp; \ - if [ -r $@ ] && cmp -s $@ $@.tmp; then \ - rm -f $@.tmp; \ - else \ - echo ' UPDATE $@'; \ - mv -f $@.tmp $@; \ - fi); -endef - -tags: force - $(RM) tags - find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ - --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/' - -TAGS: force - $(RM) TAGS - find . -name '*.[ch]' | xargs etags \ - --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/' - -define do_install_mkdir - if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ - fi -endef - -define do_install - $(call do_install_mkdir,$2); \ - $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' -endef - -define do_install_plugins - for plugin in $1; do \ - $(call do_install,$$plugin,$(plugin_dir_SQ)); \ - done -endef - -define do_generate_dynamic_list_file - symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \ - xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\ - if [ "$$symbol_type" = "U W" ];then \ - (echo '{'; \ - $(NM) -u -D $1 | awk 'NF>1 {sub("@.*", "", $$2); print "\t"$$2";"}' | sort -u;\ - echo '};'; \ - ) > $2; \ - else \ - (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\ - fi -endef - -install: $(PLUGINS) - $(call QUIET_INSTALL, trace_plugins) \ - $(call do_install_plugins, $(PLUGINS)) - -clean: - $(call QUIET_CLEAN, trace_plugins) \ - $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \ - $(RM) $(OUTPUT)libtraceevent-dynamic-list \ - $(RM) TRACEEVENT-CFLAGS tags TAGS; - -PHONY += force plugins -force: - -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable so we can use it in if_changed and friends. -.PHONY: $(PHONY) diff --git a/tools/lib/traceevent/plugins/plugin_cfg80211.c b/tools/lib/traceevent/plugins/plugin_cfg80211.c deleted file mode 100644 index 3d43b56a6c98..000000000000 --- a/tools/lib/traceevent/plugins/plugin_cfg80211.c +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <string.h> -#include <inttypes.h> -#include <endian.h> -#include "event-parse.h" - -/* - * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5, - * Fedora6. - */ -#ifndef le16toh -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define le16toh(x) (x) -# else -# define le16toh(x) __bswap_16 (x) -# endif -#endif - - -static unsigned long long -process___le16_to_cpup(struct trace_seq *s, unsigned long long *args) -{ - uint16_t *val = (uint16_t *) (unsigned long) args[0]; - return val ? (long long) le16toh(*val) : 0; -} - -int TEP_PLUGIN_LOADER(struct tep_handle *tep) -{ - tep_register_print_function(tep, - process___le16_to_cpup, - TEP_FUNC_ARG_INT, - "__le16_to_cpup", - TEP_FUNC_ARG_PTR, - TEP_FUNC_ARG_VOID); - return 0; -} - -void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) -{ - tep_unregister_print_function(tep, process___le16_to_cpup, - "__le16_to_cpup"); -} diff --git a/tools/lib/traceevent/plugins/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c deleted file mode 100644 index 807b16e1bf0f..000000000000 --- a/tools/lib/traceevent/plugins/plugin_function.c +++ /dev/null @@ -1,282 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "event-parse.h" -#include "event-utils.h" -#include "trace-seq.h" - -static struct func_stack { - int size; - char **stack; -} *fstack; - -static int cpus = -1; - -#define STK_BLK 10 - -struct tep_plugin_option plugin_options[] = -{ - { - .name = "parent", - .plugin_alias = "ftrace", - .description = - "Print parent of functions for function events", - }, - { - .name = "indent", - .plugin_alias = "ftrace", - .description = - "Try to show function call indents, based on parents", - .set = 1, - }, - { - .name = "offset", - .plugin_alias = "ftrace", - .description = - "Show function names as well as their offsets", - .set = 0, - }, - { - .name = NULL, - } -}; - -static struct tep_plugin_option *ftrace_parent = &plugin_options[0]; -static struct tep_plugin_option *ftrace_indent = &plugin_options[1]; -static struct tep_plugin_option *ftrace_offset = &plugin_options[2]; - -static void add_child(struct func_stack *stack, const char *child, int pos) -{ - int i; - - if (!child) - return; - - if (pos < stack->size) - free(stack->stack[pos]); - else { - char **ptr; - - ptr = realloc(stack->stack, sizeof(char *) * - (stack->size + STK_BLK)); - if (!ptr) { - warning("could not allocate plugin memory\n"); - return; - } - - stack->stack = ptr; - - for (i = stack->size; i < stack->size + STK_BLK; i++) - stack->stack[i] = NULL; - stack->size += STK_BLK; - } - - stack->stack[pos] = strdup(child); -} - -static int add_and_get_index(const char *parent, const char *child, int cpu) -{ - int i; - - if (cpu < 0) - return 0; - - if (cpu > cpus) { - struct func_stack *ptr; - - ptr = realloc(fstack, sizeof(*fstack) * (cpu + 1)); - if (!ptr) { - warning("could not allocate plugin memory\n"); - return 0; - } - - fstack = ptr; - - /* Account for holes in the cpu count */ - for (i = cpus + 1; i <= cpu; i++) - memset(&fstack[i], 0, sizeof(fstack[i])); - cpus = cpu; - } - - for (i = 0; i < fstack[cpu].size && fstack[cpu].stack[i]; i++) { - if (strcmp(parent, fstack[cpu].stack[i]) == 0) { - add_child(&fstack[cpu], child, i+1); - return i; - } - } - - /* Not found */ - add_child(&fstack[cpu], parent, 0); - add_child(&fstack[cpu], child, 1); - return 0; -} - -static void show_function(struct trace_seq *s, struct tep_handle *tep, - const char *func, unsigned long long function) -{ - unsigned long long offset; - - trace_seq_printf(s, "%s", func); - if (ftrace_offset->set) { - offset = tep_find_function_address(tep, function); - trace_seq_printf(s, "+0x%x ", (int)(function - offset)); - } -} - -static int function_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - struct tep_handle *tep = event->tep; - unsigned long long function; - unsigned long long pfunction; - const char *func; - const char *parent; - int index = 0; - - if (tep_get_field_val(s, event, "ip", record, &function, 1)) - return trace_seq_putc(s, '!'); - - func = tep_find_function(tep, function); - - if (tep_get_field_val(s, event, "parent_ip", record, &pfunction, 1)) - return trace_seq_putc(s, '!'); - - parent = tep_find_function(tep, pfunction); - - if (parent && ftrace_indent->set) - index = add_and_get_index(parent, func, record->cpu); - - trace_seq_printf(s, "%*s", index*3, ""); - - if (func) - show_function(s, tep, func, function); - else - trace_seq_printf(s, "0x%llx", function); - - if (ftrace_parent->set) { - trace_seq_printf(s, " <-- "); - if (parent) - show_function(s, tep, parent, pfunction); - else - trace_seq_printf(s, "0x%llx", pfunction); - } - - return 0; -} - -static int -trace_stack_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - struct tep_format_field *field; - unsigned long long addr; - const char *func; - int long_size; - void *data = record->data; - - field = tep_find_any_field(event, "caller"); - if (!field) { - trace_seq_printf(s, "<CANT FIND FIELD %s>", "caller"); - return 0; - } - - trace_seq_puts(s, "<stack trace >\n"); - - long_size = tep_get_long_size(event->tep); - - for (data += field->offset; data < record->data + record->size; - data += long_size) { - addr = tep_read_number(event->tep, data, long_size); - - if ((long_size == 8 && addr == (unsigned long long)-1) || - ((int)addr == -1)) - break; - - func = tep_find_function(event->tep, addr); - if (func) - trace_seq_printf(s, "=> %s (%llx)\n", func, addr); - else - trace_seq_printf(s, "=> %llx\n", addr); - } - - return 0; -} - -static int -trace_raw_data_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - struct tep_format_field *field; - unsigned long long id; - int long_size; - void *data = record->data; - - if (tep_get_field_val(s, event, "id", record, &id, 1)) - return trace_seq_putc(s, '!'); - - trace_seq_printf(s, "# %llx", id); - - field = tep_find_any_field(event, "buf"); - if (!field) { - trace_seq_printf(s, "<CANT FIND FIELD %s>", "buf"); - return 0; - } - - long_size = tep_get_long_size(event->tep); - - for (data += field->offset; data < record->data + record->size; - data += long_size) { - int size = sizeof(long); - int left = (record->data + record->size) - data; - int i; - - if (size > left) - size = left; - - for (i = 0; i < size; i++) - trace_seq_printf(s, " %02x", *(unsigned char *)(data + i)); - } - - return 0; -} - -int TEP_PLUGIN_LOADER(struct tep_handle *tep) -{ - tep_register_event_handler(tep, -1, "ftrace", "function", - function_handler, NULL); - - tep_register_event_handler(tep, -1, "ftrace", "kernel_stack", - trace_stack_handler, NULL); - - tep_register_event_handler(tep, -1, "ftrace", "raw_data", - trace_raw_data_handler, NULL); - - tep_plugin_add_options("ftrace", plugin_options); - - return 0; -} - -void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) -{ - int i, x; - - tep_unregister_event_handler(tep, -1, "ftrace", "function", - function_handler, NULL); - - for (i = 0; i <= cpus; i++) { - for (x = 0; x < fstack[i].size && fstack[i].stack[x]; x++) - free(fstack[i].stack[x]); - free(fstack[i].stack); - } - - tep_plugin_remove_options(plugin_options); - - free(fstack); - fstack = NULL; - cpus = -1; -} diff --git a/tools/lib/traceevent/plugins/plugin_futex.c b/tools/lib/traceevent/plugins/plugin_futex.c deleted file mode 100644 index eb7c9f8a850a..000000000000 --- a/tools/lib/traceevent/plugins/plugin_futex.c +++ /dev/null @@ -1,123 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2017 National Instruments Corp. - * - * Author: Julia Cartwright <julia@ni.com> - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <linux/futex.h> - -#include "event-parse.h" - -#define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0])) - -struct futex_args { - unsigned long long uaddr; - unsigned long long op; - unsigned long long val; - unsigned long long utime; /* or val2 */ - unsigned long long uaddr2; - unsigned long long val3; -}; - -struct futex_op { - const char *name; - const char *fmt_val; - const char *fmt_utime; - const char *fmt_uaddr2; - const char *fmt_val3; -}; - -static const struct futex_op futex_op_tbl[] = { - { "FUTEX_WAIT", " val=0x%08llx", " utime=0x%08llx", NULL, NULL }, - { "FUTEX_WAKE", " val=%llu", NULL, NULL, NULL }, - { "FUTEX_FD", " val=%llu", NULL, NULL, NULL }, - { "FUTEX_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", NULL }, - { "FUTEX_CMP_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" }, - { "FUTEX_WAKE_OP", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" }, - { "FUTEX_LOCK_PI", NULL, " utime=0x%08llx", NULL, NULL }, - { "FUTEX_UNLOCK_PI", NULL, NULL, NULL, NULL }, - { "FUTEX_TRYLOCK_PI", NULL, NULL, NULL, NULL }, - { "FUTEX_WAIT_BITSET", " val=0x%08llx", " utime=0x%08llx", NULL, " val3=0x%08llx" }, - { "FUTEX_WAKE_BITSET", " val=%llu", NULL, NULL, " val3=0x%08llx" }, - { "FUTEX_WAIT_REQUEUE_PI", " val=0x%08llx", " utime=0x%08llx", " uaddr2=0x%08llx", " val3=0x%08llx" }, - { "FUTEX_CMP_REQUEUE_PI", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" }, -}; - - -static void futex_print(struct trace_seq *s, const struct futex_args *args, - const struct futex_op *fop) -{ - trace_seq_printf(s, " uaddr=0x%08llx", args->uaddr); - - if (fop->fmt_val) - trace_seq_printf(s, fop->fmt_val, args->val); - - if (fop->fmt_utime) - trace_seq_printf(s,fop->fmt_utime, args->utime); - - if (fop->fmt_uaddr2) - trace_seq_printf(s, fop->fmt_uaddr2, args->uaddr2); - - if (fop->fmt_val3) - trace_seq_printf(s, fop->fmt_val3, args->val3); -} - -static int futex_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - const struct futex_op *fop; - struct futex_args args; - unsigned long long cmd; - - if (tep_get_field_val(s, event, "uaddr", record, &args.uaddr, 1)) - return 1; - - if (tep_get_field_val(s, event, "op", record, &args.op, 1)) - return 1; - - if (tep_get_field_val(s, event, "val", record, &args.val, 1)) - return 1; - - if (tep_get_field_val(s, event, "utime", record, &args.utime, 1)) - return 1; - - if (tep_get_field_val(s, event, "uaddr2", record, &args.uaddr2, 1)) - return 1; - - if (tep_get_field_val(s, event, "val3", record, &args.val3, 1)) - return 1; - - cmd = args.op & FUTEX_CMD_MASK; - if (cmd >= ARRAY_SIZE(futex_op_tbl)) - return 1; - - fop = &futex_op_tbl[cmd]; - - trace_seq_printf(s, "op=%s", fop->name); - - if (args.op & FUTEX_PRIVATE_FLAG) - trace_seq_puts(s, "|FUTEX_PRIVATE_FLAG"); - - if (args.op & FUTEX_CLOCK_REALTIME) - trace_seq_puts(s, "|FUTEX_CLOCK_REALTIME"); - - futex_print(s, &args, fop); - return 0; -} - -int TEP_PLUGIN_LOADER(struct tep_handle *tep) -{ - tep_register_event_handler(tep, -1, "syscalls", "sys_enter_futex", - futex_handler, NULL); - return 0; -} - -void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) -{ - tep_unregister_event_handler(tep, -1, "syscalls", "sys_enter_futex", - futex_handler, NULL); -} diff --git a/tools/lib/traceevent/plugins/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c deleted file mode 100644 index d98466788f14..000000000000 --- a/tools/lib/traceevent/plugins/plugin_hrtimer.c +++ /dev/null @@ -1,74 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net> - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "event-parse.h" -#include "trace-seq.h" - -static int timer_expire_handler(struct trace_seq *s, - struct tep_record *record, - struct tep_event *event, void *context) -{ - trace_seq_printf(s, "hrtimer="); - - if (tep_print_num_field(s, "0x%llx", event, "timer", - record, 0) == -1) - tep_print_num_field(s, "0x%llx", event, "hrtimer", - record, 1); - - trace_seq_printf(s, " now="); - - tep_print_num_field(s, "%llu", event, "now", record, 1); - - tep_print_func_field(s, " function=%s", event, "function", - record, 0); - return 0; -} - -static int timer_start_handler(struct trace_seq *s, - struct tep_record *record, - struct tep_event *event, void *context) -{ - trace_seq_printf(s, "hrtimer="); - - if (tep_print_num_field(s, "0x%llx", event, "timer", - record, 0) == -1) - tep_print_num_field(s, "0x%llx", event, "hrtimer", - record, 1); - - tep_print_func_field(s, " function=%s", event, "function", - record, 0); - - trace_seq_printf(s, " expires="); - tep_print_num_field(s, "%llu", event, "expires", record, 1); - - trace_seq_printf(s, " softexpires="); - tep_print_num_field(s, "%llu", event, "softexpires", record, 1); - return 0; -} - -int TEP_PLUGIN_LOADER(struct tep_handle *tep) -{ - tep_register_event_handler(tep, -1, - "timer", "hrtimer_expire_entry", - timer_expire_handler, NULL); - - tep_register_event_handler(tep, -1, "timer", "hrtimer_start", - timer_start_handler, NULL); - return 0; -} - -void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) -{ - tep_unregister_event_handler(tep, -1, - "timer", "hrtimer_expire_entry", - timer_expire_handler, NULL); - - tep_unregister_event_handler(tep, -1, "timer", "hrtimer_start", - timer_start_handler, NULL); -} diff --git a/tools/lib/traceevent/plugins/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c deleted file mode 100644 index 69111a68d3cf..000000000000 --- a/tools/lib/traceevent/plugins/plugin_jbd2.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "event-parse.h" -#include "trace-seq.h" - -#define MINORBITS 20 -#define MINORMASK ((1U << MINORBITS) - 1) - -#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) -#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) - -static unsigned long long -process_jbd2_dev_to_name(struct trace_seq *s, unsigned long long *args) -{ - unsigned int dev = args[0]; - - trace_seq_printf(s, "%d:%d", MAJOR(dev), MINOR(dev)); - return 0; -} - -static unsigned long long -process_jiffies_to_msecs(struct trace_seq *s, unsigned long long *args) -{ - unsigned long long jiffies = args[0]; - - trace_seq_printf(s, "%lld", jiffies); - return jiffies; -} - -int TEP_PLUGIN_LOADER(struct tep_handle *tep) -{ - tep_register_print_function(tep, - process_jbd2_dev_to_name, - TEP_FUNC_ARG_STRING, - "jbd2_dev_to_name", - TEP_FUNC_ARG_INT, - TEP_FUNC_ARG_VOID); - - tep_register_print_function(tep, - process_jiffies_to_msecs, - TEP_FUNC_ARG_LONG, - "jiffies_to_msecs", - TEP_FUNC_ARG_LONG, - TEP_FUNC_ARG_VOID); - return 0; -} - -void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) -{ - tep_unregister_print_function(tep, process_jbd2_dev_to_name, - "jbd2_dev_to_name"); - - tep_unregister_print_function(tep, process_jiffies_to_msecs, - "jiffies_to_msecs"); -} diff --git a/tools/lib/traceevent/plugins/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c deleted file mode 100644 index 4b4f7f9616e3..000000000000 --- a/tools/lib/traceevent/plugins/plugin_kmem.c +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "event-parse.h" -#include "trace-seq.h" - -static int call_site_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - struct tep_format_field *field; - unsigned long long val, addr; - void *data = record->data; - const char *func; - - field = tep_find_field(event, "call_site"); - if (!field) - return 1; - - if (tep_read_number_field(field, data, &val)) - return 1; - - func = tep_find_function(event->tep, val); - if (!func) - return 1; - - addr = tep_find_function_address(event->tep, val); - - trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr)); - return 1; -} - -int TEP_PLUGIN_LOADER(struct tep_handle *tep) -{ - tep_register_event_handler(tep, -1, "kmem", "kfree", - call_site_handler, NULL); - - tep_register_event_handler(tep, -1, "kmem", "kmalloc", - call_site_handler, NULL); - - tep_register_event_handler(tep, -1, "kmem", "kmalloc_node", - call_site_handler, NULL); - - tep_register_event_handler(tep, -1, "kmem", "kmem_cache_alloc", - call_site_handler, NULL); - - tep_register_event_handler(tep, -1, "kmem", - "kmem_cache_alloc_node", - call_site_handler, NULL); - - tep_register_event_handler(tep, -1, "kmem", "kmem_cache_free", - call_site_handler, NULL); - return 0; -} - -void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) -{ - tep_unregister_event_handler(tep, -1, "kmem", "kfree", - call_site_handler, NULL); - - tep_unregister_event_handler(tep, -1, "kmem", "kmalloc", - call_site_handler, NULL); - - tep_unregister_event_handler(tep, -1, "kmem", "kmalloc_node", - call_site_handler, NULL); - - tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_alloc", - call_site_handler, NULL); - - tep_unregister_event_handler(tep, -1, "kmem", - "kmem_cache_alloc_node", - call_site_handler, NULL); - - tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_free", - call_site_handler, NULL); -} diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c deleted file mode 100644 index 9ce7b4b68e3f..000000000000 --- a/tools/lib/traceevent/plugins/plugin_kvm.c +++ /dev/null @@ -1,527 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdint.h> - -#include "event-parse.h" -#include "trace-seq.h" - -#ifdef HAVE_UDIS86 - -#include <udis86.h> - -static ud_t ud; - -static void init_disassembler(void) -{ - ud_init(&ud); - ud_set_syntax(&ud, UD_SYN_ATT); -} - -static const char *disassemble(unsigned char *insn, int len, uint64_t rip, - int cr0_pe, int eflags_vm, - int cs_d, int cs_l) -{ - int mode; - - if (!cr0_pe) - mode = 16; - else if (eflags_vm) - mode = 16; - else if (cs_l) - mode = 64; - else if (cs_d) - mode = 32; - else - mode = 16; - - ud_set_pc(&ud, rip); - ud_set_mode(&ud, mode); - ud_set_input_buffer(&ud, insn, len); - ud_disassemble(&ud); - return ud_insn_asm(&ud); -} - -#else - -static void init_disassembler(void) -{ -} - -static const char *disassemble(unsigned char *insn, int len, uint64_t rip, - int cr0_pe, int eflags_vm, - int cs_d, int cs_l) -{ - static char out[15*3+1]; - int i; - - for (i = 0; i < len; ++i) - sprintf(out + i * 3, "%02x ", insn[i]); - out[len*3-1] = '\0'; - return out; -} - -#endif - - -#define VMX_EXIT_REASONS \ - _ER(EXCEPTION_NMI, 0) \ - _ER(EXTERNAL_INTERRUPT, 1) \ - _ER(TRIPLE_FAULT, 2) \ - _ER(PENDING_INTERRUPT, 7) \ - _ER(NMI_WINDOW, 8) \ - _ER(TASK_SWITCH, 9) \ - _ER(CPUID, 10) \ - _ER(HLT, 12) \ - _ER(INVD, 13) \ - _ER(INVLPG, 14) \ - _ER(RDPMC, 15) \ - _ER(RDTSC, 16) \ - _ER(VMCALL, 18) \ - _ER(VMCLEAR, 19) \ - _ER(VMLAUNCH, 20) \ - _ER(VMPTRLD, 21) \ - _ER(VMPTRST, 22) \ - _ER(VMREAD, 23) \ - _ER(VMRESUME, 24) \ - _ER(VMWRITE, 25) \ - _ER(VMOFF, 26) \ - _ER(VMON, 27) \ - _ER(CR_ACCESS, 28) \ - _ER(DR_ACCESS, 29) \ - _ER(IO_INSTRUCTION, 30) \ - _ER(MSR_READ, 31) \ - _ER(MSR_WRITE, 32) \ - _ER(MWAIT_INSTRUCTION, 36) \ - _ER(MONITOR_INSTRUCTION, 39) \ - _ER(PAUSE_INSTRUCTION, 40) \ - _ER(MCE_DURING_VMENTRY, 41) \ - _ER(TPR_BELOW_THRESHOLD, 43) \ - _ER(APIC_ACCESS, 44) \ - _ER(EOI_INDUCED, 45) \ - _ER(EPT_VIOLATION, 48) \ - _ER(EPT_MISCONFIG, 49) \ - _ER(INVEPT, 50) \ - _ER(PREEMPTION_TIMER, 52) \ - _ER(WBINVD, 54) \ - _ER(XSETBV, 55) \ - _ER(APIC_WRITE, 56) \ - _ER(INVPCID, 58) \ - _ER(PML_FULL, 62) \ - _ER(XSAVES, 63) \ - _ER(XRSTORS, 64) - -#define SVM_EXIT_REASONS \ - _ER(EXIT_READ_CR0, 0x000) \ - _ER(EXIT_READ_CR3, 0x003) \ - _ER(EXIT_READ_CR4, 0x004) \ - _ER(EXIT_READ_CR8, 0x008) \ - _ER(EXIT_WRITE_CR0, 0x010) \ - _ER(EXIT_WRITE_CR3, 0x013) \ - _ER(EXIT_WRITE_CR4, 0x014) \ - _ER(EXIT_WRITE_CR8, 0x018) \ - _ER(EXIT_READ_DR0, 0x020) \ - _ER(EXIT_READ_DR1, 0x021) \ - _ER(EXIT_READ_DR2, 0x022) \ - _ER(EXIT_READ_DR3, 0x023) \ - _ER(EXIT_READ_DR4, 0x024) \ - _ER(EXIT_READ_DR5, 0x025) \ - _ER(EXIT_READ_DR6, 0x026) \ - _ER(EXIT_READ_DR7, 0x027) \ - _ER(EXIT_WRITE_DR0, 0x030) \ - _ER(EXIT_WRITE_DR1, 0x031) \ - _ER(EXIT_WRITE_DR2, 0x032) \ - _ER(EXIT_WRITE_DR3, 0x033) \ - _ER(EXIT_WRITE_DR4, 0x034) \ - _ER(EXIT_WRITE_DR5, 0x035) \ - _ER(EXIT_WRITE_DR6, 0x036) \ - _ER(EXIT_WRITE_DR7, 0x037) \ - _ER(EXIT_EXCP_DE, 0x040) \ - _ER(EXIT_EXCP_DB, 0x041) \ - _ER(EXIT_EXCP_BP, 0x043) \ - _ER(EXIT_EXCP_OF, 0x044) \ - _ER(EXIT_EXCP_BR, 0x045) \ - _ER(EXIT_EXCP_UD, 0x046) \ - _ER(EXIT_EXCP_NM, 0x047) \ - _ER(EXIT_EXCP_DF, 0x048) \ - _ER(EXIT_EXCP_TS, 0x04a) \ - _ER(EXIT_EXCP_NP, 0x04b) \ - _ER(EXIT_EXCP_SS, 0x04c) \ - _ER(EXIT_EXCP_GP, 0x04d) \ - _ER(EXIT_EXCP_PF, 0x04e) \ - _ER(EXIT_EXCP_MF, 0x050) \ - _ER(EXIT_EXCP_AC, 0x051) \ - _ER(EXIT_EXCP_MC, 0x052) \ - _ER(EXIT_EXCP_XF, 0x053) \ - _ER(EXIT_INTR, 0x060) \ - _ER(EXIT_NMI, 0x061) \ - _ER(EXIT_SMI, 0x062) \ - _ER(EXIT_INIT, 0x063) \ - _ER(EXIT_VINTR, 0x064) \ - _ER(EXIT_CR0_SEL_WRITE, 0x065) \ - _ER(EXIT_IDTR_READ, 0x066) \ - _ER(EXIT_GDTR_READ, 0x067) \ - _ER(EXIT_LDTR_READ, 0x068) \ - _ER(EXIT_TR_READ, 0x069) \ - _ER(EXIT_IDTR_WRITE, 0x06a) \ - _ER(EXIT_GDTR_WRITE, 0x06b) \ - _ER(EXIT_LDTR_WRITE, 0x06c) \ - _ER(EXIT_TR_WRITE, 0x06d) \ - _ER(EXIT_RDTSC, 0x06e) \ - _ER(EXIT_RDPMC, 0x06f) \ - _ER(EXIT_PUSHF, 0x070) \ - _ER(EXIT_POPF, 0x071) \ - _ER(EXIT_CPUID, 0x072) \ - _ER(EXIT_RSM, 0x073) \ - _ER(EXIT_IRET, 0x074) \ - _ER(EXIT_SWINT, 0x075) \ - _ER(EXIT_INVD, 0x076) \ - _ER(EXIT_PAUSE, 0x077) \ - _ER(EXIT_HLT, 0x078) \ - _ER(EXIT_INVLPG, 0x079) \ - _ER(EXIT_INVLPGA, 0x07a) \ - _ER(EXIT_IOIO, 0x07b) \ - _ER(EXIT_MSR, 0x07c) \ - _ER(EXIT_TASK_SWITCH, 0x07d) \ - _ER(EXIT_FERR_FREEZE, 0x07e) \ - _ER(EXIT_SHUTDOWN, 0x07f) \ - _ER(EXIT_VMRUN, 0x080) \ - _ER(EXIT_VMMCALL, 0x081) \ - _ER(EXIT_VMLOAD, 0x082) \ - _ER(EXIT_VMSAVE, 0x083) \ - _ER(EXIT_STGI, 0x084) \ - _ER(EXIT_CLGI, 0x085) \ - _ER(EXIT_SKINIT, 0x086) \ - _ER(EXIT_RDTSCP, 0x087) \ - _ER(EXIT_ICEBP, 0x088) \ - _ER(EXIT_WBINVD, 0x089) \ - _ER(EXIT_MONITOR, 0x08a) \ - _ER(EXIT_MWAIT, 0x08b) \ - _ER(EXIT_MWAIT_COND, 0x08c) \ - _ER(EXIT_XSETBV, 0x08d) \ - _ER(EXIT_NPF, 0x400) \ - _ER(EXIT_AVIC_INCOMPLETE_IPI, 0x401) \ - _ER(EXIT_AVIC_UNACCELERATED_ACCESS, 0x402) \ - _ER(EXIT_ERR, -1) - -#define _ER(reason, val) { #reason, val }, -struct str_values { - const char *str; - int val; -}; - -static struct str_values vmx_exit_reasons[] = { - VMX_EXIT_REASONS - { NULL, -1} -}; - -static struct str_values svm_exit_reasons[] = { - SVM_EXIT_REASONS - { NULL, -1} -}; - -static struct isa_exit_reasons { - unsigned isa; - struct str_values *strings; -} isa_exit_reasons[] = { - { .isa = 1, .strings = vmx_exit_reasons }, - { .isa = 2, .strings = svm_exit_reasons }, - { } -}; - -static const char *find_exit_reason(unsigned isa, int val) -{ - struct str_values *strings = NULL; - int i; - - for (i = 0; isa_exit_reasons[i].strings; ++i) - if (isa_exit_reasons[i].isa == isa) { - strings = isa_exit_reasons[i].strings; - break; - } - if (!strings) - return "UNKNOWN-ISA"; - for (i = 0; strings[i].str; i++) - if (strings[i].val == val) - break; - - return strings[i].str; -} - -static int print_exit_reason(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, const char *field) -{ - unsigned long long isa; - unsigned long long val; - const char *reason; - - if (tep_get_field_val(s, event, field, record, &val, 1) < 0) - return -1; - - if (tep_get_field_val(s, event, "isa", record, &isa, 0) < 0) - isa = 1; - - reason = find_exit_reason(isa, val); - if (reason) - trace_seq_printf(s, "reason %s", reason); - else - trace_seq_printf(s, "reason UNKNOWN (%llu)", val); - return 0; -} - -static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - unsigned long long info1 = 0, info2 = 0; - - if (print_exit_reason(s, record, event, "exit_reason") < 0) - return -1; - - tep_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1); - - if (tep_get_field_val(s, event, "info1", record, &info1, 0) >= 0 - && tep_get_field_val(s, event, "info2", record, &info2, 0) >= 0) - trace_seq_printf(s, " info %llx %llx", info1, info2); - - return 0; -} - -#define KVM_EMUL_INSN_F_CR0_PE (1 << 0) -#define KVM_EMUL_INSN_F_EFL_VM (1 << 1) -#define KVM_EMUL_INSN_F_CS_D (1 << 2) -#define KVM_EMUL_INSN_F_CS_L (1 << 3) - -static int kvm_emulate_insn_handler(struct trace_seq *s, - struct tep_record *record, - struct tep_event *event, void *context) -{ - unsigned long long rip, csbase, len, flags, failed; - int llen; - uint8_t *insn; - const char *disasm; - - if (tep_get_field_val(s, event, "rip", record, &rip, 1) < 0) - return -1; - - if (tep_get_field_val(s, event, "csbase", record, &csbase, 1) < 0) - return -1; - - if (tep_get_field_val(s, event, "len", record, &len, 1) < 0) - return -1; - - if (tep_get_field_val(s, event, "flags", record, &flags, 1) < 0) - return -1; - - if (tep_get_field_val(s, event, "failed", record, &failed, 1) < 0) - return -1; - - insn = tep_get_field_raw(s, event, "insn", record, &llen, 1); - if (!insn) - return -1; - - disasm = disassemble(insn, len, rip, - flags & KVM_EMUL_INSN_F_CR0_PE, - flags & KVM_EMUL_INSN_F_EFL_VM, - flags & KVM_EMUL_INSN_F_CS_D, - flags & KVM_EMUL_INSN_F_CS_L); - - trace_seq_printf(s, "%llx:%llx: %s%s", csbase, rip, disasm, - failed ? " FAIL" : ""); - return 0; -} - - -static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - if (print_exit_reason(s, record, event, "exit_code") < 0) - return -1; - - tep_print_num_field(s, " info1 %llx", event, "exit_info1", record, 1); - tep_print_num_field(s, " info2 %llx", event, "exit_info2", record, 1); - tep_print_num_field(s, " int_info %llx", event, "exit_int_info", record, 1); - tep_print_num_field(s, " int_info_err %llx", event, "exit_int_info_err", record, 1); - - return 0; -} - -static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - tep_print_num_field(s, "rip %llx ", event, "rip", record, 1); - - return kvm_nested_vmexit_inject_handler(s, record, event, context); -} - -union kvm_mmu_page_role { - unsigned word; - struct { - unsigned level:4; - unsigned cr4_pae:1; - unsigned quadrant:2; - unsigned direct:1; - unsigned access:3; - unsigned invalid:1; - unsigned efer_nx:1; - unsigned cr0_wp:1; - unsigned smep_and_not_wp:1; - unsigned smap_and_not_wp:1; - unsigned pad_for_nice_hex_output:8; - unsigned smm:8; - }; -}; - -static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - unsigned long long val; - static const char *access_str[] = { - "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" - }; - union kvm_mmu_page_role role; - - if (tep_get_field_val(s, event, "role", record, &val, 1) < 0) - return -1; - - role.word = (int)val; - - /* - * We can only use the structure if file is of the same - * endianness. - */ - if (tep_is_file_bigendian(event->tep) == - tep_is_local_bigendian(event->tep)) { - - trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s", - role.level, - role.quadrant, - role.direct ? " direct" : "", - access_str[role.access], - role.invalid ? " invalid" : "", - role.cr4_pae ? "" : "!", - role.efer_nx ? "" : "!", - role.cr0_wp ? "" : "!", - role.smep_and_not_wp ? " smep" : "", - role.smap_and_not_wp ? " smap" : "", - role.smm ? " smm" : ""); - } else - trace_seq_printf(s, "WORD: %08x", role.word); - - tep_print_num_field(s, " root %u ", event, - "root_count", record, 1); - - if (tep_get_field_val(s, event, "unsync", record, &val, 1) < 0) - return -1; - - trace_seq_printf(s, "%s%c", val ? "unsync" : "sync", 0); - return 0; -} - -static int kvm_mmu_get_page_handler(struct trace_seq *s, - struct tep_record *record, - struct tep_event *event, void *context) -{ - unsigned long long val; - - if (tep_get_field_val(s, event, "created", record, &val, 1) < 0) - return -1; - - trace_seq_printf(s, "%s ", val ? "new" : "existing"); - - if (tep_get_field_val(s, event, "gfn", record, &val, 1) < 0) - return -1; - - trace_seq_printf(s, "sp gfn %llx ", val); - return kvm_mmu_print_role(s, record, event, context); -} - -#define PT_WRITABLE_SHIFT 1 -#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) - -static unsigned long long -process_is_writable_pte(struct trace_seq *s, unsigned long long *args) -{ - unsigned long pte = args[0]; - return pte & PT_WRITABLE_MASK; -} - -int TEP_PLUGIN_LOADER(struct tep_handle *tep) -{ - init_disassembler(); - - tep_register_event_handler(tep, -1, "kvm", "kvm_exit", - kvm_exit_handler, NULL); - - tep_register_event_handler(tep, -1, "kvm", "kvm_emulate_insn", - kvm_emulate_insn_handler, NULL); - - tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit", - kvm_nested_vmexit_handler, NULL); - - tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject", - kvm_nested_vmexit_inject_handler, NULL); - - tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page", - kvm_mmu_get_page_handler, NULL); - - tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page", - kvm_mmu_print_role, NULL); - - tep_register_event_handler(tep, -1, - "kvmmmu", "kvm_mmu_unsync_page", - kvm_mmu_print_role, NULL); - - tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page", - kvm_mmu_print_role, NULL); - - tep_register_event_handler(tep, -1, "kvmmmu", - "kvm_mmu_prepare_zap_page", kvm_mmu_print_role, - NULL); - - tep_register_print_function(tep, - process_is_writable_pte, - TEP_FUNC_ARG_INT, - "is_writable_pte", - TEP_FUNC_ARG_LONG, - TEP_FUNC_ARG_VOID); - return 0; -} - -void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) -{ - tep_unregister_event_handler(tep, -1, "kvm", "kvm_exit", - kvm_exit_handler, NULL); - - tep_unregister_event_handler(tep, -1, "kvm", "kvm_emulate_insn", - kvm_emulate_insn_handler, NULL); - - tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit", - kvm_nested_vmexit_handler, NULL); - - tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject", - kvm_nested_vmexit_inject_handler, NULL); - - tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page", - kvm_mmu_get_page_handler, NULL); - - tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page", - kvm_mmu_print_role, NULL); - - tep_unregister_event_handler(tep, -1, - "kvmmmu", "kvm_mmu_unsync_page", - kvm_mmu_print_role, NULL); - - tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page", - kvm_mmu_print_role, NULL); - - tep_unregister_event_handler(tep, -1, "kvmmmu", - "kvm_mmu_prepare_zap_page", kvm_mmu_print_role, - NULL); - - tep_unregister_print_function(tep, process_is_writable_pte, - "is_writable_pte"); -} diff --git a/tools/lib/traceevent/plugins/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c deleted file mode 100644 index f48071e3cfb8..000000000000 --- a/tools/lib/traceevent/plugins/plugin_mac80211.c +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net> - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "event-parse.h" -#include "trace-seq.h" - -#define INDENT 65 - -static void print_string(struct trace_seq *s, struct tep_event *event, - const char *name, const void *data) -{ - struct tep_format_field *f = tep_find_field(event, name); - int offset; - int length; - - if (!f) { - trace_seq_printf(s, "NOTFOUND:%s", name); - return; - } - - offset = f->offset; - length = f->size; - - if (!strncmp(f->type, "__data_loc", 10)) { - unsigned long long v; - if (tep_read_number_field(f, data, &v)) { - trace_seq_printf(s, "invalid_data_loc"); - return; - } - offset = v & 0xffff; - length = v >> 16; - } - - trace_seq_printf(s, "%.*s", length, (char *)data + offset); -} - -#define SF(fn) tep_print_num_field(s, fn ":%d", event, fn, record, 0) -#define SFX(fn) tep_print_num_field(s, fn ":%#x", event, fn, record, 0) -#define SP() trace_seq_putc(s, ' ') - -static int drv_bss_info_changed(struct trace_seq *s, - struct tep_record *record, - struct tep_event *event, void *context) -{ - void *data = record->data; - - print_string(s, event, "wiphy_name", data); - trace_seq_printf(s, " vif:"); - print_string(s, event, "vif_name", data); - tep_print_num_field(s, "(%d)", event, "vif_type", record, 1); - - trace_seq_printf(s, "\n%*s", INDENT, ""); - SF("assoc"); SP(); - SF("aid"); SP(); - SF("cts"); SP(); - SF("shortpre"); SP(); - SF("shortslot"); SP(); - SF("dtimper"); SP(); - trace_seq_printf(s, "\n%*s", INDENT, ""); - SF("bcnint"); SP(); - SFX("assoc_cap"); SP(); - SFX("basic_rates"); SP(); - SF("enable_beacon"); - trace_seq_printf(s, "\n%*s", INDENT, ""); - SF("ht_operation_mode"); - - return 0; -} - -int TEP_PLUGIN_LOADER(struct tep_handle *tep) -{ - tep_register_event_handler(tep, -1, "mac80211", - "drv_bss_info_changed", - drv_bss_info_changed, NULL); - return 0; -} - -void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) -{ - tep_unregister_event_handler(tep, -1, "mac80211", - "drv_bss_info_changed", - drv_bss_info_changed, NULL); -} diff --git a/tools/lib/traceevent/plugins/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c deleted file mode 100644 index e12fa103820a..000000000000 --- a/tools/lib/traceevent/plugins/plugin_sched_switch.c +++ /dev/null @@ -1,146 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "event-parse.h" -#include "trace-seq.h" - -static void write_state(struct trace_seq *s, int val) -{ - const char states[] = "SDTtZXxW"; - int found = 0; - int i; - - for (i = 0; i < (sizeof(states) - 1); i++) { - if (!(val & (1 << i))) - continue; - - if (found) - trace_seq_putc(s, '|'); - - found = 1; - trace_seq_putc(s, states[i]); - } - - if (!found) - trace_seq_putc(s, 'R'); -} - -static void write_and_save_comm(struct tep_format_field *field, - struct tep_record *record, - struct trace_seq *s, int pid) -{ - const char *comm; - int len; - - comm = (char *)(record->data + field->offset); - len = s->len; - trace_seq_printf(s, "%.*s", - field->size, comm); - - /* make sure the comm has a \0 at the end. */ - trace_seq_terminate(s); - comm = &s->buffer[len]; - - /* Help out the comm to ids. This will handle dups */ - tep_register_comm(field->event->tep, comm, pid); -} - -static int sched_wakeup_handler(struct trace_seq *s, - struct tep_record *record, - struct tep_event *event, void *context) -{ - struct tep_format_field *field; - unsigned long long val; - - if (tep_get_field_val(s, event, "pid", record, &val, 1)) - return trace_seq_putc(s, '!'); - - field = tep_find_any_field(event, "comm"); - if (field) { - write_and_save_comm(field, record, s, val); - trace_seq_putc(s, ':'); - } - trace_seq_printf(s, "%lld", val); - - if (tep_get_field_val(s, event, "prio", record, &val, 0) == 0) - trace_seq_printf(s, " [%lld]", val); - - if (tep_get_field_val(s, event, "success", record, &val, 1) == 0) - trace_seq_printf(s, " success=%lld", val); - - if (tep_get_field_val(s, event, "target_cpu", record, &val, 0) == 0) - trace_seq_printf(s, " CPU:%03llu", val); - - return 0; -} - -static int sched_switch_handler(struct trace_seq *s, - struct tep_record *record, - struct tep_event *event, void *context) -{ - struct tep_format_field *field; - unsigned long long val; - - if (tep_get_field_val(s, event, "prev_pid", record, &val, 1)) - return trace_seq_putc(s, '!'); - - field = tep_find_any_field(event, "prev_comm"); - if (field) { - write_and_save_comm(field, record, s, val); - trace_seq_putc(s, ':'); - } - trace_seq_printf(s, "%lld ", val); - - if (tep_get_field_val(s, event, "prev_prio", record, &val, 0) == 0) - trace_seq_printf(s, "[%d] ", (int) val); - - if (tep_get_field_val(s, event, "prev_state", record, &val, 0) == 0) - write_state(s, val); - - trace_seq_puts(s, " ==> "); - - if (tep_get_field_val(s, event, "next_pid", record, &val, 1)) - return trace_seq_putc(s, '!'); - - field = tep_find_any_field(event, "next_comm"); - if (field) { - write_and_save_comm(field, record, s, val); - trace_seq_putc(s, ':'); - } - trace_seq_printf(s, "%lld", val); - - if (tep_get_field_val(s, event, "next_prio", record, &val, 0) == 0) - trace_seq_printf(s, " [%d]", (int) val); - - return 0; -} - -int TEP_PLUGIN_LOADER(struct tep_handle *tep) -{ - tep_register_event_handler(tep, -1, "sched", "sched_switch", - sched_switch_handler, NULL); - - tep_register_event_handler(tep, -1, "sched", "sched_wakeup", - sched_wakeup_handler, NULL); - - tep_register_event_handler(tep, -1, "sched", "sched_wakeup_new", - sched_wakeup_handler, NULL); - return 0; -} - -void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) -{ - tep_unregister_event_handler(tep, -1, "sched", "sched_switch", - sched_switch_handler, NULL); - - tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup", - sched_wakeup_handler, NULL); - - tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup_new", - sched_wakeup_handler, NULL); -} diff --git a/tools/lib/traceevent/plugins/plugin_scsi.c b/tools/lib/traceevent/plugins/plugin_scsi.c deleted file mode 100644 index 5d0387a4b65a..000000000000 --- a/tools/lib/traceevent/plugins/plugin_scsi.c +++ /dev/null @@ -1,434 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <string.h> -#include <inttypes.h> -#include "event-parse.h" -#include "trace-seq.h" - -typedef unsigned long sector_t; -typedef uint64_t u64; -typedef unsigned int u32; - -/* - * SCSI opcodes - */ -#define TEST_UNIT_READY 0x00 -#define REZERO_UNIT 0x01 -#define REQUEST_SENSE 0x03 -#define FORMAT_UNIT 0x04 -#define READ_BLOCK_LIMITS 0x05 -#define REASSIGN_BLOCKS 0x07 -#define INITIALIZE_ELEMENT_STATUS 0x07 -#define READ_6 0x08 -#define WRITE_6 0x0a -#define SEEK_6 0x0b -#define READ_REVERSE 0x0f -#define WRITE_FILEMARKS 0x10 -#define SPACE 0x11 -#define INQUIRY 0x12 -#define RECOVER_BUFFERED_DATA 0x14 -#define MODE_SELECT 0x15 -#define RESERVE 0x16 -#define RELEASE 0x17 -#define COPY 0x18 -#define ERASE 0x19 -#define MODE_SENSE 0x1a -#define START_STOP 0x1b -#define RECEIVE_DIAGNOSTIC 0x1c -#define SEND_DIAGNOSTIC 0x1d -#define ALLOW_MEDIUM_REMOVAL 0x1e - -#define READ_FORMAT_CAPACITIES 0x23 -#define SET_WINDOW 0x24 -#define READ_CAPACITY 0x25 -#define READ_10 0x28 -#define WRITE_10 0x2a -#define SEEK_10 0x2b -#define POSITION_TO_ELEMENT 0x2b -#define WRITE_VERIFY 0x2e -#define VERIFY 0x2f -#define SEARCH_HIGH 0x30 -#define SEARCH_EQUAL 0x31 -#define SEARCH_LOW 0x32 -#define SET_LIMITS 0x33 -#define PRE_FETCH 0x34 -#define READ_POSITION 0x34 -#define SYNCHRONIZE_CACHE 0x35 -#define LOCK_UNLOCK_CACHE 0x36 -#define READ_DEFECT_DATA 0x37 -#define MEDIUM_SCAN 0x38 -#define COMPARE 0x39 -#define COPY_VERIFY 0x3a -#define WRITE_BUFFER 0x3b -#define READ_BUFFER 0x3c -#define UPDATE_BLOCK 0x3d -#define READ_LONG 0x3e -#define WRITE_LONG 0x3f -#define CHANGE_DEFINITION 0x40 -#define WRITE_SAME 0x41 -#define UNMAP 0x42 -#define READ_TOC 0x43 -#define READ_HEADER 0x44 -#define GET_EVENT_STATUS_NOTIFICATION 0x4a -#define LOG_SELECT 0x4c -#define LOG_SENSE 0x4d -#define XDWRITEREAD_10 0x53 -#define MODE_SELECT_10 0x55 -#define RESERVE_10 0x56 -#define RELEASE_10 0x57 -#define MODE_SENSE_10 0x5a -#define PERSISTENT_RESERVE_IN 0x5e -#define PERSISTENT_RESERVE_OUT 0x5f -#define VARIABLE_LENGTH_CMD 0x7f -#define REPORT_LUNS 0xa0 -#define SECURITY_PROTOCOL_IN 0xa2 -#define MAINTENANCE_IN 0xa3 -#define MAINTENANCE_OUT 0xa4 -#define MOVE_MEDIUM 0xa5 -#define EXCHANGE_MEDIUM 0xa6 -#define READ_12 0xa8 -#define SERVICE_ACTION_OUT_12 0xa9 -#define WRITE_12 0xaa -#define SERVICE_ACTION_IN_12 0xab -#define WRITE_VERIFY_12 0xae -#define VERIFY_12 0xaf -#define SEARCH_HIGH_12 0xb0 -#define SEARCH_EQUAL_12 0xb1 -#define SEARCH_LOW_12 0xb2 -#define SECURITY_PROTOCOL_OUT 0xb5 -#define READ_ELEMENT_STATUS 0xb8 -#define SEND_VOLUME_TAG 0xb6 -#define WRITE_LONG_2 0xea -#define EXTENDED_COPY 0x83 -#define RECEIVE_COPY_RESULTS 0x84 -#define ACCESS_CONTROL_IN 0x86 -#define ACCESS_CONTROL_OUT 0x87 -#define READ_16 0x88 -#define WRITE_16 0x8a -#define READ_ATTRIBUTE 0x8c -#define WRITE_ATTRIBUTE 0x8d -#define VERIFY_16 0x8f -#define SYNCHRONIZE_CACHE_16 0x91 -#define WRITE_SAME_16 0x93 -#define SERVICE_ACTION_BIDIRECTIONAL 0x9d -#define SERVICE_ACTION_IN_16 0x9e -#define SERVICE_ACTION_OUT_16 0x9f -/* values for service action in */ -#define SAI_READ_CAPACITY_16 0x10 -#define SAI_GET_LBA_STATUS 0x12 -/* values for VARIABLE_LENGTH_CMD service action codes - * see spc4r17 Section D.3.5, table D.7 and D.8 */ -#define VLC_SA_RECEIVE_CREDENTIAL 0x1800 -/* values for maintenance in */ -#define MI_REPORT_IDENTIFYING_INFORMATION 0x05 -#define MI_REPORT_TARGET_PGS 0x0a -#define MI_REPORT_ALIASES 0x0b -#define MI_REPORT_SUPPORTED_OPERATION_CODES 0x0c -#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS 0x0d -#define MI_REPORT_PRIORITY 0x0e -#define MI_REPORT_TIMESTAMP 0x0f -#define MI_MANAGEMENT_PROTOCOL_IN 0x10 -/* value for MI_REPORT_TARGET_PGS ext header */ -#define MI_EXT_HDR_PARAM_FMT 0x20 -/* values for maintenance out */ -#define MO_SET_IDENTIFYING_INFORMATION 0x06 -#define MO_SET_TARGET_PGS 0x0a -#define MO_CHANGE_ALIASES 0x0b -#define MO_SET_PRIORITY 0x0e -#define MO_SET_TIMESTAMP 0x0f -#define MO_MANAGEMENT_PROTOCOL_OUT 0x10 -/* values for variable length command */ -#define XDREAD_32 0x03 -#define XDWRITE_32 0x04 -#define XPWRITE_32 0x06 -#define XDWRITEREAD_32 0x07 -#define READ_32 0x09 -#define VERIFY_32 0x0a -#define WRITE_32 0x0b -#define WRITE_SAME_32 0x0d - -#define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f) -#define SERVICE_ACTION32(cdb) ((cdb[8] << 8) | cdb[9]) - -static const char * -scsi_trace_misc(struct trace_seq *, unsigned char *, int); - -static const char * -scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len) -{ - const char *ret = p->buffer + p->len; - sector_t lba = 0, txlen = 0; - - lba |= ((cdb[1] & 0x1F) << 16); - lba |= (cdb[2] << 8); - lba |= cdb[3]; - txlen = cdb[4]; - - trace_seq_printf(p, "lba=%llu txlen=%llu", - (unsigned long long)lba, (unsigned long long)txlen); - trace_seq_putc(p, 0); - return ret; -} - -static const char * -scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len) -{ - const char *ret = p->buffer + p->len; - sector_t lba = 0, txlen = 0; - - lba |= (cdb[2] << 24); - lba |= (cdb[3] << 16); - lba |= (cdb[4] << 8); - lba |= cdb[5]; - txlen |= (cdb[7] << 8); - txlen |= cdb[8]; - - trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", - (unsigned long long)lba, (unsigned long long)txlen, - cdb[1] >> 5); - - if (cdb[0] == WRITE_SAME) - trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1); - - trace_seq_putc(p, 0); - return ret; -} - -static const char * -scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len) -{ - const char *ret = p->buffer + p->len; - sector_t lba = 0, txlen = 0; - - lba |= (cdb[2] << 24); - lba |= (cdb[3] << 16); - lba |= (cdb[4] << 8); - lba |= cdb[5]; - txlen |= (cdb[6] << 24); - txlen |= (cdb[7] << 16); - txlen |= (cdb[8] << 8); - txlen |= cdb[9]; - - trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", - (unsigned long long)lba, (unsigned long long)txlen, - cdb[1] >> 5); - trace_seq_putc(p, 0); - return ret; -} - -static const char * -scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len) -{ - const char *ret = p->buffer + p->len; - sector_t lba = 0, txlen = 0; - - lba |= ((u64)cdb[2] << 56); - lba |= ((u64)cdb[3] << 48); - lba |= ((u64)cdb[4] << 40); - lba |= ((u64)cdb[5] << 32); - lba |= (cdb[6] << 24); - lba |= (cdb[7] << 16); - lba |= (cdb[8] << 8); - lba |= cdb[9]; - txlen |= (cdb[10] << 24); - txlen |= (cdb[11] << 16); - txlen |= (cdb[12] << 8); - txlen |= cdb[13]; - - trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", - (unsigned long long)lba, (unsigned long long)txlen, - cdb[1] >> 5); - - if (cdb[0] == WRITE_SAME_16) - trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1); - - trace_seq_putc(p, 0); - return ret; -} - -static const char * -scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len) -{ - const char *ret = p->buffer + p->len, *cmd; - sector_t lba = 0, txlen = 0; - u32 ei_lbrt = 0; - - switch (SERVICE_ACTION32(cdb)) { - case READ_32: - cmd = "READ"; - break; - case VERIFY_32: - cmd = "VERIFY"; - break; - case WRITE_32: - cmd = "WRITE"; - break; - case WRITE_SAME_32: - cmd = "WRITE_SAME"; - break; - default: - trace_seq_printf(p, "UNKNOWN"); - goto out; - } - - lba |= ((u64)cdb[12] << 56); - lba |= ((u64)cdb[13] << 48); - lba |= ((u64)cdb[14] << 40); - lba |= ((u64)cdb[15] << 32); - lba |= (cdb[16] << 24); - lba |= (cdb[17] << 16); - lba |= (cdb[18] << 8); - lba |= cdb[19]; - ei_lbrt |= (cdb[20] << 24); - ei_lbrt |= (cdb[21] << 16); - ei_lbrt |= (cdb[22] << 8); - ei_lbrt |= cdb[23]; - txlen |= (cdb[28] << 24); - txlen |= (cdb[29] << 16); - txlen |= (cdb[30] << 8); - txlen |= cdb[31]; - - trace_seq_printf(p, "%s_32 lba=%llu txlen=%llu protect=%u ei_lbrt=%u", - cmd, (unsigned long long)lba, - (unsigned long long)txlen, cdb[10] >> 5, ei_lbrt); - - if (SERVICE_ACTION32(cdb) == WRITE_SAME_32) - trace_seq_printf(p, " unmap=%u", cdb[10] >> 3 & 1); - -out: - trace_seq_putc(p, 0); - return ret; -} - -static const char * -scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len) -{ - const char *ret = p->buffer + p->len; - unsigned int regions = cdb[7] << 8 | cdb[8]; - - trace_seq_printf(p, "regions=%u", (regions - 8) / 16); - trace_seq_putc(p, 0); - return ret; -} - -static const char * -scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len) -{ - const char *ret = p->buffer + p->len, *cmd; - sector_t lba = 0; - u32 alloc_len = 0; - - switch (SERVICE_ACTION16(cdb)) { - case SAI_READ_CAPACITY_16: - cmd = "READ_CAPACITY_16"; - break; - case SAI_GET_LBA_STATUS: - cmd = "GET_LBA_STATUS"; - break; - default: - trace_seq_printf(p, "UNKNOWN"); - goto out; - } - - lba |= ((u64)cdb[2] << 56); - lba |= ((u64)cdb[3] << 48); - lba |= ((u64)cdb[4] << 40); - lba |= ((u64)cdb[5] << 32); - lba |= (cdb[6] << 24); - lba |= (cdb[7] << 16); - lba |= (cdb[8] << 8); - lba |= cdb[9]; - alloc_len |= (cdb[10] << 24); - alloc_len |= (cdb[11] << 16); - alloc_len |= (cdb[12] << 8); - alloc_len |= cdb[13]; - - trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd, - (unsigned long long)lba, alloc_len); - -out: - trace_seq_putc(p, 0); - return ret; -} - -static const char * -scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len) -{ - switch (SERVICE_ACTION32(cdb)) { - case READ_32: - case VERIFY_32: - case WRITE_32: - case WRITE_SAME_32: - return scsi_trace_rw32(p, cdb, len); - default: - return scsi_trace_misc(p, cdb, len); - } -} - -static const char * -scsi_trace_misc(struct trace_seq *p, unsigned char *cdb, int len) -{ - const char *ret = p->buffer + p->len; - - trace_seq_printf(p, "-"); - trace_seq_putc(p, 0); - return ret; -} - -const char * -scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len) -{ - switch (cdb[0]) { - case READ_6: - case WRITE_6: - return scsi_trace_rw6(p, cdb, len); - case READ_10: - case VERIFY: - case WRITE_10: - case WRITE_SAME: - return scsi_trace_rw10(p, cdb, len); - case READ_12: - case VERIFY_12: - case WRITE_12: - return scsi_trace_rw12(p, cdb, len); - case READ_16: - case VERIFY_16: - case WRITE_16: - case WRITE_SAME_16: - return scsi_trace_rw16(p, cdb, len); - case UNMAP: - return scsi_trace_unmap(p, cdb, len); - case SERVICE_ACTION_IN_16: - return scsi_trace_service_action_in(p, cdb, len); - case VARIABLE_LENGTH_CMD: - return scsi_trace_varlen(p, cdb, len); - default: - return scsi_trace_misc(p, cdb, len); - } -} - -unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s, - unsigned long long *args) -{ - scsi_trace_parse_cdb(s, (unsigned char *) (unsigned long) args[1], args[2]); - return 0; -} - -int TEP_PLUGIN_LOADER(struct tep_handle *tep) -{ - tep_register_print_function(tep, - process_scsi_trace_parse_cdb, - TEP_FUNC_ARG_STRING, - "scsi_trace_parse_cdb", - TEP_FUNC_ARG_PTR, - TEP_FUNC_ARG_PTR, - TEP_FUNC_ARG_INT, - TEP_FUNC_ARG_VOID); - return 0; -} - -void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) -{ - tep_unregister_print_function(tep, process_scsi_trace_parse_cdb, - "scsi_trace_parse_cdb"); -} diff --git a/tools/lib/traceevent/plugins/plugin_tlb.c b/tools/lib/traceevent/plugins/plugin_tlb.c deleted file mode 100644 index 43657fb60504..000000000000 --- a/tools/lib/traceevent/plugins/plugin_tlb.c +++ /dev/null @@ -1,66 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2015 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "event-parse.h" - -enum tlb_flush_reason { - TLB_FLUSH_ON_TASK_SWITCH, - TLB_REMOTE_SHOOTDOWN, - TLB_LOCAL_SHOOTDOWN, - TLB_LOCAL_MM_SHOOTDOWN, - NR_TLB_FLUSH_REASONS, -}; - -static int tlb_flush_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context) -{ - unsigned long long val; - - trace_seq_printf(s, "pages="); - - tep_print_num_field(s, "%ld", event, "pages", record, 1); - - if (tep_get_field_val(s, event, "reason", record, &val, 1) < 0) - return -1; - - trace_seq_puts(s, " reason="); - - switch (val) { - case TLB_FLUSH_ON_TASK_SWITCH: - trace_seq_puts(s, "flush on task switch"); - break; - case TLB_REMOTE_SHOOTDOWN: - trace_seq_puts(s, "remote shootdown"); - break; - case TLB_LOCAL_SHOOTDOWN: - trace_seq_puts(s, "local shootdown"); - break; - case TLB_LOCAL_MM_SHOOTDOWN: - trace_seq_puts(s, "local mm shootdown"); - break; - } - - trace_seq_printf(s, " (%lld)", val); - - return 0; -} - -int TEP_PLUGIN_LOADER(struct tep_handle *tep) -{ - tep_register_event_handler(tep, -1, "tlb", "tlb_flush", - tlb_flush_handler, NULL); - - return 0; -} - -void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) -{ - tep_unregister_event_handler(tep, -1, - "tlb", "tlb_flush", - tlb_flush_handler, NULL); -} diff --git a/tools/lib/traceevent/plugins/plugin_xen.c b/tools/lib/traceevent/plugins/plugin_xen.c deleted file mode 100644 index 993b208d0323..000000000000 --- a/tools/lib/traceevent/plugins/plugin_xen.c +++ /dev/null @@ -1,138 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include "event-parse.h" -#include "trace-seq.h" - -#define __HYPERVISOR_set_trap_table 0 -#define __HYPERVISOR_mmu_update 1 -#define __HYPERVISOR_set_gdt 2 -#define __HYPERVISOR_stack_switch 3 -#define __HYPERVISOR_set_callbacks 4 -#define __HYPERVISOR_fpu_taskswitch 5 -#define __HYPERVISOR_sched_op_compat 6 -#define __HYPERVISOR_dom0_op 7 -#define __HYPERVISOR_set_debugreg 8 -#define __HYPERVISOR_get_debugreg 9 -#define __HYPERVISOR_update_descriptor 10 -#define __HYPERVISOR_memory_op 12 -#define __HYPERVISOR_multicall 13 -#define __HYPERVISOR_update_va_mapping 14 -#define __HYPERVISOR_set_timer_op 15 -#define __HYPERVISOR_event_channel_op_compat 16 -#define __HYPERVISOR_xen_version 17 -#define __HYPERVISOR_console_io 18 -#define __HYPERVISOR_physdev_op_compat 19 -#define __HYPERVISOR_grant_table_op 20 -#define __HYPERVISOR_vm_assist 21 -#define __HYPERVISOR_update_va_mapping_otherdomain 22 -#define __HYPERVISOR_iret 23 /* x86 only */ -#define __HYPERVISOR_vcpu_op 24 -#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ -#define __HYPERVISOR_mmuext_op 26 -#define __HYPERVISOR_acm_op 27 -#define __HYPERVISOR_nmi_op 28 -#define __HYPERVISOR_sched_op 29 -#define __HYPERVISOR_callback_op 30 -#define __HYPERVISOR_xenoprof_op 31 -#define __HYPERVISOR_event_channel_op 32 -#define __HYPERVISOR_physdev_op 33 -#define __HYPERVISOR_hvm_op 34 -#define __HYPERVISOR_tmem_op 38 - -/* Architecture-specific hypercall definitions. */ -#define __HYPERVISOR_arch_0 48 -#define __HYPERVISOR_arch_1 49 -#define __HYPERVISOR_arch_2 50 -#define __HYPERVISOR_arch_3 51 -#define __HYPERVISOR_arch_4 52 -#define __HYPERVISOR_arch_5 53 -#define __HYPERVISOR_arch_6 54 -#define __HYPERVISOR_arch_7 55 - -#define N(x) [__HYPERVISOR_##x] = "("#x")" -static const char *xen_hypercall_names[] = { - N(set_trap_table), - N(mmu_update), - N(set_gdt), - N(stack_switch), - N(set_callbacks), - N(fpu_taskswitch), - N(sched_op_compat), - N(dom0_op), - N(set_debugreg), - N(get_debugreg), - N(update_descriptor), - N(memory_op), - N(multicall), - N(update_va_mapping), - N(set_timer_op), - N(event_channel_op_compat), - N(xen_version), - N(console_io), - N(physdev_op_compat), - N(grant_table_op), - N(vm_assist), - N(update_va_mapping_otherdomain), - N(iret), - N(vcpu_op), - N(set_segment_base), - N(mmuext_op), - N(acm_op), - N(nmi_op), - N(sched_op), - N(callback_op), - N(xenoprof_op), - N(event_channel_op), - N(physdev_op), - N(hvm_op), - -/* Architecture-specific hypercall definitions. */ - N(arch_0), - N(arch_1), - N(arch_2), - N(arch_3), - N(arch_4), - N(arch_5), - N(arch_6), - N(arch_7), -}; -#undef N - -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - -static const char *xen_hypercall_name(unsigned op) -{ - if (op < ARRAY_SIZE(xen_hypercall_names) && - xen_hypercall_names[op] != NULL) - return xen_hypercall_names[op]; - - return ""; -} - -unsigned long long process_xen_hypercall_name(struct trace_seq *s, - unsigned long long *args) -{ - unsigned int op = args[0]; - - trace_seq_printf(s, "%s", xen_hypercall_name(op)); - return 0; -} - -int TEP_PLUGIN_LOADER(struct tep_handle *tep) -{ - tep_register_print_function(tep, - process_xen_hypercall_name, - TEP_FUNC_ARG_STRING, - "xen_hypercall_name", - TEP_FUNC_ARG_INT, - TEP_FUNC_ARG_VOID); - return 0; -} - -void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) -{ - tep_unregister_print_function(tep, process_xen_hypercall_name, - "xen_hypercall_name"); -} diff --git a/tools/lib/traceevent/tep_strerror.c b/tools/lib/traceevent/tep_strerror.c deleted file mode 100644 index 4ac26445b2f6..000000000000 --- a/tools/lib/traceevent/tep_strerror.c +++ /dev/null @@ -1,53 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -#undef _GNU_SOURCE -#include <string.h> -#include <stdio.h> - -#include "event-parse.h" - -#undef _PE -#define _PE(code, str) str -static const char * const tep_error_str[] = { - TEP_ERRORS -}; -#undef _PE - -/* - * The tools so far have been using the strerror_r() GNU variant, that returns - * a string, be it the buffer passed or something else. - * - * But that, besides being tricky in cases where we expect that the function - * using strerror_r() returns the error formatted in a provided buffer (we have - * to check if it returned something else and copy that instead), breaks the - * build on systems not using glibc, like Alpine Linux, where musl libc is - * used. - * - * So, introduce yet another wrapper, str_error_r(), that has the GNU - * interface, but uses the portable XSI variant of strerror_r(), so that users - * rest asured that the provided buffer is used and it is what is returned. - */ -int tep_strerror(struct tep_handle *tep __maybe_unused, - enum tep_errno errnum, char *buf, size_t buflen) -{ - const char *msg; - int idx; - - if (!buflen) - return 0; - - if (errnum >= 0) { - int err = strerror_r(errnum, buf, buflen); - buf[buflen - 1] = 0; - return err; - } - - if (errnum <= __TEP_ERRNO__START || - errnum >= __TEP_ERRNO__END) - return -1; - - idx = errnum - __TEP_ERRNO__START - 1; - msg = tep_error_str[idx]; - snprintf(buf, buflen, "%s", msg); - - return 0; -} diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c deleted file mode 100644 index 8d5ecd2bf877..000000000000 --- a/tools/lib/traceevent/trace-seq.c +++ /dev/null @@ -1,249 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - */ -#include "trace-seq.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> - -#include <asm/bug.h> -#include "event-parse.h" -#include "event-utils.h" - -/* - * The TRACE_SEQ_POISON is to catch the use of using - * a trace_seq structure after it was destroyed. - */ -#define TRACE_SEQ_POISON ((void *)0xdeadbeef) -#define TRACE_SEQ_CHECK(s) \ -do { \ - if (WARN_ONCE((s)->buffer == TRACE_SEQ_POISON, \ - "Usage of trace_seq after it was destroyed")) \ - (s)->state = TRACE_SEQ__BUFFER_POISONED; \ -} while (0) - -#define TRACE_SEQ_CHECK_RET_N(s, n) \ -do { \ - TRACE_SEQ_CHECK(s); \ - if ((s)->state != TRACE_SEQ__GOOD) \ - return n; \ -} while (0) - -#define TRACE_SEQ_CHECK_RET(s) TRACE_SEQ_CHECK_RET_N(s, ) -#define TRACE_SEQ_CHECK_RET0(s) TRACE_SEQ_CHECK_RET_N(s, 0) - -/** - * trace_seq_init - initialize the trace_seq structure - * @s: a pointer to the trace_seq structure to initialize - */ -void trace_seq_init(struct trace_seq *s) -{ - s->len = 0; - s->readpos = 0; - s->buffer_size = TRACE_SEQ_BUF_SIZE; - s->buffer = malloc(s->buffer_size); - if (s->buffer != NULL) - s->state = TRACE_SEQ__GOOD; - else - s->state = TRACE_SEQ__MEM_ALLOC_FAILED; -} - -/** - * trace_seq_reset - re-initialize the trace_seq structure - * @s: a pointer to the trace_seq structure to reset - */ -void trace_seq_reset(struct trace_seq *s) -{ - if (!s) - return; - TRACE_SEQ_CHECK(s); - s->len = 0; - s->readpos = 0; -} - -/** - * trace_seq_destroy - free up memory of a trace_seq - * @s: a pointer to the trace_seq to free the buffer - * - * Only frees the buffer, not the trace_seq struct itself. - */ -void trace_seq_destroy(struct trace_seq *s) -{ - if (!s) - return; - TRACE_SEQ_CHECK_RET(s); - free(s->buffer); - s->buffer = TRACE_SEQ_POISON; -} - -static void expand_buffer(struct trace_seq *s) -{ - char *buf; - - buf = realloc(s->buffer, s->buffer_size + TRACE_SEQ_BUF_SIZE); - if (WARN_ONCE(!buf, "Can't allocate trace_seq buffer memory")) { - s->state = TRACE_SEQ__MEM_ALLOC_FAILED; - return; - } - - s->buffer = buf; - s->buffer_size += TRACE_SEQ_BUF_SIZE; -} - -/** - * trace_seq_printf - sequence printing of trace information - * @s: trace sequence descriptor - * @fmt: printf format string - * - * It returns 0 if the trace oversizes the buffer's free - * space, the number of characters printed, or a negative - * value in case of an error. - * - * The tracer may use either sequence operations or its own - * copy to user routines. To simplify formating of a trace - * trace_seq_printf is used to store strings into a special - * buffer (@s). Then the output may be either used by - * the sequencer or pulled into another buffer. - */ -int -trace_seq_printf(struct trace_seq *s, const char *fmt, ...) -{ - va_list ap; - int len; - int ret; - - try_again: - TRACE_SEQ_CHECK_RET0(s); - - len = (s->buffer_size - 1) - s->len; - - va_start(ap, fmt); - ret = vsnprintf(s->buffer + s->len, len, fmt, ap); - va_end(ap); - - if (ret >= len) { - expand_buffer(s); - goto try_again; - } - - if (ret > 0) - s->len += ret; - - return ret; -} - -/** - * trace_seq_vprintf - sequence printing of trace information - * @s: trace sequence descriptor - * @fmt: printf format string - * - * It returns 0 if the trace oversizes the buffer's free - * space, the number of characters printed, or a negative - * value in case of an error. - * * - * The tracer may use either sequence operations or its own - * copy to user routines. To simplify formating of a trace - * trace_seq_printf is used to store strings into a special - * buffer (@s). Then the output may be either used by - * the sequencer or pulled into another buffer. - */ -int -trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) -{ - int len; - int ret; - - try_again: - TRACE_SEQ_CHECK_RET0(s); - - len = (s->buffer_size - 1) - s->len; - - ret = vsnprintf(s->buffer + s->len, len, fmt, args); - - if (ret >= len) { - expand_buffer(s); - goto try_again; - } - - if (ret > 0) - s->len += ret; - - return ret; -} - -/** - * trace_seq_puts - trace sequence printing of simple string - * @s: trace sequence descriptor - * @str: simple string to record - * - * The tracer may use either the sequence operations or its own - * copy to user routines. This function records a simple string - * into a special buffer (@s) for later retrieval by a sequencer - * or other mechanism. - */ -int trace_seq_puts(struct trace_seq *s, const char *str) -{ - int len; - - TRACE_SEQ_CHECK_RET0(s); - - len = strlen(str); - - while (len > ((s->buffer_size - 1) - s->len)) - expand_buffer(s); - - TRACE_SEQ_CHECK_RET0(s); - - memcpy(s->buffer + s->len, str, len); - s->len += len; - - return len; -} - -int trace_seq_putc(struct trace_seq *s, unsigned char c) -{ - TRACE_SEQ_CHECK_RET0(s); - - while (s->len >= (s->buffer_size - 1)) - expand_buffer(s); - - TRACE_SEQ_CHECK_RET0(s); - - s->buffer[s->len++] = c; - - return 1; -} - -void trace_seq_terminate(struct trace_seq *s) -{ - TRACE_SEQ_CHECK_RET(s); - - /* There's always one character left on the buffer */ - s->buffer[s->len] = 0; -} - -int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp) -{ - TRACE_SEQ_CHECK(s); - - switch (s->state) { - case TRACE_SEQ__GOOD: - return fprintf(fp, "%.*s", s->len, s->buffer); - case TRACE_SEQ__BUFFER_POISONED: - fprintf(fp, "%s\n", "Usage of trace_seq after it was destroyed"); - break; - case TRACE_SEQ__MEM_ALLOC_FAILED: - fprintf(fp, "%s\n", "Can't allocate trace_seq buffer memory"); - break; - } - return -1; -} - -int trace_seq_do_printf(struct trace_seq *s) -{ - return trace_seq_do_fprintf(s, stdout); -} diff --git a/tools/lib/traceevent/trace-seq.h b/tools/lib/traceevent/trace-seq.h deleted file mode 100644 index d68ec69f8d1a..000000000000 --- a/tools/lib/traceevent/trace-seq.h +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - */ - -#ifndef _TRACE_SEQ_H -#define _TRACE_SEQ_H - -#include <stdarg.h> -#include <stdio.h> - -/* ----------------------- trace_seq ----------------------- */ - -#ifndef TRACE_SEQ_BUF_SIZE -#define TRACE_SEQ_BUF_SIZE 4096 -#endif - -enum trace_seq_fail { - TRACE_SEQ__GOOD, - TRACE_SEQ__BUFFER_POISONED, - TRACE_SEQ__MEM_ALLOC_FAILED, -}; - -/* - * Trace sequences are used to allow a function to call several other functions - * to create a string of data to use (up to a max of PAGE_SIZE). - */ - -struct trace_seq { - char *buffer; - unsigned int buffer_size; - unsigned int len; - unsigned int readpos; - enum trace_seq_fail state; -}; - -void trace_seq_init(struct trace_seq *s); -void trace_seq_reset(struct trace_seq *s); -void trace_seq_destroy(struct trace_seq *s); - -extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) - __attribute__ ((format (printf, 2, 0))); - -extern int trace_seq_puts(struct trace_seq *s, const char *str); -extern int trace_seq_putc(struct trace_seq *s, unsigned char c); - -extern void trace_seq_terminate(struct trace_seq *s); - -extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp); -extern int trace_seq_do_printf(struct trace_seq *s); - -#endif /* _TRACE_SEQ_H */ |
