diff options
Diffstat (limited to 'tools/perf/builtin-trace.c')
-rw-r--r-- | tools/perf/builtin-trace.c | 772 |
1 files changed, 607 insertions, 165 deletions
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 109b8e64fe69..d466447ae928 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -19,6 +19,7 @@ #ifdef HAVE_LIBBPF_SUPPORT #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include <bpf/btf.h> #ifdef HAVE_BPF_SKEL #include "bpf_skel/augmented_raw_syscalls.skel.h" #endif @@ -64,6 +65,7 @@ #include "syscalltbl.h" #include "rb_resort.h" #include "../perf.h" +#include "trace_augment.h" #include <errno.h> #include <inttypes.h> @@ -74,6 +76,7 @@ #include <linux/err.h> #include <linux/filter.h> #include <linux/kernel.h> +#include <linux/list_sort.h> #include <linux/random.h> #include <linux/stringify.h> #include <linux/time64.h> @@ -85,7 +88,7 @@ #include <perf/mmap.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #ifndef O_CLOEXEC @@ -100,6 +103,12 @@ /* * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100 + * + * We have to explicitely mark the direction of the flow of data, if from the + * kernel to user space or the other way around, since the BPF collector we + * have so far copies only from user to kernel space, mark the arguments that + * go that direction, so that we don“t end up collecting the previous contents + * for syscall args that goes from kernel to user space. */ struct syscall_arg_fmt { size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); @@ -108,7 +117,12 @@ struct syscall_arg_fmt { void *parm; const char *name; u16 nr_entries; // for arrays + bool from_user; bool show_zero; +#ifdef HAVE_LIBBPF_SUPPORT + const struct btf_type *type; + int type_id; /* used in btf_dump */ +#endif }; struct syscall_fmt { @@ -139,6 +153,9 @@ struct trace { #ifdef HAVE_BPF_SKEL struct augmented_raw_syscalls_bpf *skel; #endif +#ifdef HAVE_LIBBPF_SUPPORT + struct btf *btf; +#endif struct record_opts opts; struct evlist *evlist; struct machine *host; @@ -195,6 +212,7 @@ struct trace { bool show_string_prefix; bool force; bool vfs_getname; + bool force_btf; int trace_pgfaults; char *perfconfig_events; struct { @@ -203,6 +221,20 @@ struct trace { } oe; }; +static void trace__load_vmlinux_btf(struct trace *trace __maybe_unused) +{ +#ifdef HAVE_LIBBPF_SUPPORT + if (trace->btf != NULL) + return; + + trace->btf = btf__load_vmlinux_btf(); + if (verbose > 0) { + fprintf(trace->output, trace->btf ? "vmlinux BTF loaded\n" : + "Failed to load vmlinux BTF\n"); + } +#endif +} + struct tp_field { int offset; union { @@ -357,7 +389,12 @@ static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel) } if (et->fmt == NULL) { - et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt)); + const struct tep_event *tp_format = evsel__tp_format(evsel); + + if (tp_format == NULL) + goto out_delete; + + et->fmt = calloc(tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt)); if (et->fmt == NULL) goto out_delete; } @@ -764,7 +801,7 @@ static const char *fcntl_cmds[] = { static DEFINE_STRARRAY(fcntl_cmds, "F_"); static const char *fcntl_linux_specific_cmds[] = { - "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC", + "SETLEASE", "GETLEASE", "NOTIFY", "DUPFD_QUERY", [5] = "CANCELLK", "DUPFD_CLOEXEC", "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS", "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT", }; @@ -829,6 +866,15 @@ static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, #define SCA_FILENAME syscall_arg__scnprintf_filename +// 'argname' is just documentational at this point, to remove the previous comment with that info +#define SCA_FILENAME_FROM_USER(argname) \ + { .scnprintf = SCA_FILENAME, \ + .from_user = true, } + +static size_t syscall_arg__scnprintf_buf(char *bf, size_t size, struct syscall_arg *arg); + +#define SCA_BUF syscall_arg__scnprintf_buf + static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -886,6 +932,177 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags +#ifdef HAVE_LIBBPF_SUPPORT +static void syscall_arg_fmt__cache_btf_enum(struct syscall_arg_fmt *arg_fmt, struct btf *btf, char *type) +{ + int id; + + type = strstr(type, "enum "); + if (type == NULL) + return; + + type += 5; // skip "enum " to get the enumeration name + + id = btf__find_by_name(btf, type); + if (id < 0) + return; + + arg_fmt->type = btf__type_by_id(btf, id); +} + +static bool syscall_arg__strtoul_btf_enum(char *bf, size_t size, struct syscall_arg *arg, u64 *val) +{ + const struct btf_type *bt = arg->fmt->type; + struct btf *btf = arg->trace->btf; + struct btf_enum *be = btf_enum(bt); + + for (int i = 0; i < btf_vlen(bt); ++i, ++be) { + const char *name = btf__name_by_offset(btf, be->name_off); + int max_len = max(size, strlen(name)); + + if (strncmp(name, bf, max_len) == 0) { + *val = be->val; + return true; + } + } + + return false; +} + +static bool syscall_arg__strtoul_btf_type(char *bf, size_t size, struct syscall_arg *arg, u64 *val) +{ + const struct btf_type *bt; + char *type = arg->type_name; + struct btf *btf; + + trace__load_vmlinux_btf(arg->trace); + + btf = arg->trace->btf; + if (btf == NULL) + return false; + + if (arg->fmt->type == NULL) { + // See if this is an enum + syscall_arg_fmt__cache_btf_enum(arg->fmt, btf, type); + } + + // Now let's see if we have a BTF type resolved + bt = arg->fmt->type; + if (bt == NULL) + return false; + + // If it is an enum: + if (btf_is_enum(arg->fmt->type)) + return syscall_arg__strtoul_btf_enum(bf, size, arg, val); + + return false; +} + +static size_t btf_enum_scnprintf(const struct btf_type *type, struct btf *btf, char *bf, size_t size, int val) +{ + struct btf_enum *be = btf_enum(type); + const int nr_entries = btf_vlen(type); + + for (int i = 0; i < nr_entries; ++i, ++be) { + if (be->val == val) { + return scnprintf(bf, size, "%s", + btf__name_by_offset(btf, be->name_off)); + } + } + + return 0; +} + +struct trace_btf_dump_snprintf_ctx { + char *bf; + size_t printed, size; +}; + +static void trace__btf_dump_snprintf(void *vctx, const char *fmt, va_list args) +{ + struct trace_btf_dump_snprintf_ctx *ctx = vctx; + + ctx->printed += vscnprintf(ctx->bf + ctx->printed, ctx->size - ctx->printed, fmt, args); +} + +static size_t btf_struct_scnprintf(const struct btf_type *type, struct btf *btf, char *bf, size_t size, struct syscall_arg *arg) +{ + struct trace_btf_dump_snprintf_ctx ctx = { + .bf = bf, + .size = size, + }; + struct augmented_arg *augmented_arg = arg->augmented.args; + int type_id = arg->fmt->type_id, consumed; + struct btf_dump *btf_dump; + + LIBBPF_OPTS(btf_dump_opts, dump_opts); + LIBBPF_OPTS(btf_dump_type_data_opts, dump_data_opts); + + if (arg == NULL || arg->augmented.args == NULL) + return 0; + + dump_data_opts.compact = true; + dump_data_opts.skip_names = !arg->trace->show_arg_names; + + btf_dump = btf_dump__new(btf, trace__btf_dump_snprintf, &ctx, &dump_opts); + if (btf_dump == NULL) + return 0; + + /* pretty print the struct data here */ + if (btf_dump__dump_type_data(btf_dump, type_id, arg->augmented.args->value, type->size, &dump_data_opts) == 0) + return 0; + + consumed = sizeof(*augmented_arg) + augmented_arg->size; + arg->augmented.args = ((void *)arg->augmented.args) + consumed; + arg->augmented.size -= consumed; + + btf_dump__free(btf_dump); + + return ctx.printed; +} + +static size_t trace__btf_scnprintf(struct trace *trace, struct syscall_arg *arg, char *bf, + size_t size, int val, char *type) +{ + struct syscall_arg_fmt *arg_fmt = arg->fmt; + + if (trace->btf == NULL) + return 0; + + if (arg_fmt->type == NULL) { + // Check if this is an enum and if we have the BTF type for it. + syscall_arg_fmt__cache_btf_enum(arg_fmt, trace->btf, type); + } + + // Did we manage to find a BTF type for the syscall/tracepoint argument? + if (arg_fmt->type == NULL) + return 0; + + if (btf_is_enum(arg_fmt->type)) + return btf_enum_scnprintf(arg_fmt->type, trace->btf, bf, size, val); + else if (btf_is_struct(arg_fmt->type) || btf_is_union(arg_fmt->type)) + return btf_struct_scnprintf(arg_fmt->type, trace->btf, bf, size, arg); + + return 0; +} + +#else // HAVE_LIBBPF_SUPPORT +static size_t trace__btf_scnprintf(struct trace *trace __maybe_unused, struct syscall_arg *arg __maybe_unused, + char *bf __maybe_unused, size_t size __maybe_unused, int val __maybe_unused, + char *type __maybe_unused) +{ + return 0; +} + +static bool syscall_arg__strtoul_btf_type(char *bf __maybe_unused, size_t size __maybe_unused, + struct syscall_arg *arg __maybe_unused, u64 *val __maybe_unused) +{ + return false; +} +#endif // HAVE_LIBBPF_SUPPORT + +#define STUL_BTF_TYPE syscall_arg__strtoul_btf_type + #define STRARRAY(name, array) \ { .scnprintf = SCA_STRARRAY, \ .strtoul = STUL_STRARRAY, \ @@ -896,7 +1113,6 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, .strtoul = STUL_STRARRAY_FLAGS, \ .parm = &strarray__##array, } -#include "trace/beauty/arch_errno_names.c" #include "trace/beauty/eventfd.c" #include "trace/beauty/futex_op.c" #include "trace/beauty/futex_val3.c" @@ -920,16 +1136,17 @@ static const struct syscall_fmt syscall_fmts[] = { [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, }, { .name = "bind", .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ }, - [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, + [1] = SCA_SOCKADDR_FROM_USER(umyaddr), [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, }, { .name = "bpf", - .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, + .arg = { [0] = STRARRAY(cmd, bpf_cmd), + [1] = { .from_user = true /* attr */, }, } }, { .name = "brk", .hexret = true, .arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, }, { .name = "clock_gettime", .arg = { [0] = STRARRAY(clk_id, clockid), }, }, { .name = "clock_nanosleep", - .arg = { [2] = { .scnprintf = SCA_TIMESPEC, /* rqtp */ }, }, }, + .arg = { [2] = SCA_TIMESPEC_FROM_USER(req), }, }, { .name = "clone", .errpid = true, .nr_args = 5, .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, }, [1] = { .name = "child_stack", .scnprintf = SCA_HEX, }, @@ -940,12 +1157,21 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, { .name = "connect", .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ }, - [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, + [1] = SCA_SOCKADDR_FROM_USER(servaddr), [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, }, { .name = "epoll_ctl", .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, { .name = "eventfd2", .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, }, + { .name = "faccessat", + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, + [1] = SCA_FILENAME_FROM_USER(pathname), + [2] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, + { .name = "faccessat2", + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, + [1] = SCA_FILENAME_FROM_USER(pathname), + [2] = { .scnprintf = SCA_ACCMODE, /* mode */ }, + [3] = { .scnprintf = SCA_FACCESSAT2_FLAGS, /* flags */ }, }, }, { .name = "fchmodat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, { .name = "fchownat", @@ -965,10 +1191,9 @@ static const struct syscall_fmt syscall_fmts[] = { [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, }, { .name = "fspick", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, - [1] = { .scnprintf = SCA_FILENAME, /* path */ }, + [1] = SCA_FILENAME_FROM_USER(path), [2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, }, { .name = "fstat", .alias = "newfstat", }, - { .name = "fstatat", .alias = "newfstatat", }, { .name = "futex", .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, }, @@ -1024,32 +1249,36 @@ static const struct syscall_fmt syscall_fmts[] = { #if defined(__s390x__) .alias = "old_mmap", #endif - .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, + .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ }, [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ .strtoul = STUL_STRARRAY_FLAGS, .parm = &strarray__mmap_flags, }, [5] = { .scnprintf = SCA_HEX, /* offset */ }, }, }, { .name = "mount", - .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ }, + .arg = { [0] = SCA_FILENAME_FROM_USER(devname), [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */ .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, }, { .name = "move_mount", .arg = { [0] = { .scnprintf = SCA_FDAT, /* from_dfd */ }, - [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ }, + [1] = SCA_FILENAME_FROM_USER(pathname), [2] = { .scnprintf = SCA_FDAT, /* to_dfd */ }, - [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ }, + [3] = SCA_FILENAME_FROM_USER(pathname), [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, }, { .name = "mprotect", .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, - [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, + [2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ }, }, }, { .name = "mq_unlink", - .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, }, + .arg = { [0] = SCA_FILENAME_FROM_USER(u_name), }, }, { .name = "mremap", .hexret = true, .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, }, { .name = "name_to_handle_at", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, - { .name = "newfstatat", - .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, + { .name = "nanosleep", + .arg = { [0] = SCA_TIMESPEC_FROM_USER(req), }, }, + { .name = "newfstatat", .alias = "fstatat", + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, + [1] = SCA_FILENAME_FROM_USER(pathname), + [3] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ }, }, }, { .name = "open", .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, { .name = "open_by_handle_at", @@ -1059,7 +1288,7 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, { .name = "perf_event_open", - .arg = { [0] = { .scnprintf = SCA_PERF_ATTR, /* attr */ }, + .arg = { [0] = SCA_PERF_ATTR_FROM_USER(attr), [2] = { .scnprintf = SCA_INT, /* cpu */ }, [3] = { .scnprintf = SCA_FD, /* group_fd */ }, [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, @@ -1071,7 +1300,7 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, }, { .name = "pkey_mprotect", .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, - [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, + [2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ }, [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, }, { .name = "poll", .timeout = true, }, { .name = "ppoll", .timeout = true, }, @@ -1084,7 +1313,8 @@ static const struct syscall_fmt syscall_fmts[] = { { .name = "pread", .alias = "pread64", }, { .name = "preadv", .alias = "pread", }, { .name = "prlimit64", - .arg = { [1] = STRARRAY(resource, rlimit_resources), }, }, + .arg = { [1] = STRARRAY(resource, rlimit_resources), + [2] = { .from_user = true /* new_rlim */, }, }, }, { .name = "pwrite", .alias = "pwrite64", }, { .name = "readlinkat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, @@ -1101,6 +1331,8 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ }, [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, }, + { .name = "rseq", .errpid = true, + .arg = { [0] = { .from_user = true /* rseq */, }, }, }, { .name = "rt_sigaction", .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, { .name = "rt_sigprocmask", @@ -1122,12 +1354,15 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, { .name = "sendto", .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, - [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, }, + [4] = SCA_SOCKADDR_FROM_USER(addr), }, }, + { .name = "set_robust_list", .errpid = true, + .arg = { [0] = { .from_user = true /* head */, }, }, }, { .name = "set_tid_address", .errpid = true, }, { .name = "setitimer", .arg = { [0] = STRARRAY(which, itimers), }, }, { .name = "setrlimit", - .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, + .arg = { [0] = STRARRAY(resource, rlimit_resources), + [1] = { .from_user = true /* rlim */, }, }, }, { .name = "setsockopt", .arg = { [1] = STRARRAY(level, socket_level), }, }, { .name = "socket", @@ -1141,12 +1376,12 @@ static const struct syscall_fmt syscall_fmts[] = { { .name = "stat", .alias = "newstat", }, { .name = "statx", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ }, - [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } , + [2] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ } , [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, }, { .name = "swapoff", - .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, + .arg = { [0] = SCA_FILENAME_FROM_USER(specialfile), }, }, { .name = "swapon", - .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, + .arg = { [0] = SCA_FILENAME_FROM_USER(specialfile), }, }, { .name = "symlinkat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, { .name = "sync_file_range", @@ -1156,16 +1391,20 @@ static const struct syscall_fmt syscall_fmts[] = { { .name = "tkill", .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, { .name = "umount2", .alias = "umount", - .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, }, + .arg = { [0] = SCA_FILENAME_FROM_USER(name), }, }, { .name = "uname", .alias = "newuname", }, { .name = "unlinkat", - .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, + [1] = SCA_FILENAME_FROM_USER(pathname), + [2] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ }, }, }, { .name = "utimensat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, }, { .name = "wait4", .errpid = true, .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, { .name = "waitid", .errpid = true, .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, + { .name = "write", + .arg = { [1] = { .scnprintf = SCA_BUF /* buf */, .from_user = true, }, }, }, }; static int syscall_fmt__cmp(const void *name, const void *fmtp) @@ -1223,6 +1462,7 @@ struct syscall { bool is_exit; bool is_open; bool nonexistent; + bool use_btf; struct tep_format_field *args; const char *name; const struct syscall_fmt *fmt; @@ -1536,6 +1776,32 @@ static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, return 0; } +#define MAX_CONTROL_CHAR 31 +#define MAX_ASCII 127 + +static size_t syscall_arg__scnprintf_buf(char *bf, size_t size, struct syscall_arg *arg) +{ + struct augmented_arg *augmented_arg = arg->augmented.args; + unsigned char *orig = (unsigned char *)augmented_arg->value; + size_t printed = 0; + int consumed; + + if (augmented_arg == NULL) + return 0; + + for (int j = 0; j < augmented_arg->size; ++j) { + bool control_char = orig[j] <= MAX_CONTROL_CHAR || orig[j] >= MAX_ASCII; + /* print control characters (0~31 and 127), and non-ascii characters in \(digits) */ + printed += scnprintf(bf + printed, size - printed, control_char ? "\\%d" : "%c", (int)orig[j]); + } + + consumed = sizeof(*augmented_arg) + augmented_arg->size; + arg->augmented.args = ((void *)arg->augmented.args) + consumed; + arg->augmented.size -= consumed; + + return printed; +} + static bool trace__filter_duration(struct trace *trace, double t) { return t < (trace->duration_filter * NSEC_PER_MSEC); @@ -1611,7 +1877,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine, switch (event->header.type) { case PERF_RECORD_LOST: color_fprintf(trace->output, PERF_COLOR_RED, - "LOST %" PRIu64 " events!\n", event->lost.lost); + "LOST %" PRIu64 " events!\n", (u64)event->lost.lost); ret = machine__process_lost_event(machine, event, sample); break; default: @@ -1622,7 +1888,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine, return ret; } -static int trace__tool_process(struct perf_tool *tool, +static int trace__tool_process(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -1729,7 +1995,8 @@ static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *n } static struct tep_format_field * -syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field) +syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field, + bool *use_btf) { struct tep_format_field *last_field = NULL; int len; @@ -1742,11 +2009,15 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field len = strlen(field->name); + // As far as heuristics (or intention) goes this seems to hold true, and makes sense! + if ((field->flags & TEP_FIELD_IS_POINTER) && strstarts(field->type, "const ")) + arg->from_user = true; + if (strcmp(field->type, "const char *") == 0 && ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) || - strstr(field->name, "path") != NULL)) + strstr(field->name, "path") != NULL)) { arg->scnprintf = SCA_FILENAME; - else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr")) + } else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr")) arg->scnprintf = SCA_PTR; else if (strcmp(field->type, "pid_t") == 0) arg->scnprintf = SCA_PID; @@ -1767,6 +2038,9 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field * 7 unsigned long */ arg->scnprintf = SCA_FD; + } else if (strstr(field->type, "enum") && use_btf != NULL) { + *use_btf = true; + arg->strtoul = STUL_BTF_TYPE; } else { const struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name); @@ -1783,7 +2057,8 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field static int syscall__set_arg_fmts(struct syscall *sc) { - struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args); + struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args, + &sc->use_btf); if (last_field) sc->args_size = last_field->offset + last_field->size; @@ -1796,31 +2071,13 @@ static int trace__read_syscall_info(struct trace *trace, int id) char tp_name[128]; struct syscall *sc; const char *name = syscalltbl__name(trace->sctbl, id); + int err; -#ifdef HAVE_SYSCALL_TABLE_SUPPORT if (trace->syscalls.table == NULL) { trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc)); if (trace->syscalls.table == NULL) return -ENOMEM; } -#else - if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) { - // When using libaudit we don't know beforehand what is the max syscall id - struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); - - if (table == NULL) - return -ENOMEM; - - // Need to memset from offset 0 and +1 members if brand new - if (trace->syscalls.table == NULL) - memset(table, 0, (id + 1) * sizeof(*sc)); - else - memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc)); - - trace->syscalls.table = table; - trace->sctbl->syscalls.max_id = id; - } -#endif sc = trace->syscalls.table + id; if (sc->nonexistent) return -EEXIST; @@ -1850,8 +2107,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) return PTR_ERR(sc->tp_format); } + /* + * The tracepoint format contains __syscall_nr field, so it's one more + * than the actual number of syscall arguments. + */ if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? - RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) + RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1)) return -ENOMEM; sc->args = sc->tp_format->format.fields; @@ -1868,16 +2129,26 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat"); - return syscall__set_arg_fmts(sc); + err = syscall__set_arg_fmts(sc); + + /* after calling syscall__set_arg_fmts() we'll know whether use_btf is true */ + if (sc->use_btf) + trace__load_vmlinux_btf(trace); + + return err; } -static int evsel__init_tp_arg_scnprintf(struct evsel *evsel) +static int evsel__init_tp_arg_scnprintf(struct evsel *evsel, bool *use_btf) { struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel); if (fmt != NULL) { - syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields); - return 0; + const struct tep_event *tp_format = evsel__tp_format(evsel); + + if (tp_format) { + syscall_arg_fmt__init_array(fmt, tp_format->format.fields, use_btf); + return 0; + } } return -ENOMEM; @@ -2035,7 +2306,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, unsigned char *args, void *augmented_args, int augmented_args_size, struct trace *trace, struct thread *thread) { - size_t printed = 0; + size_t printed = 0, btf_printed; unsigned long val; u8 bit = 1; struct syscall_arg arg = { @@ -2051,6 +2322,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, .show_string_prefix = trace->show_string_prefix, }; struct thread_trace *ttrace = thread__priv(thread); + void *default_scnprintf; /* * Things like fcntl will set this in its 'cmd' formatter to pick the @@ -2076,17 +2348,15 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val); /* - * Suppress this argument if its value is zero and - * and we don't have a string associated in an - * strarray for it. - */ - if (val == 0 && - !trace->show_zeros && - !(sc->arg_fmt && - (sc->arg_fmt[arg.idx].show_zero || - sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY || - sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) && - sc->arg_fmt[arg.idx].parm)) + * Suppress this argument if its value is zero and show_zero + * property isn't set. + * + * If it has a BTF type, then override the zero suppression knob + * as the common case is for zero in an enum to have an associated entry. + */ + if (val == 0 && !trace->show_zeros && + !(sc->arg_fmt && sc->arg_fmt[arg.idx].show_zero) && + !(sc->arg_fmt && sc->arg_fmt[arg.idx].strtoul == STUL_BTF_TYPE)) continue; printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : ""); @@ -2094,6 +2364,17 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, if (trace->show_arg_names) printed += scnprintf(bf + printed, size - printed, "%s: ", field->name); + default_scnprintf = sc->arg_fmt[arg.idx].scnprintf; + + if (trace->force_btf || default_scnprintf == NULL || default_scnprintf == SCA_PTR) { + btf_printed = trace__btf_scnprintf(trace, &arg, bf + printed, + size - printed, val, field->type); + if (btf_printed) { + printed += btf_printed; + continue; + } + } + printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val); } @@ -2151,18 +2432,7 @@ static struct syscall *trace__syscall_info(struct trace *trace, err = -EINVAL; -#ifdef HAVE_SYSCALL_TABLE_SUPPORT if (id > trace->sctbl->syscalls.max_id) { -#else - if (id >= trace->sctbl->syscalls.max_id) { - /* - * With libaudit we don't know beforehand what is the max_id, - * so we let trace__read_syscall_info() figure that out as we - * go on reading syscalls. - */ - err = trace__read_syscall_info(trace, id); - if (err) -#endif goto out_cant_read; } @@ -2293,7 +2563,6 @@ static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel, static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size) { - void *augmented_args = NULL; /* * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter * and there we get all 6 syscall args plus the tracepoint common fields @@ -2311,10 +2580,24 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam int args_size = raw_augmented_args_size ?: sc->args_size; *augmented_args_size = sample->raw_size - args_size; - if (*augmented_args_size > 0) - augmented_args = sample->raw_data + args_size; + if (*augmented_args_size > 0) { + static uintptr_t argbuf[1024]; /* assuming single-threaded */ + + if ((size_t)(*augmented_args_size) > sizeof(argbuf)) + return NULL; - return augmented_args; + /* + * The perf ring-buffer is 8-byte aligned but sample->raw_data + * is not because it's preceded by u32 size. Later, beautifier + * will use the augmented args with stricter alignments like in + * some struct. To make sure it's aligned, let's copy the args + * into a static buffer as it's single-threaded for now. + */ + memcpy(argbuf, sample->raw_data + args_size, *augmented_args_size); + + return argbuf; + } + return NULL; } static void syscall__exit(struct syscall *sc) @@ -2414,6 +2697,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, char msg[1024]; void *args, *augmented_args = NULL; int augmented_args_size; + size_t printed = 0; if (sc == NULL) return -1; @@ -2429,8 +2713,8 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, args = perf_evsel__sc_tp_ptr(evsel, args, sample); augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size); - syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); - fprintf(trace->output, "%s", msg); + printed += syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); + fprintf(trace->output, "%.*s", (int)printed, msg); err = 0; out_put: thread__put(thread); @@ -2738,9 +3022,10 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, { char bf[2048]; size_t size = sizeof(bf); - struct tep_format_field *field = evsel->tp_format->format.fields; + const struct tep_event *tp_format = evsel__tp_format(evsel); + struct tep_format_field *field = tp_format ? tp_format->format.fields : NULL; struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel); - size_t printed = 0; + size_t printed = 0, btf_printed; unsigned long val; u8 bit = 1; struct syscall_arg syscall_arg = { @@ -2781,17 +3066,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, */ val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val); - /* - * Suppress this argument if its value is zero and - * we don't have a string associated in an - * strarray for it. - */ - if (val == 0 && - !trace->show_zeros && - !((arg->show_zero || - arg->scnprintf == SCA_STRARRAY || - arg->scnprintf == SCA_STRARRAYS) && - arg->parm)) + /* Suppress this argument if its value is zero and show_zero property isn't set. */ + if (val == 0 && !trace->show_zeros && !arg->show_zero && arg->strtoul != STUL_BTF_TYPE) continue; printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : ""); @@ -2799,10 +3075,16 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, if (trace->show_arg_names) printed += scnprintf(bf + printed, size - printed, "%s: ", field->name); + btf_printed = trace__btf_scnprintf(trace, &syscall_arg, bf + printed, size - printed, val, field->type); + if (btf_printed) { + printed += btf_printed; + continue; + } + printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val); } - return printed + fprintf(trace->output, "%s", bf); + return printed + fprintf(trace->output, "%.*s", (int)printed, bf); } static int trace__event_handler(struct trace *trace, struct evsel *evsel, @@ -2811,13 +3093,8 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, { struct thread *thread; int callchain_ret = 0; - /* - * Check if we called perf_evsel__disable(evsel) due to, for instance, - * this event's max_events having been hit and this is an entry coming - * from the ring buffer that we should discard, since the max events - * have already been considered/printed. - */ - if (evsel->disabled) + + if (evsel->nr_events_printed >= evsel->max_events) return 0; thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); @@ -2864,11 +3141,13 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, if (evsel__is_bpf_output(evsel)) { bpf_output__fprintf(trace, sample); - } else if (evsel->tp_format) { - if (strncmp(evsel->tp_format->name, "sys_enter_", 10) || - trace__fprintf_sys_enter(trace, evsel, sample)) { + } else { + const struct tep_event *tp_format = evsel__tp_format(evsel); + + if (tp_format && (strncmp(tp_format->name, "sys_enter_", 10) || + trace__fprintf_sys_enter(trace, evsel, sample))) { if (trace->libtraceevent_print) { - event_format__fprintf(evsel->tp_format, sample->cpu, + event_format__fprintf(tp_format, sample->cpu, sample->raw_data, sample->raw_size, trace->output); } else { @@ -2902,7 +3181,7 @@ static void print_location(FILE *f, struct perf_sample *sample, { if ((verbose > 0 || print_dso) && al->map) - fprintf(f, "%s@", map__dso(al->map)->long_name); + fprintf(f, "%s@", dso__long_name(map__dso(al->map))); if ((verbose > 0 || print_sym) && al->sym) fprintf(f, "%s+0x%" PRIx64, al->sym->name, @@ -3009,7 +3288,7 @@ static void trace__set_base_time(struct trace *trace, trace->base_time = sample->time; } -static int trace__process_sample(struct perf_tool *tool, +static int trace__process_sample(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct evsel *evsel, @@ -3276,6 +3555,23 @@ out_enomem: } #ifdef HAVE_BPF_SKEL +static int syscall_arg_fmt__cache_btf_struct(struct syscall_arg_fmt *arg_fmt, struct btf *btf, char *type) +{ + int id; + + if (arg_fmt->type != NULL) + return -1; + + id = btf__find_by_name(btf, type); + if (id < 0) + return -1; + + arg_fmt->type = btf__type_by_id(btf, id); + arg_fmt->type_id = id; + + return 0; +} + static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) { struct bpf_program *pos, *prog = NULL; @@ -3351,11 +3647,94 @@ static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id) return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); } +static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigned int *beauty_array) +{ + struct tep_format_field *field; + struct syscall *sc = trace__syscall_info(trace, NULL, key); + const struct btf_type *bt; + char *struct_offset, *tmp, name[32]; + bool can_augment = false; + int i, cnt; + + if (sc == NULL) + return -1; + + trace__load_vmlinux_btf(trace); + if (trace->btf == NULL) + return -1; + + for (i = 0, field = sc->args; field; ++i, field = field->next) { + // XXX We're only collecting pointer payloads _from_ user space + if (!sc->arg_fmt[i].from_user) + continue; + + struct_offset = strstr(field->type, "struct "); + if (struct_offset == NULL) + struct_offset = strstr(field->type, "union "); + else + struct_offset++; // "union" is shorter + + if (field->flags & TEP_FIELD_IS_POINTER && struct_offset) { /* struct or union (think BPF's attr arg) */ + struct_offset += 6; + + /* for 'struct foo *', we only want 'foo' */ + for (tmp = struct_offset, cnt = 0; *tmp != ' ' && *tmp != '\0'; ++tmp, ++cnt) { + } + + strncpy(name, struct_offset, cnt); + name[cnt] = '\0'; + + /* cache struct's btf_type and type_id */ + if (syscall_arg_fmt__cache_btf_struct(&sc->arg_fmt[i], trace->btf, name)) + continue; + + bt = sc->arg_fmt[i].type; + beauty_array[i] = bt->size; + can_augment = true; + } else if (field->flags & TEP_FIELD_IS_POINTER && /* string */ + strcmp(field->type, "const char *") == 0 && + (strstr(field->name, "name") || + strstr(field->name, "path") || + strstr(field->name, "file") || + strstr(field->name, "root") || + strstr(field->name, "key") || + strstr(field->name, "special") || + strstr(field->name, "type") || + strstr(field->name, "description"))) { + beauty_array[i] = 1; + can_augment = true; + } else if (field->flags & TEP_FIELD_IS_POINTER && /* buffer */ + strstr(field->type, "char *") && + (strstr(field->name, "buf") || + strstr(field->name, "val") || + strstr(field->name, "msg"))) { + int j; + struct tep_format_field *field_tmp; + + /* find the size of the buffer that appears in pairs with buf */ + for (j = 0, field_tmp = sc->args; field_tmp; ++j, field_tmp = field_tmp->next) { + if (!(field_tmp->flags & TEP_FIELD_IS_POINTER) && /* only integers */ + (strstr(field_tmp->name, "count") || + strstr(field_tmp->name, "siz") || /* size, bufsiz */ + (strstr(field_tmp->name, "len") && strcmp(field_tmp->name, "filename")))) { + /* filename's got 'len' in it, we don't want that */ + beauty_array[i] = -(j + 1); + can_augment = true; + break; + } + } + } + } + + if (can_augment) + return 0; + + return -1; +} + static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) { struct tep_format_field *field, *candidate_field; - int id; - /* * We're only interested in syscalls that have a pointer: */ @@ -3367,7 +3746,8 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace return NULL; try_to_find_pair: - for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) { + for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { + int id = syscalltbl__id_at_idx(trace->sctbl, i); struct syscall *pair = trace__syscall_info(trace, NULL, id); struct bpf_program *pair_prog; bool is_candidate = false; @@ -3456,10 +3836,12 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) { int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter); int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit); - int err = 0, key; + int beauty_map_fd = bpf_map__fd(trace->skel->maps.beauty_map_enter); + int err = 0; + unsigned int beauty_array[6]; - for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { - int prog_fd; + for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { + int prog_fd, key = syscalltbl__id_at_idx(trace->sctbl, i); if (!trace__syscall_enabled(trace, key)) continue; @@ -3475,6 +3857,15 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY); if (err) break; + + /* use beauty_map to tell BPF how many bytes to collect, set beauty_map's value here */ + memset(beauty_array, 0, sizeof(beauty_array)); + err = trace__bpf_sys_enter_beauty_map(trace, key, (unsigned int *)beauty_array); + if (err) + continue; + err = bpf_map_update_elem(beauty_map_fd, &key, beauty_array, BPF_ANY); + if (err) + break; } /* @@ -3505,7 +3896,8 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) * first and second arg (this one on the raw_syscalls:sys_exit prog * array tail call, then that one will be used. */ - for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { + for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { + int key = syscalltbl__id_at_idx(trace->sctbl, i); struct syscall *sc = trace__syscall_info(trace, NULL, key); struct bpf_program *pair_prog; int prog_fd; @@ -3680,22 +4072,31 @@ static int ordered_events__deliver_event(struct ordered_events *oe, return __trace__deliver_event(trace, event->event); } -static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg) +static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg, + char **type) { - struct tep_format_field *field; struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel); + const struct tep_event *tp_format; + + if (!fmt) + return NULL; - if (evsel->tp_format == NULL || fmt == NULL) + tp_format = evsel__tp_format(evsel); + if (!tp_format) return NULL; - for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt) - if (strcmp(field->name, arg) == 0) + for (const struct tep_format_field *field = tp_format->format.fields; field; + field = field->next, ++fmt) { + if (strcmp(field->name, arg) == 0) { + *type = field->type; return fmt; + } + } return NULL; } -static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel) +static int trace__expand_filter(struct trace *trace, struct evsel *evsel) { char *tok, *left = evsel->filter, *new_filter = evsel->filter; @@ -3728,14 +4129,14 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel struct syscall_arg_fmt *fmt; int left_size = tok - left, right_size = right_end - right; - char arg[128]; + char arg[128], *type; while (isspace(left[left_size - 1])) --left_size; scnprintf(arg, sizeof(arg), "%.*s", left_size, left); - fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg); + fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg, &type); if (fmt == NULL) { pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n", arg, evsel->name, evsel->filter); @@ -3748,6 +4149,9 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel if (fmt->strtoul) { u64 val; struct syscall_arg syscall_arg = { + .trace = trace, + .fmt = fmt, + .type_name = type, .parm = fmt->parm, }; @@ -3922,6 +4326,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) sizeof(__u32), BPF_ANY); } } + + if (trace->skel) + trace->filter_pids.map = trace->skel->maps.pids_filtered; #endif err = trace__set_filter_pids(trace); if (err < 0) @@ -3959,7 +4366,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) err = trace__expand_filters(trace, &evsel); if (err) goto out_delete_evlist; - err = evlist__apply_filters(evlist, &evsel); + err = evlist__apply_filters(evlist, &evsel, &trace->opts.target); if (err < 0) goto out_error_apply_filters; @@ -4312,34 +4719,38 @@ static unsigned long thread__nr_events(struct thread_trace *ttrace) return ttrace ? ttrace->nr_events : 0; } -DEFINE_RESORT_RB(threads, - (thread__nr_events(thread__priv(a->thread)) < - thread__nr_events(thread__priv(b->thread))), - struct thread *thread; -) +static int trace_nr_events_cmp(void *priv __maybe_unused, + const struct list_head *la, + const struct list_head *lb) { - entry->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread; + struct thread_list *a = list_entry(la, struct thread_list, list); + struct thread_list *b = list_entry(lb, struct thread_list, list); + unsigned long a_nr_events = thread__nr_events(thread__priv(a->thread)); + unsigned long b_nr_events = thread__nr_events(thread__priv(b->thread)); + + if (a_nr_events != b_nr_events) + return a_nr_events < b_nr_events ? -1 : 1; + + /* Identical number of threads, place smaller tids first. */ + return thread__tid(a->thread) < thread__tid(b->thread) + ? -1 + : (thread__tid(a->thread) > thread__tid(b->thread) ? 1 : 0); } static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) { size_t printed = trace__fprintf_threads_header(fp); - struct rb_node *nd; - int i; - - for (i = 0; i < THREADS__TABLE_SIZE; i++) { - DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i); + LIST_HEAD(threads); - if (threads == NULL) { - fprintf(fp, "%s", "Error sorting output by nr_events!\n"); - return 0; - } + if (machine__thread_list(trace->host, &threads) == 0) { + struct thread_list *pos; - resort_rb__for_each_entry(nd, threads) - printed += trace__fprintf_thread(fp, threads_entry->thread, trace); + list_sort(NULL, &threads, trace_nr_events_cmp); - resort_rb__delete(threads); + list_for_each_entry(pos, &threads, list) + printed += trace__fprintf_thread(fp, pos->thread, trace); } + thread_list__delete(&threads); return printed; } @@ -4436,47 +4847,62 @@ static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name) const struct syscall_fmt *scfmt = syscall_fmt__find(name); if (scfmt) { - int skip = 0; + const struct tep_event *tp_format = evsel__tp_format(evsel); - if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 || - strcmp(evsel->tp_format->format.fields->name, "nr") == 0) - ++skip; + if (tp_format) { + int skip = 0; - memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt)); + if (strcmp(tp_format->format.fields->name, "__syscall_nr") == 0 || + strcmp(tp_format->format.fields->name, "nr") == 0) + ++skip; + + memcpy(fmt + skip, scfmt->arg, + (tp_format->format.nr_fields - skip) * sizeof(*fmt)); + } } } } -static int evlist__set_syscall_tp_fields(struct evlist *evlist) +static int evlist__set_syscall_tp_fields(struct evlist *evlist, bool *use_btf) { struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel->priv || !evsel->tp_format) + const struct tep_event *tp_format; + + if (evsel->priv) continue; - if (strcmp(evsel->tp_format->system, "syscalls")) { - evsel__init_tp_arg_scnprintf(evsel); + tp_format = evsel__tp_format(evsel); + if (!tp_format) + continue; + + if (strcmp(tp_format->system, "syscalls")) { + evsel__init_tp_arg_scnprintf(evsel, use_btf); continue; } if (evsel__init_syscall_tp(evsel)) return -1; - if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) { + if (!strncmp(tp_format->name, "sys_enter_", 10)) { struct syscall_tp *sc = __evsel__syscall_tp(evsel); if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64))) return -1; - evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1); - } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) { + evsel__set_syscall_arg_fmt(evsel, + tp_format->name + sizeof("sys_enter_") - 1); + } else if (!strncmp(tp_format->name, "sys_exit_", 9)) { struct syscall_tp *sc = __evsel__syscall_tp(evsel); - if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap)) + if (__tp_field__init_uint(&sc->ret, sizeof(u64), + sc->id.offset + sizeof(u64), + evsel->needs_swap)) return -1; - evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1); + evsel__set_syscall_arg_fmt(evsel, + tp_format->name + sizeof("sys_exit_") - 1); } } @@ -4777,6 +5203,8 @@ int cmd_trace(int argc, const char **argv) OPT_INTEGER('D', "delay", &trace.opts.target.initial_delay, "ms to wait before starting measurement after program " "start"), + OPT_BOOLEAN(0, "force-btf", &trace.force_btf, "Prefer btf_dump general pretty printer" + "to customized ones"), OPTS_EVSWITCH(&trace.evswitch), OPT_END() }; @@ -4864,6 +5292,11 @@ int cmd_trace(int argc, const char **argv) if (!trace.trace_syscalls) goto skip_augmentation; + if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) { + pr_debug("Syscall augmentation fails with record, disabling augmentation"); + goto skip_augmentation; + } + trace.skel = augmented_raw_syscalls_bpf__open(); if (!trace.skel) { pr_debug("Failed to open augmented syscalls BPF skeleton"); @@ -4897,7 +5330,7 @@ int cmd_trace(int argc, const char **argv) goto out; } trace.syscalls.events.bpf_output = evlist__last(trace.evlist); - assert(!strcmp(evsel__name(trace.syscalls.events.bpf_output), "__augmented_syscalls__")); + assert(evsel__name_is(trace.syscalls.events.bpf_output, "__augmented_syscalls__")); skip_augmentation: #endif err = -1; @@ -4929,11 +5362,16 @@ skip_augmentation: } if (trace.evlist->core.nr_entries > 0) { + bool use_btf = false; + evlist__set_default_evsel_handler(trace.evlist, trace__event_handler); - if (evlist__set_syscall_tp_fields(trace.evlist)) { + if (evlist__set_syscall_tp_fields(trace.evlist, &use_btf)) { perror("failed to set syscalls:* tracepoint fields"); goto out; } + + if (use_btf) + trace__load_vmlinux_btf(&trace); } if (trace.sort_events) { @@ -4954,7 +5392,7 @@ skip_augmentation: */ if (trace.syscalls.events.bpf_output) { evlist__for_each_entry(trace.evlist, evsel) { - bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0; + bool raw_syscalls_sys_exit = evsel__name_is(evsel, "raw_syscalls:sys_exit"); if (raw_syscalls_sys_exit) { trace.raw_augmented_syscalls = true; @@ -5029,6 +5467,10 @@ init_augmented_syscall_tp: if (trace.summary_only) trace.summary = trace.summary_only; + /* Keep exited threads, otherwise information might be lost for summary */ + if (trace.summary) + symbol_conf.keep_exited_threads = true; + if (output_name != NULL) { err = trace__open_output(&trace, output_name); if (err < 0) { |