summaryrefslogtreecommitdiff
path: root/tools/perf/builtin-trace.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-trace.c')
-rw-r--r--tools/perf/builtin-trace.c772
1 files changed, 607 insertions, 165 deletions
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 109b8e64fe69..d466447ae928 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -19,6 +19,7 @@
#ifdef HAVE_LIBBPF_SUPPORT
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include <bpf/btf.h>
#ifdef HAVE_BPF_SKEL
#include "bpf_skel/augmented_raw_syscalls.skel.h"
#endif
@@ -64,6 +65,7 @@
#include "syscalltbl.h"
#include "rb_resort.h"
#include "../perf.h"
+#include "trace_augment.h"
#include <errno.h>
#include <inttypes.h>
@@ -74,6 +76,7 @@
#include <linux/err.h>
#include <linux/filter.h>
#include <linux/kernel.h>
+#include <linux/list_sort.h>
#include <linux/random.h>
#include <linux/stringify.h>
#include <linux/time64.h>
@@ -85,7 +88,7 @@
#include <perf/mmap.h>
#ifdef HAVE_LIBTRACEEVENT
-#include <traceevent/event-parse.h>
+#include <event-parse.h>
#endif
#ifndef O_CLOEXEC
@@ -100,6 +103,12 @@
/*
* strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100
+ *
+ * We have to explicitely mark the direction of the flow of data, if from the
+ * kernel to user space or the other way around, since the BPF collector we
+ * have so far copies only from user to kernel space, mark the arguments that
+ * go that direction, so that we don“t end up collecting the previous contents
+ * for syscall args that goes from kernel to user space.
*/
struct syscall_arg_fmt {
size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
@@ -108,7 +117,12 @@ struct syscall_arg_fmt {
void *parm;
const char *name;
u16 nr_entries; // for arrays
+ bool from_user;
bool show_zero;
+#ifdef HAVE_LIBBPF_SUPPORT
+ const struct btf_type *type;
+ int type_id; /* used in btf_dump */
+#endif
};
struct syscall_fmt {
@@ -139,6 +153,9 @@ struct trace {
#ifdef HAVE_BPF_SKEL
struct augmented_raw_syscalls_bpf *skel;
#endif
+#ifdef HAVE_LIBBPF_SUPPORT
+ struct btf *btf;
+#endif
struct record_opts opts;
struct evlist *evlist;
struct machine *host;
@@ -195,6 +212,7 @@ struct trace {
bool show_string_prefix;
bool force;
bool vfs_getname;
+ bool force_btf;
int trace_pgfaults;
char *perfconfig_events;
struct {
@@ -203,6 +221,20 @@ struct trace {
} oe;
};
+static void trace__load_vmlinux_btf(struct trace *trace __maybe_unused)
+{
+#ifdef HAVE_LIBBPF_SUPPORT
+ if (trace->btf != NULL)
+ return;
+
+ trace->btf = btf__load_vmlinux_btf();
+ if (verbose > 0) {
+ fprintf(trace->output, trace->btf ? "vmlinux BTF loaded\n" :
+ "Failed to load vmlinux BTF\n");
+ }
+#endif
+}
+
struct tp_field {
int offset;
union {
@@ -357,7 +389,12 @@ static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel)
}
if (et->fmt == NULL) {
- et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
+ const struct tep_event *tp_format = evsel__tp_format(evsel);
+
+ if (tp_format == NULL)
+ goto out_delete;
+
+ et->fmt = calloc(tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
if (et->fmt == NULL)
goto out_delete;
}
@@ -764,7 +801,7 @@ static const char *fcntl_cmds[] = {
static DEFINE_STRARRAY(fcntl_cmds, "F_");
static const char *fcntl_linux_specific_cmds[] = {
- "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
+ "SETLEASE", "GETLEASE", "NOTIFY", "DUPFD_QUERY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
"SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
"GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
};
@@ -829,6 +866,15 @@ static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
#define SCA_FILENAME syscall_arg__scnprintf_filename
+// 'argname' is just documentational at this point, to remove the previous comment with that info
+#define SCA_FILENAME_FROM_USER(argname) \
+ { .scnprintf = SCA_FILENAME, \
+ .from_user = true, }
+
+static size_t syscall_arg__scnprintf_buf(char *bf, size_t size, struct syscall_arg *arg);
+
+#define SCA_BUF syscall_arg__scnprintf_buf
+
static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -886,6 +932,177 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
+#ifdef HAVE_LIBBPF_SUPPORT
+static void syscall_arg_fmt__cache_btf_enum(struct syscall_arg_fmt *arg_fmt, struct btf *btf, char *type)
+{
+ int id;
+
+ type = strstr(type, "enum ");
+ if (type == NULL)
+ return;
+
+ type += 5; // skip "enum " to get the enumeration name
+
+ id = btf__find_by_name(btf, type);
+ if (id < 0)
+ return;
+
+ arg_fmt->type = btf__type_by_id(btf, id);
+}
+
+static bool syscall_arg__strtoul_btf_enum(char *bf, size_t size, struct syscall_arg *arg, u64 *val)
+{
+ const struct btf_type *bt = arg->fmt->type;
+ struct btf *btf = arg->trace->btf;
+ struct btf_enum *be = btf_enum(bt);
+
+ for (int i = 0; i < btf_vlen(bt); ++i, ++be) {
+ const char *name = btf__name_by_offset(btf, be->name_off);
+ int max_len = max(size, strlen(name));
+
+ if (strncmp(name, bf, max_len) == 0) {
+ *val = be->val;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool syscall_arg__strtoul_btf_type(char *bf, size_t size, struct syscall_arg *arg, u64 *val)
+{
+ const struct btf_type *bt;
+ char *type = arg->type_name;
+ struct btf *btf;
+
+ trace__load_vmlinux_btf(arg->trace);
+
+ btf = arg->trace->btf;
+ if (btf == NULL)
+ return false;
+
+ if (arg->fmt->type == NULL) {
+ // See if this is an enum
+ syscall_arg_fmt__cache_btf_enum(arg->fmt, btf, type);
+ }
+
+ // Now let's see if we have a BTF type resolved
+ bt = arg->fmt->type;
+ if (bt == NULL)
+ return false;
+
+ // If it is an enum:
+ if (btf_is_enum(arg->fmt->type))
+ return syscall_arg__strtoul_btf_enum(bf, size, arg, val);
+
+ return false;
+}
+
+static size_t btf_enum_scnprintf(const struct btf_type *type, struct btf *btf, char *bf, size_t size, int val)
+{
+ struct btf_enum *be = btf_enum(type);
+ const int nr_entries = btf_vlen(type);
+
+ for (int i = 0; i < nr_entries; ++i, ++be) {
+ if (be->val == val) {
+ return scnprintf(bf, size, "%s",
+ btf__name_by_offset(btf, be->name_off));
+ }
+ }
+
+ return 0;
+}
+
+struct trace_btf_dump_snprintf_ctx {
+ char *bf;
+ size_t printed, size;
+};
+
+static void trace__btf_dump_snprintf(void *vctx, const char *fmt, va_list args)
+{
+ struct trace_btf_dump_snprintf_ctx *ctx = vctx;
+
+ ctx->printed += vscnprintf(ctx->bf + ctx->printed, ctx->size - ctx->printed, fmt, args);
+}
+
+static size_t btf_struct_scnprintf(const struct btf_type *type, struct btf *btf, char *bf, size_t size, struct syscall_arg *arg)
+{
+ struct trace_btf_dump_snprintf_ctx ctx = {
+ .bf = bf,
+ .size = size,
+ };
+ struct augmented_arg *augmented_arg = arg->augmented.args;
+ int type_id = arg->fmt->type_id, consumed;
+ struct btf_dump *btf_dump;
+
+ LIBBPF_OPTS(btf_dump_opts, dump_opts);
+ LIBBPF_OPTS(btf_dump_type_data_opts, dump_data_opts);
+
+ if (arg == NULL || arg->augmented.args == NULL)
+ return 0;
+
+ dump_data_opts.compact = true;
+ dump_data_opts.skip_names = !arg->trace->show_arg_names;
+
+ btf_dump = btf_dump__new(btf, trace__btf_dump_snprintf, &ctx, &dump_opts);
+ if (btf_dump == NULL)
+ return 0;
+
+ /* pretty print the struct data here */
+ if (btf_dump__dump_type_data(btf_dump, type_id, arg->augmented.args->value, type->size, &dump_data_opts) == 0)
+ return 0;
+
+ consumed = sizeof(*augmented_arg) + augmented_arg->size;
+ arg->augmented.args = ((void *)arg->augmented.args) + consumed;
+ arg->augmented.size -= consumed;
+
+ btf_dump__free(btf_dump);
+
+ return ctx.printed;
+}
+
+static size_t trace__btf_scnprintf(struct trace *trace, struct syscall_arg *arg, char *bf,
+ size_t size, int val, char *type)
+{
+ struct syscall_arg_fmt *arg_fmt = arg->fmt;
+
+ if (trace->btf == NULL)
+ return 0;
+
+ if (arg_fmt->type == NULL) {
+ // Check if this is an enum and if we have the BTF type for it.
+ syscall_arg_fmt__cache_btf_enum(arg_fmt, trace->btf, type);
+ }
+
+ // Did we manage to find a BTF type for the syscall/tracepoint argument?
+ if (arg_fmt->type == NULL)
+ return 0;
+
+ if (btf_is_enum(arg_fmt->type))
+ return btf_enum_scnprintf(arg_fmt->type, trace->btf, bf, size, val);
+ else if (btf_is_struct(arg_fmt->type) || btf_is_union(arg_fmt->type))
+ return btf_struct_scnprintf(arg_fmt->type, trace->btf, bf, size, arg);
+
+ return 0;
+}
+
+#else // HAVE_LIBBPF_SUPPORT
+static size_t trace__btf_scnprintf(struct trace *trace __maybe_unused, struct syscall_arg *arg __maybe_unused,
+ char *bf __maybe_unused, size_t size __maybe_unused, int val __maybe_unused,
+ char *type __maybe_unused)
+{
+ return 0;
+}
+
+static bool syscall_arg__strtoul_btf_type(char *bf __maybe_unused, size_t size __maybe_unused,
+ struct syscall_arg *arg __maybe_unused, u64 *val __maybe_unused)
+{
+ return false;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+
+#define STUL_BTF_TYPE syscall_arg__strtoul_btf_type
+
#define STRARRAY(name, array) \
{ .scnprintf = SCA_STRARRAY, \
.strtoul = STUL_STRARRAY, \
@@ -896,7 +1113,6 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
.strtoul = STUL_STRARRAY_FLAGS, \
.parm = &strarray__##array, }
-#include "trace/beauty/arch_errno_names.c"
#include "trace/beauty/eventfd.c"
#include "trace/beauty/futex_op.c"
#include "trace/beauty/futex_val3.c"
@@ -920,16 +1136,17 @@ static const struct syscall_fmt syscall_fmts[] = {
[1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, },
{ .name = "bind",
.arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
- [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ },
+ [1] = SCA_SOCKADDR_FROM_USER(umyaddr),
[2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
{ .name = "bpf",
- .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
+ .arg = { [0] = STRARRAY(cmd, bpf_cmd),
+ [1] = { .from_user = true /* attr */, }, } },
{ .name = "brk", .hexret = true,
.arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, },
{ .name = "clock_gettime",
.arg = { [0] = STRARRAY(clk_id, clockid), }, },
{ .name = "clock_nanosleep",
- .arg = { [2] = { .scnprintf = SCA_TIMESPEC, /* rqtp */ }, }, },
+ .arg = { [2] = SCA_TIMESPEC_FROM_USER(req), }, },
{ .name = "clone", .errpid = true, .nr_args = 5,
.arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
[1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
@@ -940,12 +1157,21 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
{ .name = "connect",
.arg = { [0] = { .scnprintf = SCA_INT, /* fd */ },
- [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ },
+ [1] = SCA_SOCKADDR_FROM_USER(servaddr),
[2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, },
{ .name = "epoll_ctl",
.arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
{ .name = "eventfd2",
.arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
+ { .name = "faccessat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ },
+ [1] = SCA_FILENAME_FROM_USER(pathname),
+ [2] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
+ { .name = "faccessat2",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ },
+ [1] = SCA_FILENAME_FROM_USER(pathname),
+ [2] = { .scnprintf = SCA_ACCMODE, /* mode */ },
+ [3] = { .scnprintf = SCA_FACCESSAT2_FLAGS, /* flags */ }, }, },
{ .name = "fchmodat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
{ .name = "fchownat",
@@ -965,10 +1191,9 @@ static const struct syscall_fmt syscall_fmts[] = {
[2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, },
{ .name = "fspick",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
- [1] = { .scnprintf = SCA_FILENAME, /* path */ },
+ [1] = SCA_FILENAME_FROM_USER(path),
[2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, },
{ .name = "fstat", .alias = "newfstat", },
- { .name = "fstatat", .alias = "newfstatat", },
{ .name = "futex",
.arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
[5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
@@ -1024,32 +1249,36 @@ static const struct syscall_fmt syscall_fmts[] = {
#if defined(__s390x__)
.alias = "old_mmap",
#endif
- .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
+ .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ },
[3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */
.strtoul = STUL_STRARRAY_FLAGS,
.parm = &strarray__mmap_flags, },
[5] = { .scnprintf = SCA_HEX, /* offset */ }, }, },
{ .name = "mount",
- .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
+ .arg = { [0] = SCA_FILENAME_FROM_USER(devname),
[3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
.mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
{ .name = "move_mount",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* from_dfd */ },
- [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ },
+ [1] = SCA_FILENAME_FROM_USER(pathname),
[2] = { .scnprintf = SCA_FDAT, /* to_dfd */ },
- [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ },
+ [3] = SCA_FILENAME_FROM_USER(pathname),
[4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, },
{ .name = "mprotect",
.arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
- [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
+ [2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ }, }, },
{ .name = "mq_unlink",
- .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
+ .arg = { [0] = SCA_FILENAME_FROM_USER(u_name), }, },
{ .name = "mremap", .hexret = true,
.arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, },
{ .name = "name_to_handle_at",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
- { .name = "newfstatat",
- .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+ { .name = "nanosleep",
+ .arg = { [0] = SCA_TIMESPEC_FROM_USER(req), }, },
+ { .name = "newfstatat", .alias = "fstatat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ },
+ [1] = SCA_FILENAME_FROM_USER(pathname),
+ [3] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ }, }, },
{ .name = "open",
.arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
{ .name = "open_by_handle_at",
@@ -1059,7 +1288,7 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
[2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
{ .name = "perf_event_open",
- .arg = { [0] = { .scnprintf = SCA_PERF_ATTR, /* attr */ },
+ .arg = { [0] = SCA_PERF_ATTR_FROM_USER(attr),
[2] = { .scnprintf = SCA_INT, /* cpu */ },
[3] = { .scnprintf = SCA_FD, /* group_fd */ },
[4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
@@ -1071,7 +1300,7 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, },
{ .name = "pkey_mprotect",
.arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
- [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
+ [2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ },
[3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
{ .name = "poll", .timeout = true, },
{ .name = "ppoll", .timeout = true, },
@@ -1084,7 +1313,8 @@ static const struct syscall_fmt syscall_fmts[] = {
{ .name = "pread", .alias = "pread64", },
{ .name = "preadv", .alias = "pread", },
{ .name = "prlimit64",
- .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
+ .arg = { [1] = STRARRAY(resource, rlimit_resources),
+ [2] = { .from_user = true /* new_rlim */, }, }, },
{ .name = "pwrite", .alias = "pwrite64", },
{ .name = "readlinkat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
@@ -1101,6 +1331,8 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ },
[2] = { .scnprintf = SCA_FDAT, /* newdirfd */ },
[4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, },
+ { .name = "rseq", .errpid = true,
+ .arg = { [0] = { .from_user = true /* rseq */, }, }, },
{ .name = "rt_sigaction",
.arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
{ .name = "rt_sigprocmask",
@@ -1122,12 +1354,15 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
{ .name = "sendto",
.arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
- [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, },
+ [4] = SCA_SOCKADDR_FROM_USER(addr), }, },
+ { .name = "set_robust_list", .errpid = true,
+ .arg = { [0] = { .from_user = true /* head */, }, }, },
{ .name = "set_tid_address", .errpid = true, },
{ .name = "setitimer",
.arg = { [0] = STRARRAY(which, itimers), }, },
{ .name = "setrlimit",
- .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
+ .arg = { [0] = STRARRAY(resource, rlimit_resources),
+ [1] = { .from_user = true /* rlim */, }, }, },
{ .name = "setsockopt",
.arg = { [1] = STRARRAY(level, socket_level), }, },
{ .name = "socket",
@@ -1141,12 +1376,12 @@ static const struct syscall_fmt syscall_fmts[] = {
{ .name = "stat", .alias = "newstat", },
{ .name = "statx",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
- [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
+ [2] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ } ,
[3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
{ .name = "swapoff",
- .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
+ .arg = { [0] = SCA_FILENAME_FROM_USER(specialfile), }, },
{ .name = "swapon",
- .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
+ .arg = { [0] = SCA_FILENAME_FROM_USER(specialfile), }, },
{ .name = "symlinkat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
{ .name = "sync_file_range",
@@ -1156,16 +1391,20 @@ static const struct syscall_fmt syscall_fmts[] = {
{ .name = "tkill",
.arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
{ .name = "umount2", .alias = "umount",
- .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
+ .arg = { [0] = SCA_FILENAME_FROM_USER(name), }, },
{ .name = "uname", .alias = "newuname", },
{ .name = "unlinkat",
- .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
+ [1] = SCA_FILENAME_FROM_USER(pathname),
+ [2] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ }, }, },
{ .name = "utimensat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
{ .name = "wait4", .errpid = true,
.arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
{ .name = "waitid", .errpid = true,
.arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
+ { .name = "write",
+ .arg = { [1] = { .scnprintf = SCA_BUF /* buf */, .from_user = true, }, }, },
};
static int syscall_fmt__cmp(const void *name, const void *fmtp)
@@ -1223,6 +1462,7 @@ struct syscall {
bool is_exit;
bool is_open;
bool nonexistent;
+ bool use_btf;
struct tep_format_field *args;
const char *name;
const struct syscall_fmt *fmt;
@@ -1536,6 +1776,32 @@ static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
return 0;
}
+#define MAX_CONTROL_CHAR 31
+#define MAX_ASCII 127
+
+static size_t syscall_arg__scnprintf_buf(char *bf, size_t size, struct syscall_arg *arg)
+{
+ struct augmented_arg *augmented_arg = arg->augmented.args;
+ unsigned char *orig = (unsigned char *)augmented_arg->value;
+ size_t printed = 0;
+ int consumed;
+
+ if (augmented_arg == NULL)
+ return 0;
+
+ for (int j = 0; j < augmented_arg->size; ++j) {
+ bool control_char = orig[j] <= MAX_CONTROL_CHAR || orig[j] >= MAX_ASCII;
+ /* print control characters (0~31 and 127), and non-ascii characters in \(digits) */
+ printed += scnprintf(bf + printed, size - printed, control_char ? "\\%d" : "%c", (int)orig[j]);
+ }
+
+ consumed = sizeof(*augmented_arg) + augmented_arg->size;
+ arg->augmented.args = ((void *)arg->augmented.args) + consumed;
+ arg->augmented.size -= consumed;
+
+ return printed;
+}
+
static bool trace__filter_duration(struct trace *trace, double t)
{
return t < (trace->duration_filter * NSEC_PER_MSEC);
@@ -1611,7 +1877,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine,
switch (event->header.type) {
case PERF_RECORD_LOST:
color_fprintf(trace->output, PERF_COLOR_RED,
- "LOST %" PRIu64 " events!\n", event->lost.lost);
+ "LOST %" PRIu64 " events!\n", (u64)event->lost.lost);
ret = machine__process_lost_event(machine, event, sample);
break;
default:
@@ -1622,7 +1888,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine,
return ret;
}
-static int trace__tool_process(struct perf_tool *tool,
+static int trace__tool_process(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
@@ -1729,7 +1995,8 @@ static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *n
}
static struct tep_format_field *
-syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field)
+syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field,
+ bool *use_btf)
{
struct tep_format_field *last_field = NULL;
int len;
@@ -1742,11 +2009,15 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
len = strlen(field->name);
+ // As far as heuristics (or intention) goes this seems to hold true, and makes sense!
+ if ((field->flags & TEP_FIELD_IS_POINTER) && strstarts(field->type, "const "))
+ arg->from_user = true;
+
if (strcmp(field->type, "const char *") == 0 &&
((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
- strstr(field->name, "path") != NULL))
+ strstr(field->name, "path") != NULL)) {
arg->scnprintf = SCA_FILENAME;
- else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
+ } else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
arg->scnprintf = SCA_PTR;
else if (strcmp(field->type, "pid_t") == 0)
arg->scnprintf = SCA_PID;
@@ -1767,6 +2038,9 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
* 7 unsigned long
*/
arg->scnprintf = SCA_FD;
+ } else if (strstr(field->type, "enum") && use_btf != NULL) {
+ *use_btf = true;
+ arg->strtoul = STUL_BTF_TYPE;
} else {
const struct syscall_arg_fmt *fmt =
syscall_arg_fmt__find_by_name(field->name);
@@ -1783,7 +2057,8 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
static int syscall__set_arg_fmts(struct syscall *sc)
{
- struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args);
+ struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args,
+ &sc->use_btf);
if (last_field)
sc->args_size = last_field->offset + last_field->size;
@@ -1796,31 +2071,13 @@ static int trace__read_syscall_info(struct trace *trace, int id)
char tp_name[128];
struct syscall *sc;
const char *name = syscalltbl__name(trace->sctbl, id);
+ int err;
-#ifdef HAVE_SYSCALL_TABLE_SUPPORT
if (trace->syscalls.table == NULL) {
trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
if (trace->syscalls.table == NULL)
return -ENOMEM;
}
-#else
- if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) {
- // When using libaudit we don't know beforehand what is the max syscall id
- struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
-
- if (table == NULL)
- return -ENOMEM;
-
- // Need to memset from offset 0 and +1 members if brand new
- if (trace->syscalls.table == NULL)
- memset(table, 0, (id + 1) * sizeof(*sc));
- else
- memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
-
- trace->syscalls.table = table;
- trace->sctbl->syscalls.max_id = id;
- }
-#endif
sc = trace->syscalls.table + id;
if (sc->nonexistent)
return -EEXIST;
@@ -1850,8 +2107,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
return PTR_ERR(sc->tp_format);
}
+ /*
+ * The tracepoint format contains __syscall_nr field, so it's one more
+ * than the actual number of syscall arguments.
+ */
if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
- RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields))
+ RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1))
return -ENOMEM;
sc->args = sc->tp_format->format.fields;
@@ -1868,16 +2129,26 @@ static int trace__read_syscall_info(struct trace *trace, int id)
sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
- return syscall__set_arg_fmts(sc);
+ err = syscall__set_arg_fmts(sc);
+
+ /* after calling syscall__set_arg_fmts() we'll know whether use_btf is true */
+ if (sc->use_btf)
+ trace__load_vmlinux_btf(trace);
+
+ return err;
}
-static int evsel__init_tp_arg_scnprintf(struct evsel *evsel)
+static int evsel__init_tp_arg_scnprintf(struct evsel *evsel, bool *use_btf)
{
struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
if (fmt != NULL) {
- syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields);
- return 0;
+ const struct tep_event *tp_format = evsel__tp_format(evsel);
+
+ if (tp_format) {
+ syscall_arg_fmt__init_array(fmt, tp_format->format.fields, use_btf);
+ return 0;
+ }
}
return -ENOMEM;
@@ -2035,7 +2306,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
unsigned char *args, void *augmented_args, int augmented_args_size,
struct trace *trace, struct thread *thread)
{
- size_t printed = 0;
+ size_t printed = 0, btf_printed;
unsigned long val;
u8 bit = 1;
struct syscall_arg arg = {
@@ -2051,6 +2322,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
.show_string_prefix = trace->show_string_prefix,
};
struct thread_trace *ttrace = thread__priv(thread);
+ void *default_scnprintf;
/*
* Things like fcntl will set this in its 'cmd' formatter to pick the
@@ -2076,17 +2348,15 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val);
/*
- * Suppress this argument if its value is zero and
- * and we don't have a string associated in an
- * strarray for it.
- */
- if (val == 0 &&
- !trace->show_zeros &&
- !(sc->arg_fmt &&
- (sc->arg_fmt[arg.idx].show_zero ||
- sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
- sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
- sc->arg_fmt[arg.idx].parm))
+ * Suppress this argument if its value is zero and show_zero
+ * property isn't set.
+ *
+ * If it has a BTF type, then override the zero suppression knob
+ * as the common case is for zero in an enum to have an associated entry.
+ */
+ if (val == 0 && !trace->show_zeros &&
+ !(sc->arg_fmt && sc->arg_fmt[arg.idx].show_zero) &&
+ !(sc->arg_fmt && sc->arg_fmt[arg.idx].strtoul == STUL_BTF_TYPE))
continue;
printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
@@ -2094,6 +2364,17 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
if (trace->show_arg_names)
printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
+ default_scnprintf = sc->arg_fmt[arg.idx].scnprintf;
+
+ if (trace->force_btf || default_scnprintf == NULL || default_scnprintf == SCA_PTR) {
+ btf_printed = trace__btf_scnprintf(trace, &arg, bf + printed,
+ size - printed, val, field->type);
+ if (btf_printed) {
+ printed += btf_printed;
+ continue;
+ }
+ }
+
printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx],
bf + printed, size - printed, &arg, val);
}
@@ -2151,18 +2432,7 @@ static struct syscall *trace__syscall_info(struct trace *trace,
err = -EINVAL;
-#ifdef HAVE_SYSCALL_TABLE_SUPPORT
if (id > trace->sctbl->syscalls.max_id) {
-#else
- if (id >= trace->sctbl->syscalls.max_id) {
- /*
- * With libaudit we don't know beforehand what is the max_id,
- * so we let trace__read_syscall_info() figure that out as we
- * go on reading syscalls.
- */
- err = trace__read_syscall_info(trace, id);
- if (err)
-#endif
goto out_cant_read;
}
@@ -2293,7 +2563,6 @@ static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
{
- void *augmented_args = NULL;
/*
* For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
* and there we get all 6 syscall args plus the tracepoint common fields
@@ -2311,10 +2580,24 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam
int args_size = raw_augmented_args_size ?: sc->args_size;
*augmented_args_size = sample->raw_size - args_size;
- if (*augmented_args_size > 0)
- augmented_args = sample->raw_data + args_size;
+ if (*augmented_args_size > 0) {
+ static uintptr_t argbuf[1024]; /* assuming single-threaded */
+
+ if ((size_t)(*augmented_args_size) > sizeof(argbuf))
+ return NULL;
- return augmented_args;
+ /*
+ * The perf ring-buffer is 8-byte aligned but sample->raw_data
+ * is not because it's preceded by u32 size. Later, beautifier
+ * will use the augmented args with stricter alignments like in
+ * some struct. To make sure it's aligned, let's copy the args
+ * into a static buffer as it's single-threaded for now.
+ */
+ memcpy(argbuf, sample->raw_data + args_size, *augmented_args_size);
+
+ return argbuf;
+ }
+ return NULL;
}
static void syscall__exit(struct syscall *sc)
@@ -2414,6 +2697,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
char msg[1024];
void *args, *augmented_args = NULL;
int augmented_args_size;
+ size_t printed = 0;
if (sc == NULL)
return -1;
@@ -2429,8 +2713,8 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
args = perf_evsel__sc_tp_ptr(evsel, args, sample);
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
- syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
- fprintf(trace->output, "%s", msg);
+ printed += syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
+ fprintf(trace->output, "%.*s", (int)printed, msg);
err = 0;
out_put:
thread__put(thread);
@@ -2738,9 +3022,10 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
{
char bf[2048];
size_t size = sizeof(bf);
- struct tep_format_field *field = evsel->tp_format->format.fields;
+ const struct tep_event *tp_format = evsel__tp_format(evsel);
+ struct tep_format_field *field = tp_format ? tp_format->format.fields : NULL;
struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel);
- size_t printed = 0;
+ size_t printed = 0, btf_printed;
unsigned long val;
u8 bit = 1;
struct syscall_arg syscall_arg = {
@@ -2781,17 +3066,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
*/
val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val);
- /*
- * Suppress this argument if its value is zero and
- * we don't have a string associated in an
- * strarray for it.
- */
- if (val == 0 &&
- !trace->show_zeros &&
- !((arg->show_zero ||
- arg->scnprintf == SCA_STRARRAY ||
- arg->scnprintf == SCA_STRARRAYS) &&
- arg->parm))
+ /* Suppress this argument if its value is zero and show_zero property isn't set. */
+ if (val == 0 && !trace->show_zeros && !arg->show_zero && arg->strtoul != STUL_BTF_TYPE)
continue;
printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
@@ -2799,10 +3075,16 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
if (trace->show_arg_names)
printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
+ btf_printed = trace__btf_scnprintf(trace, &syscall_arg, bf + printed, size - printed, val, field->type);
+ if (btf_printed) {
+ printed += btf_printed;
+ continue;
+ }
+
printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val);
}
- return printed + fprintf(trace->output, "%s", bf);
+ return printed + fprintf(trace->output, "%.*s", (int)printed, bf);
}
static int trace__event_handler(struct trace *trace, struct evsel *evsel,
@@ -2811,13 +3093,8 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
{
struct thread *thread;
int callchain_ret = 0;
- /*
- * Check if we called perf_evsel__disable(evsel) due to, for instance,
- * this event's max_events having been hit and this is an entry coming
- * from the ring buffer that we should discard, since the max events
- * have already been considered/printed.
- */
- if (evsel->disabled)
+
+ if (evsel->nr_events_printed >= evsel->max_events)
return 0;
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
@@ -2864,11 +3141,13 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
if (evsel__is_bpf_output(evsel)) {
bpf_output__fprintf(trace, sample);
- } else if (evsel->tp_format) {
- if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
- trace__fprintf_sys_enter(trace, evsel, sample)) {
+ } else {
+ const struct tep_event *tp_format = evsel__tp_format(evsel);
+
+ if (tp_format && (strncmp(tp_format->name, "sys_enter_", 10) ||
+ trace__fprintf_sys_enter(trace, evsel, sample))) {
if (trace->libtraceevent_print) {
- event_format__fprintf(evsel->tp_format, sample->cpu,
+ event_format__fprintf(tp_format, sample->cpu,
sample->raw_data, sample->raw_size,
trace->output);
} else {
@@ -2902,7 +3181,7 @@ static void print_location(FILE *f, struct perf_sample *sample,
{
if ((verbose > 0 || print_dso) && al->map)
- fprintf(f, "%s@", map__dso(al->map)->long_name);
+ fprintf(f, "%s@", dso__long_name(map__dso(al->map)));
if ((verbose > 0 || print_sym) && al->sym)
fprintf(f, "%s+0x%" PRIx64, al->sym->name,
@@ -3009,7 +3288,7 @@ static void trace__set_base_time(struct trace *trace,
trace->base_time = sample->time;
}
-static int trace__process_sample(struct perf_tool *tool,
+static int trace__process_sample(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct evsel *evsel,
@@ -3276,6 +3555,23 @@ out_enomem:
}
#ifdef HAVE_BPF_SKEL
+static int syscall_arg_fmt__cache_btf_struct(struct syscall_arg_fmt *arg_fmt, struct btf *btf, char *type)
+{
+ int id;
+
+ if (arg_fmt->type != NULL)
+ return -1;
+
+ id = btf__find_by_name(btf, type);
+ if (id < 0)
+ return -1;
+
+ arg_fmt->type = btf__type_by_id(btf, id);
+ arg_fmt->type_id = id;
+
+ return 0;
+}
+
static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
{
struct bpf_program *pos, *prog = NULL;
@@ -3351,11 +3647,94 @@ static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented);
}
+static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigned int *beauty_array)
+{
+ struct tep_format_field *field;
+ struct syscall *sc = trace__syscall_info(trace, NULL, key);
+ const struct btf_type *bt;
+ char *struct_offset, *tmp, name[32];
+ bool can_augment = false;
+ int i, cnt;
+
+ if (sc == NULL)
+ return -1;
+
+ trace__load_vmlinux_btf(trace);
+ if (trace->btf == NULL)
+ return -1;
+
+ for (i = 0, field = sc->args; field; ++i, field = field->next) {
+ // XXX We're only collecting pointer payloads _from_ user space
+ if (!sc->arg_fmt[i].from_user)
+ continue;
+
+ struct_offset = strstr(field->type, "struct ");
+ if (struct_offset == NULL)
+ struct_offset = strstr(field->type, "union ");
+ else
+ struct_offset++; // "union" is shorter
+
+ if (field->flags & TEP_FIELD_IS_POINTER && struct_offset) { /* struct or union (think BPF's attr arg) */
+ struct_offset += 6;
+
+ /* for 'struct foo *', we only want 'foo' */
+ for (tmp = struct_offset, cnt = 0; *tmp != ' ' && *tmp != '\0'; ++tmp, ++cnt) {
+ }
+
+ strncpy(name, struct_offset, cnt);
+ name[cnt] = '\0';
+
+ /* cache struct's btf_type and type_id */
+ if (syscall_arg_fmt__cache_btf_struct(&sc->arg_fmt[i], trace->btf, name))
+ continue;
+
+ bt = sc->arg_fmt[i].type;
+ beauty_array[i] = bt->size;
+ can_augment = true;
+ } else if (field->flags & TEP_FIELD_IS_POINTER && /* string */
+ strcmp(field->type, "const char *") == 0 &&
+ (strstr(field->name, "name") ||
+ strstr(field->name, "path") ||
+ strstr(field->name, "file") ||
+ strstr(field->name, "root") ||
+ strstr(field->name, "key") ||
+ strstr(field->name, "special") ||
+ strstr(field->name, "type") ||
+ strstr(field->name, "description"))) {
+ beauty_array[i] = 1;
+ can_augment = true;
+ } else if (field->flags & TEP_FIELD_IS_POINTER && /* buffer */
+ strstr(field->type, "char *") &&
+ (strstr(field->name, "buf") ||
+ strstr(field->name, "val") ||
+ strstr(field->name, "msg"))) {
+ int j;
+ struct tep_format_field *field_tmp;
+
+ /* find the size of the buffer that appears in pairs with buf */
+ for (j = 0, field_tmp = sc->args; field_tmp; ++j, field_tmp = field_tmp->next) {
+ if (!(field_tmp->flags & TEP_FIELD_IS_POINTER) && /* only integers */
+ (strstr(field_tmp->name, "count") ||
+ strstr(field_tmp->name, "siz") || /* size, bufsiz */
+ (strstr(field_tmp->name, "len") && strcmp(field_tmp->name, "filename")))) {
+ /* filename's got 'len' in it, we don't want that */
+ beauty_array[i] = -(j + 1);
+ can_augment = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (can_augment)
+ return 0;
+
+ return -1;
+}
+
static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
{
struct tep_format_field *field, *candidate_field;
- int id;
-
/*
* We're only interested in syscalls that have a pointer:
*/
@@ -3367,7 +3746,8 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
return NULL;
try_to_find_pair:
- for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) {
+ for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
+ int id = syscalltbl__id_at_idx(trace->sctbl, i);
struct syscall *pair = trace__syscall_info(trace, NULL, id);
struct bpf_program *pair_prog;
bool is_candidate = false;
@@ -3456,10 +3836,12 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
{
int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter);
int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit);
- int err = 0, key;
+ int beauty_map_fd = bpf_map__fd(trace->skel->maps.beauty_map_enter);
+ int err = 0;
+ unsigned int beauty_array[6];
- for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
- int prog_fd;
+ for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
+ int prog_fd, key = syscalltbl__id_at_idx(trace->sctbl, i);
if (!trace__syscall_enabled(trace, key))
continue;
@@ -3475,6 +3857,15 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
if (err)
break;
+
+ /* use beauty_map to tell BPF how many bytes to collect, set beauty_map's value here */
+ memset(beauty_array, 0, sizeof(beauty_array));
+ err = trace__bpf_sys_enter_beauty_map(trace, key, (unsigned int *)beauty_array);
+ if (err)
+ continue;
+ err = bpf_map_update_elem(beauty_map_fd, &key, beauty_array, BPF_ANY);
+ if (err)
+ break;
}
/*
@@ -3505,7 +3896,8 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
* first and second arg (this one on the raw_syscalls:sys_exit prog
* array tail call, then that one will be used.
*/
- for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
+ for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
+ int key = syscalltbl__id_at_idx(trace->sctbl, i);
struct syscall *sc = trace__syscall_info(trace, NULL, key);
struct bpf_program *pair_prog;
int prog_fd;
@@ -3680,22 +4072,31 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
return __trace__deliver_event(trace, event->event);
}
-static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg)
+static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg,
+ char **type)
{
- struct tep_format_field *field;
struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
+ const struct tep_event *tp_format;
+
+ if (!fmt)
+ return NULL;
- if (evsel->tp_format == NULL || fmt == NULL)
+ tp_format = evsel__tp_format(evsel);
+ if (!tp_format)
return NULL;
- for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
- if (strcmp(field->name, arg) == 0)
+ for (const struct tep_format_field *field = tp_format->format.fields; field;
+ field = field->next, ++fmt) {
+ if (strcmp(field->name, arg) == 0) {
+ *type = field->type;
return fmt;
+ }
+ }
return NULL;
}
-static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel)
+static int trace__expand_filter(struct trace *trace, struct evsel *evsel)
{
char *tok, *left = evsel->filter, *new_filter = evsel->filter;
@@ -3728,14 +4129,14 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel
struct syscall_arg_fmt *fmt;
int left_size = tok - left,
right_size = right_end - right;
- char arg[128];
+ char arg[128], *type;
while (isspace(left[left_size - 1]))
--left_size;
scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
- fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg);
+ fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg, &type);
if (fmt == NULL) {
pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
arg, evsel->name, evsel->filter);
@@ -3748,6 +4149,9 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel
if (fmt->strtoul) {
u64 val;
struct syscall_arg syscall_arg = {
+ .trace = trace,
+ .fmt = fmt,
+ .type_name = type,
.parm = fmt->parm,
};
@@ -3922,6 +4326,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
sizeof(__u32), BPF_ANY);
}
}
+
+ if (trace->skel)
+ trace->filter_pids.map = trace->skel->maps.pids_filtered;
#endif
err = trace__set_filter_pids(trace);
if (err < 0)
@@ -3959,7 +4366,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
err = trace__expand_filters(trace, &evsel);
if (err)
goto out_delete_evlist;
- err = evlist__apply_filters(evlist, &evsel);
+ err = evlist__apply_filters(evlist, &evsel, &trace->opts.target);
if (err < 0)
goto out_error_apply_filters;
@@ -4312,34 +4719,38 @@ static unsigned long thread__nr_events(struct thread_trace *ttrace)
return ttrace ? ttrace->nr_events : 0;
}
-DEFINE_RESORT_RB(threads,
- (thread__nr_events(thread__priv(a->thread)) <
- thread__nr_events(thread__priv(b->thread))),
- struct thread *thread;
-)
+static int trace_nr_events_cmp(void *priv __maybe_unused,
+ const struct list_head *la,
+ const struct list_head *lb)
{
- entry->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
+ struct thread_list *a = list_entry(la, struct thread_list, list);
+ struct thread_list *b = list_entry(lb, struct thread_list, list);
+ unsigned long a_nr_events = thread__nr_events(thread__priv(a->thread));
+ unsigned long b_nr_events = thread__nr_events(thread__priv(b->thread));
+
+ if (a_nr_events != b_nr_events)
+ return a_nr_events < b_nr_events ? -1 : 1;
+
+ /* Identical number of threads, place smaller tids first. */
+ return thread__tid(a->thread) < thread__tid(b->thread)
+ ? -1
+ : (thread__tid(a->thread) > thread__tid(b->thread) ? 1 : 0);
}
static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
{
size_t printed = trace__fprintf_threads_header(fp);
- struct rb_node *nd;
- int i;
-
- for (i = 0; i < THREADS__TABLE_SIZE; i++) {
- DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
+ LIST_HEAD(threads);
- if (threads == NULL) {
- fprintf(fp, "%s", "Error sorting output by nr_events!\n");
- return 0;
- }
+ if (machine__thread_list(trace->host, &threads) == 0) {
+ struct thread_list *pos;
- resort_rb__for_each_entry(nd, threads)
- printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+ list_sort(NULL, &threads, trace_nr_events_cmp);
- resort_rb__delete(threads);
+ list_for_each_entry(pos, &threads, list)
+ printed += trace__fprintf_thread(fp, pos->thread, trace);
}
+ thread_list__delete(&threads);
return printed;
}
@@ -4436,47 +4847,62 @@ static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
const struct syscall_fmt *scfmt = syscall_fmt__find(name);
if (scfmt) {
- int skip = 0;
+ const struct tep_event *tp_format = evsel__tp_format(evsel);
- if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 ||
- strcmp(evsel->tp_format->format.fields->name, "nr") == 0)
- ++skip;
+ if (tp_format) {
+ int skip = 0;
- memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt));
+ if (strcmp(tp_format->format.fields->name, "__syscall_nr") == 0 ||
+ strcmp(tp_format->format.fields->name, "nr") == 0)
+ ++skip;
+
+ memcpy(fmt + skip, scfmt->arg,
+ (tp_format->format.nr_fields - skip) * sizeof(*fmt));
+ }
}
}
}
-static int evlist__set_syscall_tp_fields(struct evlist *evlist)
+static int evlist__set_syscall_tp_fields(struct evlist *evlist, bool *use_btf)
{
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
- if (evsel->priv || !evsel->tp_format)
+ const struct tep_event *tp_format;
+
+ if (evsel->priv)
continue;
- if (strcmp(evsel->tp_format->system, "syscalls")) {
- evsel__init_tp_arg_scnprintf(evsel);
+ tp_format = evsel__tp_format(evsel);
+ if (!tp_format)
+ continue;
+
+ if (strcmp(tp_format->system, "syscalls")) {
+ evsel__init_tp_arg_scnprintf(evsel, use_btf);
continue;
}
if (evsel__init_syscall_tp(evsel))
return -1;
- if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
+ if (!strncmp(tp_format->name, "sys_enter_", 10)) {
struct syscall_tp *sc = __evsel__syscall_tp(evsel);
if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
return -1;
- evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1);
- } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
+ evsel__set_syscall_arg_fmt(evsel,
+ tp_format->name + sizeof("sys_enter_") - 1);
+ } else if (!strncmp(tp_format->name, "sys_exit_", 9)) {
struct syscall_tp *sc = __evsel__syscall_tp(evsel);
- if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
+ if (__tp_field__init_uint(&sc->ret, sizeof(u64),
+ sc->id.offset + sizeof(u64),
+ evsel->needs_swap))
return -1;
- evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1);
+ evsel__set_syscall_arg_fmt(evsel,
+ tp_format->name + sizeof("sys_exit_") - 1);
}
}
@@ -4777,6 +5203,8 @@ int cmd_trace(int argc, const char **argv)
OPT_INTEGER('D', "delay", &trace.opts.target.initial_delay,
"ms to wait before starting measurement after program "
"start"),
+ OPT_BOOLEAN(0, "force-btf", &trace.force_btf, "Prefer btf_dump general pretty printer"
+ "to customized ones"),
OPTS_EVSWITCH(&trace.evswitch),
OPT_END()
};
@@ -4864,6 +5292,11 @@ int cmd_trace(int argc, const char **argv)
if (!trace.trace_syscalls)
goto skip_augmentation;
+ if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) {
+ pr_debug("Syscall augmentation fails with record, disabling augmentation");
+ goto skip_augmentation;
+ }
+
trace.skel = augmented_raw_syscalls_bpf__open();
if (!trace.skel) {
pr_debug("Failed to open augmented syscalls BPF skeleton");
@@ -4897,7 +5330,7 @@ int cmd_trace(int argc, const char **argv)
goto out;
}
trace.syscalls.events.bpf_output = evlist__last(trace.evlist);
- assert(!strcmp(evsel__name(trace.syscalls.events.bpf_output), "__augmented_syscalls__"));
+ assert(evsel__name_is(trace.syscalls.events.bpf_output, "__augmented_syscalls__"));
skip_augmentation:
#endif
err = -1;
@@ -4929,11 +5362,16 @@ skip_augmentation:
}
if (trace.evlist->core.nr_entries > 0) {
+ bool use_btf = false;
+
evlist__set_default_evsel_handler(trace.evlist, trace__event_handler);
- if (evlist__set_syscall_tp_fields(trace.evlist)) {
+ if (evlist__set_syscall_tp_fields(trace.evlist, &use_btf)) {
perror("failed to set syscalls:* tracepoint fields");
goto out;
}
+
+ if (use_btf)
+ trace__load_vmlinux_btf(&trace);
}
if (trace.sort_events) {
@@ -4954,7 +5392,7 @@ skip_augmentation:
*/
if (trace.syscalls.events.bpf_output) {
evlist__for_each_entry(trace.evlist, evsel) {
- bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
+ bool raw_syscalls_sys_exit = evsel__name_is(evsel, "raw_syscalls:sys_exit");
if (raw_syscalls_sys_exit) {
trace.raw_augmented_syscalls = true;
@@ -5029,6 +5467,10 @@ init_augmented_syscall_tp:
if (trace.summary_only)
trace.summary = trace.summary_only;
+ /* Keep exited threads, otherwise information might be lost for summary */
+ if (trace.summary)
+ symbol_conf.keep_exited_threads = true;
+
if (output_name != NULL) {
err = trace__open_output(&trace, output_name);
if (err < 0) {