diff options
Diffstat (limited to 'tools/perf')
295 files changed, 18016 insertions, 4565 deletions
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 3d1bb802dbf4..3db3db9278be 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -30,3 +30,4 @@ config.mak.autogen *.pyo .config-detected util/intel-pt-decoder/inat-tables.c +arch/*/include/generated/ diff --git a/tools/perf/Documentation/android.txt b/tools/perf/Documentation/android.txt index 8484c3a04a6a..24a59998fc91 100644 --- a/tools/perf/Documentation/android.txt +++ b/tools/perf/Documentation/android.txt @@ -12,14 +12,14 @@ Set the NDK variable to point to the path where you installed the NDK: 2. Set cross-compiling environment variables for NDK toolchain and sysroot. For arm: - export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.6/prebuilt/linux-x86/bin/arm-linux-androideabi- - export NDK_SYSROOT=${NDK}/platforms/android-9/arch-arm + export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi- + export NDK_SYSROOT=${NDK}/platforms/android-24/arch-arm For x86: - export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.6/prebuilt/linux-x86/bin/i686-linux-android- - export NDK_SYSROOT=${NDK}/platforms/android-9/arch-x86 + export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.9/prebuilt/linux-x86_64/bin/i686-linux-android- + export NDK_SYSROOT=${NDK}/platforms/android-24/arch-x86 -This method is not working for Android NDK versions up to Revision 8b. -perf uses some bionic enhancements that are not included in these NDK versions. +This method is only tested for Android NDK versions Revision 11b and later. +perf uses some bionic enhancements that are not included in prior NDK versions. You can use method (b) described below instead. (b). Use the Android source tree @@ -49,9 +49,9 @@ II. Compile perf for Android ------------------------------------------------ You need to run make with the NDK toolchain and sysroot defined above: For arm: - make ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} CFLAGS="--sysroot=${NDK_SYSROOT}" + make WERROR=0 ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}" For x86: - make ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} CFLAGS="--sysroot=${NDK_SYSROOT}" + make WERROR=0 ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}" III. Install perf ----------------------------------------------- diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index be764f9ec769..c6c8318e38a2 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -672,6 +672,7 @@ The letters are: d create a debug log g synthesize a call chain (use with i or x) l synthesize last branch entries (use with i or x) + s skip initial number of events "Instructions" events look like they were recorded by "perf record -e instructions". @@ -730,6 +731,12 @@ from one sample to the next. To disable trace decoding entirely, use the option --no-itrace. +It is also possible to skip events generated (instructions, branches, transactions) +at the beginning. This is useful to ignore initialization code. + + --itrace=i0nss1000000 + +skips the first million instructions. dump option ----------- diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 65453f4c7006..e2a4c5e0dbe5 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -7,6 +7,7 @@ d create a debug log g synthesize a call chain (use with i or x) l synthesize last branch entries (use with i or x) + s skip initial number of events The default is all events i.e. the same as --itrace=ibxe @@ -24,3 +25,10 @@ Also the number of last branch entries (default 64, max. 1024) for instructions or transactions events can be specified. + + It is also possible to skip events generated (instructions, branches, transactions) + at the beginning. This is useful to ignore initialization code. + + --itrace=i0nss1000000 + + skips the first million instructions. diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index e9cd39a92dc2..8ffbd272952d 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -33,7 +33,7 @@ OPTIONS -f:: --force:: - Don't complain, do it. + Don't do ownership validation. -v:: --verbose:: @@ -61,6 +61,13 @@ OPTIONS --stdio:: Use the stdio interface. +--stdio-color:: + 'always', 'never' or 'auto', allowing configuring color output + via the command line, in addition to via "color.ui" .perfconfig. + Use '--stdio-color always' to generate color even when redirecting + to a pipe or file. Using just '--stdio-color' is equivalent to + using 'always'. + --tui:: Use the TUI interface. Use of --tui requires a tty, if one is not present, as when piping to other commands, the stdio interface is used. This interfaces starts by centering on the line with more diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index dd07b55f58d8..058064db39d2 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -15,6 +15,9 @@ DESCRIPTION This command manages the build-id cache. It can add, remove, update and purge files to/from the cache. In the future it should as well set upper limits for the space used by the cache, etc. +This also scans the target binary for SDT (Statically Defined Tracing) and +record it along with the buildid-cache, which will be used by perf-probe. +For more details, see linkperf:perf-probe[1]. OPTIONS ------- diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt index be8fa1a0a97e..f0796a47dfa3 100644 --- a/tools/perf/Documentation/perf-data.txt +++ b/tools/perf/Documentation/perf-data.txt @@ -34,6 +34,10 @@ OPTIONS for 'convert' --verbose:: Be more verbose (show counter open errors, etc). +--all:: + Convert all events, including non-sample events (comm, fork, ...), to output. + Default is off, only convert samples. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index d1deb573877f..3e9490b9c533 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -75,7 +75,7 @@ OPTIONS -f:: --force:: - Don't complain, do it. + Don't do ownership validation. --symfs=<directory>:: Look for files with symbols relative to this directory. diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index ec723d0a5bb3..a126e97a8114 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -93,6 +93,67 @@ raw encoding of 0x1A8 can be used: You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. +ARBITRARY PMUS +-------------- + +perf also supports an extended syntax for specifying raw parameters +to PMUs. Using this typically requires looking up the specific event +in the CPU vendor specific documentation. + +The available PMUs and their raw parameters can be listed with + + ls /sys/devices/*/format + +For example the raw event "LSD.UOPS" core pmu event above could +be specified as + + perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ... + +PER SOCKET PMUS +--------------- + +Some PMUs are not associated with a core, but with a whole CPU socket. +Events on these PMUs generally cannot be sampled, but only counted globally +with perf stat -a. They can be bound to one logical CPU, but will measure +all the CPUs in the same socket. + +This example measures memory bandwidth every second +on the first memory controller on socket 0 of a Intel Xeon system + + perf stat -C 0 -a uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/ -I 1000 ... + +Each memory controller has its own PMU. Measuring the complete system +bandwidth would require specifying all imc PMUs (see perf list output), +and adding the values together. + +This example measures the combined core power every second + + perf stat -I 1000 -e power/energy-cores/ -a + +ACCESS RESTRICTIONS +------------------- + +For non root users generally only context switched PMU events are available. +This is normally only the events in the cpu PMU, the predefined events +like cycles and instructions and some software events. + +Other PMUs and global measurements are normally root only. +Some event qualifiers, such as "any", are also root only. + +This can be overriden by setting the kernel.perf_event_paranoid +sysctl to -1, which allows non root to use these events. + +For accessing trace point events perf needs to have read access to +/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed +setting. + +TRACING +------- + +Some PMUs control advanced hardware tracing capabilities, such as Intel PT, +that allows low overhead execution tracing. These are described in a separate +intel-pt.txt document. + PARAMETERIZED EVENTS -------------------- @@ -106,6 +167,50 @@ also be supplied. For example: perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ... +EVENT GROUPS +------------ + +Perf supports time based multiplexing of events, when the number of events +active exceeds the number of hardware performance counters. Multiplexing +can cause measurement errors when the workload changes its execution +profile. + +When metrics are computed using formulas from event counts, it is useful to +ensure some events are always measured together as a group to minimize multiplexing +errors. Event groups can be specified using { }. + + perf stat -e '{instructions,cycles}' ... + +The number of available performance counters depend on the CPU. A group +cannot contain more events than available counters. +For example Intel Core CPUs typically have four generic performance counters +for the core, plus three fixed counters for instructions, cycles and +ref-cycles. Some special events have restrictions on which counter they +can schedule, and may not support multiple instances in a single group. +When too many events are specified in the group none of them will not +be measured. + +Globally pinned events can limit the number of counters available for +other groups. On x86 systems, the NMI watchdog pins a counter by default. +The nmi watchdog can be disabled as root with + + echo 0 > /proc/sys/kernel/nmi_watchdog + +Events from multiple different PMUs cannot be mixed in a group, with +some exceptions for software events. + +LEADER SAMPLING +--------------- + +perf also supports group leader sampling using the :S specifier. + + perf record -e '{cycles,instructions}:S' ... + perf report --group + +Normally all events in a event group sample, but with :S only +the first event (the leader) samples, and it only reads the values of the +other events in the group. + OPTIONS ------- @@ -143,5 +248,5 @@ SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-top[1], linkperf:perf-record[1], -http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide], +http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide], http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 43310d8661fe..73496320fca3 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -48,6 +48,17 @@ OPTIONS option can be passed in record mode. It will be interpreted the same way as perf record. +-K:: +--all-kernel:: + Configure all used events to run in kernel space. + +-U:: +--all-user:: + Configure all used events to run in user space. + +--ldload:: + Specify desired latency for loads event. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 3a8a9ba2b041..b303bcdd8ed1 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -67,7 +67,10 @@ OPTIONS -l:: --list[=[GROUP:]EVENT]:: - List up current probe events. This can also accept filtering patterns of event names. + List up current probe events. This can also accept filtering patterns of + event names. + When this is used with --cache, perf shows all cached probes instead of + the live probes. -L:: --line=:: @@ -109,6 +112,12 @@ OPTIONS Dry run. With this option, --add and --del doesn't execute actual adding and removal operations. +--cache:: + (With --add) Cache the probes. Any events which successfully added + are also stored in the cache file. + (With --list) Show cached probes. + (With --del) Remove cached probes. + --max-probes=NUM:: Set the maximum number of probe points for an event. Default is 128. @@ -134,19 +143,30 @@ PROBE SYNTAX Probe points are defined by following syntax. 1) Define event based on function name - [EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...] + [[GROUP:]EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...] 2) Define event based on source file with line number - [EVENT=]SRC:ALN [ARG ...] + [[GROUP:]EVENT=]SRC:ALN [ARG ...] 3) Define event based on source file with lazy pattern - [EVENT=]SRC;PTN [ARG ...] + [[GROUP:]EVENT=]SRC;PTN [ARG ...] + 4) Pre-defined SDT events or cached event with name + %[sdt_PROVIDER:]SDTEVENT + or, + sdt_PROVIDER:SDTEVENT -'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. Currently, event group name is set as 'probe'. +'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_<bin>' is used for uprobe. +Note that using existing group name can conflict with other events. Especially, using the group name reserved for kernel modules can hide embedded events in the +modules. 'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function. It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern. 'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT). +'SDTEVENT' and 'PROVIDER' is the pre-defined event name which is defined by user SDT (Statically Defined Tracing) or the pre-cached probes with event name. +Note that before using the SDT event, the target binary (on which SDT events are defined) must be scanned by linkperf:perf-buildid-cache[1] to make SDT events as cached events. + +For details of the SDT, see below. +https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html PROBE ARGUMENT -------------- @@ -156,10 +176,18 @@ Each probe argument follows below syntax. 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters. -'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. +'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail) On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid. +TYPES +----- +Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe. +String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. +Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is; + + b<bit-width>@<bit-offset>/<container-size> + LINE SYNTAX ----------- Line range is described by following syntax. @@ -226,4 +254,4 @@ Add probes at malloc() function on libc SEE ALSO -------- -linkperf:perf-trace[1], linkperf:perf-record[1] +linkperf:perf-trace[1], linkperf:perf-record[1], linkperf:perf-buildid-cache[1] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 19aa17532a16..379a2bed07c0 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -192,6 +192,9 @@ OPTIONS --period:: Record the sample period. +--sample-cpu:: + Record the sample cpu. + -n:: --no-samples:: Don't sample. @@ -347,6 +350,48 @@ Configure all used events to run in kernel space. --all-user:: Configure all used events to run in user space. +--timestamp-filename +Append timestamp to output file name. + +--switch-output:: +Generate multiple perf.data files, timestamp prefixed, switching to a new one +when receiving a SIGUSR2. + +A possible use case is to, given an external event, slice the perf.data file +that gets then processed, possibly via a perf script, to decide if that +particular perf.data snapshot should be kept or not. + +Implies --timestamp-filename, --no-buildid and --no-buildid-cache. + +--dry-run:: +Parse options then exit. --dry-run can be used to detect errors in cmdline +options. + +'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj +in config file is set to true. + +--tail-synthesize:: +Instead of collecting non-sample events (for example, fork, comm, mmap) at +the beginning of record, collect them during finalizing an output file. +The collected non-sample events reflects the status of the system when +record is finished. + +--overwrite:: +Makes all events use an overwritable ring buffer. An overwritable ring +buffer works like a flight recorder: when it gets full, the kernel will +overwrite the oldest records, that thus will never make it to the +perf.data file. + +When '--overwrite' and '--switch-output' are used perf records and drops +events until it receives a signal, meaning that something unusual was +detected that warrants taking a snapshot of the most current events, +those fitting in the ring buffer at that moment. + +'overwrite' attribute can also be set or canceled for an event using +config terms. For example: 'cycles/overwrite/' and 'instructions/no-overwrite/'. + +Implies --tail-synthesize. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 12113992ac9d..2d1746295abf 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -103,12 +103,13 @@ OPTIONS If --branch-stack option is used, following sort keys are also available: - dso_from, dso_to, symbol_from, symbol_to, mispredict. - dso_from: name of library or module branched from - dso_to: name of library or module branched to - symbol_from: name of function branched from - symbol_to: name of function branched to + - srcline_from: source file and line branched from + - srcline_to: source file and line branched to - mispredict: "N" for predicted branch, "Y" for mispredicted branch - in_tx: branch in TSX transaction - abort: TSX transaction abort. @@ -264,6 +265,13 @@ OPTIONS --stdio:: Use the stdio interface. +--stdio-color:: + 'always', 'never' or 'auto', allowing configuring color output + via the command line, in addition to via "color.ui" .perfconfig. + Use '--stdio-color always' to generate color even when redirecting + to a pipe or file. Using just '--stdio-color' is equivalent to + using 'always'. + --tui:: Use the TUI interface, that is integrated with annotate and allows zooming into DSOs or threads, among other features. Use of --tui requires a tty, if one is not present, as when piping to other @@ -285,7 +293,7 @@ OPTIONS -f:: --force:: - Don't complain, do it. + Don't do ownership validation. --symfs=<directory>:: Look for files with symbols relative to this directory. diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 8ff4df956951..1cc08cc47ac5 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -50,6 +50,22 @@ OPTIONS --dump-raw-trace=:: Display verbose dump of the sched data. +OPTIONS for 'perf sched map' +---------------------------- + +--compact:: + Show only CPUs with activity. Helps visualizing on high core + count systems. + +--cpus:: + Show just entries with activities for the given CPUs. + +--color-cpus:: + Highlight the given cpus. + +--color-pids:: + Highlight the given pids. + SEE ALSO -------- linkperf:perf-record[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 382ddfb45d1d..053bbbd84ece 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,16 +116,16 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, brstack, brstacksym, flags. - Field list can be prepended with the type, trace, sw or hw, + srcline, period, iregs, brstack, brstacksym, flags, bpf-output, + callindent. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. - e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace + e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace - perf script -f <fields> + perf script -F <fields> is equivalent to: - perf script -f trace:<fields> -f sw:<fields> -f hw:<fields> + perf script -F trace:<fields> -F sw:<fields> -F hw:<fields> i.e., the specified fields apply to all event types if the type string is not given. @@ -133,9 +133,9 @@ OPTIONS The arguments are processed in the order received. A later usage can reset a prior request. e.g.: - -f trace: -f comm,tid,time,ip,sym + -F trace: -F comm,tid,time,ip,sym - The first -f suppresses trace events (field list is ""), but then the + The first -F suppresses trace events (field list is ""), but then the second invocation sets the fields to comm,tid,time,ip,sym. In this case a warning is given to the user: @@ -143,9 +143,9 @@ OPTIONS Alternatively, consider the order: - -f comm,tid,time,ip,sym -f trace: + -F comm,tid,time,ip,sym -F trace: - The first -f sets the fields for all events and the second -f + The first -F sets the fields for all events and the second -F suppresses trace events. The user is given a warning message about the override, and the result of the above is that only S/W and H/W events are displayed with the given fields. @@ -154,14 +154,14 @@ OPTIONS event type, a message is displayed to the user that the option is ignored for that type. For example: - $ perf script -f comm,tid,trace + $ perf script -F comm,tid,trace 'trace' not valid for hardware events. Ignoring. 'trace' not valid for software events. Ignoring. Alternatively, if the type is given an invalid field is specified it is an error. For example: - perf script -v -f sw:comm,tid,trace + perf script -v -F sw:comm,tid,trace 'trace' not valid for software events. At this point usage is displayed, and perf-script exits. @@ -170,10 +170,19 @@ OPTIONS Trace decoding. The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, system, asynchronous, interrupt, transaction abort, trace begin, trace end, and in transaction, - respectively. + respectively. Known combinations of flags are printed more nicely e.g. + "call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b", + "int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs", + "async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB", + "tr end" for "bE". However the "x" flag will be display separately in those + cases e.g. "jcc (x)" for a condition branch within a transaction. + + The callindent field is synthesized and may have a value when + Instruction Trace decoding. For calls and returns, it will display the + name of the symbol indented with spaces to reflect the stack depth. Finally, a user may not set fields to none for all event types. - i.e., -f "" is not allowed. + i.e., -F "" is not allowed. The brstack output includes branch related information with raw addresses using the /v/v/v/v/ syntax in the following order: @@ -259,9 +268,23 @@ include::itrace.txt[] --full-source-path:: Show the full path for source files for srcline output. +--max-stack:: + Set the stack depth limit when parsing the callchain, anything + beyond the specified depth will be ignored. This is a trade-off + between information loss and faster processing especially for + workloads that can have a very long callchain stack. + Note that when using the --itrace option the synthesized callchain size + will override this value if the synthesized callchain size is bigger. + + Default: 127 + --ns:: Use 9 decimal places when displaying time (i.e. show the nanoseconds) +-f:: +--force:: + Don't do ownership validation. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 04f23b404bbc..d96ccd4844df 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -204,6 +204,38 @@ Aggregate counts per physical processor for system-wide mode measurements. --no-aggr:: Do not aggregate counts across all monitored CPUs. +--topdown:: +Print top down level 1 metrics if supported by the CPU. This allows to +determine bottle necks in the CPU pipeline for CPU bound workloads, +by breaking the cycles consumed down into frontend bound, backend bound, +bad speculation and retiring. + +Frontend bound means that the CPU cannot fetch and decode instructions fast +enough. Backend bound means that computation or memory access is the bottle +neck. Bad Speculation means that the CPU wasted cycles due to branch +mispredictions and similar issues. Retiring means that the CPU computed without +an apparently bottleneck. The bottleneck is only the real bottleneck +if the workload is actually bound by the CPU and not by something else. + +For best results it is usually a good idea to use it with interval +mode like -I 1000, as the bottleneck of workloads can change often. + +The top down metrics are collected per core instead of per +CPU thread. Per core mode is automatically enabled +and -a (global monitoring) is needed, requiring root rights or +perf.perf_event_paranoid=-1. + +Topdown uses the full Performance Monitoring Unit, and needs +disabling of the NMI watchdog (as root): +echo 0 > /proc/sys/kernel/nmi_watchdog +for best results. Otherwise the bottlenecks may be inconsistent +on workload with changing phases. + +This enables --metric-only, unless overriden with --no-metric-only. + +To interpret the results it is usually needed to know on which +CPUs the workload runs on. If needed the CPUs can be forced using +taskset. EXAMPLES -------- diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt index 31a5c3ea7f74..b329c65d7f40 100644 --- a/tools/perf/Documentation/perf-test.txt +++ b/tools/perf/Documentation/perf-test.txt @@ -30,3 +30,7 @@ OPTIONS -v:: --verbose:: Be more verbose. + +-F:: +--dont-fork:: + Do not fork child for each test, run all tests within single process. diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 19f046f027cd..91d638df3a6b 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -177,7 +177,7 @@ Default is to monitor all CPUS. between information loss and faster processing especially for workloads that can have a very long callchain stack. - Default: 127 + Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise. --ignore-callees=<regex>:: Ignore callees of the function(s) matching the given regex. diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 13293de8869f..1ab0782369b1 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -117,9 +117,42 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --syscalls:: Trace system calls. This options is enabled by default. +--call-graph [mode,type,min[,limit],order[,key][,branch]]:: + Setup and enable call-graph (stack chain/backtrace) recording. + See `--call-graph` section in perf-record and perf-report + man pages for details. The ones that are most useful in 'perf trace' + are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'. + + Using this will, for the root user, bump the value of --mmap-pages to 4 + times the maximum for non-root users, based on the kernel.perf_event_mlock_kb + sysctl. This is done only if the user doesn't specify a --mmap-pages value. + +--kernel-syscall-graph:: + Show the kernel callchains on the syscall exit path. + --event:: Trace other events, see 'perf list' for a complete list. +--max-stack:: + Set the stack depth limit when parsing the callchain, anything + beyond the specified depth will be ignored. Note that at this point + this is just about the presentation part, i.e. the kernel is still + not limiting, the overhead of callchains needs to be set via the + knobs in --call-graph dwarf. + + Implies '--call-graph dwarf' when --call-graph not present on the + command line, on systems where DWARF unwinding was built in. + + Default: /proc/sys/kernel/perf_event_max_stack when present for + live sessions (without --input/-i), 127 otherwise. + +--min-stack:: + Set the stack depth limit when parsing the callchain, anything + below the specified depth will be ignored. Disabled by default. + + Implies '--call-graph dwarf' when --call-graph not present on the + command line, on systems where DWARF unwinding was built in. + --proc-map-timeout:: When processing pre-existing threads /proc/XXX/mmap, it may take a long time, because the file may be huge. A time out is needed in such cases. diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt new file mode 100644 index 000000000000..fdc99fe6bbc3 --- /dev/null +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -0,0 +1,442 @@ +perf.data format + +Uptodate as of v4.7 + +This document describes the on-disk perf.data format, generated by perf record +or perf inject and consumed by the other perf tools. + +On a high level perf.data contains the events generated by the PMUs, plus metadata. + +All fields are in native-endian of the machine that generated the perf.data. + +When perf is writing to a pipe it uses a special version of the file +format that does not rely on seeking to adjust data offsets. This +format is not described here. The pipe version can be converted to +normal perf.data with perf inject. + +The file starts with a perf_header: + +struct perf_header { + char magic[8]; /* PERFILE2 */ + uint64_t size; /* size of the header */ + uint64_t attr_size; /* size of an attribute in attrs */ + struct perf_file_section attrs; + struct perf_file_section data; + struct perf_file_section event_types; + uint64_t flags; + uint64_t flags1[3]; +}; + +The magic number identifies the perf file and the version. Current perf versions +use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1 +is not described here. The magic number also identifies the endian. When the +magic value is 64bit byte swapped compared the file is in non-native +endian. + +A perf_file_section contains a pointer to another section of the perf file. +The header contains three such pointers: for attributes, data and event types. + +struct perf_file_section { + uint64_t offset; /* offset from start of file */ + uint64_t size; /* size of the section */ +}; + +Flags section: + +The header is followed by different optional headers, described by the bits set +in flags. Only headers for which the bit is set are included. Each header +consists of a perf_file_section located after the initial header. +The respective perf_file_section points to the data of the additional +header and defines its size. + +Some headers consist of strings, which are defined like this: + +struct perf_header_string { + uint32_t len; + char string[len]; /* zero terminated */ +}; + +Some headers consist of a sequence of strings, which start with a + +struct perf_header_string_list { + uint32_t nr; + struct perf_header_string strings[nr]; /* variable length records */ +}; + +The bits are the flags bits in a 256 bit bitmap starting with +flags. These define the valid bits: + + HEADER_RESERVED = 0, /* always cleared */ + HEADER_FIRST_FEATURE = 1, + HEADER_TRACING_DATA = 1, + +Describe me. + + HEADER_BUILD_ID = 2, + +The header consists of an sequence of build_id_event. The size of each record +is defined by header.size (see perf_event.h). Each event defines a ELF build id +for a executable file name for a pid. An ELF build id is a unique identifier +assigned by the linker to an executable. + +struct build_id_event { + struct perf_event_header header; + pid_t pid; + uint8_t build_id[24]; + char filename[header.size - offsetof(struct build_id_event, filename)]; +}; + + HEADER_HOSTNAME = 3, + +A perf_header_string with the hostname where the data was collected +(uname -n) + + HEADER_OSRELEASE = 4, + +A perf_header_string with the os release where the data was collected +(uname -r) + + HEADER_VERSION = 5, + +A perf_header_string with the perf user tool version where the +data was collected. This is the same as the version of the source tree +the perf tool was built from. + + HEADER_ARCH = 6, + +A perf_header_string with the CPU architecture (uname -m) + + HEADER_NRCPUS = 7, + +A structure defining the number of CPUs. + +struct nr_cpus { + uint32_t nr_cpus_online; + uint32_t nr_cpus_available; /* CPUs not yet onlined */ +}; + + HEADER_CPUDESC = 8, + +A perf_header_string with description of the CPU. On x86 this is the model name +in /proc/cpuinfo + + HEADER_CPUID = 9, + +A perf_header_string with the exact CPU type. On x86 this is +vendor,family,model,stepping. For example: GenuineIntel,6,69,1 + + HEADER_TOTAL_MEM = 10, + +An uint64_t with the total memory in bytes. + + HEADER_CMDLINE = 11, + +A perf_header_string with the perf command line used to collect the data. + + HEADER_EVENT_DESC = 12, + +Another description of the perf_event_attrs, more detailed than header.attrs +including IDs and names. See perf_event.h or the man page for a description +of a struct perf_event_attr. + +struct { + uint32_t nr; /* number of events */ + uint32_t attr_size; /* size of each perf_event_attr */ + struct { + struct perf_event_attr attr; /* size of attr_size */ + uint32_t nr_ids; + struct perf_header_string event_string; + uint64_t ids[nr_ids]; + } events[nr]; /* Variable length records */ +}; + + HEADER_CPU_TOPOLOGY = 13, + +String lists defining the core and CPU threads topology. + +struct { + struct perf_header_string_list cores; /* Variable length */ + struct perf_header_string_list threads; /* Variable length */ +}; + +Example: + sibling cores : 0-3 + sibling threads : 0-1 + sibling threads : 2-3 + + HEADER_NUMA_TOPOLOGY = 14, + + A list of NUMA node descriptions + +struct { + uint32_t nr; + struct { + uint32_t nodenr; + uint64_t mem_total; + uint64_t mem_free; + struct perf_header_string cpus; + } nodes[nr]; /* Variable length records */ +}; + + HEADER_BRANCH_STACK = 15, + +Not implemented in perf. + + HEADER_PMU_MAPPINGS = 16, + + A list of PMU structures, defining the different PMUs supported by perf. + +struct { + uint32_t nr; + struct pmu { + uint32_t pmu_type; + struct perf_header_string pmu_name; + } [nr]; /* Variable length records */ +}; + + HEADER_GROUP_DESC = 17, + + Description of counter groups ({...} in perf syntax) + +struct { + uint32_t nr; + struct { + struct perf_header_string string; + uint32_t leader_idx; + uint32_t nr_members; + } [nr]; /* Variable length records */ +}; + + HEADER_AUXTRACE = 18, + +Define additional auxtrace areas in the perf.data. auxtrace is used to store +undecoded hardware tracing information, such as Intel Processor Trace data. + +/** + * struct auxtrace_index_entry - indexes a AUX area tracing event within a + * perf.data file. + * @file_offset: offset within the perf.data file + * @sz: size of the event + */ +struct auxtrace_index_entry { + u64 file_offset; + u64 sz; +}; + +#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256 + +/** + * struct auxtrace_index - index of AUX area tracing events within a perf.data + * file. + * @list: linking a number of arrays of entries + * @nr: number of entries + * @entries: array of entries + */ +struct auxtrace_index { + struct list_head list; + size_t nr; + struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT]; +}; + + other bits are reserved and should ignored for now + HEADER_FEAT_BITS = 256, + +Attributes + +This is an array of perf_event_attrs, each attr_size bytes long, which defines +each event collected. See perf_event.h or the man page for a detailed +description. + +Data + +This section is the bulk of the file. It consist of a stream of perf_events +describing events. This matches the format generated by the kernel. +See perf_event.h or the manpage for a detailed description. + +Some notes on parsing: + +Ordering + +The events are not necessarily in time stamp order, as they can be +collected in parallel on different CPUs. If the events should be +processed in time order they need to be sorted first. It is possible +to only do a partial sort using the FINISHED_ROUND event header (see +below). perf record guarantees that there is no reordering over a +FINISHED_ROUND. + +ID vs IDENTIFIER + +When the event stream contains multiple events each event is identified +by an ID. This can be either through the PERF_SAMPLE_ID or the +PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is +at a fixed offset from the event header, which allows reliable +parsing of the header. Relying on ID may be ambigious. +IDENTIFIER is only supported by newer Linux kernels. + +Perf record specific events: + +In addition to the kernel generated event types perf record adds its +own event types (in addition it also synthesizes some kernel events, +for example MMAP events) + + PERF_RECORD_USER_TYPE_START = 64, + PERF_RECORD_HEADER_ATTR = 64, + +struct attr_event { + struct perf_event_header header; + struct perf_event_attr attr; + uint64_t id[]; +}; + + PERF_RECORD_HEADER_EVENT_TYPE = 65, /* depreceated */ + +#define MAX_EVENT_NAME 64 + +struct perf_trace_event_type { + uint64_t event_id; + char name[MAX_EVENT_NAME]; +}; + +struct event_type_event { + struct perf_event_header header; + struct perf_trace_event_type event_type; +}; + + + PERF_RECORD_HEADER_TRACING_DATA = 66, + +Describe me + +struct tracing_data_event { + struct perf_event_header header; + uint32_t size; +}; + + PERF_RECORD_HEADER_BUILD_ID = 67, + +Define a ELF build ID for a referenced executable. + + struct build_id_event; /* See above */ + + PERF_RECORD_FINISHED_ROUND = 68, + +No event reordering over this header. No payload. + + PERF_RECORD_ID_INDEX = 69, + +Map event ids to CPUs and TIDs. + +struct id_index_entry { + uint64_t id; + uint64_t idx; + uint64_t cpu; + uint64_t tid; +}; + +struct id_index_event { + struct perf_event_header header; + uint64_t nr; + struct id_index_entry entries[nr]; +}; + + PERF_RECORD_AUXTRACE_INFO = 70, + +Auxtrace type specific information. Describe me + +struct auxtrace_info_event { + struct perf_event_header header; + uint32_t type; + uint32_t reserved__; /* For alignment */ + uint64_t priv[]; +}; + + PERF_RECORD_AUXTRACE = 71, + +Defines auxtrace data. Followed by the actual data. The contents of +the auxtrace data is dependent on the event and the CPU. For example +for Intel Processor Trace it contains Processor Trace data generated +by the CPU. + +struct auxtrace_event { + struct perf_event_header header; + uint64_t size; + uint64_t offset; + uint64_t reference; + uint32_t idx; + uint32_t tid; + uint32_t cpu; + uint32_t reserved__; /* For alignment */ +}; + +struct aux_event { + struct perf_event_header header; + uint64_t aux_offset; + uint64_t aux_size; + uint64_t flags; +}; + + PERF_RECORD_AUXTRACE_ERROR = 72, + +Describes an error in hardware tracing + +enum auxtrace_error_type { + PERF_AUXTRACE_ERROR_ITRACE = 1, + PERF_AUXTRACE_ERROR_MAX +}; + +#define MAX_AUXTRACE_ERROR_MSG 64 + +struct auxtrace_error_event { + struct perf_event_header header; + uint32_t type; + uint32_t code; + uint32_t cpu; + uint32_t pid; + uint32_t tid; + uint32_t reserved__; /* For alignment */ + uint64_t ip; + char msg[MAX_AUXTRACE_ERROR_MSG]; +}; + +Event types + +Define the event attributes with their IDs. + +An array bound by the perf_file_section size. + + struct { + struct perf_event_attr attr; /* Size defined by header.attr_size */ + struct perf_file_section ids; + } + +ids points to a array of uint64_t defining the ids for event attr attr. + +References: + +include/uapi/linux/perf_event.h + +This is the canonical description of the kernel generated perf_events +and the perf_event_attrs. + +perf_events manpage + +A manpage describing perf_event and perf_event_attr is here: +http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html +This tends to be slightly behind the kernel include, but has better +descriptions. An (typically older) version of the man page may be +included with the standard Linux man pages, available with "man +perf_events" + +pmu-tools + +https://github.com/andikleen/pmu-tools/tree/master/parser + +A definition of the perf.data format in python "construct" format is available +in pmu-tools parser. This allows to read perf.data from python and dump it. + +quipper + +The quipper C++ parser is available at +https://chromium.googlesource.com/chromiumos/platform/chromiumos-wide-profiling/ +Unfortunately this parser tends to be many versions behind and may not be able +to parse data files generated by recent perf. diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 8c8c6b9ce915..ad2534df4ba6 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -12,13 +12,23 @@ tools/arch/sparc/include/asm/barrier_32.h tools/arch/sparc/include/asm/barrier_64.h tools/arch/tile/include/asm/barrier.h tools/arch/x86/include/asm/barrier.h +tools/arch/x86/include/asm/cpufeatures.h +tools/arch/x86/include/asm/disabled-features.h +tools/arch/x86/include/asm/required-features.h +tools/arch/x86/include/uapi/asm/svm.h +tools/arch/x86/include/uapi/asm/vmx.h +tools/arch/x86/include/uapi/asm/kvm.h +tools/arch/x86/include/uapi/asm/kvm_perf.h +tools/arch/x86/lib/memcpy_64.S +tools/arch/x86/lib/memset_64.S +tools/arch/s390/include/uapi/asm/kvm_perf.h +tools/arch/s390/include/uapi/asm/sie.h tools/arch/xtensa/include/asm/barrier.h tools/scripts tools/build tools/arch/x86/include/asm/atomic.h tools/arch/x86/include/asm/rmwcc.h tools/lib/traceevent -tools/lib/bpf tools/lib/api tools/lib/bpf tools/lib/subcmd @@ -29,6 +39,9 @@ tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/lib/find_bit.c tools/lib/bitmap.c +tools/lib/str_error_r.c +tools/lib/vsprintf.c +tools/include/asm/alternative-asm.h tools/include/asm/atomic.h tools/include/asm/barrier.h tools/include/asm/bug.h @@ -52,43 +65,16 @@ tools/include/linux/hash.h tools/include/linux/kernel.h tools/include/linux/list.h tools/include/linux/log2.h +tools/include/uapi/linux/bpf.h +tools/include/uapi/linux/bpf_common.h +tools/include/uapi/linux/hw_breakpoint.h +tools/include/uapi/linux/perf_event.h tools/include/linux/poison.h tools/include/linux/rbtree.h tools/include/linux/rbtree_augmented.h tools/include/linux/string.h +tools/include/linux/stringify.h tools/include/linux/types.h tools/include/linux/err.h tools/include/linux/bitmap.h -include/asm-generic/bitops/arch_hweight.h -include/asm-generic/bitops/const_hweight.h -include/asm-generic/bitops/fls64.h -include/asm-generic/bitops/__fls.h -include/asm-generic/bitops/fls.h -include/linux/perf_event.h -include/linux/list.h -include/linux/hash.h -include/linux/stringify.h -include/linux/swab.h -arch/*/include/asm/unistd*.h -arch/*/include/uapi/asm/unistd*.h -arch/*/include/uapi/asm/perf_regs.h -arch/*/lib/memcpy*.S -arch/*/lib/memset*.S -arch/*/include/asm/*features.h -include/linux/poison.h -include/linux/hw_breakpoint.h -include/uapi/linux/perf_event.h -include/uapi/linux/bpf.h -include/uapi/linux/bpf_common.h -include/uapi/linux/const.h -include/uapi/linux/swab.h -include/uapi/linux/hw_breakpoint.h -arch/x86/include/asm/svm.h -arch/x86/include/asm/vmx.h -arch/x86/include/asm/kvm_host.h -arch/x86/include/uapi/asm/svm.h -arch/x86/include/uapi/asm/vmx.h -arch/x86/include/uapi/asm/kvm.h -arch/x86/include/uapi/asm/kvm_perf.h -arch/s390/include/uapi/asm/sie.h -arch/s390/include/uapi/asm/kvm_perf.h +tools/arch/*/include/uapi/asm/perf_regs.h diff --git a/tools/perf/config/Makefile b/tools/perf/Makefile.config index 6f8f6430f2bf..24803c58049a 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/Makefile.config @@ -23,11 +23,17 @@ $(call detected_var,ARCH) NO_PERF_REGS := 1 +# Additional ARCH settings for ppc +ifeq ($(ARCH),powerpc) + NO_PERF_REGS := 0 + LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 +endif + # Additional ARCH settings for x86 ifeq ($(ARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) - CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT + CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 $(call detected,CONFIG_X86_64) @@ -67,17 +73,25 @@ endif # # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ # + +libunwind_arch_set_flags = $(eval $(libunwind_arch_set_flags_code)) +define libunwind_arch_set_flags_code + FEATURE_CHECK_CFLAGS-libunwind-$(1) = -I$(LIBUNWIND_DIR)/include + FEATURE_CHECK_LDFLAGS-libunwind-$(1) = -L$(LIBUNWIND_DIR)/lib +endef + ifdef LIBUNWIND_DIR LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib + LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64 + $(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch))) endif -LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS) # Set per-feature check compilation flags FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS) -FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) +FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) -FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) +FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) ifeq ($(NO_PERF_REGS),0) CFLAGS += -DHAVE_PERF_REGS_SUPPORT @@ -101,7 +115,7 @@ endif FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf -FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi +FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi # include ARCH specific config -include $(src-perf)/arch/$(ARCH)/Makefile @@ -192,11 +206,11 @@ endif CFLAGS += -I$(src-perf)/util/include CFLAGS += -I$(src-perf)/arch/$(ARCH)/include +CFLAGS += -I$(srctree)/tools/include/uapi CFLAGS += -I$(srctree)/tools/include/ -CFLAGS += -I$(srctree)/arch/$(ARCH)/include/uapi -CFLAGS += -I$(srctree)/arch/$(ARCH)/include -CFLAGS += -I$(srctree)/include/uapi -CFLAGS += -I$(srctree)/include +CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi +CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/ +CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/ # $(obj-perf) for generated common-cmds.h # $(obj-perf)/util for generated bison/flex headers @@ -243,7 +257,7 @@ else LIBC_SUPPORT := 1 endif ifeq ($(LIBC_SUPPORT),1) - msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install elfutils-libelf-devel/libelf-dev); + msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install libelf-dev, libelf-devel or elfutils-libelf-devel); NO_LIBELF := 1 NO_DWARF := 1 @@ -295,8 +309,15 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT endif - # include ARCH specific config - -include $(src-perf)/arch/$(ARCH)/Makefile + ifeq ($(feature-libelf-gelf_getnote), 1) + CFLAGS += -DHAVE_GELF_GETNOTE_SUPPORT + else + msg := $(warning gelf_getnote() not found on libelf, SDT support disabled); + endif + + ifeq ($(feature-libelf-getshdrstrndx), 1) + CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT + endif ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) @@ -334,6 +355,16 @@ ifndef NO_LIBELF endif # NO_LIBBPF endif # NO_LIBELF +ifndef NO_SDT + ifneq ($(feature-sdt), 1) + msg := $(warning No sys/sdt.h found, no SDT events are defined, please install systemtap-sdt-devel or systemtap-sdt-dev); + NO_SDT := 1; + else + CFLAGS += -DHAVE_SDT_EVENT + $(call detected,CONFIG_SDT_EVENT) + endif +endif + ifdef PERF_HAVE_JITDUMP ifndef NO_DWARF $(call detected,CONFIG_JITDUMP) @@ -348,10 +379,42 @@ ifeq ($(ARCH),powerpc) endif ifndef NO_LIBUNWIND + have_libunwind := + + ifeq ($(feature-libunwind-x86), 1) + $(call detected,CONFIG_LIBUNWIND_X86) + CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT + LDFLAGS += -lunwind-x86 + EXTLIBS_LIBUNWIND += -lunwind-x86 + have_libunwind = 1 + endif + + ifeq ($(feature-libunwind-aarch64), 1) + $(call detected,CONFIG_LIBUNWIND_AARCH64) + CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT + LDFLAGS += -lunwind-aarch64 + EXTLIBS_LIBUNWIND += -lunwind-aarch64 + have_libunwind = 1 + $(call feature_check,libunwind-debug-frame-aarch64) + ifneq ($(feature-libunwind-debug-frame-aarch64), 1) + msg := $(warning No debug_frame support found in libunwind-aarch64); + CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME_AARCH64 + endif + endif + ifneq ($(feature-libunwind), 1) msg := $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR); + NO_LOCAL_LIBUNWIND := 1 + else + have_libunwind := 1 + $(call detected,CONFIG_LOCAL_LIBUNWIND) + endif + + ifneq ($(have_libunwind), 1) NO_LIBUNWIND := 1 endif +else + NO_LOCAL_LIBUNWIND := 1 endif ifndef NO_LIBBPF @@ -389,7 +452,7 @@ else NO_DWARF_UNWIND := 1 endif -ifndef NO_LIBUNWIND +ifndef NO_LOCAL_LIBUNWIND ifeq ($(ARCH),$(filter $(ARCH),arm arm64)) $(call feature_check,libunwind-debug-frame) ifneq ($(feature-libunwind-debug-frame), 1) @@ -400,10 +463,15 @@ ifndef NO_LIBUNWIND # non-ARM has no dwarf_find_debug_frame() function: CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME endif - CFLAGS += -DHAVE_LIBUNWIND_SUPPORT EXTLIBS += $(LIBUNWIND_LIBS) + LDFLAGS += $(LIBUNWIND_LIBS) +endif + +ifndef NO_LIBUNWIND + CFLAGS += -DHAVE_LIBUNWIND_SUPPORT CFLAGS += $(LIBUNWIND_CFLAGS) LDFLAGS += $(LIBUNWIND_LDFLAGS) + EXTLIBS += $(EXTLIBS_LIBUNWIND) endif ifndef NO_LIBAUDIT @@ -434,7 +502,7 @@ endif ifndef NO_SLANG ifneq ($(feature-libslang), 1) - msg := $(warning slang not found, disables TUI support. Please install slang-devel or libslang-dev); + msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev); NO_SLANG := 1 else # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 000ea210389d..2d9087501633 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -81,6 +81,9 @@ include ../scripts/utilities.mak # # Define NO_LIBBPF if you do not want BPF support # +# Define NO_SDT if you do not want to define SDT event in perf tools, +# note that it doesn't disable SDT scanning support. +# # Define FEATURES_DUMP to provide features detection dump file # and bypass the feature detection @@ -158,7 +161,7 @@ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ BPF_DIR = $(srctree)/tools/lib/bpf/ SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ -# include config/Makefile by default and rule out +# include Makefile.config by default and rule out # non-config cases config := 1 @@ -180,7 +183,12 @@ ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump) FEATURE_TESTS := all endif endif -include config/Makefile +include Makefile.config +endif + +ifeq ($(config),0) +include $(srctree)/tools/scripts/Makefile.arch +-include arch/$(ARCH)/Makefile endif # The FEATURE_DUMP_EXPORT holds location of the actual @@ -249,7 +257,8 @@ PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) - $(QUIET_GEN)CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \ + $(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \ + CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \ $(PYTHON_WORD) util/setup.py \ --quiet build_ext; \ mkdir -p $(OUTPUT)python && \ @@ -297,8 +306,6 @@ endif # because maintaining the nesting to match is a pain. If # we had "elif" things would have been much nicer... --include arch/$(ARCH)/Makefile - ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif @@ -341,6 +348,87 @@ export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK include $(srctree)/tools/build/Makefile.include $(PERF_IN): prepare FORCE + @(test -f ../../include/uapi/linux/perf_event.h && ( \ + (diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \ + || echo "Warning: tools/include/uapi/linux/perf_event.h differs from kernel" >&2 )) || true + @(test -f ../../include/linux/hash.h && ( \ + (diff -B ../include/linux/hash.h ../../include/linux/hash.h >/dev/null) \ + || echo "Warning: tools/include/linux/hash.h differs from kernel" >&2 )) || true + @(test -f ../../include/uapi/linux/hw_breakpoint.h && ( \ + (diff -B ../include/uapi/linux/hw_breakpoint.h ../../include/uapi/linux/hw_breakpoint.h >/dev/null) \ + || echo "Warning: tools/include/uapi/linux/hw_breakpoint.h differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/include/asm/disabled-features.h && ( \ + (diff -B ../arch/x86/include/asm/disabled-features.h ../../arch/x86/include/asm/disabled-features.h >/dev/null) \ + || echo "Warning: tools/arch/x86/include/asm/disabled-features.h differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/include/asm/required-features.h && ( \ + (diff -B ../arch/x86/include/asm/required-features.h ../../arch/x86/include/asm/required-features.h >/dev/null) \ + || echo "Warning: tools/arch/x86/include/asm/required-features.h differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/include/asm/cpufeatures.h && ( \ + (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \ + || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/lib/memcpy_64.S && ( \ + (diff -B ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \ + || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/lib/memset_64.S && ( \ + (diff -B ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \ + || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true + @(test -f ../../arch/arm/include/uapi/asm/perf_regs.h && ( \ + (diff -B ../arch/arm/include/uapi/asm/perf_regs.h ../../arch/arm/include/uapi/asm/perf_regs.h >/dev/null) \ + || echo "Warning: tools/arch/arm/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true + @(test -f ../../arch/arm64/include/uapi/asm/perf_regs.h && ( \ + (diff -B ../arch/arm64/include/uapi/asm/perf_regs.h ../../arch/arm64/include/uapi/asm/perf_regs.h >/dev/null) \ + || echo "Warning: tools/arch/arm64/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true + @(test -f ../../arch/powerpc/include/uapi/asm/perf_regs.h && ( \ + (diff -B ../arch/powerpc/include/uapi/asm/perf_regs.h ../../arch/powerpc/include/uapi/asm/perf_regs.h >/dev/null) \ + || echo "Warning: tools/arch/powerpc/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/include/uapi/asm/perf_regs.h && ( \ + (diff -B ../arch/x86/include/uapi/asm/perf_regs.h ../../arch/x86/include/uapi/asm/perf_regs.h >/dev/null) \ + || echo "Warning: tools/arch/x86/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/include/uapi/asm/kvm.h && ( \ + (diff -B ../arch/x86/include/uapi/asm/kvm.h ../../arch/x86/include/uapi/asm/kvm.h >/dev/null) \ + || echo "Warning: tools/arch/x86/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/include/uapi/asm/kvm_perf.h && ( \ + (diff -B ../arch/x86/include/uapi/asm/kvm_perf.h ../../arch/x86/include/uapi/asm/kvm_perf.h >/dev/null) \ + || echo "Warning: tools/arch/x86/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/include/uapi/asm/svm.h && ( \ + (diff -B ../arch/x86/include/uapi/asm/svm.h ../../arch/x86/include/uapi/asm/svm.h >/dev/null) \ + || echo "Warning: tools/arch/x86/include/uapi/asm/svm.h differs from kernel" >&2 )) || true + @(test -f ../../arch/x86/include/uapi/asm/vmx.h && ( \ + (diff -B ../arch/x86/include/uapi/asm/vmx.h ../../arch/x86/include/uapi/asm/vmx.h >/dev/null) \ + || echo "Warning: tools/arch/x86/include/uapi/asm/vmx.h differs from kernel" >&2 )) || true + @(test -f ../../arch/powerpc/include/uapi/asm/kvm.h && ( \ + (diff -B ../arch/powerpc/include/uapi/asm/kvm.h ../../arch/powerpc/include/uapi/asm/kvm.h >/dev/null) \ + || echo "Warning: tools/arch/powerpc/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true + @(test -f ../../arch/s390/include/uapi/asm/kvm.h && ( \ + (diff -B ../arch/s390/include/uapi/asm/kvm.h ../../arch/s390/include/uapi/asm/kvm.h >/dev/null) \ + || echo "Warning: tools/arch/s390/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true + @(test -f ../../arch/s390/include/uapi/asm/kvm_perf.h && ( \ + (diff -B ../arch/s390/include/uapi/asm/kvm_perf.h ../../arch/s390/include/uapi/asm/kvm_perf.h >/dev/null) \ + || echo "Warning: tools/arch/s390/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true + @(test -f ../../arch/s390/include/uapi/asm/sie.h && ( \ + (diff -B ../arch/s390/include/uapi/asm/sie.h ../../arch/s390/include/uapi/asm/sie.h >/dev/null) \ + || echo "Warning: tools/arch/s390/include/uapi/asm/sie.h differs from kernel" >&2 )) || true + @(test -f ../../arch/arm/include/uapi/asm/kvm.h && ( \ + (diff -B ../arch/arm/include/uapi/asm/kvm.h ../../arch/arm/include/uapi/asm/kvm.h >/dev/null) \ + || echo "Warning: tools/arch/arm/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true + @(test -f ../../arch/arm64/include/uapi/asm/kvm.h && ( \ + (diff -B ../arch/arm64/include/uapi/asm/kvm.h ../../arch/arm64/include/uapi/asm/kvm.h >/dev/null) \ + || echo "Warning: tools/arch/arm64/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true + @(test -f ../../include/asm-generic/bitops/arch_hweight.h && ( \ + (diff -B ../include/asm-generic/bitops/arch_hweight.h ../../include/asm-generic/bitops/arch_hweight.h >/dev/null) \ + || echo "Warning: tools/include/asm-generic/bitops/arch_hweight.h differs from kernel" >&2 )) || true + @(test -f ../../include/asm-generic/bitops/const_hweight.h && ( \ + (diff -B ../include/asm-generic/bitops/const_hweight.h ../../include/asm-generic/bitops/const_hweight.h >/dev/null) \ + || echo "Warning: tools/include/asm-generic/bitops/const_hweight.h differs from kernel" >&2 )) || true + @(test -f ../../include/asm-generic/bitops/__fls.h && ( \ + (diff -B ../include/asm-generic/bitops/__fls.h ../../include/asm-generic/bitops/__fls.h >/dev/null) \ + || echo "Warning: tools/include/asm-generic/bitops/__fls.h differs from kernel" >&2 )) || true + @(test -f ../../include/asm-generic/bitops/fls.h && ( \ + (diff -B ../include/asm-generic/bitops/fls.h ../../include/asm-generic/bitops/fls.h >/dev/null) \ + || echo "Warning: tools/include/asm-generic/bitops/fls.h differs from kernel" >&2 )) || true + @(test -f ../../include/asm-generic/bitops/fls64.h && ( \ + (diff -B ../include/asm-generic/bitops/fls64.h ../../include/asm-generic/bitops/fls64.h >/dev/null) \ + || echo "Warning: tools/include/asm-generic/bitops/fls64.h differs from kernel" >&2 )) || true $(Q)$(MAKE) $(build)=perf $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -390,7 +478,7 @@ endif __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) -prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep +prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep archheaders $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -430,7 +518,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBPERF_IN := $(OUTPUT)libperf-in.o -$(LIBPERF_IN): fixdep FORCE +$(LIBPERF_IN): prepare fixdep FORCE $(Q)$(MAKE) $(build)=libperf $(LIB_FILE): $(LIBPERF_IN) @@ -618,14 +706,14 @@ $(INSTALL_DOC_TARGETS): ### Cleaning rules # -# This is here, not in config/Makefile, because config/Makefile does +# This is here, not in Makefile.config, because Makefile.config does # not get included for the clean target: # config-clean: $(call QUIET_CLEAN, config) $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null -clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected @@ -662,5 +750,5 @@ FORCE: .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare -.PHONY: libtraceevent_plugins +.PHONY: libtraceevent_plugins archheaders diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index d22e3d07de3d..f98da17357c0 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -1,4 +1,4 @@ libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index e58123a8912b..02f41dba4f4f 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,2 +1,2 @@ libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c index a87afa91a99e..c116b713f7f7 100644 --- a/tools/perf/arch/arm64/util/unwind-libunwind.c +++ b/tools/perf/arch/arm64/util/unwind-libunwind.c @@ -1,11 +1,13 @@ +#ifndef REMOTE_UNWIND_LIBUNWIND #include <errno.h> #include <libunwind.h> #include "perf_regs.h" #include "../../util/unwind.h" #include "../../util/debug.h" +#endif -int libunwind__arch_reg_id(int regnum) +int LIBUNWIND__ARCH_REG_ID(int regnum) { switch (regnum) { case UNW_AARCH64_X0: diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index e83c8ce24303..886dd2aaff0d 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -1,6 +1,7 @@ #include <stdio.h> #include <sys/utsname.h> #include "common.h" +#include "../util/util.h" #include "../util/debug.h" const char *const arm_triplets[] = { @@ -9,34 +10,44 @@ const char *const arm_triplets[] = { "arm-unknown-linux-", "arm-unknown-linux-gnu-", "arm-unknown-linux-gnueabi-", + "arm-linux-gnu-", + "arm-linux-gnueabihf-", + "arm-none-eabi-", NULL }; const char *const arm64_triplets[] = { "aarch64-linux-android-", + "aarch64-linux-gnu-", NULL }; const char *const powerpc_triplets[] = { "powerpc-unknown-linux-gnu-", "powerpc64-unknown-linux-gnu-", + "powerpc64-linux-gnu-", + "powerpc64le-linux-gnu-", NULL }; const char *const s390_triplets[] = { "s390-ibm-linux-", + "s390x-linux-gnu-", NULL }; const char *const sh_triplets[] = { "sh-unknown-linux-gnu-", "sh64-unknown-linux-gnu-", + "sh-linux-gnu-", + "sh64-linux-gnu-", NULL }; const char *const sparc_triplets[] = { "sparc-unknown-linux-gnu-", "sparc64-unknown-linux-gnu-", + "sparc64-linux-gnu-", NULL }; @@ -49,12 +60,19 @@ const char *const x86_triplets[] = { "i386-pc-linux-gnu-", "i686-linux-android-", "i686-android-linux-", + "x86_64-linux-gnu-", + "i586-linux-gnu-", NULL }; const char *const mips_triplets[] = { "mips-unknown-linux-gnu-", "mipsel-linux-android-", + "mips-linux-gnu-", + "mips64-linux-gnu-", + "mips64el-linux-gnuabi64-", + "mips64-linux-gnuabi64-", + "mipsel-linux-gnu-", NULL }; @@ -102,7 +120,7 @@ static int lookup_triplets(const char *const *triplets, const char *name) * Return architecture name in a normalized form. * The conversion logic comes from the Makefile. */ -static const char *normalize_arch(char *arch) +const char *normalize_arch(char *arch) { if (!strcmp(arch, "x86_64")) return "x86"; diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index 7529cfb143ce..6b01c736b7d9 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -6,5 +6,6 @@ extern const char *objdump_path; int perf_env__lookup_objdump(struct perf_env *env); +const char *normalize_arch(char *arch); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 56e05f126ad8..cc3930904d68 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -3,4 +3,5 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 +PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h new file mode 100644 index 000000000000..75de0e92e71e --- /dev/null +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -0,0 +1,69 @@ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include <stdlib.h> +#include <linux/types.h> +#include <asm/perf_regs.h> + +#define PERF_REGS_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) +#define PERF_REGS_MAX PERF_REG_POWERPC_MAX +#ifdef __powerpc64__ + #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64 +#else + #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32 +#endif + +#define PERF_REG_IP PERF_REG_POWERPC_NIP +#define PERF_REG_SP PERF_REG_POWERPC_R1 + +static const char *reg_names[] = { + [PERF_REG_POWERPC_R0] = "r0", + [PERF_REG_POWERPC_R1] = "r1", + [PERF_REG_POWERPC_R2] = "r2", + [PERF_REG_POWERPC_R3] = "r3", + [PERF_REG_POWERPC_R4] = "r4", + [PERF_REG_POWERPC_R5] = "r5", + [PERF_REG_POWERPC_R6] = "r6", + [PERF_REG_POWERPC_R7] = "r7", + [PERF_REG_POWERPC_R8] = "r8", + [PERF_REG_POWERPC_R9] = "r9", + [PERF_REG_POWERPC_R10] = "r10", + [PERF_REG_POWERPC_R11] = "r11", + [PERF_REG_POWERPC_R12] = "r12", + [PERF_REG_POWERPC_R13] = "r13", + [PERF_REG_POWERPC_R14] = "r14", + [PERF_REG_POWERPC_R15] = "r15", + [PERF_REG_POWERPC_R16] = "r16", + [PERF_REG_POWERPC_R17] = "r17", + [PERF_REG_POWERPC_R18] = "r18", + [PERF_REG_POWERPC_R19] = "r19", + [PERF_REG_POWERPC_R20] = "r20", + [PERF_REG_POWERPC_R21] = "r21", + [PERF_REG_POWERPC_R22] = "r22", + [PERF_REG_POWERPC_R23] = "r23", + [PERF_REG_POWERPC_R24] = "r24", + [PERF_REG_POWERPC_R25] = "r25", + [PERF_REG_POWERPC_R26] = "r26", + [PERF_REG_POWERPC_R27] = "r27", + [PERF_REG_POWERPC_R28] = "r28", + [PERF_REG_POWERPC_R29] = "r29", + [PERF_REG_POWERPC_R30] = "r30", + [PERF_REG_POWERPC_R31] = "r31", + [PERF_REG_POWERPC_NIP] = "nip", + [PERF_REG_POWERPC_MSR] = "msr", + [PERF_REG_POWERPC_ORIG_R3] = "orig_r3", + [PERF_REG_POWERPC_CTR] = "ctr", + [PERF_REG_POWERPC_LINK] = "link", + [PERF_REG_POWERPC_XER] = "xer", + [PERF_REG_POWERPC_CCR] = "ccr", + [PERF_REG_POWERPC_SOFTE] = "softe", + [PERF_REG_POWERPC_TRAP] = "trap", + [PERF_REG_POWERPC_DAR] = "dar", + [PERF_REG_POWERPC_DSISR] = "dsisr" +}; + +static inline const char *perf_reg_name(int id) +{ + return reg_names[id]; +} +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index c8fe2074d217..90ad64b231cd 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,6 +1,8 @@ libperf-y += header.o libperf-y += sym-handling.o libperf-y += kvm-stat.o +libperf-y += perf_regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c index 733151cdf46e..41bdf9530d82 100644 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c @@ -10,19 +10,26 @@ */ #include <stddef.h> +#include <errno.h> +#include <string.h> #include <dwarf-regs.h> - +#include <linux/ptrace.h> +#include <linux/kernel.h> +#include "util.h" struct pt_regs_dwarfnum { const char *name; unsigned int dwarfnum; + unsigned int ptregs_offset; }; -#define STR(s) #s -#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} -#define GPR_DWARFNUM_NAME(num) \ - {.name = STR(%gpr##num), .dwarfnum = num} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} +#define REG_DWARFNUM_NAME(r, num) \ + {.name = STR(%)STR(r), .dwarfnum = num, \ + .ptregs_offset = offsetof(struct pt_regs, r)} +#define GPR_DWARFNUM_NAME(num) \ + {.name = STR(%gpr##num), .dwarfnum = num, \ + .ptregs_offset = offsetof(struct pt_regs, gpr[num])} +#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0} /* * Reference: @@ -61,12 +68,12 @@ static const struct pt_regs_dwarfnum regdwarfnum_table[] = { GPR_DWARFNUM_NAME(29), GPR_DWARFNUM_NAME(30), GPR_DWARFNUM_NAME(31), - REG_DWARFNUM_NAME("%msr", 66), - REG_DWARFNUM_NAME("%ctr", 109), - REG_DWARFNUM_NAME("%link", 108), - REG_DWARFNUM_NAME("%xer", 101), - REG_DWARFNUM_NAME("%dar", 119), - REG_DWARFNUM_NAME("%dsisr", 118), + REG_DWARFNUM_NAME(msr, 66), + REG_DWARFNUM_NAME(ctr, 109), + REG_DWARFNUM_NAME(link, 108), + REG_DWARFNUM_NAME(xer, 101), + REG_DWARFNUM_NAME(dar, 119), + REG_DWARFNUM_NAME(dsisr, 118), REG_DWARFNUM_END, }; @@ -86,3 +93,12 @@ const char *get_arch_regstr(unsigned int n) return roff->name; return NULL; } + +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_dwarfnum *roff; + for (roff = regdwarfnum_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->ptregs_offset; + return -EINVAL; +} diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c new file mode 100644 index 000000000000..a3c3e1ce6807 --- /dev/null +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -0,0 +1,49 @@ +#include "../../perf.h" +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG(r0, PERF_REG_POWERPC_R0), + SMPL_REG(r1, PERF_REG_POWERPC_R1), + SMPL_REG(r2, PERF_REG_POWERPC_R2), + SMPL_REG(r3, PERF_REG_POWERPC_R3), + SMPL_REG(r4, PERF_REG_POWERPC_R4), + SMPL_REG(r5, PERF_REG_POWERPC_R5), + SMPL_REG(r6, PERF_REG_POWERPC_R6), + SMPL_REG(r7, PERF_REG_POWERPC_R7), + SMPL_REG(r8, PERF_REG_POWERPC_R8), + SMPL_REG(r9, PERF_REG_POWERPC_R9), + SMPL_REG(r10, PERF_REG_POWERPC_R10), + SMPL_REG(r11, PERF_REG_POWERPC_R11), + SMPL_REG(r12, PERF_REG_POWERPC_R12), + SMPL_REG(r13, PERF_REG_POWERPC_R13), + SMPL_REG(r14, PERF_REG_POWERPC_R14), + SMPL_REG(r15, PERF_REG_POWERPC_R15), + SMPL_REG(r16, PERF_REG_POWERPC_R16), + SMPL_REG(r17, PERF_REG_POWERPC_R17), + SMPL_REG(r18, PERF_REG_POWERPC_R18), + SMPL_REG(r19, PERF_REG_POWERPC_R19), + SMPL_REG(r20, PERF_REG_POWERPC_R20), + SMPL_REG(r21, PERF_REG_POWERPC_R21), + SMPL_REG(r22, PERF_REG_POWERPC_R22), + SMPL_REG(r23, PERF_REG_POWERPC_R23), + SMPL_REG(r24, PERF_REG_POWERPC_R24), + SMPL_REG(r25, PERF_REG_POWERPC_R25), + SMPL_REG(r26, PERF_REG_POWERPC_R26), + SMPL_REG(r27, PERF_REG_POWERPC_R27), + SMPL_REG(r28, PERF_REG_POWERPC_R28), + SMPL_REG(r29, PERF_REG_POWERPC_R29), + SMPL_REG(r30, PERF_REG_POWERPC_R30), + SMPL_REG(r31, PERF_REG_POWERPC_R31), + SMPL_REG(nip, PERF_REG_POWERPC_NIP), + SMPL_REG(msr, PERF_REG_POWERPC_MSR), + SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3), + SMPL_REG(ctr, PERF_REG_POWERPC_CTR), + SMPL_REG(link, PERF_REG_POWERPC_LINK), + SMPL_REG(xer, PERF_REG_POWERPC_XER), + SMPL_REG(ccr, PERF_REG_POWERPC_CCR), + SMPL_REG(softe, PERF_REG_POWERPC_SOFTE), + SMPL_REG(trap, PERF_REG_POWERPC_TRAP), + SMPL_REG(dar, PERF_REG_POWERPC_DAR), + SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR), + SMPL_REG_END +}; diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index bbc1a50768dd..35745a733100 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -19,12 +19,6 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) ehdr.e_type == ET_DYN; } -#if defined(_CALL_ELF) && _CALL_ELF == 2 -void arch__elf_sym_adjust(GElf_Sym *sym) -{ - sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); -} -#endif #endif #if !defined(_CALL_ELF) || _CALL_ELF != 2 @@ -60,23 +54,71 @@ int arch__compare_symbol_names(const char *namea, const char *nameb) #endif #if defined(_CALL_ELF) && _CALL_ELF == 2 -bool arch__prefers_symtab(void) + +#ifdef HAVE_LIBELF_SUPPORT +void arch__sym_update(struct symbol *s, GElf_Sym *sym) { - return true; + s->arch_sym = sym->st_other; } +#endif #define PPC64LE_LEP_OFFSET 8 void arch__fix_tev_from_maps(struct perf_probe_event *pev, - struct probe_trace_event *tev, struct map *map) + struct probe_trace_event *tev, struct map *map, + struct symbol *sym) { + int lep_offset; + /* - * ppc64 ABIv2 local entry point is currently always 2 instructions - * (8 bytes) after the global entry point. + * When probing at a function entry point, we normally always want the + * LEP since that catches calls to the function through both the GEP and + * the LEP. Hence, we would like to probe at an offset of 8 bytes if + * the user only specified the function entry. + * + * However, if the user specifies an offset, we fall back to using the + * GEP since all userspace applications (objdump/readelf) show function + * disassembly with offsets from the GEP. + * + * In addition, we shouldn't specify an offset for kretprobes. */ - if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) { - tev->point.address += PPC64LE_LEP_OFFSET; + if (pev->point.offset || pev->point.retprobe || !map || !sym) + return; + + lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym); + + if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) tev->point.offset += PPC64LE_LEP_OFFSET; + else if (lep_offset) { + if (pev->uprobes) + tev->point.address += lep_offset; + else + tev->point.offset += lep_offset; } } + +#ifdef HAVE_LIBELF_SUPPORT +void arch__post_process_probe_trace_events(struct perf_probe_event *pev, + int ntevs) +{ + struct probe_trace_event *tev; + struct map *map; + struct symbol *sym = NULL; + struct rb_node *tmp; + int i = 0; + + map = get_target_map(pev->target, pev->uprobes); + if (!map || map__load(map, NULL) < 0) + return; + + for (i = 0; i < ntevs; i++) { + tev = &pev->tevs[i]; + map__for_each_symbol(map, sym, tmp) { + if (map->unmap_ip(map, sym->start) == tev->point.address) + arch__fix_tev_from_maps(pev, tev, map, sym); + } + } +} +#endif /* HAVE_LIBELF_SUPPORT */ + #endif diff --git a/tools/perf/arch/powerpc/util/unwind-libunwind.c b/tools/perf/arch/powerpc/util/unwind-libunwind.c new file mode 100644 index 000000000000..9e15f92ae49f --- /dev/null +++ b/tools/perf/arch/powerpc/util/unwind-libunwind.c @@ -0,0 +1,96 @@ +/* + * Copyright 2016 Chandan Kumar, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <errno.h> +#include <libunwind.h> +#include <asm/perf_regs.h> +#include "../../util/unwind.h" +#include "../../util/debug.h" + +int libunwind__arch_reg_id(int regnum) +{ + switch (regnum) { + case UNW_PPC64_R0: + return PERF_REG_POWERPC_R0; + case UNW_PPC64_R1: + return PERF_REG_POWERPC_R1; + case UNW_PPC64_R2: + return PERF_REG_POWERPC_R2; + case UNW_PPC64_R3: + return PERF_REG_POWERPC_R3; + case UNW_PPC64_R4: + return PERF_REG_POWERPC_R4; + case UNW_PPC64_R5: + return PERF_REG_POWERPC_R5; + case UNW_PPC64_R6: + return PERF_REG_POWERPC_R6; + case UNW_PPC64_R7: + return PERF_REG_POWERPC_R7; + case UNW_PPC64_R8: + return PERF_REG_POWERPC_R8; + case UNW_PPC64_R9: + return PERF_REG_POWERPC_R9; + case UNW_PPC64_R10: + return PERF_REG_POWERPC_R10; + case UNW_PPC64_R11: + return PERF_REG_POWERPC_R11; + case UNW_PPC64_R12: + return PERF_REG_POWERPC_R12; + case UNW_PPC64_R13: + return PERF_REG_POWERPC_R13; + case UNW_PPC64_R14: + return PERF_REG_POWERPC_R14; + case UNW_PPC64_R15: + return PERF_REG_POWERPC_R15; + case UNW_PPC64_R16: + return PERF_REG_POWERPC_R16; + case UNW_PPC64_R17: + return PERF_REG_POWERPC_R17; + case UNW_PPC64_R18: + return PERF_REG_POWERPC_R18; + case UNW_PPC64_R19: + return PERF_REG_POWERPC_R19; + case UNW_PPC64_R20: + return PERF_REG_POWERPC_R20; + case UNW_PPC64_R21: + return PERF_REG_POWERPC_R21; + case UNW_PPC64_R22: + return PERF_REG_POWERPC_R22; + case UNW_PPC64_R23: + return PERF_REG_POWERPC_R23; + case UNW_PPC64_R24: + return PERF_REG_POWERPC_R24; + case UNW_PPC64_R25: + return PERF_REG_POWERPC_R25; + case UNW_PPC64_R26: + return PERF_REG_POWERPC_R26; + case UNW_PPC64_R27: + return PERF_REG_POWERPC_R27; + case UNW_PPC64_R28: + return PERF_REG_POWERPC_R28; + case UNW_PPC64_R29: + return PERF_REG_POWERPC_R29; + case UNW_PPC64_R30: + return PERF_REG_POWERPC_R30; + case UNW_PPC64_R31: + return PERF_REG_POWERPC_R31; + case UNW_PPC64_LR: + return PERF_REG_POWERPC_LINK; + case UNW_PPC64_CTR: + return PERF_REG_POWERPC_CTR; + case UNW_PPC64_XER: + return PERF_REG_POWERPC_XER; + case UNW_PPC64_NIP: + return PERF_REG_POWERPC_NIP; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + return -EINVAL; +} diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 8a61372bb47a..5bd7b9260cc0 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -2,3 +2,5 @@ libperf-y += header.o libperf-y += kvm-stat.o libperf-$(CONFIG_DWARF) += dwarf-regs.o + +libperf-y += machine.o diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c new file mode 100644 index 000000000000..b9a95a1a8e69 --- /dev/null +++ b/tools/perf/arch/s390/util/machine.c @@ -0,0 +1,19 @@ +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include "util.h" +#include "machine.h" +#include "api/fs/fs.h" + +int arch__fix_module_text_start(u64 *start, const char *name) +{ + char path[PATH_MAX]; + + snprintf(path, PATH_MAX, "module/%.*s/sections/.text", + (int)strlen(name) - 2, name + 1); + + if (sysfs__read_ull(path, (unsigned long long *)start) < 0) + return -1; + + return 0; +} diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 269af2143735..6c9211b18ec0 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -4,3 +4,26 @@ endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 + +### +# Syscall table generation +# + +out := $(OUTPUT)arch/x86/include/generated/asm +header := $(out)/syscalls_64.c +sys := $(srctree)/tools/perf/arch/x86/entry/syscalls +systbl := $(sys)/syscalltbl.sh + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sys)/syscall_64.tbl $(systbl) + @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ + (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \ + || echo "Warning: x86_64's syscall_64.tbl differs from kernel" >&2 )) || true + $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@ + +clean:: + $(call QUIET_CLEAN, x86) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl new file mode 100644 index 000000000000..555263e385c9 --- /dev/null +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -0,0 +1,378 @@ +# +# 64-bit system call numbers and entry vectors +# +# The format is: +# <number> <abi> <name> <entry point> +# +# The abi is "common", "64" or "x32" for this file. +# +0 common read sys_read +1 common write sys_write +2 common open sys_open +3 common close sys_close +4 common stat sys_newstat +5 common fstat sys_newfstat +6 common lstat sys_newlstat +7 common poll sys_poll +8 common lseek sys_lseek +9 common mmap sys_mmap +10 common mprotect sys_mprotect +11 common munmap sys_munmap +12 common brk sys_brk +13 64 rt_sigaction sys_rt_sigaction +14 common rt_sigprocmask sys_rt_sigprocmask +15 64 rt_sigreturn sys_rt_sigreturn/ptregs +16 64 ioctl sys_ioctl +17 common pread64 sys_pread64 +18 common pwrite64 sys_pwrite64 +19 64 readv sys_readv +20 64 writev sys_writev +21 common access sys_access +22 common pipe sys_pipe +23 common select sys_select +24 common sched_yield sys_sched_yield +25 common mremap sys_mremap +26 common msync sys_msync +27 common mincore sys_mincore +28 common madvise sys_madvise +29 common shmget sys_shmget +30 common shmat sys_shmat +31 common shmctl sys_shmctl +32 common dup sys_dup +33 common dup2 sys_dup2 +34 common pause sys_pause +35 common nanosleep sys_nanosleep +36 common getitimer sys_getitimer +37 common alarm sys_alarm +38 common setitimer sys_setitimer +39 common getpid sys_getpid +40 common sendfile sys_sendfile64 +41 common socket sys_socket +42 common connect sys_connect +43 common accept sys_accept +44 common sendto sys_sendto +45 64 recvfrom sys_recvfrom +46 64 sendmsg sys_sendmsg +47 64 recvmsg sys_recvmsg +48 common shutdown sys_shutdown +49 common bind sys_bind +50 common listen sys_listen +51 common getsockname sys_getsockname +52 common getpeername sys_getpeername +53 common socketpair sys_socketpair +54 64 setsockopt sys_setsockopt +55 64 getsockopt sys_getsockopt +56 common clone sys_clone/ptregs +57 common fork sys_fork/ptregs +58 common vfork sys_vfork/ptregs +59 64 execve sys_execve/ptregs +60 common exit sys_exit +61 common wait4 sys_wait4 +62 common kill sys_kill +63 common uname sys_newuname +64 common semget sys_semget +65 common semop sys_semop +66 common semctl sys_semctl +67 common shmdt sys_shmdt +68 common msgget sys_msgget +69 common msgsnd sys_msgsnd +70 common msgrcv sys_msgrcv +71 common msgctl sys_msgctl +72 common fcntl sys_fcntl +73 common flock sys_flock +74 common fsync sys_fsync +75 common fdatasync sys_fdatasync +76 common truncate sys_truncate +77 common ftruncate sys_ftruncate +78 common getdents sys_getdents +79 common getcwd sys_getcwd +80 common chdir sys_chdir +81 common fchdir sys_fchdir +82 common rename sys_rename +83 common mkdir sys_mkdir +84 common rmdir sys_rmdir +85 common creat sys_creat +86 common link sys_link +87 common unlink sys_unlink +88 common symlink sys_symlink +89 common readlink sys_readlink +90 common chmod sys_chmod +91 common fchmod sys_fchmod +92 common chown sys_chown +93 common fchown sys_fchown +94 common lchown sys_lchown +95 common umask sys_umask +96 common gettimeofday sys_gettimeofday +97 common getrlimit sys_getrlimit +98 common getrusage sys_getrusage +99 common sysinfo sys_sysinfo +100 common times sys_times +101 64 ptrace sys_ptrace +102 common getuid sys_getuid +103 common syslog sys_syslog +104 common getgid sys_getgid +105 common setuid sys_setuid +106 common setgid sys_setgid +107 common geteuid sys_geteuid +108 common getegid sys_getegid +109 common setpgid sys_setpgid +110 common getppid sys_getppid +111 common getpgrp sys_getpgrp +112 common setsid sys_setsid +113 common setreuid sys_setreuid +114 common setregid sys_setregid +115 common getgroups sys_getgroups +116 common setgroups sys_setgroups +117 common setresuid sys_setresuid +118 common getresuid sys_getresuid +119 common setresgid sys_setresgid +120 common getresgid sys_getresgid +121 common getpgid sys_getpgid +122 common setfsuid sys_setfsuid +123 common setfsgid sys_setfsgid +124 common getsid sys_getsid +125 common capget sys_capget +126 common capset sys_capset +127 64 rt_sigpending sys_rt_sigpending +128 64 rt_sigtimedwait sys_rt_sigtimedwait +129 64 rt_sigqueueinfo sys_rt_sigqueueinfo +130 common rt_sigsuspend sys_rt_sigsuspend +131 64 sigaltstack sys_sigaltstack +132 common utime sys_utime +133 common mknod sys_mknod +134 64 uselib +135 common personality sys_personality +136 common ustat sys_ustat +137 common statfs sys_statfs +138 common fstatfs sys_fstatfs +139 common sysfs sys_sysfs +140 common getpriority sys_getpriority +141 common setpriority sys_setpriority +142 common sched_setparam sys_sched_setparam +143 common sched_getparam sys_sched_getparam +144 common sched_setscheduler sys_sched_setscheduler +145 common sched_getscheduler sys_sched_getscheduler +146 common sched_get_priority_max sys_sched_get_priority_max +147 common sched_get_priority_min sys_sched_get_priority_min +148 common sched_rr_get_interval sys_sched_rr_get_interval +149 common mlock sys_mlock +150 common munlock sys_munlock +151 common mlockall sys_mlockall +152 common munlockall sys_munlockall +153 common vhangup sys_vhangup +154 common modify_ldt sys_modify_ldt +155 common pivot_root sys_pivot_root +156 64 _sysctl sys_sysctl +157 common prctl sys_prctl +158 common arch_prctl sys_arch_prctl +159 common adjtimex sys_adjtimex +160 common setrlimit sys_setrlimit +161 common chroot sys_chroot +162 common sync sys_sync +163 common acct sys_acct +164 common settimeofday sys_settimeofday +165 common mount sys_mount +166 common umount2 sys_umount +167 common swapon sys_swapon +168 common swapoff sys_swapoff +169 common reboot sys_reboot +170 common sethostname sys_sethostname +171 common setdomainname sys_setdomainname +172 common iopl sys_iopl/ptregs +173 common ioperm sys_ioperm +174 64 create_module +175 common init_module sys_init_module +176 common delete_module sys_delete_module +177 64 get_kernel_syms +178 64 query_module +179 common quotactl sys_quotactl +180 64 nfsservctl +181 common getpmsg +182 common putpmsg +183 common afs_syscall +184 common tuxcall +185 common security +186 common gettid sys_gettid +187 common readahead sys_readahead +188 common setxattr sys_setxattr +189 common lsetxattr sys_lsetxattr +190 common fsetxattr sys_fsetxattr +191 common getxattr sys_getxattr +192 common lgetxattr sys_lgetxattr +193 common fgetxattr sys_fgetxattr +194 common listxattr sys_listxattr +195 common llistxattr sys_llistxattr +196 common flistxattr sys_flistxattr +197 common removexattr sys_removexattr +198 common lremovexattr sys_lremovexattr +199 common fremovexattr sys_fremovexattr +200 common tkill sys_tkill +201 common time sys_time +202 common futex sys_futex +203 common sched_setaffinity sys_sched_setaffinity +204 common sched_getaffinity sys_sched_getaffinity +205 64 set_thread_area +206 64 io_setup sys_io_setup +207 common io_destroy sys_io_destroy +208 common io_getevents sys_io_getevents +209 64 io_submit sys_io_submit +210 common io_cancel sys_io_cancel +211 64 get_thread_area +212 common lookup_dcookie sys_lookup_dcookie +213 common epoll_create sys_epoll_create +214 64 epoll_ctl_old +215 64 epoll_wait_old +216 common remap_file_pages sys_remap_file_pages +217 common getdents64 sys_getdents64 +218 common set_tid_address sys_set_tid_address +219 common restart_syscall sys_restart_syscall +220 common semtimedop sys_semtimedop +221 common fadvise64 sys_fadvise64 +222 64 timer_create sys_timer_create +223 common timer_settime sys_timer_settime +224 common timer_gettime sys_timer_gettime +225 common timer_getoverrun sys_timer_getoverrun +226 common timer_delete sys_timer_delete +227 common clock_settime sys_clock_settime +228 common clock_gettime sys_clock_gettime +229 common clock_getres sys_clock_getres +230 common clock_nanosleep sys_clock_nanosleep +231 common exit_group sys_exit_group +232 common epoll_wait sys_epoll_wait +233 common epoll_ctl sys_epoll_ctl +234 common tgkill sys_tgkill +235 common utimes sys_utimes +236 64 vserver +237 common mbind sys_mbind +238 common set_mempolicy sys_set_mempolicy +239 common get_mempolicy sys_get_mempolicy +240 common mq_open sys_mq_open +241 common mq_unlink sys_mq_unlink +242 common mq_timedsend sys_mq_timedsend +243 common mq_timedreceive sys_mq_timedreceive +244 64 mq_notify sys_mq_notify +245 common mq_getsetattr sys_mq_getsetattr +246 64 kexec_load sys_kexec_load +247 64 waitid sys_waitid +248 common add_key sys_add_key +249 common request_key sys_request_key +250 common keyctl sys_keyctl +251 common ioprio_set sys_ioprio_set +252 common ioprio_get sys_ioprio_get +253 common inotify_init sys_inotify_init +254 common inotify_add_watch sys_inotify_add_watch +255 common inotify_rm_watch sys_inotify_rm_watch +256 common migrate_pages sys_migrate_pages +257 common openat sys_openat +258 common mkdirat sys_mkdirat +259 common mknodat sys_mknodat +260 common fchownat sys_fchownat +261 common futimesat sys_futimesat +262 common newfstatat sys_newfstatat +263 common unlinkat sys_unlinkat +264 common renameat sys_renameat +265 common linkat sys_linkat +266 common symlinkat sys_symlinkat +267 common readlinkat sys_readlinkat +268 common fchmodat sys_fchmodat +269 common faccessat sys_faccessat +270 common pselect6 sys_pselect6 +271 common ppoll sys_ppoll +272 common unshare sys_unshare +273 64 set_robust_list sys_set_robust_list +274 64 get_robust_list sys_get_robust_list +275 common splice sys_splice +276 common tee sys_tee +277 common sync_file_range sys_sync_file_range +278 64 vmsplice sys_vmsplice +279 64 move_pages sys_move_pages +280 common utimensat sys_utimensat +281 common epoll_pwait sys_epoll_pwait +282 common signalfd sys_signalfd +283 common timerfd_create sys_timerfd_create +284 common eventfd sys_eventfd +285 common fallocate sys_fallocate +286 common timerfd_settime sys_timerfd_settime +287 common timerfd_gettime sys_timerfd_gettime +288 common accept4 sys_accept4 +289 common signalfd4 sys_signalfd4 +290 common eventfd2 sys_eventfd2 +291 common epoll_create1 sys_epoll_create1 +292 common dup3 sys_dup3 +293 common pipe2 sys_pipe2 +294 common inotify_init1 sys_inotify_init1 +295 64 preadv sys_preadv +296 64 pwritev sys_pwritev +297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +298 common perf_event_open sys_perf_event_open +299 64 recvmmsg sys_recvmmsg +300 common fanotify_init sys_fanotify_init +301 common fanotify_mark sys_fanotify_mark +302 common prlimit64 sys_prlimit64 +303 common name_to_handle_at sys_name_to_handle_at +304 common open_by_handle_at sys_open_by_handle_at +305 common clock_adjtime sys_clock_adjtime +306 common syncfs sys_syncfs +307 64 sendmmsg sys_sendmmsg +308 common setns sys_setns +309 common getcpu sys_getcpu +310 64 process_vm_readv sys_process_vm_readv +311 64 process_vm_writev sys_process_vm_writev +312 common kcmp sys_kcmp +313 common finit_module sys_finit_module +314 common sched_setattr sys_sched_setattr +315 common sched_getattr sys_sched_getattr +316 common renameat2 sys_renameat2 +317 common seccomp sys_seccomp +318 common getrandom sys_getrandom +319 common memfd_create sys_memfd_create +320 common kexec_file_load sys_kexec_file_load +321 common bpf sys_bpf +322 64 execveat sys_execveat/ptregs +323 common userfaultfd sys_userfaultfd +324 common membarrier sys_membarrier +325 common mlock2 sys_mlock2 +326 common copy_file_range sys_copy_file_range +327 64 preadv2 sys_preadv2 +328 64 pwritev2 sys_pwritev2 + +# +# x32-specific system call numbers start at 512 to avoid cache impact +# for native 64-bit operation. +# +512 x32 rt_sigaction compat_sys_rt_sigaction +513 x32 rt_sigreturn sys32_x32_rt_sigreturn +514 x32 ioctl compat_sys_ioctl +515 x32 readv compat_sys_readv +516 x32 writev compat_sys_writev +517 x32 recvfrom compat_sys_recvfrom +518 x32 sendmsg compat_sys_sendmsg +519 x32 recvmsg compat_sys_recvmsg +520 x32 execve compat_sys_execve/ptregs +521 x32 ptrace compat_sys_ptrace +522 x32 rt_sigpending compat_sys_rt_sigpending +523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait +524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo +525 x32 sigaltstack compat_sys_sigaltstack +526 x32 timer_create compat_sys_timer_create +527 x32 mq_notify compat_sys_mq_notify +528 x32 kexec_load compat_sys_kexec_load +529 x32 waitid compat_sys_waitid +530 x32 set_robust_list compat_sys_set_robust_list +531 x32 get_robust_list compat_sys_get_robust_list +532 x32 vmsplice compat_sys_vmsplice +533 x32 move_pages compat_sys_move_pages +534 x32 preadv compat_sys_preadv64 +535 x32 pwritev compat_sys_pwritev64 +536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo +537 x32 recvmmsg compat_sys_recvmmsg +538 x32 sendmmsg compat_sys_sendmmsg +539 x32 process_vm_readv compat_sys_process_vm_readv +540 x32 process_vm_writev compat_sys_process_vm_writev +541 x32 setsockopt compat_sys_setsockopt +542 x32 getsockopt compat_sys_getsockopt +543 x32 io_setup compat_sys_io_setup +544 x32 io_submit compat_sys_io_submit +545 x32 execveat compat_sys_execveat/ptregs +534 x32 preadv2 compat_sys_preadv2 +535 x32 pwritev2 compat_sys_pwritev2 diff --git a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh new file mode 100755 index 000000000000..49a18b9ad9cf --- /dev/null +++ b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +in="$1" +arch="$2" + +syscall_macro() { + nr="$1" + name="$2" + + echo " [$nr] = \"$name\"," +} + +emit() { + nr="$1" + entry="$2" + + syscall_macro "$nr" "$entry" +} + +echo "static const char *syscalltbl_${arch}[] = {" + +sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX) +grep '^[0-9]' "$in" | sort -n > $sorted_table + +max_nr=0 +while read nr abi name entry compat; do + if [ $nr -ge 512 ] ; then # discard compat sycalls + break + fi + + emit "$nr" "$name" + max_nr=$nr +done < $sorted_table + +rm -f $sorted_table + +echo "};" + +echo "#define SYSCALLTBL_${arch}_MAX_ID ${max_nr}" diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index 3b491cfe204e..0f196eec9f48 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -6,6 +6,1016 @@ {{0x0f, 0x31, }, 2, 0, "", "", "0f 31 \trdtsc ",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "", +"c4 e2 7d 13 eb \tvcvtph2ps %xmm3,%ymm5",}, +{{0x62, 0x81, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "", +"62 81 78 56 34 12 \tbound %eax,0x12345678(%ecx)",}, +{{0x62, 0x88, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "", +"62 88 78 56 34 12 \tbound %ecx,0x12345678(%eax)",}, +{{0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "", +"62 90 78 56 34 12 \tbound %edx,0x12345678(%eax)",}, +{{0x62, 0x98, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "", +"62 98 78 56 34 12 \tbound %ebx,0x12345678(%eax)",}, +{{0x62, 0xa0, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "", +"62 a0 78 56 34 12 \tbound %esp,0x12345678(%eax)",}, +{{0x62, 0xa8, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "", +"62 a8 78 56 34 12 \tbound %ebp,0x12345678(%eax)",}, +{{0x62, 0xb0, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "", +"62 b0 78 56 34 12 \tbound %esi,0x12345678(%eax)",}, +{{0x62, 0xb8, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "", +"62 b8 78 56 34 12 \tbound %edi,0x12345678(%eax)",}, +{{0x62, 0x08, }, 2, 0, "", "", +"62 08 \tbound %ecx,(%eax)",}, +{{0x62, 0x05, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "", +"62 05 78 56 34 12 \tbound %eax,0x12345678",}, +{{0x62, 0x14, 0x01, }, 3, 0, "", "", +"62 14 01 \tbound %edx,(%ecx,%eax,1)",}, +{{0x62, 0x14, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"62 14 05 78 56 34 12 \tbound %edx,0x12345678(,%eax,1)",}, +{{0x62, 0x14, 0x08, }, 3, 0, "", "", +"62 14 08 \tbound %edx,(%eax,%ecx,1)",}, +{{0x62, 0x14, 0xc8, }, 3, 0, "", "", +"62 14 c8 \tbound %edx,(%eax,%ecx,8)",}, +{{0x62, 0x50, 0x12, }, 3, 0, "", "", +"62 50 12 \tbound %edx,0x12(%eax)",}, +{{0x62, 0x55, 0x12, }, 3, 0, "", "", +"62 55 12 \tbound %edx,0x12(%ebp)",}, +{{0x62, 0x54, 0x01, 0x12, }, 4, 0, "", "", +"62 54 01 12 \tbound %edx,0x12(%ecx,%eax,1)",}, +{{0x62, 0x54, 0x05, 0x12, }, 4, 0, "", "", +"62 54 05 12 \tbound %edx,0x12(%ebp,%eax,1)",}, +{{0x62, 0x54, 0x08, 0x12, }, 4, 0, "", "", +"62 54 08 12 \tbound %edx,0x12(%eax,%ecx,1)",}, +{{0x62, 0x54, 0xc8, 0x12, }, 4, 0, "", "", +"62 54 c8 12 \tbound %edx,0x12(%eax,%ecx,8)",}, +{{0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "", +"62 90 78 56 34 12 \tbound %edx,0x12345678(%eax)",}, +{{0x62, 0x95, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "", +"62 95 78 56 34 12 \tbound %edx,0x12345678(%ebp)",}, +{{0x62, 0x94, 0x01, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"62 94 01 78 56 34 12 \tbound %edx,0x12345678(%ecx,%eax,1)",}, +{{0x62, 0x94, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"62 94 05 78 56 34 12 \tbound %edx,0x12345678(%ebp,%eax,1)",}, +{{0x62, 0x94, 0x08, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"62 94 08 78 56 34 12 \tbound %edx,0x12345678(%eax,%ecx,1)",}, +{{0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"62 94 c8 78 56 34 12 \tbound %edx,0x12345678(%eax,%ecx,8)",}, +{{0x66, 0x62, 0x81, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"66 62 81 78 56 34 12 \tbound %ax,0x12345678(%ecx)",}, +{{0x66, 0x62, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"66 62 88 78 56 34 12 \tbound %cx,0x12345678(%eax)",}, +{{0x66, 0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"66 62 90 78 56 34 12 \tbound %dx,0x12345678(%eax)",}, +{{0x66, 0x62, 0x98, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"66 62 98 78 56 34 12 \tbound %bx,0x12345678(%eax)",}, +{{0x66, 0x62, 0xa0, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"66 62 a0 78 56 34 12 \tbound %sp,0x12345678(%eax)",}, +{{0x66, 0x62, 0xa8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"66 62 a8 78 56 34 12 \tbound %bp,0x12345678(%eax)",}, +{{0x66, 0x62, 0xb0, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"66 62 b0 78 56 34 12 \tbound %si,0x12345678(%eax)",}, +{{0x66, 0x62, 0xb8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"66 62 b8 78 56 34 12 \tbound %di,0x12345678(%eax)",}, +{{0x66, 0x62, 0x08, }, 3, 0, "", "", +"66 62 08 \tbound %cx,(%eax)",}, +{{0x66, 0x62, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"66 62 05 78 56 34 12 \tbound %ax,0x12345678",}, +{{0x66, 0x62, 0x14, 0x01, }, 4, 0, "", "", +"66 62 14 01 \tbound %dx,(%ecx,%eax,1)",}, +{{0x66, 0x62, 0x14, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 62 14 05 78 56 34 12 \tbound %dx,0x12345678(,%eax,1)",}, +{{0x66, 0x62, 0x14, 0x08, }, 4, 0, "", "", +"66 62 14 08 \tbound %dx,(%eax,%ecx,1)",}, +{{0x66, 0x62, 0x14, 0xc8, }, 4, 0, "", "", +"66 62 14 c8 \tbound %dx,(%eax,%ecx,8)",}, +{{0x66, 0x62, 0x50, 0x12, }, 4, 0, "", "", +"66 62 50 12 \tbound %dx,0x12(%eax)",}, +{{0x66, 0x62, 0x55, 0x12, }, 4, 0, "", "", +"66 62 55 12 \tbound %dx,0x12(%ebp)",}, +{{0x66, 0x62, 0x54, 0x01, 0x12, }, 5, 0, "", "", +"66 62 54 01 12 \tbound %dx,0x12(%ecx,%eax,1)",}, +{{0x66, 0x62, 0x54, 0x05, 0x12, }, 5, 0, "", "", +"66 62 54 05 12 \tbound %dx,0x12(%ebp,%eax,1)",}, +{{0x66, 0x62, 0x54, 0x08, 0x12, }, 5, 0, "", "", +"66 62 54 08 12 \tbound %dx,0x12(%eax,%ecx,1)",}, +{{0x66, 0x62, 0x54, 0xc8, 0x12, }, 5, 0, "", "", +"66 62 54 c8 12 \tbound %dx,0x12(%eax,%ecx,8)",}, +{{0x66, 0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"66 62 90 78 56 34 12 \tbound %dx,0x12345678(%eax)",}, +{{0x66, 0x62, 0x95, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"66 62 95 78 56 34 12 \tbound %dx,0x12345678(%ebp)",}, +{{0x66, 0x62, 0x94, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 62 94 01 78 56 34 12 \tbound %dx,0x12345678(%ecx,%eax,1)",}, +{{0x66, 0x62, 0x94, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 62 94 05 78 56 34 12 \tbound %dx,0x12345678(%ebp,%eax,1)",}, +{{0x66, 0x62, 0x94, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 62 94 08 78 56 34 12 \tbound %dx,0x12345678(%eax,%ecx,1)",}, +{{0x66, 0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 62 94 c8 78 56 34 12 \tbound %dx,0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0x41, 0xd8, }, 3, 0, "", "", +"0f 41 d8 \tcmovno %eax,%ebx",}, +{{0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 41 88 78 56 34 12 \tcmovno 0x12345678(%eax),%ecx",}, +{{0x66, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%eax),%cx",}, +{{0x0f, 0x44, 0xd8, }, 3, 0, "", "", +"0f 44 d8 \tcmove %eax,%ebx",}, +{{0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 44 88 78 56 34 12 \tcmove 0x12345678(%eax),%ecx",}, +{{0x66, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 44 88 78 56 34 12 \tcmove 0x12345678(%eax),%cx",}, +{{0x0f, 0x90, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 90 80 78 56 34 12 \tseto 0x12345678(%eax)",}, +{{0x0f, 0x91, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 91 80 78 56 34 12 \tsetno 0x12345678(%eax)",}, +{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 92 80 78 56 34 12 \tsetb 0x12345678(%eax)",}, +{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 92 80 78 56 34 12 \tsetb 0x12345678(%eax)",}, +{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 92 80 78 56 34 12 \tsetb 0x12345678(%eax)",}, +{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 93 80 78 56 34 12 \tsetae 0x12345678(%eax)",}, +{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 93 80 78 56 34 12 \tsetae 0x12345678(%eax)",}, +{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 93 80 78 56 34 12 \tsetae 0x12345678(%eax)",}, +{{0x0f, 0x98, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 98 80 78 56 34 12 \tsets 0x12345678(%eax)",}, +{{0x0f, 0x99, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 99 80 78 56 34 12 \tsetns 0x12345678(%eax)",}, +{{0xc5, 0xcc, 0x41, 0xef, }, 4, 0, "", "", +"c5 cc 41 ef \tkandw %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x41, 0xef, }, 5, 0, "", "", +"c4 e1 cc 41 ef \tkandq %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x41, 0xef, }, 4, 0, "", "", +"c5 cd 41 ef \tkandb %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcd, 0x41, 0xef, }, 5, 0, "", "", +"c4 e1 cd 41 ef \tkandd %k7,%k6,%k5",}, +{{0xc5, 0xcc, 0x42, 0xef, }, 4, 0, "", "", +"c5 cc 42 ef \tkandnw %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x42, 0xef, }, 5, 0, "", "", +"c4 e1 cc 42 ef \tkandnq %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x42, 0xef, }, 4, 0, "", "", +"c5 cd 42 ef \tkandnb %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcd, 0x42, 0xef, }, 5, 0, "", "", +"c4 e1 cd 42 ef \tkandnd %k7,%k6,%k5",}, +{{0xc5, 0xf8, 0x44, 0xf7, }, 4, 0, "", "", +"c5 f8 44 f7 \tknotw %k7,%k6",}, +{{0xc4, 0xe1, 0xf8, 0x44, 0xf7, }, 5, 0, "", "", +"c4 e1 f8 44 f7 \tknotq %k7,%k6",}, +{{0xc5, 0xf9, 0x44, 0xf7, }, 4, 0, "", "", +"c5 f9 44 f7 \tknotb %k7,%k6",}, +{{0xc4, 0xe1, 0xf9, 0x44, 0xf7, }, 5, 0, "", "", +"c4 e1 f9 44 f7 \tknotd %k7,%k6",}, +{{0xc5, 0xcc, 0x45, 0xef, }, 4, 0, "", "", +"c5 cc 45 ef \tkorw %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x45, 0xef, }, 5, 0, "", "", +"c4 e1 cc 45 ef \tkorq %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x45, 0xef, }, 4, 0, "", "", +"c5 cd 45 ef \tkorb %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcd, 0x45, 0xef, }, 5, 0, "", "", +"c4 e1 cd 45 ef \tkord %k7,%k6,%k5",}, +{{0xc5, 0xcc, 0x46, 0xef, }, 4, 0, "", "", +"c5 cc 46 ef \tkxnorw %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x46, 0xef, }, 5, 0, "", "", +"c4 e1 cc 46 ef \tkxnorq %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x46, 0xef, }, 4, 0, "", "", +"c5 cd 46 ef \tkxnorb %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcd, 0x46, 0xef, }, 5, 0, "", "", +"c4 e1 cd 46 ef \tkxnord %k7,%k6,%k5",}, +{{0xc5, 0xcc, 0x47, 0xef, }, 4, 0, "", "", +"c5 cc 47 ef \tkxorw %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x47, 0xef, }, 5, 0, "", "", +"c4 e1 cc 47 ef \tkxorq %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x47, 0xef, }, 4, 0, "", "", +"c5 cd 47 ef \tkxorb %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcd, 0x47, 0xef, }, 5, 0, "", "", +"c4 e1 cd 47 ef \tkxord %k7,%k6,%k5",}, +{{0xc5, 0xcc, 0x4a, 0xef, }, 4, 0, "", "", +"c5 cc 4a ef \tkaddw %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x4a, 0xef, }, 5, 0, "", "", +"c4 e1 cc 4a ef \tkaddq %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x4a, 0xef, }, 4, 0, "", "", +"c5 cd 4a ef \tkaddb %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcd, 0x4a, 0xef, }, 5, 0, "", "", +"c4 e1 cd 4a ef \tkaddd %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x4b, 0xef, }, 4, 0, "", "", +"c5 cd 4b ef \tkunpckbw %k7,%k6,%k5",}, +{{0xc5, 0xcc, 0x4b, 0xef, }, 4, 0, "", "", +"c5 cc 4b ef \tkunpckwd %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x4b, 0xef, }, 5, 0, "", "", +"c4 e1 cc 4b ef \tkunpckdq %k7,%k6,%k5",}, +{{0xc5, 0xf8, 0x90, 0xee, }, 4, 0, "", "", +"c5 f8 90 ee \tkmovw %k6,%k5",}, +{{0xc5, 0xf8, 0x90, 0x29, }, 4, 0, "", "", +"c5 f8 90 29 \tkmovw (%ecx),%k5",}, +{{0xc5, 0xf8, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "", +"c5 f8 90 ac c8 23 01 00 00 \tkmovw 0x123(%eax,%ecx,8),%k5",}, +{{0xc5, 0xf8, 0x91, 0x29, }, 4, 0, "", "", +"c5 f8 91 29 \tkmovw %k5,(%ecx)",}, +{{0xc5, 0xf8, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "", +"c5 f8 91 ac c8 23 01 00 00 \tkmovw %k5,0x123(%eax,%ecx,8)",}, +{{0xc5, 0xf8, 0x92, 0xe8, }, 4, 0, "", "", +"c5 f8 92 e8 \tkmovw %eax,%k5",}, +{{0xc5, 0xf8, 0x92, 0xed, }, 4, 0, "", "", +"c5 f8 92 ed \tkmovw %ebp,%k5",}, +{{0xc5, 0xf8, 0x93, 0xc5, }, 4, 0, "", "", +"c5 f8 93 c5 \tkmovw %k5,%eax",}, +{{0xc5, 0xf8, 0x93, 0xed, }, 4, 0, "", "", +"c5 f8 93 ed \tkmovw %k5,%ebp",}, +{{0xc4, 0xe1, 0xf8, 0x90, 0xee, }, 5, 0, "", "", +"c4 e1 f8 90 ee \tkmovq %k6,%k5",}, +{{0xc4, 0xe1, 0xf8, 0x90, 0x29, }, 5, 0, "", "", +"c4 e1 f8 90 29 \tkmovq (%ecx),%k5",}, +{{0xc4, 0xe1, 0xf8, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "", +"c4 e1 f8 90 ac c8 23 01 00 00 \tkmovq 0x123(%eax,%ecx,8),%k5",}, +{{0xc4, 0xe1, 0xf8, 0x91, 0x29, }, 5, 0, "", "", +"c4 e1 f8 91 29 \tkmovq %k5,(%ecx)",}, +{{0xc4, 0xe1, 0xf8, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "", +"c4 e1 f8 91 ac c8 23 01 00 00 \tkmovq %k5,0x123(%eax,%ecx,8)",}, +{{0xc5, 0xf9, 0x90, 0xee, }, 4, 0, "", "", +"c5 f9 90 ee \tkmovb %k6,%k5",}, +{{0xc5, 0xf9, 0x90, 0x29, }, 4, 0, "", "", +"c5 f9 90 29 \tkmovb (%ecx),%k5",}, +{{0xc5, 0xf9, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "", +"c5 f9 90 ac c8 23 01 00 00 \tkmovb 0x123(%eax,%ecx,8),%k5",}, +{{0xc5, 0xf9, 0x91, 0x29, }, 4, 0, "", "", +"c5 f9 91 29 \tkmovb %k5,(%ecx)",}, +{{0xc5, 0xf9, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "", +"c5 f9 91 ac c8 23 01 00 00 \tkmovb %k5,0x123(%eax,%ecx,8)",}, +{{0xc5, 0xf9, 0x92, 0xe8, }, 4, 0, "", "", +"c5 f9 92 e8 \tkmovb %eax,%k5",}, +{{0xc5, 0xf9, 0x92, 0xed, }, 4, 0, "", "", +"c5 f9 92 ed \tkmovb %ebp,%k5",}, +{{0xc5, 0xf9, 0x93, 0xc5, }, 4, 0, "", "", +"c5 f9 93 c5 \tkmovb %k5,%eax",}, +{{0xc5, 0xf9, 0x93, 0xed, }, 4, 0, "", "", +"c5 f9 93 ed \tkmovb %k5,%ebp",}, +{{0xc4, 0xe1, 0xf9, 0x90, 0xee, }, 5, 0, "", "", +"c4 e1 f9 90 ee \tkmovd %k6,%k5",}, +{{0xc4, 0xe1, 0xf9, 0x90, 0x29, }, 5, 0, "", "", +"c4 e1 f9 90 29 \tkmovd (%ecx),%k5",}, +{{0xc4, 0xe1, 0xf9, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "", +"c4 e1 f9 90 ac c8 23 01 00 00 \tkmovd 0x123(%eax,%ecx,8),%k5",}, +{{0xc4, 0xe1, 0xf9, 0x91, 0x29, }, 5, 0, "", "", +"c4 e1 f9 91 29 \tkmovd %k5,(%ecx)",}, +{{0xc4, 0xe1, 0xf9, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "", +"c4 e1 f9 91 ac c8 23 01 00 00 \tkmovd %k5,0x123(%eax,%ecx,8)",}, +{{0xc5, 0xfb, 0x92, 0xe8, }, 4, 0, "", "", +"c5 fb 92 e8 \tkmovd %eax,%k5",}, +{{0xc5, 0xfb, 0x92, 0xed, }, 4, 0, "", "", +"c5 fb 92 ed \tkmovd %ebp,%k5",}, +{{0xc5, 0xfb, 0x93, 0xc5, }, 4, 0, "", "", +"c5 fb 93 c5 \tkmovd %k5,%eax",}, +{{0xc5, 0xfb, 0x93, 0xed, }, 4, 0, "", "", +"c5 fb 93 ed \tkmovd %k5,%ebp",}, +{{0xc5, 0xf8, 0x98, 0xee, }, 4, 0, "", "", +"c5 f8 98 ee \tkortestw %k6,%k5",}, +{{0xc4, 0xe1, 0xf8, 0x98, 0xee, }, 5, 0, "", "", +"c4 e1 f8 98 ee \tkortestq %k6,%k5",}, +{{0xc5, 0xf9, 0x98, 0xee, }, 4, 0, "", "", +"c5 f9 98 ee \tkortestb %k6,%k5",}, +{{0xc4, 0xe1, 0xf9, 0x98, 0xee, }, 5, 0, "", "", +"c4 e1 f9 98 ee \tkortestd %k6,%k5",}, +{{0xc5, 0xf8, 0x99, 0xee, }, 4, 0, "", "", +"c5 f8 99 ee \tktestw %k6,%k5",}, +{{0xc4, 0xe1, 0xf8, 0x99, 0xee, }, 5, 0, "", "", +"c4 e1 f8 99 ee \tktestq %k6,%k5",}, +{{0xc5, 0xf9, 0x99, 0xee, }, 4, 0, "", "", +"c5 f9 99 ee \tktestb %k6,%k5",}, +{{0xc4, 0xe1, 0xf9, 0x99, 0xee, }, 5, 0, "", "", +"c4 e1 f9 99 ee \tktestd %k6,%k5",}, +{{0xc4, 0xe3, 0xf9, 0x30, 0xee, 0x12, }, 6, 0, "", "", +"c4 e3 f9 30 ee 12 \tkshiftrw $0x12,%k6,%k5",}, +{{0xc4, 0xe3, 0xf9, 0x31, 0xee, 0x5b, }, 6, 0, "", "", +"c4 e3 f9 31 ee 5b \tkshiftrq $0x5b,%k6,%k5",}, +{{0xc4, 0xe3, 0xf9, 0x32, 0xee, 0x12, }, 6, 0, "", "", +"c4 e3 f9 32 ee 12 \tkshiftlw $0x12,%k6,%k5",}, +{{0xc4, 0xe3, 0xf9, 0x33, 0xee, 0x5b, }, 6, 0, "", "", +"c4 e3 f9 33 ee 5b \tkshiftlq $0x5b,%k6,%k5",}, +{{0xc5, 0xf8, 0x5b, 0xf5, }, 4, 0, "", "", +"c5 f8 5b f5 \tvcvtdq2ps %xmm5,%xmm6",}, +{{0x62, 0xf1, 0xfc, 0x4f, 0x5b, 0xf5, }, 6, 0, "", "", +"62 f1 fc 4f 5b f5 \tvcvtqq2ps %zmm5,%ymm6{%k7}",}, +{{0xc5, 0xf9, 0x5b, 0xf5, }, 4, 0, "", "", +"c5 f9 5b f5 \tvcvtps2dq %xmm5,%xmm6",}, +{{0xc5, 0xfa, 0x5b, 0xf5, }, 4, 0, "", "", +"c5 fa 5b f5 \tvcvttps2dq %xmm5,%xmm6",}, +{{0x0f, 0x6f, 0xe0, }, 3, 0, "", "", +"0f 6f e0 \tmovq %mm0,%mm4",}, +{{0xc5, 0xfd, 0x6f, 0xf4, }, 4, 0, "", "", +"c5 fd 6f f4 \tvmovdqa %ymm4,%ymm6",}, +{{0x62, 0xf1, 0x7d, 0x48, 0x6f, 0xf5, }, 6, 0, "", "", +"62 f1 7d 48 6f f5 \tvmovdqa32 %zmm5,%zmm6",}, +{{0x62, 0xf1, 0xfd, 0x48, 0x6f, 0xf5, }, 6, 0, "", "", +"62 f1 fd 48 6f f5 \tvmovdqa64 %zmm5,%zmm6",}, +{{0xc5, 0xfe, 0x6f, 0xf4, }, 4, 0, "", "", +"c5 fe 6f f4 \tvmovdqu %ymm4,%ymm6",}, +{{0x62, 0xf1, 0x7e, 0x48, 0x6f, 0xf5, }, 6, 0, "", "", +"62 f1 7e 48 6f f5 \tvmovdqu32 %zmm5,%zmm6",}, +{{0x62, 0xf1, 0xfe, 0x48, 0x6f, 0xf5, }, 6, 0, "", "", +"62 f1 fe 48 6f f5 \tvmovdqu64 %zmm5,%zmm6",}, +{{0x62, 0xf1, 0x7f, 0x48, 0x6f, 0xf5, }, 6, 0, "", "", +"62 f1 7f 48 6f f5 \tvmovdqu8 %zmm5,%zmm6",}, +{{0x62, 0xf1, 0xff, 0x48, 0x6f, 0xf5, }, 6, 0, "", "", +"62 f1 ff 48 6f f5 \tvmovdqu16 %zmm5,%zmm6",}, +{{0x0f, 0x78, 0xc3, }, 3, 0, "", "", +"0f 78 c3 \tvmread %eax,%ebx",}, +{{0x62, 0xf1, 0x7c, 0x48, 0x78, 0xf5, }, 6, 0, "", "", +"62 f1 7c 48 78 f5 \tvcvttps2udq %zmm5,%zmm6",}, +{{0x62, 0xf1, 0xfc, 0x4f, 0x78, 0xf5, }, 6, 0, "", "", +"62 f1 fc 4f 78 f5 \tvcvttpd2udq %zmm5,%ymm6{%k7}",}, +{{0x62, 0xf1, 0x7f, 0x08, 0x78, 0xc6, }, 6, 0, "", "", +"62 f1 7f 08 78 c6 \tvcvttsd2usi %xmm6,%eax",}, +{{0x62, 0xf1, 0x7e, 0x08, 0x78, 0xc6, }, 6, 0, "", "", +"62 f1 7e 08 78 c6 \tvcvttss2usi %xmm6,%eax",}, +{{0x62, 0xf1, 0x7d, 0x4f, 0x78, 0xf5, }, 6, 0, "", "", +"62 f1 7d 4f 78 f5 \tvcvttps2uqq %ymm5,%zmm6{%k7}",}, +{{0x62, 0xf1, 0xfd, 0x48, 0x78, 0xf5, }, 6, 0, "", "", +"62 f1 fd 48 78 f5 \tvcvttpd2uqq %zmm5,%zmm6",}, +{{0x0f, 0x79, 0xd8, }, 3, 0, "", "", +"0f 79 d8 \tvmwrite %eax,%ebx",}, +{{0x62, 0xf1, 0x7c, 0x48, 0x79, 0xf5, }, 6, 0, "", "", +"62 f1 7c 48 79 f5 \tvcvtps2udq %zmm5,%zmm6",}, +{{0x62, 0xf1, 0xfc, 0x4f, 0x79, 0xf5, }, 6, 0, "", "", +"62 f1 fc 4f 79 f5 \tvcvtpd2udq %zmm5,%ymm6{%k7}",}, +{{0x62, 0xf1, 0x7f, 0x08, 0x79, 0xc6, }, 6, 0, "", "", +"62 f1 7f 08 79 c6 \tvcvtsd2usi %xmm6,%eax",}, +{{0x62, 0xf1, 0x7e, 0x08, 0x79, 0xc6, }, 6, 0, "", "", +"62 f1 7e 08 79 c6 \tvcvtss2usi %xmm6,%eax",}, +{{0x62, 0xf1, 0x7d, 0x4f, 0x79, 0xf5, }, 6, 0, "", "", +"62 f1 7d 4f 79 f5 \tvcvtps2uqq %ymm5,%zmm6{%k7}",}, +{{0x62, 0xf1, 0xfd, 0x48, 0x79, 0xf5, }, 6, 0, "", "", +"62 f1 fd 48 79 f5 \tvcvtpd2uqq %zmm5,%zmm6",}, +{{0x62, 0xf1, 0x7e, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "", +"62 f1 7e 4f 7a f5 \tvcvtudq2pd %ymm5,%zmm6{%k7}",}, +{{0x62, 0xf1, 0xfe, 0x48, 0x7a, 0xf5, }, 6, 0, "", "", +"62 f1 fe 48 7a f5 \tvcvtuqq2pd %zmm5,%zmm6",}, +{{0x62, 0xf1, 0x7f, 0x48, 0x7a, 0xf5, }, 6, 0, "", "", +"62 f1 7f 48 7a f5 \tvcvtudq2ps %zmm5,%zmm6",}, +{{0x62, 0xf1, 0xff, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "", +"62 f1 ff 4f 7a f5 \tvcvtuqq2ps %zmm5,%ymm6{%k7}",}, +{{0x62, 0xf1, 0x7d, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "", +"62 f1 7d 4f 7a f5 \tvcvttps2qq %ymm5,%zmm6{%k7}",}, +{{0x62, 0xf1, 0xfd, 0x48, 0x7a, 0xf5, }, 6, 0, "", "", +"62 f1 fd 48 7a f5 \tvcvttpd2qq %zmm5,%zmm6",}, +{{0x62, 0xf1, 0x57, 0x08, 0x7b, 0xf0, }, 6, 0, "", "", +"62 f1 57 08 7b f0 \tvcvtusi2sd %eax,%xmm5,%xmm6",}, +{{0x62, 0xf1, 0x56, 0x08, 0x7b, 0xf0, }, 6, 0, "", "", +"62 f1 56 08 7b f0 \tvcvtusi2ss %eax,%xmm5,%xmm6",}, +{{0x62, 0xf1, 0x7d, 0x4f, 0x7b, 0xf5, }, 6, 0, "", "", +"62 f1 7d 4f 7b f5 \tvcvtps2qq %ymm5,%zmm6{%k7}",}, +{{0x62, 0xf1, 0xfd, 0x48, 0x7b, 0xf5, }, 6, 0, "", "", +"62 f1 fd 48 7b f5 \tvcvtpd2qq %zmm5,%zmm6",}, +{{0x0f, 0x7f, 0xc4, }, 3, 0, "", "", +"0f 7f c4 \tmovq %mm0,%mm4",}, +{{0xc5, 0xfd, 0x7f, 0xee, }, 4, 0, "", "", +"c5 fd 7f ee \tvmovdqa %ymm5,%ymm6",}, +{{0x62, 0xf1, 0x7d, 0x48, 0x7f, 0xee, }, 6, 0, "", "", +"62 f1 7d 48 7f ee \tvmovdqa32 %zmm5,%zmm6",}, +{{0x62, 0xf1, 0xfd, 0x48, 0x7f, 0xee, }, 6, 0, "", "", +"62 f1 fd 48 7f ee \tvmovdqa64 %zmm5,%zmm6",}, +{{0xc5, 0xfe, 0x7f, 0xee, }, 4, 0, "", "", +"c5 fe 7f ee \tvmovdqu %ymm5,%ymm6",}, +{{0x62, 0xf1, 0x7e, 0x48, 0x7f, 0xee, }, 6, 0, "", "", +"62 f1 7e 48 7f ee \tvmovdqu32 %zmm5,%zmm6",}, +{{0x62, 0xf1, 0xfe, 0x48, 0x7f, 0xee, }, 6, 0, "", "", +"62 f1 fe 48 7f ee \tvmovdqu64 %zmm5,%zmm6",}, +{{0x62, 0xf1, 0x7f, 0x48, 0x7f, 0xee, }, 6, 0, "", "", +"62 f1 7f 48 7f ee \tvmovdqu8 %zmm5,%zmm6",}, +{{0x62, 0xf1, 0xff, 0x48, 0x7f, 0xee, }, 6, 0, "", "", +"62 f1 ff 48 7f ee \tvmovdqu16 %zmm5,%zmm6",}, +{{0x0f, 0xdb, 0xd1, }, 3, 0, "", "", +"0f db d1 \tpand %mm1,%mm2",}, +{{0x66, 0x0f, 0xdb, 0xd1, }, 4, 0, "", "", +"66 0f db d1 \tpand %xmm1,%xmm2",}, +{{0xc5, 0xcd, 0xdb, 0xd4, }, 4, 0, "", "", +"c5 cd db d4 \tvpand %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf1, 0x55, 0x48, 0xdb, 0xf4, }, 6, 0, "", "", +"62 f1 55 48 db f4 \tvpandd %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x48, 0xdb, 0xf4, }, 6, 0, "", "", +"62 f1 d5 48 db f4 \tvpandq %zmm4,%zmm5,%zmm6",}, +{{0x0f, 0xdf, 0xd1, }, 3, 0, "", "", +"0f df d1 \tpandn %mm1,%mm2",}, +{{0x66, 0x0f, 0xdf, 0xd1, }, 4, 0, "", "", +"66 0f df d1 \tpandn %xmm1,%xmm2",}, +{{0xc5, 0xcd, 0xdf, 0xd4, }, 4, 0, "", "", +"c5 cd df d4 \tvpandn %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf1, 0x55, 0x48, 0xdf, 0xf4, }, 6, 0, "", "", +"62 f1 55 48 df f4 \tvpandnd %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x48, 0xdf, 0xf4, }, 6, 0, "", "", +"62 f1 d5 48 df f4 \tvpandnq %zmm4,%zmm5,%zmm6",}, +{{0xc5, 0xf9, 0xe6, 0xd1, }, 4, 0, "", "", +"c5 f9 e6 d1 \tvcvttpd2dq %xmm1,%xmm2",}, +{{0xc5, 0xfa, 0xe6, 0xf5, }, 4, 0, "", "", +"c5 fa e6 f5 \tvcvtdq2pd %xmm5,%xmm6",}, +{{0x62, 0xf1, 0x7e, 0x4f, 0xe6, 0xf5, }, 6, 0, "", "", +"62 f1 7e 4f e6 f5 \tvcvtdq2pd %ymm5,%zmm6{%k7}",}, +{{0x62, 0xf1, 0xfe, 0x48, 0xe6, 0xf5, }, 6, 0, "", "", +"62 f1 fe 48 e6 f5 \tvcvtqq2pd %zmm5,%zmm6",}, +{{0xc5, 0xfb, 0xe6, 0xd1, }, 4, 0, "", "", +"c5 fb e6 d1 \tvcvtpd2dq %xmm1,%xmm2",}, +{{0x0f, 0xeb, 0xf4, }, 3, 0, "", "", +"0f eb f4 \tpor %mm4,%mm6",}, +{{0xc5, 0xcd, 0xeb, 0xd4, }, 4, 0, "", "", +"c5 cd eb d4 \tvpor %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf1, 0x55, 0x48, 0xeb, 0xf4, }, 6, 0, "", "", +"62 f1 55 48 eb f4 \tvpord %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x48, 0xeb, 0xf4, }, 6, 0, "", "", +"62 f1 d5 48 eb f4 \tvporq %zmm4,%zmm5,%zmm6",}, +{{0x0f, 0xef, 0xf4, }, 3, 0, "", "", +"0f ef f4 \tpxor %mm4,%mm6",}, +{{0xc5, 0xcd, 0xef, 0xd4, }, 4, 0, "", "", +"c5 cd ef d4 \tvpxor %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf1, 0x55, 0x48, 0xef, 0xf4, }, 6, 0, "", "", +"62 f1 55 48 ef f4 \tvpxord %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x48, 0xef, 0xf4, }, 6, 0, "", "", +"62 f1 d5 48 ef f4 \tvpxorq %zmm4,%zmm5,%zmm6",}, +{{0x66, 0x0f, 0x38, 0x10, 0xc1, }, 5, 0, "", "", +"66 0f 38 10 c1 \tpblendvb %xmm0,%xmm1,%xmm0",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x10, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 10 f4 \tvpsrlvw %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x10, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 10 ee \tvpmovuswb %zmm5,%ymm6{%k7}",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x11, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 11 ee \tvpmovusdb %zmm5,%xmm6{%k7}",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x11, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 11 f4 \tvpsravw %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x12, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 12 ee \tvpmovusqb %zmm5,%xmm6{%k7}",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x12, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 12 f4 \tvpsllvw %zmm4,%zmm5,%zmm6",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "", +"c4 e2 7d 13 eb \tvcvtph2ps %xmm3,%ymm5",}, +{{0x62, 0xf2, 0x7d, 0x4f, 0x13, 0xf5, }, 6, 0, "", "", +"62 f2 7d 4f 13 f5 \tvcvtph2ps %ymm5,%zmm6{%k7}",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x13, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 13 ee \tvpmovusdw %zmm5,%ymm6{%k7}",}, +{{0x66, 0x0f, 0x38, 0x14, 0xc1, }, 5, 0, "", "", +"66 0f 38 14 c1 \tblendvps %xmm0,%xmm1,%xmm0",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x14, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 14 ee \tvpmovusqw %zmm5,%xmm6{%k7}",}, +{{0x62, 0xf2, 0x55, 0x48, 0x14, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 14 f4 \tvprorvd %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x14, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 14 f4 \tvprorvq %zmm4,%zmm5,%zmm6",}, +{{0x66, 0x0f, 0x38, 0x15, 0xc1, }, 5, 0, "", "", +"66 0f 38 15 c1 \tblendvpd %xmm0,%xmm1,%xmm0",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x15, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 15 ee \tvpmovusqd %zmm5,%ymm6{%k7}",}, +{{0x62, 0xf2, 0x55, 0x48, 0x15, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 15 f4 \tvprolvd %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x15, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 15 f4 \tvprolvq %zmm4,%zmm5,%zmm6",}, +{{0xc4, 0xe2, 0x4d, 0x16, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 16 d4 \tvpermps %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf2, 0x4d, 0x2f, 0x16, 0xd4, }, 6, 0, "", "", +"62 f2 4d 2f 16 d4 \tvpermps %ymm4,%ymm6,%ymm2{%k7}",}, +{{0x62, 0xf2, 0xcd, 0x2f, 0x16, 0xd4, }, 6, 0, "", "", +"62 f2 cd 2f 16 d4 \tvpermpd %ymm4,%ymm6,%ymm2{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x19, 0xf4, }, 5, 0, "", "", +"c4 e2 7d 19 f4 \tvbroadcastsd %xmm4,%ymm6",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x19, 0xf7, }, 6, 0, "", "", +"62 f2 7d 48 19 f7 \tvbroadcastf32x2 %xmm7,%zmm6",}, +{{0xc4, 0xe2, 0x7d, 0x1a, 0x21, }, 5, 0, "", "", +"c4 e2 7d 1a 21 \tvbroadcastf128 (%ecx),%ymm4",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x1a, 0x31, }, 6, 0, "", "", +"62 f2 7d 48 1a 31 \tvbroadcastf32x4 (%ecx),%zmm6",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x1a, 0x31, }, 6, 0, "", "", +"62 f2 fd 48 1a 31 \tvbroadcastf64x2 (%ecx),%zmm6",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x1b, 0x31, }, 6, 0, "", "", +"62 f2 7d 48 1b 31 \tvbroadcastf32x8 (%ecx),%zmm6",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x1b, 0x31, }, 6, 0, "", "", +"62 f2 fd 48 1b 31 \tvbroadcastf64x4 (%ecx),%zmm6",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x1f, 0xf4, }, 6, 0, "", "", +"62 f2 fd 48 1f f4 \tvpabsq %zmm4,%zmm6",}, +{{0xc4, 0xe2, 0x79, 0x20, 0xec, }, 5, 0, "", "", +"c4 e2 79 20 ec \tvpmovsxbw %xmm4,%xmm5",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x20, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 20 ee \tvpmovswb %zmm5,%ymm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x21, 0xf4, }, 5, 0, "", "", +"c4 e2 7d 21 f4 \tvpmovsxbd %xmm4,%ymm6",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x21, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 21 ee \tvpmovsdb %zmm5,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x22, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 22 e4 \tvpmovsxbq %xmm4,%ymm4",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x22, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 22 ee \tvpmovsqb %zmm5,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x23, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 23 e4 \tvpmovsxwd %xmm4,%ymm4",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x23, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 23 ee \tvpmovsdw %zmm5,%ymm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x24, 0xf4, }, 5, 0, "", "", +"c4 e2 7d 24 f4 \tvpmovsxwq %xmm4,%ymm6",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x24, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 24 ee \tvpmovsqw %zmm5,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x25, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 25 e4 \tvpmovsxdq %xmm4,%ymm4",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x25, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 25 ee \tvpmovsqd %zmm5,%ymm6{%k7}",}, +{{0x62, 0xf2, 0x4d, 0x48, 0x26, 0xed, }, 6, 0, "", "", +"62 f2 4d 48 26 ed \tvptestmb %zmm5,%zmm6,%k5",}, +{{0x62, 0xf2, 0xcd, 0x48, 0x26, 0xed, }, 6, 0, "", "", +"62 f2 cd 48 26 ed \tvptestmw %zmm5,%zmm6,%k5",}, +{{0x62, 0xf2, 0x56, 0x48, 0x26, 0xec, }, 6, 0, "", "", +"62 f2 56 48 26 ec \tvptestnmb %zmm4,%zmm5,%k5",}, +{{0x62, 0xf2, 0xd6, 0x48, 0x26, 0xec, }, 6, 0, "", "", +"62 f2 d6 48 26 ec \tvptestnmw %zmm4,%zmm5,%k5",}, +{{0x62, 0xf2, 0x4d, 0x48, 0x27, 0xed, }, 6, 0, "", "", +"62 f2 4d 48 27 ed \tvptestmd %zmm5,%zmm6,%k5",}, +{{0x62, 0xf2, 0xcd, 0x48, 0x27, 0xed, }, 6, 0, "", "", +"62 f2 cd 48 27 ed \tvptestmq %zmm5,%zmm6,%k5",}, +{{0x62, 0xf2, 0x56, 0x48, 0x27, 0xec, }, 6, 0, "", "", +"62 f2 56 48 27 ec \tvptestnmd %zmm4,%zmm5,%k5",}, +{{0x62, 0xf2, 0xd6, 0x48, 0x27, 0xec, }, 6, 0, "", "", +"62 f2 d6 48 27 ec \tvptestnmq %zmm4,%zmm5,%k5",}, +{{0xc4, 0xe2, 0x4d, 0x28, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 28 d4 \tvpmuldq %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf2, 0x7e, 0x48, 0x28, 0xf5, }, 6, 0, "", "", +"62 f2 7e 48 28 f5 \tvpmovm2b %k5,%zmm6",}, +{{0x62, 0xf2, 0xfe, 0x48, 0x28, 0xf5, }, 6, 0, "", "", +"62 f2 fe 48 28 f5 \tvpmovm2w %k5,%zmm6",}, +{{0xc4, 0xe2, 0x4d, 0x29, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 29 d4 \tvpcmpeqq %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf2, 0x7e, 0x48, 0x29, 0xee, }, 6, 0, "", "", +"62 f2 7e 48 29 ee \tvpmovb2m %zmm6,%k5",}, +{{0x62, 0xf2, 0xfe, 0x48, 0x29, 0xee, }, 6, 0, "", "", +"62 f2 fe 48 29 ee \tvpmovw2m %zmm6,%k5",}, +{{0xc4, 0xe2, 0x7d, 0x2a, 0x21, }, 5, 0, "", "", +"c4 e2 7d 2a 21 \tvmovntdqa (%ecx),%ymm4",}, +{{0x62, 0xf2, 0xfe, 0x48, 0x2a, 0xce, }, 6, 0, "", "", +"62 f2 fe 48 2a ce \tvpbroadcastmb2q %k6,%zmm1",}, +{{0xc4, 0xe2, 0x5d, 0x2c, 0x31, }, 5, 0, "", "", +"c4 e2 5d 2c 31 \tvmaskmovps (%ecx),%ymm4,%ymm6",}, +{{0x62, 0xf2, 0x55, 0x48, 0x2c, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 2c f4 \tvscalefps %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x2c, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 2c f4 \tvscalefpd %zmm4,%zmm5,%zmm6",}, +{{0xc4, 0xe2, 0x5d, 0x2d, 0x31, }, 5, 0, "", "", +"c4 e2 5d 2d 31 \tvmaskmovpd (%ecx),%ymm4,%ymm6",}, +{{0x62, 0xf2, 0x55, 0x0f, 0x2d, 0xf4, }, 6, 0, "", "", +"62 f2 55 0f 2d f4 \tvscalefss %xmm4,%xmm5,%xmm6{%k7}",}, +{{0x62, 0xf2, 0xd5, 0x0f, 0x2d, 0xf4, }, 6, 0, "", "", +"62 f2 d5 0f 2d f4 \tvscalefsd %xmm4,%xmm5,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x30, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 30 e4 \tvpmovzxbw %xmm4,%ymm4",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x30, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 30 ee \tvpmovwb %zmm5,%ymm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x31, 0xf4, }, 5, 0, "", "", +"c4 e2 7d 31 f4 \tvpmovzxbd %xmm4,%ymm6",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x31, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 31 ee \tvpmovdb %zmm5,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x32, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 32 e4 \tvpmovzxbq %xmm4,%ymm4",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x32, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 32 ee \tvpmovqb %zmm5,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x33, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 33 e4 \tvpmovzxwd %xmm4,%ymm4",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x33, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 33 ee \tvpmovdw %zmm5,%ymm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x34, 0xf4, }, 5, 0, "", "", +"c4 e2 7d 34 f4 \tvpmovzxwq %xmm4,%ymm6",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x34, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 34 ee \tvpmovqw %zmm5,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x35, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 35 e4 \tvpmovzxdq %xmm4,%ymm4",}, +{{0x62, 0xf2, 0x7e, 0x4f, 0x35, 0xee, }, 6, 0, "", "", +"62 f2 7e 4f 35 ee \tvpmovqd %zmm5,%ymm6{%k7}",}, +{{0xc4, 0xe2, 0x4d, 0x36, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 36 d4 \tvpermd %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf2, 0x4d, 0x2f, 0x36, 0xd4, }, 6, 0, "", "", +"62 f2 4d 2f 36 d4 \tvpermd %ymm4,%ymm6,%ymm2{%k7}",}, +{{0x62, 0xf2, 0xcd, 0x2f, 0x36, 0xd4, }, 6, 0, "", "", +"62 f2 cd 2f 36 d4 \tvpermq %ymm4,%ymm6,%ymm2{%k7}",}, +{{0xc4, 0xe2, 0x4d, 0x38, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 38 d4 \tvpminsb %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf2, 0x7e, 0x48, 0x38, 0xf5, }, 6, 0, "", "", +"62 f2 7e 48 38 f5 \tvpmovm2d %k5,%zmm6",}, +{{0x62, 0xf2, 0xfe, 0x48, 0x38, 0xf5, }, 6, 0, "", "", +"62 f2 fe 48 38 f5 \tvpmovm2q %k5,%zmm6",}, +{{0xc4, 0xe2, 0x69, 0x39, 0xd9, }, 5, 0, "", "", +"c4 e2 69 39 d9 \tvpminsd %xmm1,%xmm2,%xmm3",}, +{{0x62, 0xf2, 0x55, 0x48, 0x39, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 39 f4 \tvpminsd %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x39, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 39 f4 \tvpminsq %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x7e, 0x48, 0x39, 0xee, }, 6, 0, "", "", +"62 f2 7e 48 39 ee \tvpmovd2m %zmm6,%k5",}, +{{0x62, 0xf2, 0xfe, 0x48, 0x39, 0xee, }, 6, 0, "", "", +"62 f2 fe 48 39 ee \tvpmovq2m %zmm6,%k5",}, +{{0xc4, 0xe2, 0x4d, 0x3a, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 3a d4 \tvpminuw %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf2, 0x7e, 0x48, 0x3a, 0xf6, }, 6, 0, "", "", +"62 f2 7e 48 3a f6 \tvpbroadcastmw2d %k6,%zmm6",}, +{{0xc4, 0xe2, 0x4d, 0x3b, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 3b d4 \tvpminud %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf2, 0x55, 0x48, 0x3b, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 3b f4 \tvpminud %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x3b, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 3b f4 \tvpminuq %zmm4,%zmm5,%zmm6",}, +{{0xc4, 0xe2, 0x4d, 0x3d, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 3d d4 \tvpmaxsd %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf2, 0x55, 0x48, 0x3d, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 3d f4 \tvpmaxsd %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x3d, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 3d f4 \tvpmaxsq %zmm4,%zmm5,%zmm6",}, +{{0xc4, 0xe2, 0x4d, 0x3f, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 3f d4 \tvpmaxud %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf2, 0x55, 0x48, 0x3f, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 3f f4 \tvpmaxud %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x3f, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 3f f4 \tvpmaxuq %zmm4,%zmm5,%zmm6",}, +{{0xc4, 0xe2, 0x4d, 0x40, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 40 d4 \tvpmulld %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf2, 0x55, 0x48, 0x40, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 40 f4 \tvpmulld %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x40, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 40 f4 \tvpmullq %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x42, 0xf5, }, 6, 0, "", "", +"62 f2 7d 48 42 f5 \tvgetexpps %zmm5,%zmm6",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x42, 0xf5, }, 6, 0, "", "", +"62 f2 fd 48 42 f5 \tvgetexppd %zmm5,%zmm6",}, +{{0x62, 0xf2, 0x55, 0x0f, 0x43, 0xf4, }, 6, 0, "", "", +"62 f2 55 0f 43 f4 \tvgetexpss %xmm4,%xmm5,%xmm6{%k7}",}, +{{0x62, 0xf2, 0xe5, 0x0f, 0x43, 0xe2, }, 6, 0, "", "", +"62 f2 e5 0f 43 e2 \tvgetexpsd %xmm2,%xmm3,%xmm4{%k7}",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x44, 0xf5, }, 6, 0, "", "", +"62 f2 7d 48 44 f5 \tvplzcntd %zmm5,%zmm6",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x44, 0xf5, }, 6, 0, "", "", +"62 f2 fd 48 44 f5 \tvplzcntq %zmm5,%zmm6",}, +{{0xc4, 0xe2, 0x4d, 0x46, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 46 d4 \tvpsravd %ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf2, 0x55, 0x48, 0x46, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 46 f4 \tvpsravd %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x46, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 46 f4 \tvpsravq %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x4c, 0xf5, }, 6, 0, "", "", +"62 f2 7d 48 4c f5 \tvrcp14ps %zmm5,%zmm6",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x4c, 0xf5, }, 6, 0, "", "", +"62 f2 fd 48 4c f5 \tvrcp14pd %zmm5,%zmm6",}, +{{0x62, 0xf2, 0x55, 0x0f, 0x4d, 0xf4, }, 6, 0, "", "", +"62 f2 55 0f 4d f4 \tvrcp14ss %xmm4,%xmm5,%xmm6{%k7}",}, +{{0x62, 0xf2, 0xd5, 0x0f, 0x4d, 0xf4, }, 6, 0, "", "", +"62 f2 d5 0f 4d f4 \tvrcp14sd %xmm4,%xmm5,%xmm6{%k7}",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x4e, 0xf5, }, 6, 0, "", "", +"62 f2 7d 48 4e f5 \tvrsqrt14ps %zmm5,%zmm6",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x4e, 0xf5, }, 6, 0, "", "", +"62 f2 fd 48 4e f5 \tvrsqrt14pd %zmm5,%zmm6",}, +{{0x62, 0xf2, 0x55, 0x0f, 0x4f, 0xf4, }, 6, 0, "", "", +"62 f2 55 0f 4f f4 \tvrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}",}, +{{0x62, 0xf2, 0xd5, 0x0f, 0x4f, 0xf4, }, 6, 0, "", "", +"62 f2 d5 0f 4f f4 \tvrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x79, 0x59, 0xf4, }, 5, 0, "", "", +"c4 e2 79 59 f4 \tvpbroadcastq %xmm4,%xmm6",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x59, 0xf7, }, 6, 0, "", "", +"62 f2 7d 48 59 f7 \tvbroadcasti32x2 %xmm7,%zmm6",}, +{{0xc4, 0xe2, 0x7d, 0x5a, 0x21, }, 5, 0, "", "", +"c4 e2 7d 5a 21 \tvbroadcasti128 (%ecx),%ymm4",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x5a, 0x31, }, 6, 0, "", "", +"62 f2 7d 48 5a 31 \tvbroadcasti32x4 (%ecx),%zmm6",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x5a, 0x31, }, 6, 0, "", "", +"62 f2 fd 48 5a 31 \tvbroadcasti64x2 (%ecx),%zmm6",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x5b, 0x31, }, 6, 0, "", "", +"62 f2 7d 48 5b 31 \tvbroadcasti32x8 (%ecx),%zmm6",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x5b, 0x31, }, 6, 0, "", "", +"62 f2 fd 48 5b 31 \tvbroadcasti64x4 (%ecx),%zmm6",}, +{{0x62, 0xf2, 0x55, 0x48, 0x64, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 64 f4 \tvpblendmd %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x64, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 64 f4 \tvpblendmq %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x55, 0x48, 0x65, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 65 f4 \tvblendmps %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x65, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 65 f4 \tvblendmpd %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x55, 0x48, 0x66, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 66 f4 \tvpblendmb %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x66, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 66 f4 \tvpblendmw %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x55, 0x48, 0x75, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 75 f4 \tvpermi2b %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x75, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 75 f4 \tvpermi2w %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x55, 0x48, 0x76, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 76 f4 \tvpermi2d %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x76, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 76 f4 \tvpermi2q %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x55, 0x48, 0x77, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 77 f4 \tvpermi2ps %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x77, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 77 f4 \tvpermi2pd %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x7d, 0x08, 0x7a, 0xd8, }, 6, 0, "", "", +"62 f2 7d 08 7a d8 \tvpbroadcastb %eax,%xmm3",}, +{{0x62, 0xf2, 0x7d, 0x08, 0x7b, 0xd8, }, 6, 0, "", "", +"62 f2 7d 08 7b d8 \tvpbroadcastw %eax,%xmm3",}, +{{0x62, 0xf2, 0x7d, 0x08, 0x7c, 0xd8, }, 6, 0, "", "", +"62 f2 7d 08 7c d8 \tvpbroadcastd %eax,%xmm3",}, +{{0x62, 0xf2, 0x55, 0x48, 0x7d, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 7d f4 \tvpermt2b %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x7d, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 7d f4 \tvpermt2w %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x55, 0x48, 0x7e, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 7e f4 \tvpermt2d %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x7e, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 7e f4 \tvpermt2q %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x55, 0x48, 0x7f, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 7f f4 \tvpermt2ps %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x7f, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 7f f4 \tvpermt2pd %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x83, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 83 f4 \tvpmultishiftqb %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x88, 0x31, }, 6, 0, "", "", +"62 f2 7d 48 88 31 \tvexpandps (%ecx),%zmm6",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x88, 0x31, }, 6, 0, "", "", +"62 f2 fd 48 88 31 \tvexpandpd (%ecx),%zmm6",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x89, 0x31, }, 6, 0, "", "", +"62 f2 7d 48 89 31 \tvpexpandd (%ecx),%zmm6",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x89, 0x31, }, 6, 0, "", "", +"62 f2 fd 48 89 31 \tvpexpandq (%ecx),%zmm6",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x8a, 0x31, }, 6, 0, "", "", +"62 f2 7d 48 8a 31 \tvcompressps %zmm6,(%ecx)",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x8a, 0x31, }, 6, 0, "", "", +"62 f2 fd 48 8a 31 \tvcompresspd %zmm6,(%ecx)",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x8b, 0x31, }, 6, 0, "", "", +"62 f2 7d 48 8b 31 \tvpcompressd %zmm6,(%ecx)",}, +{{0x62, 0xf2, 0xfd, 0x48, 0x8b, 0x31, }, 6, 0, "", "", +"62 f2 fd 48 8b 31 \tvpcompressq %zmm6,(%ecx)",}, +{{0x62, 0xf2, 0x55, 0x48, 0x8d, 0xf4, }, 6, 0, "", "", +"62 f2 55 48 8d f4 \tvpermb %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0x8d, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 8d f4 \tvpermw %zmm4,%zmm5,%zmm6",}, +{{0xc4, 0xe2, 0x69, 0x90, 0x4c, 0x7d, 0x02, }, 7, 0, "", "", +"c4 e2 69 90 4c 7d 02 \tvpgatherdd %xmm2,0x2(%ebp,%xmm7,2),%xmm1",}, +{{0xc4, 0xe2, 0xe9, 0x90, 0x4c, 0x7d, 0x04, }, 7, 0, "", "", +"c4 e2 e9 90 4c 7d 04 \tvpgatherdq %xmm2,0x4(%ebp,%xmm7,2),%xmm1",}, +{{0x62, 0xf2, 0x7d, 0x49, 0x90, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 90 b4 fd 7b 00 00 00 \tvpgatherdd 0x7b(%ebp,%zmm7,8),%zmm6{%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0x90, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 90 b4 fd 7b 00 00 00 \tvpgatherdq 0x7b(%ebp,%ymm7,8),%zmm6{%k1}",}, +{{0xc4, 0xe2, 0x69, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "", +"c4 e2 69 91 4c 7d 02 \tvpgatherqd %xmm2,0x2(%ebp,%xmm7,2),%xmm1",}, +{{0xc4, 0xe2, 0xe9, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "", +"c4 e2 e9 91 4c 7d 02 \tvpgatherqq %xmm2,0x2(%ebp,%xmm7,2),%xmm1",}, +{{0x62, 0xf2, 0x7d, 0x49, 0x91, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 91 b4 fd 7b 00 00 00 \tvpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0x91, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 91 b4 fd 7b 00 00 00 \tvpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}",}, +{{0x62, 0xf2, 0x7d, 0x49, 0xa0, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 a0 b4 fd 7b 00 00 00 \tvpscatterdd %zmm6,0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0xa0, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 a0 b4 fd 7b 00 00 00 \tvpscatterdq %zmm6,0x7b(%ebp,%ymm7,8){%k1}",}, +{{0x62, 0xf2, 0x7d, 0x49, 0xa1, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 a1 b4 fd 7b 00 00 00 \tvpscatterqd %ymm6,0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xfd, 0x29, 0xa1, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 29 a1 b4 fd 7b 00 00 00 \tvpscatterqq %ymm6,0x7b(%ebp,%ymm7,8){%k1}",}, +{{0x62, 0xf2, 0x7d, 0x49, 0xa2, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 a2 b4 fd 7b 00 00 00 \tvscatterdps %zmm6,0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0xa2, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 a2 b4 fd 7b 00 00 00 \tvscatterdpd %zmm6,0x7b(%ebp,%ymm7,8){%k1}",}, +{{0x62, 0xf2, 0x7d, 0x49, 0xa3, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 a3 b4 fd 7b 00 00 00 \tvscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0xa3, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 a3 b4 fd 7b 00 00 00 \tvscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xd5, 0x48, 0xb4, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 b4 f4 \tvpmadd52luq %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0xd5, 0x48, 0xb5, 0xf4, }, 6, 0, "", "", +"62 f2 d5 48 b5 f4 \tvpmadd52huq %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf2, 0x7d, 0x48, 0xc4, 0xf5, }, 6, 0, "", "", +"62 f2 7d 48 c4 f5 \tvpconflictd %zmm5,%zmm6",}, +{{0x62, 0xf2, 0xfd, 0x48, 0xc4, 0xf5, }, 6, 0, "", "", +"62 f2 fd 48 c4 f5 \tvpconflictq %zmm5,%zmm6",}, +{{0x62, 0xf2, 0x7d, 0x48, 0xc8, 0xfe, }, 6, 0, "", "", +"62 f2 7d 48 c8 fe \tvexp2ps %zmm6,%zmm7",}, +{{0x62, 0xf2, 0xfd, 0x48, 0xc8, 0xfe, }, 6, 0, "", "", +"62 f2 fd 48 c8 fe \tvexp2pd %zmm6,%zmm7",}, +{{0x62, 0xf2, 0x7d, 0x48, 0xca, 0xfe, }, 6, 0, "", "", +"62 f2 7d 48 ca fe \tvrcp28ps %zmm6,%zmm7",}, +{{0x62, 0xf2, 0xfd, 0x48, 0xca, 0xfe, }, 6, 0, "", "", +"62 f2 fd 48 ca fe \tvrcp28pd %zmm6,%zmm7",}, +{{0x62, 0xf2, 0x4d, 0x0f, 0xcb, 0xfd, }, 6, 0, "", "", +"62 f2 4d 0f cb fd \tvrcp28ss %xmm5,%xmm6,%xmm7{%k7}",}, +{{0x62, 0xf2, 0xcd, 0x0f, 0xcb, 0xfd, }, 6, 0, "", "", +"62 f2 cd 0f cb fd \tvrcp28sd %xmm5,%xmm6,%xmm7{%k7}",}, +{{0x62, 0xf2, 0x7d, 0x48, 0xcc, 0xfe, }, 6, 0, "", "", +"62 f2 7d 48 cc fe \tvrsqrt28ps %zmm6,%zmm7",}, +{{0x62, 0xf2, 0xfd, 0x48, 0xcc, 0xfe, }, 6, 0, "", "", +"62 f2 fd 48 cc fe \tvrsqrt28pd %zmm6,%zmm7",}, +{{0x62, 0xf2, 0x4d, 0x0f, 0xcd, 0xfd, }, 6, 0, "", "", +"62 f2 4d 0f cd fd \tvrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}",}, +{{0x62, 0xf2, 0xcd, 0x0f, 0xcd, 0xfd, }, 6, 0, "", "", +"62 f2 cd 0f cd fd \tvrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}",}, +{{0x62, 0xf3, 0x4d, 0x48, 0x03, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 4d 48 03 fd 12 \tvalignd $0x12,%zmm5,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0xcd, 0x48, 0x03, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 cd 48 03 fd 12 \tvalignq $0x12,%zmm5,%zmm6,%zmm7",}, +{{0xc4, 0xe3, 0x7d, 0x08, 0xd6, 0x05, }, 6, 0, "", "", +"c4 e3 7d 08 d6 05 \tvroundps $0x5,%ymm6,%ymm2",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x08, 0xf5, 0x12, }, 7, 0, "", "", +"62 f3 7d 48 08 f5 12 \tvrndscaleps $0x12,%zmm5,%zmm6",}, +{{0xc4, 0xe3, 0x7d, 0x09, 0xd6, 0x05, }, 6, 0, "", "", +"c4 e3 7d 09 d6 05 \tvroundpd $0x5,%ymm6,%ymm2",}, +{{0x62, 0xf3, 0xfd, 0x48, 0x09, 0xf5, 0x12, }, 7, 0, "", "", +"62 f3 fd 48 09 f5 12 \tvrndscalepd $0x12,%zmm5,%zmm6",}, +{{0xc4, 0xe3, 0x49, 0x0a, 0xd4, 0x05, }, 6, 0, "", "", +"c4 e3 49 0a d4 05 \tvroundss $0x5,%xmm4,%xmm6,%xmm2",}, +{{0x62, 0xf3, 0x55, 0x0f, 0x0a, 0xf4, 0x12, }, 7, 0, "", "", +"62 f3 55 0f 0a f4 12 \tvrndscaless $0x12,%xmm4,%xmm5,%xmm6{%k7}",}, +{{0xc4, 0xe3, 0x49, 0x0b, 0xd4, 0x05, }, 6, 0, "", "", +"c4 e3 49 0b d4 05 \tvroundsd $0x5,%xmm4,%xmm6,%xmm2",}, +{{0x62, 0xf3, 0xd5, 0x0f, 0x0b, 0xf4, 0x12, }, 7, 0, "", "", +"62 f3 d5 0f 0b f4 12 \tvrndscalesd $0x12,%xmm4,%xmm5,%xmm6{%k7}",}, +{{0xc4, 0xe3, 0x5d, 0x18, 0xf4, 0x05, }, 6, 0, "", "", +"c4 e3 5d 18 f4 05 \tvinsertf128 $0x5,%xmm4,%ymm4,%ymm6",}, +{{0x62, 0xf3, 0x55, 0x4f, 0x18, 0xf4, 0x12, }, 7, 0, "", "", +"62 f3 55 4f 18 f4 12 \tvinsertf32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}",}, +{{0x62, 0xf3, 0xd5, 0x4f, 0x18, 0xf4, 0x12, }, 7, 0, "", "", +"62 f3 d5 4f 18 f4 12 \tvinsertf64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}",}, +{{0xc4, 0xe3, 0x7d, 0x19, 0xe4, 0x05, }, 6, 0, "", "", +"c4 e3 7d 19 e4 05 \tvextractf128 $0x5,%ymm4,%xmm4",}, +{{0x62, 0xf3, 0x7d, 0x4f, 0x19, 0xee, 0x12, }, 7, 0, "", "", +"62 f3 7d 4f 19 ee 12 \tvextractf32x4 $0x12,%zmm5,%xmm6{%k7}",}, +{{0x62, 0xf3, 0xfd, 0x4f, 0x19, 0xee, 0x12, }, 7, 0, "", "", +"62 f3 fd 4f 19 ee 12 \tvextractf64x2 $0x12,%zmm5,%xmm6{%k7}",}, +{{0x62, 0xf3, 0x4d, 0x4f, 0x1a, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 4d 4f 1a fd 12 \tvinsertf32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}",}, +{{0x62, 0xf3, 0xcd, 0x4f, 0x1a, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 cd 4f 1a fd 12 \tvinsertf64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}",}, +{{0x62, 0xf3, 0x7d, 0x4f, 0x1b, 0xf7, 0x12, }, 7, 0, "", "", +"62 f3 7d 4f 1b f7 12 \tvextractf32x8 $0x12,%zmm6,%ymm7{%k7}",}, +{{0x62, 0xf3, 0xfd, 0x4f, 0x1b, 0xf7, 0x12, }, 7, 0, "", "", +"62 f3 fd 4f 1b f7 12 \tvextractf64x4 $0x12,%zmm6,%ymm7{%k7}",}, +{{0x62, 0xf3, 0x45, 0x48, 0x1e, 0xee, 0x12, }, 7, 0, "", "", +"62 f3 45 48 1e ee 12 \tvpcmpud $0x12,%zmm6,%zmm7,%k5",}, +{{0x62, 0xf3, 0xc5, 0x48, 0x1e, 0xee, 0x12, }, 7, 0, "", "", +"62 f3 c5 48 1e ee 12 \tvpcmpuq $0x12,%zmm6,%zmm7,%k5",}, +{{0x62, 0xf3, 0x45, 0x48, 0x1f, 0xee, 0x12, }, 7, 0, "", "", +"62 f3 45 48 1f ee 12 \tvpcmpd $0x12,%zmm6,%zmm7,%k5",}, +{{0x62, 0xf3, 0xc5, 0x48, 0x1f, 0xee, 0x12, }, 7, 0, "", "", +"62 f3 c5 48 1f ee 12 \tvpcmpq $0x12,%zmm6,%zmm7,%k5",}, +{{0x62, 0xf3, 0x4d, 0x48, 0x23, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 4d 48 23 fd 12 \tvshuff32x4 $0x12,%zmm5,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0xcd, 0x48, 0x23, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 cd 48 23 fd 12 \tvshuff64x2 $0x12,%zmm5,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0x4d, 0x48, 0x25, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 4d 48 25 fd 12 \tvpternlogd $0x12,%zmm5,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0xcd, 0x48, 0x25, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 cd 48 25 fd 12 \tvpternlogq $0x12,%zmm5,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x26, 0xfe, 0x12, }, 7, 0, "", "", +"62 f3 7d 48 26 fe 12 \tvgetmantps $0x12,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0xfd, 0x48, 0x26, 0xfe, 0x12, }, 7, 0, "", "", +"62 f3 fd 48 26 fe 12 \tvgetmantpd $0x12,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0x4d, 0x0f, 0x27, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 4d 0f 27 fd 12 \tvgetmantss $0x12,%xmm5,%xmm6,%xmm7{%k7}",}, +{{0x62, 0xf3, 0xcd, 0x0f, 0x27, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 cd 0f 27 fd 12 \tvgetmantsd $0x12,%xmm5,%xmm6,%xmm7{%k7}",}, +{{0xc4, 0xe3, 0x5d, 0x38, 0xf4, 0x05, }, 6, 0, "", "", +"c4 e3 5d 38 f4 05 \tvinserti128 $0x5,%xmm4,%ymm4,%ymm6",}, +{{0x62, 0xf3, 0x55, 0x4f, 0x38, 0xf4, 0x12, }, 7, 0, "", "", +"62 f3 55 4f 38 f4 12 \tvinserti32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}",}, +{{0x62, 0xf3, 0xd5, 0x4f, 0x38, 0xf4, 0x12, }, 7, 0, "", "", +"62 f3 d5 4f 38 f4 12 \tvinserti64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}",}, +{{0xc4, 0xe3, 0x7d, 0x39, 0xe6, 0x05, }, 6, 0, "", "", +"c4 e3 7d 39 e6 05 \tvextracti128 $0x5,%ymm4,%xmm6",}, +{{0x62, 0xf3, 0x7d, 0x4f, 0x39, 0xee, 0x12, }, 7, 0, "", "", +"62 f3 7d 4f 39 ee 12 \tvextracti32x4 $0x12,%zmm5,%xmm6{%k7}",}, +{{0x62, 0xf3, 0xfd, 0x4f, 0x39, 0xee, 0x12, }, 7, 0, "", "", +"62 f3 fd 4f 39 ee 12 \tvextracti64x2 $0x12,%zmm5,%xmm6{%k7}",}, +{{0x62, 0xf3, 0x4d, 0x4f, 0x3a, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 4d 4f 3a fd 12 \tvinserti32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}",}, +{{0x62, 0xf3, 0xcd, 0x4f, 0x3a, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 cd 4f 3a fd 12 \tvinserti64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}",}, +{{0x62, 0xf3, 0x7d, 0x4f, 0x3b, 0xf7, 0x12, }, 7, 0, "", "", +"62 f3 7d 4f 3b f7 12 \tvextracti32x8 $0x12,%zmm6,%ymm7{%k7}",}, +{{0x62, 0xf3, 0xfd, 0x4f, 0x3b, 0xf7, 0x12, }, 7, 0, "", "", +"62 f3 fd 4f 3b f7 12 \tvextracti64x4 $0x12,%zmm6,%ymm7{%k7}",}, +{{0x62, 0xf3, 0x45, 0x48, 0x3e, 0xee, 0x12, }, 7, 0, "", "", +"62 f3 45 48 3e ee 12 \tvpcmpub $0x12,%zmm6,%zmm7,%k5",}, +{{0x62, 0xf3, 0xc5, 0x48, 0x3e, 0xee, 0x12, }, 7, 0, "", "", +"62 f3 c5 48 3e ee 12 \tvpcmpuw $0x12,%zmm6,%zmm7,%k5",}, +{{0x62, 0xf3, 0x45, 0x48, 0x3f, 0xee, 0x12, }, 7, 0, "", "", +"62 f3 45 48 3f ee 12 \tvpcmpb $0x12,%zmm6,%zmm7,%k5",}, +{{0x62, 0xf3, 0xc5, 0x48, 0x3f, 0xee, 0x12, }, 7, 0, "", "", +"62 f3 c5 48 3f ee 12 \tvpcmpw $0x12,%zmm6,%zmm7,%k5",}, +{{0xc4, 0xe3, 0x4d, 0x42, 0xd4, 0x05, }, 6, 0, "", "", +"c4 e3 4d 42 d4 05 \tvmpsadbw $0x5,%ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf3, 0x55, 0x48, 0x42, 0xf4, 0x12, }, 7, 0, "", "", +"62 f3 55 48 42 f4 12 \tvdbpsadbw $0x12,%zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf3, 0x4d, 0x48, 0x43, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 4d 48 43 fd 12 \tvshufi32x4 $0x12,%zmm5,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0xcd, 0x48, 0x43, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 cd 48 43 fd 12 \tvshufi64x2 $0x12,%zmm5,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0x4d, 0x48, 0x50, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 4d 48 50 fd 12 \tvrangeps $0x12,%zmm5,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0xcd, 0x48, 0x50, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 cd 48 50 fd 12 \tvrangepd $0x12,%zmm5,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0x4d, 0x08, 0x51, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 4d 08 51 fd 12 \tvrangess $0x12,%xmm5,%xmm6,%xmm7",}, +{{0x62, 0xf3, 0xcd, 0x08, 0x51, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 cd 08 51 fd 12 \tvrangesd $0x12,%xmm5,%xmm6,%xmm7",}, +{{0x62, 0xf3, 0x4d, 0x48, 0x54, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 4d 48 54 fd 12 \tvfixupimmps $0x12,%zmm5,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0xcd, 0x48, 0x54, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 cd 48 54 fd 12 \tvfixupimmpd $0x12,%zmm5,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0x4d, 0x0f, 0x55, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 4d 0f 55 fd 12 \tvfixupimmss $0x12,%xmm5,%xmm6,%xmm7{%k7}",}, +{{0x62, 0xf3, 0xcd, 0x0f, 0x55, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 cd 0f 55 fd 12 \tvfixupimmsd $0x12,%xmm5,%xmm6,%xmm7{%k7}",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x56, 0xfe, 0x12, }, 7, 0, "", "", +"62 f3 7d 48 56 fe 12 \tvreduceps $0x12,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0xfd, 0x48, 0x56, 0xfe, 0x12, }, 7, 0, "", "", +"62 f3 fd 48 56 fe 12 \tvreducepd $0x12,%zmm6,%zmm7",}, +{{0x62, 0xf3, 0x4d, 0x08, 0x57, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 4d 08 57 fd 12 \tvreducess $0x12,%xmm5,%xmm6,%xmm7",}, +{{0x62, 0xf3, 0xcd, 0x08, 0x57, 0xfd, 0x12, }, 7, 0, "", "", +"62 f3 cd 08 57 fd 12 \tvreducesd $0x12,%xmm5,%xmm6,%xmm7",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x66, 0xef, 0x12, }, 7, 0, "", "", +"62 f3 7d 48 66 ef 12 \tvfpclassps $0x12,%zmm7,%k5",}, +{{0x62, 0xf3, 0xfd, 0x48, 0x66, 0xef, 0x12, }, 7, 0, "", "", +"62 f3 fd 48 66 ef 12 \tvfpclasspd $0x12,%zmm7,%k5",}, +{{0x62, 0xf3, 0x7d, 0x08, 0x67, 0xef, 0x12, }, 7, 0, "", "", +"62 f3 7d 08 67 ef 12 \tvfpclassss $0x12,%xmm7,%k5",}, +{{0x62, 0xf3, 0xfd, 0x08, 0x67, 0xef, 0x12, }, 7, 0, "", "", +"62 f3 fd 08 67 ef 12 \tvfpclasssd $0x12,%xmm7,%k5",}, +{{0x62, 0xf1, 0x4d, 0x48, 0x72, 0xc5, 0x12, }, 7, 0, "", "", +"62 f1 4d 48 72 c5 12 \tvprord $0x12,%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xcd, 0x48, 0x72, 0xc5, 0x12, }, 7, 0, "", "", +"62 f1 cd 48 72 c5 12 \tvprorq $0x12,%zmm5,%zmm6",}, +{{0x62, 0xf1, 0x4d, 0x48, 0x72, 0xcd, 0x12, }, 7, 0, "", "", +"62 f1 4d 48 72 cd 12 \tvprold $0x12,%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xcd, 0x48, 0x72, 0xcd, 0x12, }, 7, 0, "", "", +"62 f1 cd 48 72 cd 12 \tvprolq $0x12,%zmm5,%zmm6",}, +{{0x0f, 0x72, 0xe6, 0x02, }, 4, 0, "", "", +"0f 72 e6 02 \tpsrad $0x2,%mm6",}, +{{0xc5, 0xed, 0x72, 0xe6, 0x05, }, 5, 0, "", "", +"c5 ed 72 e6 05 \tvpsrad $0x5,%ymm6,%ymm2",}, +{{0x62, 0xf1, 0x6d, 0x48, 0x72, 0xe6, 0x05, }, 7, 0, "", "", +"62 f1 6d 48 72 e6 05 \tvpsrad $0x5,%zmm6,%zmm2",}, +{{0x62, 0xf1, 0xed, 0x48, 0x72, 0xe6, 0x05, }, 7, 0, "", "", +"62 f1 ed 48 72 e6 05 \tvpsraq $0x5,%zmm6,%zmm2",}, +{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 c6 8c fd 7b 00 00 00 \tvgatherpf0dps 0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 c6 8c fd 7b 00 00 00 \tvgatherpf0dpd 0x7b(%ebp,%ymm7,8){%k1}",}, +{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 c6 94 fd 7b 00 00 00 \tvgatherpf1dps 0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 c6 94 fd 7b 00 00 00 \tvgatherpf1dpd 0x7b(%ebp,%ymm7,8){%k1}",}, +{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 c6 ac fd 7b 00 00 00 \tvscatterpf0dps 0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 c6 ac fd 7b 00 00 00 \tvscatterpf0dpd 0x7b(%ebp,%ymm7,8){%k1}",}, +{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 c6 b4 fd 7b 00 00 00 \tvscatterpf1dps 0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 c6 b4 fd 7b 00 00 00 \tvscatterpf1dpd 0x7b(%ebp,%ymm7,8){%k1}",}, +{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 c7 8c fd 7b 00 00 00 \tvgatherpf0qps 0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 c7 8c fd 7b 00 00 00 \tvgatherpf0qpd 0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 c7 94 fd 7b 00 00 00 \tvgatherpf1qps 0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 c7 94 fd 7b 00 00 00 \tvgatherpf1qpd 0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 c7 ac fd 7b 00 00 00 \tvscatterpf0qps 0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 c7 ac fd 7b 00 00 00 \tvscatterpf0qpd 0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 7d 49 c7 b4 fd 7b 00 00 00 \tvscatterpf1qps 0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 f2 fd 49 c7 b4 fd 7b 00 00 00 \tvscatterpf1qpd 0x7b(%ebp,%zmm7,8){%k1}",}, +{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0xf4, }, 6, 0, "", "", +"62 f1 d5 48 58 f4 \tvaddpd %zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x4f, 0x58, 0xf4, }, 6, 0, "", "", +"62 f1 d5 4f 58 f4 \tvaddpd %zmm4,%zmm5,%zmm6{%k7}",}, +{{0x62, 0xf1, 0xd5, 0xcf, 0x58, 0xf4, }, 6, 0, "", "", +"62 f1 d5 cf 58 f4 \tvaddpd %zmm4,%zmm5,%zmm6{%k7}{z}",}, +{{0x62, 0xf1, 0xd5, 0x18, 0x58, 0xf4, }, 6, 0, "", "", +"62 f1 d5 18 58 f4 \tvaddpd {rn-sae},%zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0xf4, }, 6, 0, "", "", +"62 f1 d5 58 58 f4 \tvaddpd {ru-sae},%zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x38, 0x58, 0xf4, }, 6, 0, "", "", +"62 f1 d5 38 58 f4 \tvaddpd {rd-sae},%zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x78, 0x58, 0xf4, }, 6, 0, "", "", +"62 f1 d5 78 58 f4 \tvaddpd {rz-sae},%zmm4,%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0x31, }, 6, 0, "", "", +"62 f1 d5 48 58 31 \tvaddpd (%ecx),%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0xb4, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 11, 0, "", "", +"62 f1 d5 48 58 b4 c8 23 01 00 00 \tvaddpd 0x123(%eax,%ecx,8),%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0x31, }, 6, 0, "", "", +"62 f1 d5 58 58 31 \tvaddpd (%ecx){1to8},%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0x72, 0x7f, }, 7, 0, "", "", +"62 f1 d5 48 58 72 7f \tvaddpd 0x1fc0(%edx),%zmm5,%zmm6",}, +{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0x72, 0x7f, }, 7, 0, "", "", +"62 f1 d5 58 58 72 7f \tvaddpd 0x3f8(%edx){1to8},%zmm5,%zmm6",}, +{{0x62, 0xf1, 0x4c, 0x58, 0xc2, 0x6a, 0x7f, 0x08, }, 8, 0, "", "", +"62 f1 4c 58 c2 6a 7f 08 \tvcmpeq_uqps 0x1fc(%edx){1to16},%zmm6,%k5",}, +{{0x62, 0xf1, 0xe7, 0x0f, 0xc2, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, 0x01, }, 12, 0, "", "", +"62 f1 e7 0f c2 ac c8 23 01 00 00 01 \tvcmpltsd 0x123(%eax,%ecx,8),%xmm3,%k5{%k7}",}, +{{0x62, 0xf1, 0xd7, 0x1f, 0xc2, 0xec, 0x02, }, 7, 0, "", "", +"62 f1 d7 1f c2 ec 02 \tvcmplesd {sae},%xmm4,%xmm5,%k5{%k7}",}, +{{0x62, 0xf3, 0x5d, 0x0f, 0x27, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, 0x5b, }, 12, 0, "", "", +"62 f3 5d 0f 27 ac c8 23 01 00 00 5b \tvgetmantss $0x5b,0x123(%eax,%ecx,8),%xmm4,%xmm5{%k7}",}, {{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", "f3 0f 1b 00 \tbndmk (%eax),%bnd0",}, {{0xf3, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", @@ -309,19 +1319,19 @@ {{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", "0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax,%ecx,1)",}, {{0xf2, 0xe8, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "call", "unconditional", -"f2 e8 fc ff ff ff \tbnd call 3c3 <main+0x3c3>",}, +"f2 e8 fc ff ff ff \tbnd call fce <main+0xfce>",}, {{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect", "f2 ff 10 \tbnd call *(%eax)",}, {{0xf2, 0xc3, }, 2, 0, "ret", "indirect", "f2 c3 \tbnd ret ",}, {{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional", -"f2 e9 fc ff ff ff \tbnd jmp 3ce <main+0x3ce>",}, +"f2 e9 fc ff ff ff \tbnd jmp fd9 <main+0xfd9>",}, {{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional", -"f2 e9 fc ff ff ff \tbnd jmp 3d4 <main+0x3d4>",}, +"f2 e9 fc ff ff ff \tbnd jmp fdf <main+0xfdf>",}, {{0xf2, 0xff, 0x21, }, 3, 0, "jmp", "indirect", "f2 ff 21 \tbnd jmp *(%ecx)",}, {{0xf2, 0x0f, 0x85, 0xfc, 0xff, 0xff, 0xff, }, 7, 0xfffffffc, "jcc", "conditional", -"f2 0f 85 fc ff ff ff \tbnd jne 3de <main+0x3de>",}, +"f2 0f 85 fc ff ff ff \tbnd jne fe9 <main+0xfe9>",}, {{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "", "0f 3a cc c1 00 \tsha1rnds4 $0x0,%xmm1,%xmm0",}, {{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "", @@ -654,5 +1664,3 @@ "0f c7 1d 78 56 34 12 \txrstors 0x12345678",}, {{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",}, -{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "", -"66 0f ae f8 \tpcommit ",}, diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index 4fe7cce179c4..af25bc8240d0 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -6,6 +6,938 @@ {{0x0f, 0x31, }, 2, 0, "", "", "0f 31 \trdtsc ",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "", +"c4 e2 7d 13 eb \tvcvtph2ps %xmm3,%ymm5",}, +{{0x48, 0x0f, 0x41, 0xd8, }, 4, 0, "", "", +"48 0f 41 d8 \tcmovno %rax,%rbx",}, +{{0x48, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"48 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%rax),%rcx",}, +{{0x66, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%rax),%cx",}, +{{0x48, 0x0f, 0x44, 0xd8, }, 4, 0, "", "", +"48 0f 44 d8 \tcmove %rax,%rbx",}, +{{0x48, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"48 0f 44 88 78 56 34 12 \tcmove 0x12345678(%rax),%rcx",}, +{{0x66, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 44 88 78 56 34 12 \tcmove 0x12345678(%rax),%cx",}, +{{0x0f, 0x90, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 90 80 78 56 34 12 \tseto 0x12345678(%rax)",}, +{{0x0f, 0x91, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 91 80 78 56 34 12 \tsetno 0x12345678(%rax)",}, +{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 92 80 78 56 34 12 \tsetb 0x12345678(%rax)",}, +{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 92 80 78 56 34 12 \tsetb 0x12345678(%rax)",}, +{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 92 80 78 56 34 12 \tsetb 0x12345678(%rax)",}, +{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 93 80 78 56 34 12 \tsetae 0x12345678(%rax)",}, +{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 93 80 78 56 34 12 \tsetae 0x12345678(%rax)",}, +{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 93 80 78 56 34 12 \tsetae 0x12345678(%rax)",}, +{{0x0f, 0x98, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 98 80 78 56 34 12 \tsets 0x12345678(%rax)",}, +{{0x0f, 0x99, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 99 80 78 56 34 12 \tsetns 0x12345678(%rax)",}, +{{0xc5, 0xcc, 0x41, 0xef, }, 4, 0, "", "", +"c5 cc 41 ef \tkandw %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x41, 0xef, }, 5, 0, "", "", +"c4 e1 cc 41 ef \tkandq %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x41, 0xef, }, 4, 0, "", "", +"c5 cd 41 ef \tkandb %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcd, 0x41, 0xef, }, 5, 0, "", "", +"c4 e1 cd 41 ef \tkandd %k7,%k6,%k5",}, +{{0xc5, 0xcc, 0x42, 0xef, }, 4, 0, "", "", +"c5 cc 42 ef \tkandnw %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x42, 0xef, }, 5, 0, "", "", +"c4 e1 cc 42 ef \tkandnq %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x42, 0xef, }, 4, 0, "", "", +"c5 cd 42 ef \tkandnb %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcd, 0x42, 0xef, }, 5, 0, "", "", +"c4 e1 cd 42 ef \tkandnd %k7,%k6,%k5",}, +{{0xc5, 0xf8, 0x44, 0xf7, }, 4, 0, "", "", +"c5 f8 44 f7 \tknotw %k7,%k6",}, +{{0xc4, 0xe1, 0xf8, 0x44, 0xf7, }, 5, 0, "", "", +"c4 e1 f8 44 f7 \tknotq %k7,%k6",}, +{{0xc5, 0xf9, 0x44, 0xf7, }, 4, 0, "", "", +"c5 f9 44 f7 \tknotb %k7,%k6",}, +{{0xc4, 0xe1, 0xf9, 0x44, 0xf7, }, 5, 0, "", "", +"c4 e1 f9 44 f7 \tknotd %k7,%k6",}, +{{0xc5, 0xcc, 0x45, 0xef, }, 4, 0, "", "", +"c5 cc 45 ef \tkorw %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x45, 0xef, }, 5, 0, "", "", +"c4 e1 cc 45 ef \tkorq %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x45, 0xef, }, 4, 0, "", "", +"c5 cd 45 ef \tkorb %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcd, 0x45, 0xef, }, 5, 0, "", "", +"c4 e1 cd 45 ef \tkord %k7,%k6,%k5",}, +{{0xc5, 0xcc, 0x46, 0xef, }, 4, 0, "", "", +"c5 cc 46 ef \tkxnorw %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x46, 0xef, }, 5, 0, "", "", +"c4 e1 cc 46 ef \tkxnorq %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x46, 0xef, }, 4, 0, "", "", +"c5 cd 46 ef \tkxnorb %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcd, 0x46, 0xef, }, 5, 0, "", "", +"c4 e1 cd 46 ef \tkxnord %k7,%k6,%k5",}, +{{0xc5, 0xcc, 0x47, 0xef, }, 4, 0, "", "", +"c5 cc 47 ef \tkxorw %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x47, 0xef, }, 5, 0, "", "", +"c4 e1 cc 47 ef \tkxorq %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x47, 0xef, }, 4, 0, "", "", +"c5 cd 47 ef \tkxorb %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcd, 0x47, 0xef, }, 5, 0, "", "", +"c4 e1 cd 47 ef \tkxord %k7,%k6,%k5",}, +{{0xc5, 0xcc, 0x4a, 0xef, }, 4, 0, "", "", +"c5 cc 4a ef \tkaddw %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x4a, 0xef, }, 5, 0, "", "", +"c4 e1 cc 4a ef \tkaddq %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x4a, 0xef, }, 4, 0, "", "", +"c5 cd 4a ef \tkaddb %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcd, 0x4a, 0xef, }, 5, 0, "", "", +"c4 e1 cd 4a ef \tkaddd %k7,%k6,%k5",}, +{{0xc5, 0xcd, 0x4b, 0xef, }, 4, 0, "", "", +"c5 cd 4b ef \tkunpckbw %k7,%k6,%k5",}, +{{0xc5, 0xcc, 0x4b, 0xef, }, 4, 0, "", "", +"c5 cc 4b ef \tkunpckwd %k7,%k6,%k5",}, +{{0xc4, 0xe1, 0xcc, 0x4b, 0xef, }, 5, 0, "", "", +"c4 e1 cc 4b ef \tkunpckdq %k7,%k6,%k5",}, +{{0xc5, 0xf8, 0x90, 0xee, }, 4, 0, "", "", +"c5 f8 90 ee \tkmovw %k6,%k5",}, +{{0xc5, 0xf8, 0x90, 0x29, }, 4, 0, "", "", +"c5 f8 90 29 \tkmovw (%rcx),%k5",}, +{{0xc4, 0xa1, 0x78, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "", +"c4 a1 78 90 ac f0 23 01 00 00 \tkmovw 0x123(%rax,%r14,8),%k5",}, +{{0xc5, 0xf8, 0x91, 0x29, }, 4, 0, "", "", +"c5 f8 91 29 \tkmovw %k5,(%rcx)",}, +{{0xc4, 0xa1, 0x78, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "", +"c4 a1 78 91 ac f0 23 01 00 00 \tkmovw %k5,0x123(%rax,%r14,8)",}, +{{0xc5, 0xf8, 0x92, 0xe8, }, 4, 0, "", "", +"c5 f8 92 e8 \tkmovw %eax,%k5",}, +{{0xc5, 0xf8, 0x92, 0xed, }, 4, 0, "", "", +"c5 f8 92 ed \tkmovw %ebp,%k5",}, +{{0xc4, 0xc1, 0x78, 0x92, 0xed, }, 5, 0, "", "", +"c4 c1 78 92 ed \tkmovw %r13d,%k5",}, +{{0xc5, 0xf8, 0x93, 0xc5, }, 4, 0, "", "", +"c5 f8 93 c5 \tkmovw %k5,%eax",}, +{{0xc5, 0xf8, 0x93, 0xed, }, 4, 0, "", "", +"c5 f8 93 ed \tkmovw %k5,%ebp",}, +{{0xc5, 0x78, 0x93, 0xed, }, 4, 0, "", "", +"c5 78 93 ed \tkmovw %k5,%r13d",}, +{{0xc4, 0xe1, 0xf8, 0x90, 0xee, }, 5, 0, "", "", +"c4 e1 f8 90 ee \tkmovq %k6,%k5",}, +{{0xc4, 0xe1, 0xf8, 0x90, 0x29, }, 5, 0, "", "", +"c4 e1 f8 90 29 \tkmovq (%rcx),%k5",}, +{{0xc4, 0xa1, 0xf8, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "", +"c4 a1 f8 90 ac f0 23 01 00 00 \tkmovq 0x123(%rax,%r14,8),%k5",}, +{{0xc4, 0xe1, 0xf8, 0x91, 0x29, }, 5, 0, "", "", +"c4 e1 f8 91 29 \tkmovq %k5,(%rcx)",}, +{{0xc4, 0xa1, 0xf8, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "", +"c4 a1 f8 91 ac f0 23 01 00 00 \tkmovq %k5,0x123(%rax,%r14,8)",}, +{{0xc4, 0xe1, 0xfb, 0x92, 0xe8, }, 5, 0, "", "", +"c4 e1 fb 92 e8 \tkmovq %rax,%k5",}, +{{0xc4, 0xe1, 0xfb, 0x92, 0xed, }, 5, 0, "", "", +"c4 e1 fb 92 ed \tkmovq %rbp,%k5",}, +{{0xc4, 0xc1, 0xfb, 0x92, 0xed, }, 5, 0, "", "", +"c4 c1 fb 92 ed \tkmovq %r13,%k5",}, +{{0xc4, 0xe1, 0xfb, 0x93, 0xc5, }, 5, 0, "", "", +"c4 e1 fb 93 c5 \tkmovq %k5,%rax",}, +{{0xc4, 0xe1, 0xfb, 0x93, 0xed, }, 5, 0, "", "", +"c4 e1 fb 93 ed \tkmovq %k5,%rbp",}, +{{0xc4, 0x61, 0xfb, 0x93, 0xed, }, 5, 0, "", "", +"c4 61 fb 93 ed \tkmovq %k5,%r13",}, +{{0xc5, 0xf9, 0x90, 0xee, }, 4, 0, "", "", +"c5 f9 90 ee \tkmovb %k6,%k5",}, +{{0xc5, 0xf9, 0x90, 0x29, }, 4, 0, "", "", +"c5 f9 90 29 \tkmovb (%rcx),%k5",}, +{{0xc4, 0xa1, 0x79, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "", +"c4 a1 79 90 ac f0 23 01 00 00 \tkmovb 0x123(%rax,%r14,8),%k5",}, +{{0xc5, 0xf9, 0x91, 0x29, }, 4, 0, "", "", +"c5 f9 91 29 \tkmovb %k5,(%rcx)",}, +{{0xc4, 0xa1, 0x79, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "", +"c4 a1 79 91 ac f0 23 01 00 00 \tkmovb %k5,0x123(%rax,%r14,8)",}, +{{0xc5, 0xf9, 0x92, 0xe8, }, 4, 0, "", "", +"c5 f9 92 e8 \tkmovb %eax,%k5",}, +{{0xc5, 0xf9, 0x92, 0xed, }, 4, 0, "", "", +"c5 f9 92 ed \tkmovb %ebp,%k5",}, +{{0xc4, 0xc1, 0x79, 0x92, 0xed, }, 5, 0, "", "", +"c4 c1 79 92 ed \tkmovb %r13d,%k5",}, +{{0xc5, 0xf9, 0x93, 0xc5, }, 4, 0, "", "", +"c5 f9 93 c5 \tkmovb %k5,%eax",}, +{{0xc5, 0xf9, 0x93, 0xed, }, 4, 0, "", "", +"c5 f9 93 ed \tkmovb %k5,%ebp",}, +{{0xc5, 0x79, 0x93, 0xed, }, 4, 0, "", "", +"c5 79 93 ed \tkmovb %k5,%r13d",}, +{{0xc4, 0xe1, 0xf9, 0x90, 0xee, }, 5, 0, "", "", +"c4 e1 f9 90 ee \tkmovd %k6,%k5",}, +{{0xc4, 0xe1, 0xf9, 0x90, 0x29, }, 5, 0, "", "", +"c4 e1 f9 90 29 \tkmovd (%rcx),%k5",}, +{{0xc4, 0xa1, 0xf9, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "", +"c4 a1 f9 90 ac f0 23 01 00 00 \tkmovd 0x123(%rax,%r14,8),%k5",}, +{{0xc4, 0xe1, 0xf9, 0x91, 0x29, }, 5, 0, "", "", +"c4 e1 f9 91 29 \tkmovd %k5,(%rcx)",}, +{{0xc4, 0xa1, 0xf9, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "", +"c4 a1 f9 91 ac f0 23 01 00 00 \tkmovd %k5,0x123(%rax,%r14,8)",}, +{{0xc5, 0xfb, 0x92, 0xe8, }, 4, 0, "", "", +"c5 fb 92 e8 \tkmovd %eax,%k5",}, +{{0xc5, 0xfb, 0x92, 0xed, }, 4, 0, "", "", +"c5 fb 92 ed \tkmovd %ebp,%k5",}, +{{0xc4, 0xc1, 0x7b, 0x92, 0xed, }, 5, 0, "", "", +"c4 c1 7b 92 ed \tkmovd %r13d,%k5",}, +{{0xc5, 0xfb, 0x93, 0xc5, }, 4, 0, "", "", +"c5 fb 93 c5 \tkmovd %k5,%eax",}, +{{0xc5, 0xfb, 0x93, 0xed, }, 4, 0, "", "", +"c5 fb 93 ed \tkmovd %k5,%ebp",}, +{{0xc5, 0x7b, 0x93, 0xed, }, 4, 0, "", "", +"c5 7b 93 ed \tkmovd %k5,%r13d",}, +{{0xc5, 0xf8, 0x98, 0xee, }, 4, 0, "", "", +"c5 f8 98 ee \tkortestw %k6,%k5",}, +{{0xc4, 0xe1, 0xf8, 0x98, 0xee, }, 5, 0, "", "", +"c4 e1 f8 98 ee \tkortestq %k6,%k5",}, +{{0xc5, 0xf9, 0x98, 0xee, }, 4, 0, "", "", +"c5 f9 98 ee \tkortestb %k6,%k5",}, +{{0xc4, 0xe1, 0xf9, 0x98, 0xee, }, 5, 0, "", "", +"c4 e1 f9 98 ee \tkortestd %k6,%k5",}, +{{0xc5, 0xf8, 0x99, 0xee, }, 4, 0, "", "", +"c5 f8 99 ee \tktestw %k6,%k5",}, +{{0xc4, 0xe1, 0xf8, 0x99, 0xee, }, 5, 0, "", "", +"c4 e1 f8 99 ee \tktestq %k6,%k5",}, +{{0xc5, 0xf9, 0x99, 0xee, }, 4, 0, "", "", +"c5 f9 99 ee \tktestb %k6,%k5",}, +{{0xc4, 0xe1, 0xf9, 0x99, 0xee, }, 5, 0, "", "", +"c4 e1 f9 99 ee \tktestd %k6,%k5",}, +{{0xc4, 0xe3, 0xf9, 0x30, 0xee, 0x12, }, 6, 0, "", "", +"c4 e3 f9 30 ee 12 \tkshiftrw $0x12,%k6,%k5",}, +{{0xc4, 0xe3, 0xf9, 0x31, 0xee, 0x5b, }, 6, 0, "", "", +"c4 e3 f9 31 ee 5b \tkshiftrq $0x5b,%k6,%k5",}, +{{0xc4, 0xe3, 0xf9, 0x32, 0xee, 0x12, }, 6, 0, "", "", +"c4 e3 f9 32 ee 12 \tkshiftlw $0x12,%k6,%k5",}, +{{0xc4, 0xe3, 0xf9, 0x33, 0xee, 0x5b, }, 6, 0, "", "", +"c4 e3 f9 33 ee 5b \tkshiftlq $0x5b,%k6,%k5",}, +{{0xc5, 0xf8, 0x5b, 0xf5, }, 4, 0, "", "", +"c5 f8 5b f5 \tvcvtdq2ps %xmm5,%xmm6",}, +{{0x62, 0x91, 0xfc, 0x4f, 0x5b, 0xf5, }, 6, 0, "", "", +"62 91 fc 4f 5b f5 \tvcvtqq2ps %zmm29,%ymm6{%k7}",}, +{{0xc5, 0xf9, 0x5b, 0xf5, }, 4, 0, "", "", +"c5 f9 5b f5 \tvcvtps2dq %xmm5,%xmm6",}, +{{0xc5, 0xfa, 0x5b, 0xf5, }, 4, 0, "", "", +"c5 fa 5b f5 \tvcvttps2dq %xmm5,%xmm6",}, +{{0x0f, 0x6f, 0xe0, }, 3, 0, "", "", +"0f 6f e0 \tmovq %mm0,%mm4",}, +{{0xc5, 0xfd, 0x6f, 0xf4, }, 4, 0, "", "", +"c5 fd 6f f4 \tvmovdqa %ymm4,%ymm6",}, +{{0x62, 0x01, 0x7d, 0x48, 0x6f, 0xd1, }, 6, 0, "", "", +"62 01 7d 48 6f d1 \tvmovdqa32 %zmm25,%zmm26",}, +{{0x62, 0x01, 0xfd, 0x48, 0x6f, 0xd1, }, 6, 0, "", "", +"62 01 fd 48 6f d1 \tvmovdqa64 %zmm25,%zmm26",}, +{{0xc5, 0xfe, 0x6f, 0xf4, }, 4, 0, "", "", +"c5 fe 6f f4 \tvmovdqu %ymm4,%ymm6",}, +{{0x62, 0x01, 0x7e, 0x48, 0x6f, 0xf5, }, 6, 0, "", "", +"62 01 7e 48 6f f5 \tvmovdqu32 %zmm29,%zmm30",}, +{{0x62, 0x01, 0xfe, 0x48, 0x6f, 0xd1, }, 6, 0, "", "", +"62 01 fe 48 6f d1 \tvmovdqu64 %zmm25,%zmm26",}, +{{0x62, 0x01, 0x7f, 0x48, 0x6f, 0xf5, }, 6, 0, "", "", +"62 01 7f 48 6f f5 \tvmovdqu8 %zmm29,%zmm30",}, +{{0x62, 0x01, 0xff, 0x48, 0x6f, 0xd1, }, 6, 0, "", "", +"62 01 ff 48 6f d1 \tvmovdqu16 %zmm25,%zmm26",}, +{{0x0f, 0x78, 0xc3, }, 3, 0, "", "", +"0f 78 c3 \tvmread %rax,%rbx",}, +{{0x62, 0x01, 0x7c, 0x48, 0x78, 0xd1, }, 6, 0, "", "", +"62 01 7c 48 78 d1 \tvcvttps2udq %zmm25,%zmm26",}, +{{0x62, 0x91, 0xfc, 0x4f, 0x78, 0xf5, }, 6, 0, "", "", +"62 91 fc 4f 78 f5 \tvcvttpd2udq %zmm29,%ymm6{%k7}",}, +{{0x62, 0xf1, 0xff, 0x08, 0x78, 0xc6, }, 6, 0, "", "", +"62 f1 ff 08 78 c6 \tvcvttsd2usi %xmm6,%rax",}, +{{0x62, 0xf1, 0xfe, 0x08, 0x78, 0xc6, }, 6, 0, "", "", +"62 f1 fe 08 78 c6 \tvcvttss2usi %xmm6,%rax",}, +{{0x62, 0x61, 0x7d, 0x4f, 0x78, 0xd5, }, 6, 0, "", "", +"62 61 7d 4f 78 d5 \tvcvttps2uqq %ymm5,%zmm26{%k7}",}, +{{0x62, 0x01, 0xfd, 0x48, 0x78, 0xf5, }, 6, 0, "", "", +"62 01 fd 48 78 f5 \tvcvttpd2uqq %zmm29,%zmm30",}, +{{0x0f, 0x79, 0xd8, }, 3, 0, "", "", +"0f 79 d8 \tvmwrite %rax,%rbx",}, +{{0x62, 0x01, 0x7c, 0x48, 0x79, 0xd1, }, 6, 0, "", "", +"62 01 7c 48 79 d1 \tvcvtps2udq %zmm25,%zmm26",}, +{{0x62, 0x91, 0xfc, 0x4f, 0x79, 0xf5, }, 6, 0, "", "", +"62 91 fc 4f 79 f5 \tvcvtpd2udq %zmm29,%ymm6{%k7}",}, +{{0x62, 0xf1, 0xff, 0x08, 0x79, 0xc6, }, 6, 0, "", "", +"62 f1 ff 08 79 c6 \tvcvtsd2usi %xmm6,%rax",}, +{{0x62, 0xf1, 0xfe, 0x08, 0x79, 0xc6, }, 6, 0, "", "", +"62 f1 fe 08 79 c6 \tvcvtss2usi %xmm6,%rax",}, +{{0x62, 0x61, 0x7d, 0x4f, 0x79, 0xd5, }, 6, 0, "", "", +"62 61 7d 4f 79 d5 \tvcvtps2uqq %ymm5,%zmm26{%k7}",}, +{{0x62, 0x01, 0xfd, 0x48, 0x79, 0xf5, }, 6, 0, "", "", +"62 01 fd 48 79 f5 \tvcvtpd2uqq %zmm29,%zmm30",}, +{{0x62, 0x61, 0x7e, 0x4f, 0x7a, 0xed, }, 6, 0, "", "", +"62 61 7e 4f 7a ed \tvcvtudq2pd %ymm5,%zmm29{%k7}",}, +{{0x62, 0x01, 0xfe, 0x48, 0x7a, 0xd1, }, 6, 0, "", "", +"62 01 fe 48 7a d1 \tvcvtuqq2pd %zmm25,%zmm26",}, +{{0x62, 0x01, 0x7f, 0x48, 0x7a, 0xf5, }, 6, 0, "", "", +"62 01 7f 48 7a f5 \tvcvtudq2ps %zmm29,%zmm30",}, +{{0x62, 0x01, 0xff, 0x4f, 0x7a, 0xd1, }, 6, 0, "", "", +"62 01 ff 4f 7a d1 \tvcvtuqq2ps %zmm25,%ymm26{%k7}",}, +{{0x62, 0x01, 0x7d, 0x4f, 0x7a, 0xd1, }, 6, 0, "", "", +"62 01 7d 4f 7a d1 \tvcvttps2qq %ymm25,%zmm26{%k7}",}, +{{0x62, 0x01, 0xfd, 0x48, 0x7a, 0xf5, }, 6, 0, "", "", +"62 01 fd 48 7a f5 \tvcvttpd2qq %zmm29,%zmm30",}, +{{0x62, 0xf1, 0x57, 0x08, 0x7b, 0xf0, }, 6, 0, "", "", +"62 f1 57 08 7b f0 \tvcvtusi2sd %eax,%xmm5,%xmm6",}, +{{0x62, 0xf1, 0x56, 0x08, 0x7b, 0xf0, }, 6, 0, "", "", +"62 f1 56 08 7b f0 \tvcvtusi2ss %eax,%xmm5,%xmm6",}, +{{0x62, 0x61, 0x7d, 0x4f, 0x7b, 0xd5, }, 6, 0, "", "", +"62 61 7d 4f 7b d5 \tvcvtps2qq %ymm5,%zmm26{%k7}",}, +{{0x62, 0x01, 0xfd, 0x48, 0x7b, 0xf5, }, 6, 0, "", "", +"62 01 fd 48 7b f5 \tvcvtpd2qq %zmm29,%zmm30",}, +{{0x0f, 0x7f, 0xc4, }, 3, 0, "", "", +"0f 7f c4 \tmovq %mm0,%mm4",}, +{{0xc5, 0x7d, 0x7f, 0xc6, }, 4, 0, "", "", +"c5 7d 7f c6 \tvmovdqa %ymm8,%ymm6",}, +{{0x62, 0x01, 0x7d, 0x48, 0x7f, 0xca, }, 6, 0, "", "", +"62 01 7d 48 7f ca \tvmovdqa32 %zmm25,%zmm26",}, +{{0x62, 0x01, 0xfd, 0x48, 0x7f, 0xca, }, 6, 0, "", "", +"62 01 fd 48 7f ca \tvmovdqa64 %zmm25,%zmm26",}, +{{0xc5, 0x7e, 0x7f, 0xc6, }, 4, 0, "", "", +"c5 7e 7f c6 \tvmovdqu %ymm8,%ymm6",}, +{{0x62, 0x01, 0x7e, 0x48, 0x7f, 0xca, }, 6, 0, "", "", +"62 01 7e 48 7f ca \tvmovdqu32 %zmm25,%zmm26",}, +{{0x62, 0x01, 0xfe, 0x48, 0x7f, 0xca, }, 6, 0, "", "", +"62 01 fe 48 7f ca \tvmovdqu64 %zmm25,%zmm26",}, +{{0x62, 0x61, 0x7f, 0x48, 0x7f, 0x31, }, 6, 0, "", "", +"62 61 7f 48 7f 31 \tvmovdqu8 %zmm30,(%rcx)",}, +{{0x62, 0x01, 0xff, 0x48, 0x7f, 0xca, }, 6, 0, "", "", +"62 01 ff 48 7f ca \tvmovdqu16 %zmm25,%zmm26",}, +{{0x0f, 0xdb, 0xd1, }, 3, 0, "", "", +"0f db d1 \tpand %mm1,%mm2",}, +{{0x66, 0x0f, 0xdb, 0xd1, }, 4, 0, "", "", +"66 0f db d1 \tpand %xmm1,%xmm2",}, +{{0xc5, 0xcd, 0xdb, 0xd4, }, 4, 0, "", "", +"c5 cd db d4 \tvpand %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x01, 0x35, 0x40, 0xdb, 0xd0, }, 6, 0, "", "", +"62 01 35 40 db d0 \tvpandd %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x01, 0xb5, 0x40, 0xdb, 0xd0, }, 6, 0, "", "", +"62 01 b5 40 db d0 \tvpandq %zmm24,%zmm25,%zmm26",}, +{{0x0f, 0xdf, 0xd1, }, 3, 0, "", "", +"0f df d1 \tpandn %mm1,%mm2",}, +{{0x66, 0x0f, 0xdf, 0xd1, }, 4, 0, "", "", +"66 0f df d1 \tpandn %xmm1,%xmm2",}, +{{0xc5, 0xcd, 0xdf, 0xd4, }, 4, 0, "", "", +"c5 cd df d4 \tvpandn %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x01, 0x35, 0x40, 0xdf, 0xd0, }, 6, 0, "", "", +"62 01 35 40 df d0 \tvpandnd %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x01, 0xb5, 0x40, 0xdf, 0xd0, }, 6, 0, "", "", +"62 01 b5 40 df d0 \tvpandnq %zmm24,%zmm25,%zmm26",}, +{{0xc5, 0xf9, 0xe6, 0xd1, }, 4, 0, "", "", +"c5 f9 e6 d1 \tvcvttpd2dq %xmm1,%xmm2",}, +{{0xc5, 0xfa, 0xe6, 0xf5, }, 4, 0, "", "", +"c5 fa e6 f5 \tvcvtdq2pd %xmm5,%xmm6",}, +{{0x62, 0x61, 0x7e, 0x4f, 0xe6, 0xd5, }, 6, 0, "", "", +"62 61 7e 4f e6 d5 \tvcvtdq2pd %ymm5,%zmm26{%k7}",}, +{{0x62, 0x01, 0xfe, 0x48, 0xe6, 0xd1, }, 6, 0, "", "", +"62 01 fe 48 e6 d1 \tvcvtqq2pd %zmm25,%zmm26",}, +{{0xc5, 0xfb, 0xe6, 0xd1, }, 4, 0, "", "", +"c5 fb e6 d1 \tvcvtpd2dq %xmm1,%xmm2",}, +{{0x0f, 0xeb, 0xf4, }, 3, 0, "", "", +"0f eb f4 \tpor %mm4,%mm6",}, +{{0xc5, 0xcd, 0xeb, 0xd4, }, 4, 0, "", "", +"c5 cd eb d4 \tvpor %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x01, 0x35, 0x40, 0xeb, 0xd0, }, 6, 0, "", "", +"62 01 35 40 eb d0 \tvpord %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x01, 0xb5, 0x40, 0xeb, 0xd0, }, 6, 0, "", "", +"62 01 b5 40 eb d0 \tvporq %zmm24,%zmm25,%zmm26",}, +{{0x0f, 0xef, 0xf4, }, 3, 0, "", "", +"0f ef f4 \tpxor %mm4,%mm6",}, +{{0xc5, 0xcd, 0xef, 0xd4, }, 4, 0, "", "", +"c5 cd ef d4 \tvpxor %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x01, 0x35, 0x40, 0xef, 0xd0, }, 6, 0, "", "", +"62 01 35 40 ef d0 \tvpxord %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x01, 0xb5, 0x40, 0xef, 0xd0, }, 6, 0, "", "", +"62 01 b5 40 ef d0 \tvpxorq %zmm24,%zmm25,%zmm26",}, +{{0x66, 0x0f, 0x38, 0x10, 0xc1, }, 5, 0, "", "", +"66 0f 38 10 c1 \tpblendvb %xmm0,%xmm1,%xmm0",}, +{{0x62, 0x02, 0x9d, 0x40, 0x10, 0xeb, }, 6, 0, "", "", +"62 02 9d 40 10 eb \tvpsrlvw %zmm27,%zmm28,%zmm29",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x10, 0xe6, }, 6, 0, "", "", +"62 62 7e 4f 10 e6 \tvpmovuswb %zmm28,%ymm6{%k7}",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x11, 0xe6, }, 6, 0, "", "", +"62 62 7e 4f 11 e6 \tvpmovusdb %zmm28,%xmm6{%k7}",}, +{{0x62, 0x02, 0x9d, 0x40, 0x11, 0xeb, }, 6, 0, "", "", +"62 02 9d 40 11 eb \tvpsravw %zmm27,%zmm28,%zmm29",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x12, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 12 de \tvpmovusqb %zmm27,%xmm6{%k7}",}, +{{0x62, 0x02, 0x9d, 0x40, 0x12, 0xeb, }, 6, 0, "", "", +"62 02 9d 40 12 eb \tvpsllvw %zmm27,%zmm28,%zmm29",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "", +"c4 e2 7d 13 eb \tvcvtph2ps %xmm3,%ymm5",}, +{{0x62, 0x62, 0x7d, 0x4f, 0x13, 0xdd, }, 6, 0, "", "", +"62 62 7d 4f 13 dd \tvcvtph2ps %ymm5,%zmm27{%k7}",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x13, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 13 de \tvpmovusdw %zmm27,%ymm6{%k7}",}, +{{0x66, 0x0f, 0x38, 0x14, 0xc1, }, 5, 0, "", "", +"66 0f 38 14 c1 \tblendvps %xmm0,%xmm1,%xmm0",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x14, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 14 de \tvpmovusqw %zmm27,%xmm6{%k7}",}, +{{0x62, 0x02, 0x1d, 0x40, 0x14, 0xeb, }, 6, 0, "", "", +"62 02 1d 40 14 eb \tvprorvd %zmm27,%zmm28,%zmm29",}, +{{0x62, 0x02, 0x9d, 0x40, 0x14, 0xeb, }, 6, 0, "", "", +"62 02 9d 40 14 eb \tvprorvq %zmm27,%zmm28,%zmm29",}, +{{0x66, 0x0f, 0x38, 0x15, 0xc1, }, 5, 0, "", "", +"66 0f 38 15 c1 \tblendvpd %xmm0,%xmm1,%xmm0",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x15, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 15 de \tvpmovusqd %zmm27,%ymm6{%k7}",}, +{{0x62, 0x02, 0x1d, 0x40, 0x15, 0xeb, }, 6, 0, "", "", +"62 02 1d 40 15 eb \tvprolvd %zmm27,%zmm28,%zmm29",}, +{{0x62, 0x02, 0x9d, 0x40, 0x15, 0xeb, }, 6, 0, "", "", +"62 02 9d 40 15 eb \tvprolvq %zmm27,%zmm28,%zmm29",}, +{{0xc4, 0xe2, 0x4d, 0x16, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 16 d4 \tvpermps %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x82, 0x2d, 0x27, 0x16, 0xf0, }, 6, 0, "", "", +"62 82 2d 27 16 f0 \tvpermps %ymm24,%ymm26,%ymm22{%k7}",}, +{{0x62, 0x82, 0xad, 0x27, 0x16, 0xf0, }, 6, 0, "", "", +"62 82 ad 27 16 f0 \tvpermpd %ymm24,%ymm26,%ymm22{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x19, 0xf4, }, 5, 0, "", "", +"c4 e2 7d 19 f4 \tvbroadcastsd %xmm4,%ymm6",}, +{{0x62, 0x02, 0x7d, 0x48, 0x19, 0xd3, }, 6, 0, "", "", +"62 02 7d 48 19 d3 \tvbroadcastf32x2 %xmm27,%zmm26",}, +{{0xc4, 0xe2, 0x7d, 0x1a, 0x21, }, 5, 0, "", "", +"c4 e2 7d 1a 21 \tvbroadcastf128 (%rcx),%ymm4",}, +{{0x62, 0x62, 0x7d, 0x48, 0x1a, 0x11, }, 6, 0, "", "", +"62 62 7d 48 1a 11 \tvbroadcastf32x4 (%rcx),%zmm26",}, +{{0x62, 0x62, 0xfd, 0x48, 0x1a, 0x11, }, 6, 0, "", "", +"62 62 fd 48 1a 11 \tvbroadcastf64x2 (%rcx),%zmm26",}, +{{0x62, 0x62, 0x7d, 0x48, 0x1b, 0x19, }, 6, 0, "", "", +"62 62 7d 48 1b 19 \tvbroadcastf32x8 (%rcx),%zmm27",}, +{{0x62, 0x62, 0xfd, 0x48, 0x1b, 0x11, }, 6, 0, "", "", +"62 62 fd 48 1b 11 \tvbroadcastf64x4 (%rcx),%zmm26",}, +{{0x62, 0x02, 0xfd, 0x48, 0x1f, 0xe3, }, 6, 0, "", "", +"62 02 fd 48 1f e3 \tvpabsq %zmm27,%zmm28",}, +{{0xc4, 0xe2, 0x79, 0x20, 0xec, }, 5, 0, "", "", +"c4 e2 79 20 ec \tvpmovsxbw %xmm4,%xmm5",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x20, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 20 de \tvpmovswb %zmm27,%ymm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x21, 0xf4, }, 5, 0, "", "", +"c4 e2 7d 21 f4 \tvpmovsxbd %xmm4,%ymm6",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x21, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 21 de \tvpmovsdb %zmm27,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x22, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 22 e4 \tvpmovsxbq %xmm4,%ymm4",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x22, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 22 de \tvpmovsqb %zmm27,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x23, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 23 e4 \tvpmovsxwd %xmm4,%ymm4",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x23, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 23 de \tvpmovsdw %zmm27,%ymm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x24, 0xf4, }, 5, 0, "", "", +"c4 e2 7d 24 f4 \tvpmovsxwq %xmm4,%ymm6",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x24, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 24 de \tvpmovsqw %zmm27,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x25, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 25 e4 \tvpmovsxdq %xmm4,%ymm4",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x25, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 25 de \tvpmovsqd %zmm27,%ymm6{%k7}",}, +{{0x62, 0x92, 0x1d, 0x40, 0x26, 0xeb, }, 6, 0, "", "", +"62 92 1d 40 26 eb \tvptestmb %zmm27,%zmm28,%k5",}, +{{0x62, 0x92, 0x9d, 0x40, 0x26, 0xeb, }, 6, 0, "", "", +"62 92 9d 40 26 eb \tvptestmw %zmm27,%zmm28,%k5",}, +{{0x62, 0x92, 0x26, 0x40, 0x26, 0xea, }, 6, 0, "", "", +"62 92 26 40 26 ea \tvptestnmb %zmm26,%zmm27,%k5",}, +{{0x62, 0x92, 0xa6, 0x40, 0x26, 0xea, }, 6, 0, "", "", +"62 92 a6 40 26 ea \tvptestnmw %zmm26,%zmm27,%k5",}, +{{0x62, 0x92, 0x1d, 0x40, 0x27, 0xeb, }, 6, 0, "", "", +"62 92 1d 40 27 eb \tvptestmd %zmm27,%zmm28,%k5",}, +{{0x62, 0x92, 0x9d, 0x40, 0x27, 0xeb, }, 6, 0, "", "", +"62 92 9d 40 27 eb \tvptestmq %zmm27,%zmm28,%k5",}, +{{0x62, 0x92, 0x26, 0x40, 0x27, 0xea, }, 6, 0, "", "", +"62 92 26 40 27 ea \tvptestnmd %zmm26,%zmm27,%k5",}, +{{0x62, 0x92, 0xa6, 0x40, 0x27, 0xea, }, 6, 0, "", "", +"62 92 a6 40 27 ea \tvptestnmq %zmm26,%zmm27,%k5",}, +{{0xc4, 0xe2, 0x4d, 0x28, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 28 d4 \tvpmuldq %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x62, 0x7e, 0x48, 0x28, 0xe5, }, 6, 0, "", "", +"62 62 7e 48 28 e5 \tvpmovm2b %k5,%zmm28",}, +{{0x62, 0x62, 0xfe, 0x48, 0x28, 0xe5, }, 6, 0, "", "", +"62 62 fe 48 28 e5 \tvpmovm2w %k5,%zmm28",}, +{{0xc4, 0xe2, 0x4d, 0x29, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 29 d4 \tvpcmpeqq %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x92, 0x7e, 0x48, 0x29, 0xec, }, 6, 0, "", "", +"62 92 7e 48 29 ec \tvpmovb2m %zmm28,%k5",}, +{{0x62, 0x92, 0xfe, 0x48, 0x29, 0xec, }, 6, 0, "", "", +"62 92 fe 48 29 ec \tvpmovw2m %zmm28,%k5",}, +{{0xc4, 0xe2, 0x7d, 0x2a, 0x21, }, 5, 0, "", "", +"c4 e2 7d 2a 21 \tvmovntdqa (%rcx),%ymm4",}, +{{0x62, 0x62, 0xfe, 0x48, 0x2a, 0xf6, }, 6, 0, "", "", +"62 62 fe 48 2a f6 \tvpbroadcastmb2q %k6,%zmm30",}, +{{0xc4, 0xe2, 0x5d, 0x2c, 0x31, }, 5, 0, "", "", +"c4 e2 5d 2c 31 \tvmaskmovps (%rcx),%ymm4,%ymm6",}, +{{0x62, 0x02, 0x35, 0x40, 0x2c, 0xd0, }, 6, 0, "", "", +"62 02 35 40 2c d0 \tvscalefps %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x02, 0xb5, 0x40, 0x2c, 0xd0, }, 6, 0, "", "", +"62 02 b5 40 2c d0 \tvscalefpd %zmm24,%zmm25,%zmm26",}, +{{0xc4, 0xe2, 0x5d, 0x2d, 0x31, }, 5, 0, "", "", +"c4 e2 5d 2d 31 \tvmaskmovpd (%rcx),%ymm4,%ymm6",}, +{{0x62, 0x02, 0x35, 0x07, 0x2d, 0xd0, }, 6, 0, "", "", +"62 02 35 07 2d d0 \tvscalefss %xmm24,%xmm25,%xmm26{%k7}",}, +{{0x62, 0x02, 0xb5, 0x07, 0x2d, 0xd0, }, 6, 0, "", "", +"62 02 b5 07 2d d0 \tvscalefsd %xmm24,%xmm25,%xmm26{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x30, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 30 e4 \tvpmovzxbw %xmm4,%ymm4",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x30, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 30 de \tvpmovwb %zmm27,%ymm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x31, 0xf4, }, 5, 0, "", "", +"c4 e2 7d 31 f4 \tvpmovzxbd %xmm4,%ymm6",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x31, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 31 de \tvpmovdb %zmm27,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x32, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 32 e4 \tvpmovzxbq %xmm4,%ymm4",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x32, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 32 de \tvpmovqb %zmm27,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x33, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 33 e4 \tvpmovzxwd %xmm4,%ymm4",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x33, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 33 de \tvpmovdw %zmm27,%ymm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x34, 0xf4, }, 5, 0, "", "", +"c4 e2 7d 34 f4 \tvpmovzxwq %xmm4,%ymm6",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x34, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 34 de \tvpmovqw %zmm27,%xmm6{%k7}",}, +{{0xc4, 0xe2, 0x7d, 0x35, 0xe4, }, 5, 0, "", "", +"c4 e2 7d 35 e4 \tvpmovzxdq %xmm4,%ymm4",}, +{{0x62, 0x62, 0x7e, 0x4f, 0x35, 0xde, }, 6, 0, "", "", +"62 62 7e 4f 35 de \tvpmovqd %zmm27,%ymm6{%k7}",}, +{{0xc4, 0xe2, 0x4d, 0x36, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 36 d4 \tvpermd %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x82, 0x2d, 0x27, 0x36, 0xf0, }, 6, 0, "", "", +"62 82 2d 27 36 f0 \tvpermd %ymm24,%ymm26,%ymm22{%k7}",}, +{{0x62, 0x82, 0xad, 0x27, 0x36, 0xf0, }, 6, 0, "", "", +"62 82 ad 27 36 f0 \tvpermq %ymm24,%ymm26,%ymm22{%k7}",}, +{{0xc4, 0xe2, 0x4d, 0x38, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 38 d4 \tvpminsb %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x62, 0x7e, 0x48, 0x38, 0xe5, }, 6, 0, "", "", +"62 62 7e 48 38 e5 \tvpmovm2d %k5,%zmm28",}, +{{0x62, 0x62, 0xfe, 0x48, 0x38, 0xe5, }, 6, 0, "", "", +"62 62 fe 48 38 e5 \tvpmovm2q %k5,%zmm28",}, +{{0xc4, 0xe2, 0x69, 0x39, 0xd9, }, 5, 0, "", "", +"c4 e2 69 39 d9 \tvpminsd %xmm1,%xmm2,%xmm3",}, +{{0x62, 0x02, 0x35, 0x40, 0x39, 0xd0, }, 6, 0, "", "", +"62 02 35 40 39 d0 \tvpminsd %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x02, 0xb5, 0x40, 0x39, 0xd0, }, 6, 0, "", "", +"62 02 b5 40 39 d0 \tvpminsq %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x92, 0x7e, 0x48, 0x39, 0xec, }, 6, 0, "", "", +"62 92 7e 48 39 ec \tvpmovd2m %zmm28,%k5",}, +{{0x62, 0x92, 0xfe, 0x48, 0x39, 0xec, }, 6, 0, "", "", +"62 92 fe 48 39 ec \tvpmovq2m %zmm28,%k5",}, +{{0xc4, 0xe2, 0x4d, 0x3a, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 3a d4 \tvpminuw %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x62, 0x7e, 0x48, 0x3a, 0xe6, }, 6, 0, "", "", +"62 62 7e 48 3a e6 \tvpbroadcastmw2d %k6,%zmm28",}, +{{0xc4, 0xe2, 0x4d, 0x3b, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 3b d4 \tvpminud %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x02, 0x35, 0x40, 0x3b, 0xd0, }, 6, 0, "", "", +"62 02 35 40 3b d0 \tvpminud %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x02, 0xb5, 0x40, 0x3b, 0xd0, }, 6, 0, "", "", +"62 02 b5 40 3b d0 \tvpminuq %zmm24,%zmm25,%zmm26",}, +{{0xc4, 0xe2, 0x4d, 0x3d, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 3d d4 \tvpmaxsd %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x02, 0x35, 0x40, 0x3d, 0xd0, }, 6, 0, "", "", +"62 02 35 40 3d d0 \tvpmaxsd %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x02, 0xb5, 0x40, 0x3d, 0xd0, }, 6, 0, "", "", +"62 02 b5 40 3d d0 \tvpmaxsq %zmm24,%zmm25,%zmm26",}, +{{0xc4, 0xe2, 0x4d, 0x3f, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 3f d4 \tvpmaxud %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x02, 0x35, 0x40, 0x3f, 0xd0, }, 6, 0, "", "", +"62 02 35 40 3f d0 \tvpmaxud %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x02, 0xb5, 0x40, 0x3f, 0xd0, }, 6, 0, "", "", +"62 02 b5 40 3f d0 \tvpmaxuq %zmm24,%zmm25,%zmm26",}, +{{0xc4, 0xe2, 0x4d, 0x40, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 40 d4 \tvpmulld %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x02, 0x35, 0x40, 0x40, 0xd0, }, 6, 0, "", "", +"62 02 35 40 40 d0 \tvpmulld %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x02, 0xb5, 0x40, 0x40, 0xd0, }, 6, 0, "", "", +"62 02 b5 40 40 d0 \tvpmullq %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x02, 0x7d, 0x48, 0x42, 0xd1, }, 6, 0, "", "", +"62 02 7d 48 42 d1 \tvgetexpps %zmm25,%zmm26",}, +{{0x62, 0x02, 0xfd, 0x48, 0x42, 0xe3, }, 6, 0, "", "", +"62 02 fd 48 42 e3 \tvgetexppd %zmm27,%zmm28",}, +{{0x62, 0x02, 0x35, 0x07, 0x43, 0xd0, }, 6, 0, "", "", +"62 02 35 07 43 d0 \tvgetexpss %xmm24,%xmm25,%xmm26{%k7}",}, +{{0x62, 0x02, 0x95, 0x07, 0x43, 0xf4, }, 6, 0, "", "", +"62 02 95 07 43 f4 \tvgetexpsd %xmm28,%xmm29,%xmm30{%k7}",}, +{{0x62, 0x02, 0x7d, 0x48, 0x44, 0xe3, }, 6, 0, "", "", +"62 02 7d 48 44 e3 \tvplzcntd %zmm27,%zmm28",}, +{{0x62, 0x02, 0xfd, 0x48, 0x44, 0xe3, }, 6, 0, "", "", +"62 02 fd 48 44 e3 \tvplzcntq %zmm27,%zmm28",}, +{{0xc4, 0xe2, 0x4d, 0x46, 0xd4, }, 5, 0, "", "", +"c4 e2 4d 46 d4 \tvpsravd %ymm4,%ymm6,%ymm2",}, +{{0x62, 0x02, 0x35, 0x40, 0x46, 0xd0, }, 6, 0, "", "", +"62 02 35 40 46 d0 \tvpsravd %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x02, 0xb5, 0x40, 0x46, 0xd0, }, 6, 0, "", "", +"62 02 b5 40 46 d0 \tvpsravq %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x02, 0x7d, 0x48, 0x4c, 0xd1, }, 6, 0, "", "", +"62 02 7d 48 4c d1 \tvrcp14ps %zmm25,%zmm26",}, +{{0x62, 0x02, 0xfd, 0x48, 0x4c, 0xe3, }, 6, 0, "", "", +"62 02 fd 48 4c e3 \tvrcp14pd %zmm27,%zmm28",}, +{{0x62, 0x02, 0x35, 0x07, 0x4d, 0xd0, }, 6, 0, "", "", +"62 02 35 07 4d d0 \tvrcp14ss %xmm24,%xmm25,%xmm26{%k7}",}, +{{0x62, 0x02, 0xb5, 0x07, 0x4d, 0xd0, }, 6, 0, "", "", +"62 02 b5 07 4d d0 \tvrcp14sd %xmm24,%xmm25,%xmm26{%k7}",}, +{{0x62, 0x02, 0x7d, 0x48, 0x4e, 0xd1, }, 6, 0, "", "", +"62 02 7d 48 4e d1 \tvrsqrt14ps %zmm25,%zmm26",}, +{{0x62, 0x02, 0xfd, 0x48, 0x4e, 0xe3, }, 6, 0, "", "", +"62 02 fd 48 4e e3 \tvrsqrt14pd %zmm27,%zmm28",}, +{{0x62, 0x02, 0x35, 0x07, 0x4f, 0xd0, }, 6, 0, "", "", +"62 02 35 07 4f d0 \tvrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}",}, +{{0x62, 0x02, 0xb5, 0x07, 0x4f, 0xd0, }, 6, 0, "", "", +"62 02 b5 07 4f d0 \tvrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}",}, +{{0xc4, 0xe2, 0x79, 0x59, 0xf4, }, 5, 0, "", "", +"c4 e2 79 59 f4 \tvpbroadcastq %xmm4,%xmm6",}, +{{0x62, 0x02, 0x7d, 0x48, 0x59, 0xd3, }, 6, 0, "", "", +"62 02 7d 48 59 d3 \tvbroadcasti32x2 %xmm27,%zmm26",}, +{{0xc4, 0xe2, 0x7d, 0x5a, 0x21, }, 5, 0, "", "", +"c4 e2 7d 5a 21 \tvbroadcasti128 (%rcx),%ymm4",}, +{{0x62, 0x62, 0x7d, 0x48, 0x5a, 0x11, }, 6, 0, "", "", +"62 62 7d 48 5a 11 \tvbroadcasti32x4 (%rcx),%zmm26",}, +{{0x62, 0x62, 0xfd, 0x48, 0x5a, 0x11, }, 6, 0, "", "", +"62 62 fd 48 5a 11 \tvbroadcasti64x2 (%rcx),%zmm26",}, +{{0x62, 0x62, 0x7d, 0x48, 0x5b, 0x21, }, 6, 0, "", "", +"62 62 7d 48 5b 21 \tvbroadcasti32x8 (%rcx),%zmm28",}, +{{0x62, 0x62, 0xfd, 0x48, 0x5b, 0x11, }, 6, 0, "", "", +"62 62 fd 48 5b 11 \tvbroadcasti64x4 (%rcx),%zmm26",}, +{{0x62, 0x02, 0x25, 0x40, 0x64, 0xe2, }, 6, 0, "", "", +"62 02 25 40 64 e2 \tvpblendmd %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0xa5, 0x40, 0x64, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 64 e2 \tvpblendmq %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0x35, 0x40, 0x65, 0xd0, }, 6, 0, "", "", +"62 02 35 40 65 d0 \tvblendmps %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x02, 0xa5, 0x40, 0x65, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 65 e2 \tvblendmpd %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0x25, 0x40, 0x66, 0xe2, }, 6, 0, "", "", +"62 02 25 40 66 e2 \tvpblendmb %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0xa5, 0x40, 0x66, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 66 e2 \tvpblendmw %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0x35, 0x40, 0x75, 0xd0, }, 6, 0, "", "", +"62 02 35 40 75 d0 \tvpermi2b %zmm24,%zmm25,%zmm26",}, +{{0x62, 0x02, 0xa5, 0x40, 0x75, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 75 e2 \tvpermi2w %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0x25, 0x40, 0x76, 0xe2, }, 6, 0, "", "", +"62 02 25 40 76 e2 \tvpermi2d %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0xa5, 0x40, 0x76, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 76 e2 \tvpermi2q %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0x25, 0x40, 0x77, 0xe2, }, 6, 0, "", "", +"62 02 25 40 77 e2 \tvpermi2ps %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0xa5, 0x40, 0x77, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 77 e2 \tvpermi2pd %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x62, 0x7d, 0x08, 0x7a, 0xf0, }, 6, 0, "", "", +"62 62 7d 08 7a f0 \tvpbroadcastb %eax,%xmm30",}, +{{0x62, 0x62, 0x7d, 0x08, 0x7b, 0xf0, }, 6, 0, "", "", +"62 62 7d 08 7b f0 \tvpbroadcastw %eax,%xmm30",}, +{{0x62, 0x62, 0x7d, 0x08, 0x7c, 0xf0, }, 6, 0, "", "", +"62 62 7d 08 7c f0 \tvpbroadcastd %eax,%xmm30",}, +{{0x62, 0x62, 0xfd, 0x48, 0x7c, 0xf0, }, 6, 0, "", "", +"62 62 fd 48 7c f0 \tvpbroadcastq %rax,%zmm30",}, +{{0x62, 0x02, 0x25, 0x40, 0x7d, 0xe2, }, 6, 0, "", "", +"62 02 25 40 7d e2 \tvpermt2b %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0xa5, 0x40, 0x7d, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 7d e2 \tvpermt2w %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0x25, 0x40, 0x7e, 0xe2, }, 6, 0, "", "", +"62 02 25 40 7e e2 \tvpermt2d %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0xa5, 0x40, 0x7e, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 7e e2 \tvpermt2q %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0x25, 0x40, 0x7f, 0xe2, }, 6, 0, "", "", +"62 02 25 40 7f e2 \tvpermt2ps %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0xa5, 0x40, 0x7f, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 7f e2 \tvpermt2pd %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0xa5, 0x40, 0x83, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 83 e2 \tvpmultishiftqb %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x62, 0x7d, 0x48, 0x88, 0x11, }, 6, 0, "", "", +"62 62 7d 48 88 11 \tvexpandps (%rcx),%zmm26",}, +{{0x62, 0x62, 0xfd, 0x48, 0x88, 0x21, }, 6, 0, "", "", +"62 62 fd 48 88 21 \tvexpandpd (%rcx),%zmm28",}, +{{0x62, 0x62, 0x7d, 0x48, 0x89, 0x21, }, 6, 0, "", "", +"62 62 7d 48 89 21 \tvpexpandd (%rcx),%zmm28",}, +{{0x62, 0x62, 0xfd, 0x48, 0x89, 0x11, }, 6, 0, "", "", +"62 62 fd 48 89 11 \tvpexpandq (%rcx),%zmm26",}, +{{0x62, 0x62, 0x7d, 0x48, 0x8a, 0x21, }, 6, 0, "", "", +"62 62 7d 48 8a 21 \tvcompressps %zmm28,(%rcx)",}, +{{0x62, 0x62, 0xfd, 0x48, 0x8a, 0x21, }, 6, 0, "", "", +"62 62 fd 48 8a 21 \tvcompresspd %zmm28,(%rcx)",}, +{{0x62, 0x62, 0x7d, 0x48, 0x8b, 0x21, }, 6, 0, "", "", +"62 62 7d 48 8b 21 \tvpcompressd %zmm28,(%rcx)",}, +{{0x62, 0x62, 0xfd, 0x48, 0x8b, 0x11, }, 6, 0, "", "", +"62 62 fd 48 8b 11 \tvpcompressq %zmm26,(%rcx)",}, +{{0x62, 0x02, 0x25, 0x40, 0x8d, 0xe2, }, 6, 0, "", "", +"62 02 25 40 8d e2 \tvpermb %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0xa5, 0x40, 0x8d, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 8d e2 \tvpermw %zmm26,%zmm27,%zmm28",}, +{{0xc4, 0xe2, 0x69, 0x90, 0x4c, 0x7d, 0x02, }, 7, 0, "", "", +"c4 e2 69 90 4c 7d 02 \tvpgatherdd %xmm2,0x2(%rbp,%xmm7,2),%xmm1",}, +{{0xc4, 0xe2, 0xe9, 0x90, 0x4c, 0x7d, 0x04, }, 7, 0, "", "", +"c4 e2 e9 90 4c 7d 04 \tvpgatherdq %xmm2,0x4(%rbp,%xmm7,2),%xmm1",}, +{{0x62, 0x22, 0x7d, 0x41, 0x90, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 22 7d 41 90 94 dd 7b 00 00 00 \tvpgatherdd 0x7b(%rbp,%zmm27,8),%zmm26{%k1}",}, +{{0x62, 0x22, 0xfd, 0x41, 0x90, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 22 fd 41 90 94 dd 7b 00 00 00 \tvpgatherdq 0x7b(%rbp,%ymm27,8),%zmm26{%k1}",}, +{{0xc4, 0xe2, 0x69, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "", +"c4 e2 69 91 4c 7d 02 \tvpgatherqd %xmm2,0x2(%rbp,%xmm7,2),%xmm1",}, +{{0xc4, 0xe2, 0xe9, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "", +"c4 e2 e9 91 4c 7d 02 \tvpgatherqq %xmm2,0x2(%rbp,%xmm7,2),%xmm1",}, +{{0x62, 0x22, 0x7d, 0x41, 0x91, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 22 7d 41 91 94 dd 7b 00 00 00 \tvpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}",}, +{{0x62, 0x22, 0xfd, 0x41, 0x91, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 22 fd 41 91 94 dd 7b 00 00 00 \tvpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}",}, +{{0x62, 0x22, 0x7d, 0x41, 0xa0, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 22 7d 41 a0 a4 ed 7b 00 00 00 \tvpscatterdd %zmm28,0x7b(%rbp,%zmm29,8){%k1}",}, +{{0x62, 0x22, 0xfd, 0x41, 0xa0, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 22 fd 41 a0 94 dd 7b 00 00 00 \tvpscatterdq %zmm26,0x7b(%rbp,%ymm27,8){%k1}",}, +{{0x62, 0xb2, 0x7d, 0x41, 0xa1, 0xb4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 b2 7d 41 a1 b4 ed 7b 00 00 00 \tvpscatterqd %ymm6,0x7b(%rbp,%zmm29,8){%k1}",}, +{{0x62, 0xb2, 0xfd, 0x21, 0xa1, 0xb4, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 b2 fd 21 a1 b4 dd 7b 00 00 00 \tvpscatterqq %ymm6,0x7b(%rbp,%ymm27,8){%k1}",}, +{{0x62, 0x22, 0x7d, 0x41, 0xa2, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 22 7d 41 a2 a4 ed 7b 00 00 00 \tvscatterdps %zmm28,0x7b(%rbp,%zmm29,8){%k1}",}, +{{0x62, 0x22, 0xfd, 0x41, 0xa2, 0xa4, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 22 fd 41 a2 a4 dd 7b 00 00 00 \tvscatterdpd %zmm28,0x7b(%rbp,%ymm27,8){%k1}",}, +{{0x62, 0xb2, 0x7d, 0x41, 0xa3, 0xb4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 b2 7d 41 a3 b4 ed 7b 00 00 00 \tvscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}",}, +{{0x62, 0x22, 0xfd, 0x41, 0xa3, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 22 fd 41 a3 a4 ed 7b 00 00 00 \tvscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}",}, +{{0x62, 0x02, 0xa5, 0x40, 0xb4, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 b4 e2 \tvpmadd52luq %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0xa5, 0x40, 0xb5, 0xe2, }, 6, 0, "", "", +"62 02 a5 40 b5 e2 \tvpmadd52huq %zmm26,%zmm27,%zmm28",}, +{{0x62, 0x02, 0x7d, 0x48, 0xc4, 0xda, }, 6, 0, "", "", +"62 02 7d 48 c4 da \tvpconflictd %zmm26,%zmm27",}, +{{0x62, 0x02, 0xfd, 0x48, 0xc4, 0xda, }, 6, 0, "", "", +"62 02 fd 48 c4 da \tvpconflictq %zmm26,%zmm27",}, +{{0x62, 0x02, 0x7d, 0x48, 0xc8, 0xf5, }, 6, 0, "", "", +"62 02 7d 48 c8 f5 \tvexp2ps %zmm29,%zmm30",}, +{{0x62, 0x02, 0xfd, 0x48, 0xc8, 0xda, }, 6, 0, "", "", +"62 02 fd 48 c8 da \tvexp2pd %zmm26,%zmm27",}, +{{0x62, 0x02, 0x7d, 0x48, 0xca, 0xf5, }, 6, 0, "", "", +"62 02 7d 48 ca f5 \tvrcp28ps %zmm29,%zmm30",}, +{{0x62, 0x02, 0xfd, 0x48, 0xca, 0xda, }, 6, 0, "", "", +"62 02 fd 48 ca da \tvrcp28pd %zmm26,%zmm27",}, +{{0x62, 0x02, 0x15, 0x07, 0xcb, 0xf4, }, 6, 0, "", "", +"62 02 15 07 cb f4 \tvrcp28ss %xmm28,%xmm29,%xmm30{%k7}",}, +{{0x62, 0x02, 0xad, 0x07, 0xcb, 0xd9, }, 6, 0, "", "", +"62 02 ad 07 cb d9 \tvrcp28sd %xmm25,%xmm26,%xmm27{%k7}",}, +{{0x62, 0x02, 0x7d, 0x48, 0xcc, 0xf5, }, 6, 0, "", "", +"62 02 7d 48 cc f5 \tvrsqrt28ps %zmm29,%zmm30",}, +{{0x62, 0x02, 0xfd, 0x48, 0xcc, 0xda, }, 6, 0, "", "", +"62 02 fd 48 cc da \tvrsqrt28pd %zmm26,%zmm27",}, +{{0x62, 0x02, 0x15, 0x07, 0xcd, 0xf4, }, 6, 0, "", "", +"62 02 15 07 cd f4 \tvrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}",}, +{{0x62, 0x02, 0xad, 0x07, 0xcd, 0xd9, }, 6, 0, "", "", +"62 02 ad 07 cd d9 \tvrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}",}, +{{0x62, 0x03, 0x15, 0x40, 0x03, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 15 40 03 f4 12 \tvalignd $0x12,%zmm28,%zmm29,%zmm30",}, +{{0x62, 0x03, 0xad, 0x40, 0x03, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 ad 40 03 d9 12 \tvalignq $0x12,%zmm25,%zmm26,%zmm27",}, +{{0xc4, 0xe3, 0x7d, 0x08, 0xd6, 0x05, }, 6, 0, "", "", +"c4 e3 7d 08 d6 05 \tvroundps $0x5,%ymm6,%ymm2",}, +{{0x62, 0x03, 0x7d, 0x48, 0x08, 0xd1, 0x12, }, 7, 0, "", "", +"62 03 7d 48 08 d1 12 \tvrndscaleps $0x12,%zmm25,%zmm26",}, +{{0xc4, 0xe3, 0x7d, 0x09, 0xd6, 0x05, }, 6, 0, "", "", +"c4 e3 7d 09 d6 05 \tvroundpd $0x5,%ymm6,%ymm2",}, +{{0x62, 0x03, 0xfd, 0x48, 0x09, 0xd1, 0x12, }, 7, 0, "", "", +"62 03 fd 48 09 d1 12 \tvrndscalepd $0x12,%zmm25,%zmm26",}, +{{0xc4, 0xe3, 0x49, 0x0a, 0xd4, 0x05, }, 6, 0, "", "", +"c4 e3 49 0a d4 05 \tvroundss $0x5,%xmm4,%xmm6,%xmm2",}, +{{0x62, 0x03, 0x35, 0x07, 0x0a, 0xd0, 0x12, }, 7, 0, "", "", +"62 03 35 07 0a d0 12 \tvrndscaless $0x12,%xmm24,%xmm25,%xmm26{%k7}",}, +{{0xc4, 0xe3, 0x49, 0x0b, 0xd4, 0x05, }, 6, 0, "", "", +"c4 e3 49 0b d4 05 \tvroundsd $0x5,%xmm4,%xmm6,%xmm2",}, +{{0x62, 0x03, 0xb5, 0x07, 0x0b, 0xd0, 0x12, }, 7, 0, "", "", +"62 03 b5 07 0b d0 12 \tvrndscalesd $0x12,%xmm24,%xmm25,%xmm26{%k7}",}, +{{0xc4, 0xe3, 0x5d, 0x18, 0xf4, 0x05, }, 6, 0, "", "", +"c4 e3 5d 18 f4 05 \tvinsertf128 $0x5,%xmm4,%ymm4,%ymm6",}, +{{0x62, 0x03, 0x35, 0x47, 0x18, 0xd0, 0x12, }, 7, 0, "", "", +"62 03 35 47 18 d0 12 \tvinsertf32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}",}, +{{0x62, 0x03, 0xb5, 0x47, 0x18, 0xd0, 0x12, }, 7, 0, "", "", +"62 03 b5 47 18 d0 12 \tvinsertf64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}",}, +{{0xc4, 0xe3, 0x7d, 0x19, 0xe4, 0x05, }, 6, 0, "", "", +"c4 e3 7d 19 e4 05 \tvextractf128 $0x5,%ymm4,%xmm4",}, +{{0x62, 0x03, 0x7d, 0x4f, 0x19, 0xca, 0x12, }, 7, 0, "", "", +"62 03 7d 4f 19 ca 12 \tvextractf32x4 $0x12,%zmm25,%xmm26{%k7}",}, +{{0x62, 0x03, 0xfd, 0x4f, 0x19, 0xca, 0x12, }, 7, 0, "", "", +"62 03 fd 4f 19 ca 12 \tvextractf64x2 $0x12,%zmm25,%xmm26{%k7}",}, +{{0x62, 0x03, 0x2d, 0x47, 0x1a, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 2d 47 1a d9 12 \tvinsertf32x8 $0x12,%ymm25,%zmm26,%zmm27{%k7}",}, +{{0x62, 0x03, 0x95, 0x47, 0x1a, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 95 47 1a f4 12 \tvinsertf64x4 $0x12,%ymm28,%zmm29,%zmm30{%k7}",}, +{{0x62, 0x03, 0x7d, 0x4f, 0x1b, 0xee, 0x12, }, 7, 0, "", "", +"62 03 7d 4f 1b ee 12 \tvextractf32x8 $0x12,%zmm29,%ymm30{%k7}",}, +{{0x62, 0x03, 0xfd, 0x4f, 0x1b, 0xd3, 0x12, }, 7, 0, "", "", +"62 03 fd 4f 1b d3 12 \tvextractf64x4 $0x12,%zmm26,%ymm27{%k7}",}, +{{0x62, 0x93, 0x0d, 0x40, 0x1e, 0xed, 0x12, }, 7, 0, "", "", +"62 93 0d 40 1e ed 12 \tvpcmpud $0x12,%zmm29,%zmm30,%k5",}, +{{0x62, 0x93, 0xa5, 0x40, 0x1e, 0xea, 0x12, }, 7, 0, "", "", +"62 93 a5 40 1e ea 12 \tvpcmpuq $0x12,%zmm26,%zmm27,%k5",}, +{{0x62, 0x93, 0x0d, 0x40, 0x1f, 0xed, 0x12, }, 7, 0, "", "", +"62 93 0d 40 1f ed 12 \tvpcmpd $0x12,%zmm29,%zmm30,%k5",}, +{{0x62, 0x93, 0xa5, 0x40, 0x1f, 0xea, 0x12, }, 7, 0, "", "", +"62 93 a5 40 1f ea 12 \tvpcmpq $0x12,%zmm26,%zmm27,%k5",}, +{{0x62, 0x03, 0x15, 0x40, 0x23, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 15 40 23 f4 12 \tvshuff32x4 $0x12,%zmm28,%zmm29,%zmm30",}, +{{0x62, 0x03, 0xad, 0x40, 0x23, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 ad 40 23 d9 12 \tvshuff64x2 $0x12,%zmm25,%zmm26,%zmm27",}, +{{0x62, 0x03, 0x15, 0x40, 0x25, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 15 40 25 f4 12 \tvpternlogd $0x12,%zmm28,%zmm29,%zmm30",}, +{{0x62, 0x03, 0x95, 0x40, 0x25, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 95 40 25 f4 12 \tvpternlogq $0x12,%zmm28,%zmm29,%zmm30",}, +{{0x62, 0x03, 0x7d, 0x48, 0x26, 0xda, 0x12, }, 7, 0, "", "", +"62 03 7d 48 26 da 12 \tvgetmantps $0x12,%zmm26,%zmm27",}, +{{0x62, 0x03, 0xfd, 0x48, 0x26, 0xf5, 0x12, }, 7, 0, "", "", +"62 03 fd 48 26 f5 12 \tvgetmantpd $0x12,%zmm29,%zmm30",}, +{{0x62, 0x03, 0x2d, 0x07, 0x27, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 2d 07 27 d9 12 \tvgetmantss $0x12,%xmm25,%xmm26,%xmm27{%k7}",}, +{{0x62, 0x03, 0x95, 0x07, 0x27, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 95 07 27 f4 12 \tvgetmantsd $0x12,%xmm28,%xmm29,%xmm30{%k7}",}, +{{0xc4, 0xe3, 0x5d, 0x38, 0xf4, 0x05, }, 6, 0, "", "", +"c4 e3 5d 38 f4 05 \tvinserti128 $0x5,%xmm4,%ymm4,%ymm6",}, +{{0x62, 0x03, 0x35, 0x47, 0x38, 0xd0, 0x12, }, 7, 0, "", "", +"62 03 35 47 38 d0 12 \tvinserti32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}",}, +{{0x62, 0x03, 0xb5, 0x47, 0x38, 0xd0, 0x12, }, 7, 0, "", "", +"62 03 b5 47 38 d0 12 \tvinserti64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}",}, +{{0xc4, 0xe3, 0x7d, 0x39, 0xe6, 0x05, }, 6, 0, "", "", +"c4 e3 7d 39 e6 05 \tvextracti128 $0x5,%ymm4,%xmm6",}, +{{0x62, 0x03, 0x7d, 0x4f, 0x39, 0xca, 0x12, }, 7, 0, "", "", +"62 03 7d 4f 39 ca 12 \tvextracti32x4 $0x12,%zmm25,%xmm26{%k7}",}, +{{0x62, 0x03, 0xfd, 0x4f, 0x39, 0xca, 0x12, }, 7, 0, "", "", +"62 03 fd 4f 39 ca 12 \tvextracti64x2 $0x12,%zmm25,%xmm26{%k7}",}, +{{0x62, 0x03, 0x15, 0x47, 0x3a, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 15 47 3a f4 12 \tvinserti32x8 $0x12,%ymm28,%zmm29,%zmm30{%k7}",}, +{{0x62, 0x03, 0xad, 0x47, 0x3a, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 ad 47 3a d9 12 \tvinserti64x4 $0x12,%ymm25,%zmm26,%zmm27{%k7}",}, +{{0x62, 0x03, 0x7d, 0x4f, 0x3b, 0xee, 0x12, }, 7, 0, "", "", +"62 03 7d 4f 3b ee 12 \tvextracti32x8 $0x12,%zmm29,%ymm30{%k7}",}, +{{0x62, 0x03, 0xfd, 0x4f, 0x3b, 0xd3, 0x12, }, 7, 0, "", "", +"62 03 fd 4f 3b d3 12 \tvextracti64x4 $0x12,%zmm26,%ymm27{%k7}",}, +{{0x62, 0x93, 0x0d, 0x40, 0x3e, 0xed, 0x12, }, 7, 0, "", "", +"62 93 0d 40 3e ed 12 \tvpcmpub $0x12,%zmm29,%zmm30,%k5",}, +{{0x62, 0x93, 0xa5, 0x40, 0x3e, 0xea, 0x12, }, 7, 0, "", "", +"62 93 a5 40 3e ea 12 \tvpcmpuw $0x12,%zmm26,%zmm27,%k5",}, +{{0x62, 0x93, 0x0d, 0x40, 0x3f, 0xed, 0x12, }, 7, 0, "", "", +"62 93 0d 40 3f ed 12 \tvpcmpb $0x12,%zmm29,%zmm30,%k5",}, +{{0x62, 0x93, 0xa5, 0x40, 0x3f, 0xea, 0x12, }, 7, 0, "", "", +"62 93 a5 40 3f ea 12 \tvpcmpw $0x12,%zmm26,%zmm27,%k5",}, +{{0xc4, 0xe3, 0x4d, 0x42, 0xd4, 0x05, }, 6, 0, "", "", +"c4 e3 4d 42 d4 05 \tvmpsadbw $0x5,%ymm4,%ymm6,%ymm2",}, +{{0x62, 0xf3, 0x55, 0x48, 0x42, 0xf4, 0x12, }, 7, 0, "", "", +"62 f3 55 48 42 f4 12 \tvdbpsadbw $0x12,%zmm4,%zmm5,%zmm6",}, +{{0x62, 0x03, 0x2d, 0x40, 0x43, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 2d 40 43 d9 12 \tvshufi32x4 $0x12,%zmm25,%zmm26,%zmm27",}, +{{0x62, 0x03, 0x95, 0x40, 0x43, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 95 40 43 f4 12 \tvshufi64x2 $0x12,%zmm28,%zmm29,%zmm30",}, +{{0x62, 0x03, 0x2d, 0x40, 0x50, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 2d 40 50 d9 12 \tvrangeps $0x12,%zmm25,%zmm26,%zmm27",}, +{{0x62, 0x03, 0x95, 0x40, 0x50, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 95 40 50 f4 12 \tvrangepd $0x12,%zmm28,%zmm29,%zmm30",}, +{{0x62, 0x03, 0x2d, 0x00, 0x51, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 2d 00 51 d9 12 \tvrangess $0x12,%xmm25,%xmm26,%xmm27",}, +{{0x62, 0x03, 0x95, 0x00, 0x51, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 95 00 51 f4 12 \tvrangesd $0x12,%xmm28,%xmm29,%xmm30",}, +{{0x62, 0x03, 0x15, 0x40, 0x54, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 15 40 54 f4 12 \tvfixupimmps $0x12,%zmm28,%zmm29,%zmm30",}, +{{0x62, 0x03, 0xad, 0x40, 0x54, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 ad 40 54 d9 12 \tvfixupimmpd $0x12,%zmm25,%zmm26,%zmm27",}, +{{0x62, 0x03, 0x15, 0x07, 0x55, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 15 07 55 f4 12 \tvfixupimmss $0x12,%xmm28,%xmm29,%xmm30{%k7}",}, +{{0x62, 0x03, 0xad, 0x07, 0x55, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 ad 07 55 d9 12 \tvfixupimmsd $0x12,%xmm25,%xmm26,%xmm27{%k7}",}, +{{0x62, 0x03, 0x7d, 0x48, 0x56, 0xda, 0x12, }, 7, 0, "", "", +"62 03 7d 48 56 da 12 \tvreduceps $0x12,%zmm26,%zmm27",}, +{{0x62, 0x03, 0xfd, 0x48, 0x56, 0xf5, 0x12, }, 7, 0, "", "", +"62 03 fd 48 56 f5 12 \tvreducepd $0x12,%zmm29,%zmm30",}, +{{0x62, 0x03, 0x2d, 0x00, 0x57, 0xd9, 0x12, }, 7, 0, "", "", +"62 03 2d 00 57 d9 12 \tvreducess $0x12,%xmm25,%xmm26,%xmm27",}, +{{0x62, 0x03, 0x95, 0x00, 0x57, 0xf4, 0x12, }, 7, 0, "", "", +"62 03 95 00 57 f4 12 \tvreducesd $0x12,%xmm28,%xmm29,%xmm30",}, +{{0x62, 0x93, 0x7d, 0x48, 0x66, 0xeb, 0x12, }, 7, 0, "", "", +"62 93 7d 48 66 eb 12 \tvfpclassps $0x12,%zmm27,%k5",}, +{{0x62, 0x93, 0xfd, 0x48, 0x66, 0xee, 0x12, }, 7, 0, "", "", +"62 93 fd 48 66 ee 12 \tvfpclasspd $0x12,%zmm30,%k5",}, +{{0x62, 0x93, 0x7d, 0x08, 0x67, 0xeb, 0x12, }, 7, 0, "", "", +"62 93 7d 08 67 eb 12 \tvfpclassss $0x12,%xmm27,%k5",}, +{{0x62, 0x93, 0xfd, 0x08, 0x67, 0xee, 0x12, }, 7, 0, "", "", +"62 93 fd 08 67 ee 12 \tvfpclasssd $0x12,%xmm30,%k5",}, +{{0x62, 0x91, 0x2d, 0x40, 0x72, 0xc1, 0x12, }, 7, 0, "", "", +"62 91 2d 40 72 c1 12 \tvprord $0x12,%zmm25,%zmm26",}, +{{0x62, 0x91, 0xad, 0x40, 0x72, 0xc1, 0x12, }, 7, 0, "", "", +"62 91 ad 40 72 c1 12 \tvprorq $0x12,%zmm25,%zmm26",}, +{{0x62, 0x91, 0x0d, 0x40, 0x72, 0xcd, 0x12, }, 7, 0, "", "", +"62 91 0d 40 72 cd 12 \tvprold $0x12,%zmm29,%zmm30",}, +{{0x62, 0x91, 0x8d, 0x40, 0x72, 0xcd, 0x12, }, 7, 0, "", "", +"62 91 8d 40 72 cd 12 \tvprolq $0x12,%zmm29,%zmm30",}, +{{0x0f, 0x72, 0xe6, 0x02, }, 4, 0, "", "", +"0f 72 e6 02 \tpsrad $0x2,%mm6",}, +{{0xc5, 0xed, 0x72, 0xe6, 0x05, }, 5, 0, "", "", +"c5 ed 72 e6 05 \tvpsrad $0x5,%ymm6,%ymm2",}, +{{0x62, 0x91, 0x4d, 0x40, 0x72, 0xe2, 0x05, }, 7, 0, "", "", +"62 91 4d 40 72 e2 05 \tvpsrad $0x5,%zmm26,%zmm22",}, +{{0x62, 0x91, 0xcd, 0x40, 0x72, 0xe2, 0x05, }, 7, 0, "", "", +"62 91 cd 40 72 e2 05 \tvpsraq $0x5,%zmm26,%zmm22",}, +{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 7d 41 c6 8c fe 7b 00 00 00 \tvgatherpf0dps 0x7b(%r14,%zmm31,8){%k1}",}, +{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 fd 41 c6 8c fe 7b 00 00 00 \tvgatherpf0dpd 0x7b(%r14,%ymm31,8){%k1}",}, +{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 7d 41 c6 94 fe 7b 00 00 00 \tvgatherpf1dps 0x7b(%r14,%zmm31,8){%k1}",}, +{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 fd 41 c6 94 fe 7b 00 00 00 \tvgatherpf1dpd 0x7b(%r14,%ymm31,8){%k1}",}, +{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 7d 41 c6 ac fe 7b 00 00 00 \tvscatterpf0dps 0x7b(%r14,%zmm31,8){%k1}",}, +{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 fd 41 c6 ac fe 7b 00 00 00 \tvscatterpf0dpd 0x7b(%r14,%ymm31,8){%k1}",}, +{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 7d 41 c6 b4 fe 7b 00 00 00 \tvscatterpf1dps 0x7b(%r14,%zmm31,8){%k1}",}, +{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 fd 41 c6 b4 fe 7b 00 00 00 \tvscatterpf1dpd 0x7b(%r14,%ymm31,8){%k1}",}, +{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 7d 41 c7 8c fe 7b 00 00 00 \tvgatherpf0qps 0x7b(%r14,%zmm31,8){%k1}",}, +{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 fd 41 c7 8c fe 7b 00 00 00 \tvgatherpf0qpd 0x7b(%r14,%zmm31,8){%k1}",}, +{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 7d 41 c7 94 fe 7b 00 00 00 \tvgatherpf1qps 0x7b(%r14,%zmm31,8){%k1}",}, +{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 fd 41 c7 94 fe 7b 00 00 00 \tvgatherpf1qpd 0x7b(%r14,%zmm31,8){%k1}",}, +{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 7d 41 c7 ac fe 7b 00 00 00 \tvscatterpf0qps 0x7b(%r14,%zmm31,8){%k1}",}, +{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 fd 41 c7 ac fe 7b 00 00 00 \tvscatterpf0qpd 0x7b(%r14,%zmm31,8){%k1}",}, +{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 7d 41 c7 b4 fe 7b 00 00 00 \tvscatterpf1qps 0x7b(%r14,%zmm31,8){%k1}",}, +{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "", +"62 92 fd 41 c7 b4 fe 7b 00 00 00 \tvscatterpf1qpd 0x7b(%r14,%zmm31,8){%k1}",}, +{{0x62, 0x01, 0x95, 0x40, 0x58, 0xf4, }, 6, 0, "", "", +"62 01 95 40 58 f4 \tvaddpd %zmm28,%zmm29,%zmm30",}, +{{0x62, 0x01, 0x95, 0x47, 0x58, 0xf4, }, 6, 0, "", "", +"62 01 95 47 58 f4 \tvaddpd %zmm28,%zmm29,%zmm30{%k7}",}, +{{0x62, 0x01, 0x95, 0xc7, 0x58, 0xf4, }, 6, 0, "", "", +"62 01 95 c7 58 f4 \tvaddpd %zmm28,%zmm29,%zmm30{%k7}{z}",}, +{{0x62, 0x01, 0x95, 0x10, 0x58, 0xf4, }, 6, 0, "", "", +"62 01 95 10 58 f4 \tvaddpd {rn-sae},%zmm28,%zmm29,%zmm30",}, +{{0x62, 0x01, 0x95, 0x50, 0x58, 0xf4, }, 6, 0, "", "", +"62 01 95 50 58 f4 \tvaddpd {ru-sae},%zmm28,%zmm29,%zmm30",}, +{{0x62, 0x01, 0x95, 0x30, 0x58, 0xf4, }, 6, 0, "", "", +"62 01 95 30 58 f4 \tvaddpd {rd-sae},%zmm28,%zmm29,%zmm30",}, +{{0x62, 0x01, 0x95, 0x70, 0x58, 0xf4, }, 6, 0, "", "", +"62 01 95 70 58 f4 \tvaddpd {rz-sae},%zmm28,%zmm29,%zmm30",}, +{{0x62, 0x61, 0x95, 0x40, 0x58, 0x31, }, 6, 0, "", "", +"62 61 95 40 58 31 \tvaddpd (%rcx),%zmm29,%zmm30",}, +{{0x62, 0x21, 0x95, 0x40, 0x58, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 11, 0, "", "", +"62 21 95 40 58 b4 f0 23 01 00 00 \tvaddpd 0x123(%rax,%r14,8),%zmm29,%zmm30",}, +{{0x62, 0x61, 0x95, 0x50, 0x58, 0x31, }, 6, 0, "", "", +"62 61 95 50 58 31 \tvaddpd (%rcx){1to8},%zmm29,%zmm30",}, +{{0x62, 0x61, 0x95, 0x40, 0x58, 0x72, 0x7f, }, 7, 0, "", "", +"62 61 95 40 58 72 7f \tvaddpd 0x1fc0(%rdx),%zmm29,%zmm30",}, +{{0x62, 0x61, 0x95, 0x50, 0x58, 0x72, 0x7f, }, 7, 0, "", "", +"62 61 95 50 58 72 7f \tvaddpd 0x3f8(%rdx){1to8},%zmm29,%zmm30",}, +{{0x62, 0xf1, 0x0c, 0x50, 0xc2, 0x6a, 0x7f, 0x08, }, 8, 0, "", "", +"62 f1 0c 50 c2 6a 7f 08 \tvcmpeq_uqps 0x1fc(%rdx){1to16},%zmm30,%k5",}, +{{0x62, 0xb1, 0x97, 0x07, 0xc2, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x01, }, 12, 0, "", "", +"62 b1 97 07 c2 ac f0 23 01 00 00 01 \tvcmpltsd 0x123(%rax,%r14,8),%xmm29,%k5{%k7}",}, +{{0x62, 0x91, 0x97, 0x17, 0xc2, 0xec, 0x02, }, 7, 0, "", "", +"62 91 97 17 c2 ec 02 \tvcmplesd {sae},%xmm28,%xmm29,%k5{%k7}",}, +{{0x62, 0x23, 0x15, 0x07, 0x27, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x5b, }, 12, 0, "", "", +"62 23 15 07 27 b4 f0 23 01 00 00 5b \tvgetmantss $0x5b,0x123(%rax,%r14,8),%xmm29,%xmm30{%k7}",}, {{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", "f3 0f 1b 00 \tbndmk (%rax),%bnd0",}, {{0xf3, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "", @@ -325,19 +1257,19 @@ {{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", "0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax,%rcx,1)",}, {{0xf2, 0xe8, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "call", "unconditional", -"f2 e8 00 00 00 00 \tbnd callq 3f6 <main+0x3f6>",}, +"f2 e8 00 00 00 00 \tbnd callq f22 <main+0xf22>",}, {{0x67, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect", "67 f2 ff 10 \tbnd callq *(%eax)",}, {{0xf2, 0xc3, }, 2, 0, "ret", "indirect", "f2 c3 \tbnd retq ",}, {{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional", -"f2 e9 00 00 00 00 \tbnd jmpq 402 <main+0x402>",}, +"f2 e9 00 00 00 00 \tbnd jmpq f2e <main+0xf2e>",}, {{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional", -"f2 e9 00 00 00 00 \tbnd jmpq 408 <main+0x408>",}, +"f2 e9 00 00 00 00 \tbnd jmpq f34 <main+0xf34>",}, {{0x67, 0xf2, 0xff, 0x21, }, 4, 0, "jmp", "indirect", "67 f2 ff 21 \tbnd jmpq *(%ecx)",}, {{0xf2, 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, }, 7, 0, "jcc", "conditional", -"f2 0f 85 00 00 00 00 \tbnd jne 413 <main+0x413>",}, +"f2 0f 85 00 00 00 00 \tbnd jne f3f <main+0xf3f>",}, {{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "", "0f 3a cc c1 00 \tsha1rnds4 $0x0,%xmm1,%xmm0",}, {{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "", @@ -764,5 +1696,3 @@ "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",}, {{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", "41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",}, -{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "", -"66 0f ae f8 \tpcommit ",}, diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index 41b1b1c62660..979487dae8d4 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -19,8 +19,882 @@ int main(void) /* Following line is a marker for the awk script - do not change */ asm volatile("rdtsc"); /* Start here */ + /* Test fix for vcvtph2ps in x86-opcode-map.txt */ + + asm volatile("vcvtph2ps %xmm3,%ymm5"); + #ifdef __x86_64__ + /* AVX-512: Instructions with the same op codes as Mask Instructions */ + + asm volatile("cmovno %rax,%rbx"); + asm volatile("cmovno 0x12345678(%rax),%rcx"); + asm volatile("cmovno 0x12345678(%rax),%cx"); + + asm volatile("cmove %rax,%rbx"); + asm volatile("cmove 0x12345678(%rax),%rcx"); + asm volatile("cmove 0x12345678(%rax),%cx"); + + asm volatile("seto 0x12345678(%rax)"); + asm volatile("setno 0x12345678(%rax)"); + asm volatile("setb 0x12345678(%rax)"); + asm volatile("setc 0x12345678(%rax)"); + asm volatile("setnae 0x12345678(%rax)"); + asm volatile("setae 0x12345678(%rax)"); + asm volatile("setnb 0x12345678(%rax)"); + asm volatile("setnc 0x12345678(%rax)"); + asm volatile("sets 0x12345678(%rax)"); + asm volatile("setns 0x12345678(%rax)"); + + /* AVX-512: Mask Instructions */ + + asm volatile("kandw %k7,%k6,%k5"); + asm volatile("kandq %k7,%k6,%k5"); + asm volatile("kandb %k7,%k6,%k5"); + asm volatile("kandd %k7,%k6,%k5"); + + asm volatile("kandnw %k7,%k6,%k5"); + asm volatile("kandnq %k7,%k6,%k5"); + asm volatile("kandnb %k7,%k6,%k5"); + asm volatile("kandnd %k7,%k6,%k5"); + + asm volatile("knotw %k7,%k6"); + asm volatile("knotq %k7,%k6"); + asm volatile("knotb %k7,%k6"); + asm volatile("knotd %k7,%k6"); + + asm volatile("korw %k7,%k6,%k5"); + asm volatile("korq %k7,%k6,%k5"); + asm volatile("korb %k7,%k6,%k5"); + asm volatile("kord %k7,%k6,%k5"); + + asm volatile("kxnorw %k7,%k6,%k5"); + asm volatile("kxnorq %k7,%k6,%k5"); + asm volatile("kxnorb %k7,%k6,%k5"); + asm volatile("kxnord %k7,%k6,%k5"); + + asm volatile("kxorw %k7,%k6,%k5"); + asm volatile("kxorq %k7,%k6,%k5"); + asm volatile("kxorb %k7,%k6,%k5"); + asm volatile("kxord %k7,%k6,%k5"); + + asm volatile("kaddw %k7,%k6,%k5"); + asm volatile("kaddq %k7,%k6,%k5"); + asm volatile("kaddb %k7,%k6,%k5"); + asm volatile("kaddd %k7,%k6,%k5"); + + asm volatile("kunpckbw %k7,%k6,%k5"); + asm volatile("kunpckwd %k7,%k6,%k5"); + asm volatile("kunpckdq %k7,%k6,%k5"); + + asm volatile("kmovw %k6,%k5"); + asm volatile("kmovw (%rcx),%k5"); + asm volatile("kmovw 0x123(%rax,%r14,8),%k5"); + asm volatile("kmovw %k5,(%rcx)"); + asm volatile("kmovw %k5,0x123(%rax,%r14,8)"); + asm volatile("kmovw %eax,%k5"); + asm volatile("kmovw %ebp,%k5"); + asm volatile("kmovw %r13d,%k5"); + asm volatile("kmovw %k5,%eax"); + asm volatile("kmovw %k5,%ebp"); + asm volatile("kmovw %k5,%r13d"); + + asm volatile("kmovq %k6,%k5"); + asm volatile("kmovq (%rcx),%k5"); + asm volatile("kmovq 0x123(%rax,%r14,8),%k5"); + asm volatile("kmovq %k5,(%rcx)"); + asm volatile("kmovq %k5,0x123(%rax,%r14,8)"); + asm volatile("kmovq %rax,%k5"); + asm volatile("kmovq %rbp,%k5"); + asm volatile("kmovq %r13,%k5"); + asm volatile("kmovq %k5,%rax"); + asm volatile("kmovq %k5,%rbp"); + asm volatile("kmovq %k5,%r13"); + + asm volatile("kmovb %k6,%k5"); + asm volatile("kmovb (%rcx),%k5"); + asm volatile("kmovb 0x123(%rax,%r14,8),%k5"); + asm volatile("kmovb %k5,(%rcx)"); + asm volatile("kmovb %k5,0x123(%rax,%r14,8)"); + asm volatile("kmovb %eax,%k5"); + asm volatile("kmovb %ebp,%k5"); + asm volatile("kmovb %r13d,%k5"); + asm volatile("kmovb %k5,%eax"); + asm volatile("kmovb %k5,%ebp"); + asm volatile("kmovb %k5,%r13d"); + + asm volatile("kmovd %k6,%k5"); + asm volatile("kmovd (%rcx),%k5"); + asm volatile("kmovd 0x123(%rax,%r14,8),%k5"); + asm volatile("kmovd %k5,(%rcx)"); + asm volatile("kmovd %k5,0x123(%rax,%r14,8)"); + asm volatile("kmovd %eax,%k5"); + asm volatile("kmovd %ebp,%k5"); + asm volatile("kmovd %r13d,%k5"); + asm volatile("kmovd %k5,%eax"); + asm volatile("kmovd %k5,%ebp"); + asm volatile("kmovd %k5,%r13d"); + + asm volatile("kortestw %k6,%k5"); + asm volatile("kortestq %k6,%k5"); + asm volatile("kortestb %k6,%k5"); + asm volatile("kortestd %k6,%k5"); + + asm volatile("ktestw %k6,%k5"); + asm volatile("ktestq %k6,%k5"); + asm volatile("ktestb %k6,%k5"); + asm volatile("ktestd %k6,%k5"); + + asm volatile("kshiftrw $0x12,%k6,%k5"); + asm volatile("kshiftrq $0x5b,%k6,%k5"); + asm volatile("kshiftlw $0x12,%k6,%k5"); + asm volatile("kshiftlq $0x5b,%k6,%k5"); + + /* AVX-512: Op code 0f 5b */ + asm volatile("vcvtdq2ps %xmm5,%xmm6"); + asm volatile("vcvtqq2ps %zmm29,%ymm6{%k7}"); + asm volatile("vcvtps2dq %xmm5,%xmm6"); + asm volatile("vcvttps2dq %xmm5,%xmm6"); + + /* AVX-512: Op code 0f 6f */ + + asm volatile("movq %mm0,%mm4"); + asm volatile("vmovdqa %ymm4,%ymm6"); + asm volatile("vmovdqa32 %zmm25,%zmm26"); + asm volatile("vmovdqa64 %zmm25,%zmm26"); + asm volatile("vmovdqu %ymm4,%ymm6"); + asm volatile("vmovdqu32 %zmm29,%zmm30"); + asm volatile("vmovdqu64 %zmm25,%zmm26"); + asm volatile("vmovdqu8 %zmm29,%zmm30"); + asm volatile("vmovdqu16 %zmm25,%zmm26"); + + /* AVX-512: Op code 0f 78 */ + + asm volatile("vmread %rax,%rbx"); + asm volatile("vcvttps2udq %zmm25,%zmm26"); + asm volatile("vcvttpd2udq %zmm29,%ymm6{%k7}"); + asm volatile("vcvttsd2usi %xmm6,%rax"); + asm volatile("vcvttss2usi %xmm6,%rax"); + asm volatile("vcvttps2uqq %ymm5,%zmm26{%k7}"); + asm volatile("vcvttpd2uqq %zmm29,%zmm30"); + + /* AVX-512: Op code 0f 79 */ + + asm volatile("vmwrite %rax,%rbx"); + asm volatile("vcvtps2udq %zmm25,%zmm26"); + asm volatile("vcvtpd2udq %zmm29,%ymm6{%k7}"); + asm volatile("vcvtsd2usi %xmm6,%rax"); + asm volatile("vcvtss2usi %xmm6,%rax"); + asm volatile("vcvtps2uqq %ymm5,%zmm26{%k7}"); + asm volatile("vcvtpd2uqq %zmm29,%zmm30"); + + /* AVX-512: Op code 0f 7a */ + + asm volatile("vcvtudq2pd %ymm5,%zmm29{%k7}"); + asm volatile("vcvtuqq2pd %zmm25,%zmm26"); + asm volatile("vcvtudq2ps %zmm29,%zmm30"); + asm volatile("vcvtuqq2ps %zmm25,%ymm26{%k7}"); + asm volatile("vcvttps2qq %ymm25,%zmm26{%k7}"); + asm volatile("vcvttpd2qq %zmm29,%zmm30"); + + /* AVX-512: Op code 0f 7b */ + + asm volatile("vcvtusi2sd %eax,%xmm5,%xmm6"); + asm volatile("vcvtusi2ss %eax,%xmm5,%xmm6"); + asm volatile("vcvtps2qq %ymm5,%zmm26{%k7}"); + asm volatile("vcvtpd2qq %zmm29,%zmm30"); + + /* AVX-512: Op code 0f 7f */ + + asm volatile("movq.s %mm0,%mm4"); + asm volatile("vmovdqa %ymm8,%ymm6"); + asm volatile("vmovdqa32.s %zmm25,%zmm26"); + asm volatile("vmovdqa64.s %zmm25,%zmm26"); + asm volatile("vmovdqu %ymm8,%ymm6"); + asm volatile("vmovdqu32.s %zmm25,%zmm26"); + asm volatile("vmovdqu64.s %zmm25,%zmm26"); + asm volatile("vmovdqu8.s %zmm30,(%rcx)"); + asm volatile("vmovdqu16.s %zmm25,%zmm26"); + + /* AVX-512: Op code 0f db */ + + asm volatile("pand %mm1,%mm2"); + asm volatile("pand %xmm1,%xmm2"); + asm volatile("vpand %ymm4,%ymm6,%ymm2"); + asm volatile("vpandd %zmm24,%zmm25,%zmm26"); + asm volatile("vpandq %zmm24,%zmm25,%zmm26"); + + /* AVX-512: Op code 0f df */ + + asm volatile("pandn %mm1,%mm2"); + asm volatile("pandn %xmm1,%xmm2"); + asm volatile("vpandn %ymm4,%ymm6,%ymm2"); + asm volatile("vpandnd %zmm24,%zmm25,%zmm26"); + asm volatile("vpandnq %zmm24,%zmm25,%zmm26"); + + /* AVX-512: Op code 0f e6 */ + + asm volatile("vcvttpd2dq %xmm1,%xmm2"); + asm volatile("vcvtdq2pd %xmm5,%xmm6"); + asm volatile("vcvtdq2pd %ymm5,%zmm26{%k7}"); + asm volatile("vcvtqq2pd %zmm25,%zmm26"); + asm volatile("vcvtpd2dq %xmm1,%xmm2"); + + /* AVX-512: Op code 0f eb */ + + asm volatile("por %mm4,%mm6"); + asm volatile("vpor %ymm4,%ymm6,%ymm2"); + asm volatile("vpord %zmm24,%zmm25,%zmm26"); + asm volatile("vporq %zmm24,%zmm25,%zmm26"); + + /* AVX-512: Op code 0f ef */ + + asm volatile("pxor %mm4,%mm6"); + asm volatile("vpxor %ymm4,%ymm6,%ymm2"); + asm volatile("vpxord %zmm24,%zmm25,%zmm26"); + asm volatile("vpxorq %zmm24,%zmm25,%zmm26"); + + /* AVX-512: Op code 0f 38 10 */ + + asm volatile("pblendvb %xmm1,%xmm0"); + asm volatile("vpsrlvw %zmm27,%zmm28,%zmm29"); + asm volatile("vpmovuswb %zmm28,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 11 */ + + asm volatile("vpmovusdb %zmm28,%xmm6{%k7}"); + asm volatile("vpsravw %zmm27,%zmm28,%zmm29"); + + /* AVX-512: Op code 0f 38 12 */ + + asm volatile("vpmovusqb %zmm27,%xmm6{%k7}"); + asm volatile("vpsllvw %zmm27,%zmm28,%zmm29"); + + /* AVX-512: Op code 0f 38 13 */ + + asm volatile("vcvtph2ps %xmm3,%ymm5"); + asm volatile("vcvtph2ps %ymm5,%zmm27{%k7}"); + asm volatile("vpmovusdw %zmm27,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 14 */ + + asm volatile("blendvps %xmm1,%xmm0"); + asm volatile("vpmovusqw %zmm27,%xmm6{%k7}"); + asm volatile("vprorvd %zmm27,%zmm28,%zmm29"); + asm volatile("vprorvq %zmm27,%zmm28,%zmm29"); + + /* AVX-512: Op code 0f 38 15 */ + + asm volatile("blendvpd %xmm1,%xmm0"); + asm volatile("vpmovusqd %zmm27,%ymm6{%k7}"); + asm volatile("vprolvd %zmm27,%zmm28,%zmm29"); + asm volatile("vprolvq %zmm27,%zmm28,%zmm29"); + + /* AVX-512: Op code 0f 38 16 */ + + asm volatile("vpermps %ymm4,%ymm6,%ymm2"); + asm volatile("vpermps %ymm24,%ymm26,%ymm22{%k7}"); + asm volatile("vpermpd %ymm24,%ymm26,%ymm22{%k7}"); + + /* AVX-512: Op code 0f 38 19 */ + + asm volatile("vbroadcastsd %xmm4,%ymm6"); + asm volatile("vbroadcastf32x2 %xmm27,%zmm26"); + + /* AVX-512: Op code 0f 38 1a */ + + asm volatile("vbroadcastf128 (%rcx),%ymm4"); + asm volatile("vbroadcastf32x4 (%rcx),%zmm26"); + asm volatile("vbroadcastf64x2 (%rcx),%zmm26"); + + /* AVX-512: Op code 0f 38 1b */ + + asm volatile("vbroadcastf32x8 (%rcx),%zmm27"); + asm volatile("vbroadcastf64x4 (%rcx),%zmm26"); + + /* AVX-512: Op code 0f 38 1f */ + + asm volatile("vpabsq %zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 20 */ + + asm volatile("vpmovsxbw %xmm4,%xmm5"); + asm volatile("vpmovswb %zmm27,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 21 */ + + asm volatile("vpmovsxbd %xmm4,%ymm6"); + asm volatile("vpmovsdb %zmm27,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 22 */ + + asm volatile("vpmovsxbq %xmm4,%ymm4"); + asm volatile("vpmovsqb %zmm27,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 23 */ + + asm volatile("vpmovsxwd %xmm4,%ymm4"); + asm volatile("vpmovsdw %zmm27,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 24 */ + + asm volatile("vpmovsxwq %xmm4,%ymm6"); + asm volatile("vpmovsqw %zmm27,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 25 */ + + asm volatile("vpmovsxdq %xmm4,%ymm4"); + asm volatile("vpmovsqd %zmm27,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 26 */ + + asm volatile("vptestmb %zmm27,%zmm28,%k5"); + asm volatile("vptestmw %zmm27,%zmm28,%k5"); + asm volatile("vptestnmb %zmm26,%zmm27,%k5"); + asm volatile("vptestnmw %zmm26,%zmm27,%k5"); + + /* AVX-512: Op code 0f 38 27 */ + + asm volatile("vptestmd %zmm27,%zmm28,%k5"); + asm volatile("vptestmq %zmm27,%zmm28,%k5"); + asm volatile("vptestnmd %zmm26,%zmm27,%k5"); + asm volatile("vptestnmq %zmm26,%zmm27,%k5"); + + /* AVX-512: Op code 0f 38 28 */ + + asm volatile("vpmuldq %ymm4,%ymm6,%ymm2"); + asm volatile("vpmovm2b %k5,%zmm28"); + asm volatile("vpmovm2w %k5,%zmm28"); + + /* AVX-512: Op code 0f 38 29 */ + + asm volatile("vpcmpeqq %ymm4,%ymm6,%ymm2"); + asm volatile("vpmovb2m %zmm28,%k5"); + asm volatile("vpmovw2m %zmm28,%k5"); + + /* AVX-512: Op code 0f 38 2a */ + + asm volatile("vmovntdqa (%rcx),%ymm4"); + asm volatile("vpbroadcastmb2q %k6,%zmm30"); + + /* AVX-512: Op code 0f 38 2c */ + + asm volatile("vmaskmovps (%rcx),%ymm4,%ymm6"); + asm volatile("vscalefps %zmm24,%zmm25,%zmm26"); + asm volatile("vscalefpd %zmm24,%zmm25,%zmm26"); + + /* AVX-512: Op code 0f 38 2d */ + + asm volatile("vmaskmovpd (%rcx),%ymm4,%ymm6"); + asm volatile("vscalefss %xmm24,%xmm25,%xmm26{%k7}"); + asm volatile("vscalefsd %xmm24,%xmm25,%xmm26{%k7}"); + + /* AVX-512: Op code 0f 38 30 */ + + asm volatile("vpmovzxbw %xmm4,%ymm4"); + asm volatile("vpmovwb %zmm27,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 31 */ + + asm volatile("vpmovzxbd %xmm4,%ymm6"); + asm volatile("vpmovdb %zmm27,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 32 */ + + asm volatile("vpmovzxbq %xmm4,%ymm4"); + asm volatile("vpmovqb %zmm27,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 33 */ + + asm volatile("vpmovzxwd %xmm4,%ymm4"); + asm volatile("vpmovdw %zmm27,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 34 */ + + asm volatile("vpmovzxwq %xmm4,%ymm6"); + asm volatile("vpmovqw %zmm27,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 35 */ + + asm volatile("vpmovzxdq %xmm4,%ymm4"); + asm volatile("vpmovqd %zmm27,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 38 */ + + asm volatile("vpermd %ymm4,%ymm6,%ymm2"); + asm volatile("vpermd %ymm24,%ymm26,%ymm22{%k7}"); + asm volatile("vpermq %ymm24,%ymm26,%ymm22{%k7}"); + + /* AVX-512: Op code 0f 38 38 */ + + asm volatile("vpminsb %ymm4,%ymm6,%ymm2"); + asm volatile("vpmovm2d %k5,%zmm28"); + asm volatile("vpmovm2q %k5,%zmm28"); + + /* AVX-512: Op code 0f 38 39 */ + + asm volatile("vpminsd %xmm1,%xmm2,%xmm3"); + asm volatile("vpminsd %zmm24,%zmm25,%zmm26"); + asm volatile("vpminsq %zmm24,%zmm25,%zmm26"); + asm volatile("vpmovd2m %zmm28,%k5"); + asm volatile("vpmovq2m %zmm28,%k5"); + + /* AVX-512: Op code 0f 38 3a */ + + asm volatile("vpminuw %ymm4,%ymm6,%ymm2"); + asm volatile("vpbroadcastmw2d %k6,%zmm28"); + + /* AVX-512: Op code 0f 38 3b */ + + asm volatile("vpminud %ymm4,%ymm6,%ymm2"); + asm volatile("vpminud %zmm24,%zmm25,%zmm26"); + asm volatile("vpminuq %zmm24,%zmm25,%zmm26"); + + /* AVX-512: Op code 0f 38 3d */ + + asm volatile("vpmaxsd %ymm4,%ymm6,%ymm2"); + asm volatile("vpmaxsd %zmm24,%zmm25,%zmm26"); + asm volatile("vpmaxsq %zmm24,%zmm25,%zmm26"); + + /* AVX-512: Op code 0f 38 3f */ + + asm volatile("vpmaxud %ymm4,%ymm6,%ymm2"); + asm volatile("vpmaxud %zmm24,%zmm25,%zmm26"); + asm volatile("vpmaxuq %zmm24,%zmm25,%zmm26"); + + /* AVX-512: Op code 0f 38 42 */ + + asm volatile("vpmulld %ymm4,%ymm6,%ymm2"); + asm volatile("vpmulld %zmm24,%zmm25,%zmm26"); + asm volatile("vpmullq %zmm24,%zmm25,%zmm26"); + + /* AVX-512: Op code 0f 38 42 */ + + asm volatile("vgetexpps %zmm25,%zmm26"); + asm volatile("vgetexppd %zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 43 */ + + asm volatile("vgetexpss %xmm24,%xmm25,%xmm26{%k7}"); + asm volatile("vgetexpsd %xmm28,%xmm29,%xmm30{%k7}"); + + /* AVX-512: Op code 0f 38 44 */ + + asm volatile("vplzcntd %zmm27,%zmm28"); + asm volatile("vplzcntq %zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 46 */ + + asm volatile("vpsravd %ymm4,%ymm6,%ymm2"); + asm volatile("vpsravd %zmm24,%zmm25,%zmm26"); + asm volatile("vpsravq %zmm24,%zmm25,%zmm26"); + + /* AVX-512: Op code 0f 38 4c */ + + asm volatile("vrcp14ps %zmm25,%zmm26"); + asm volatile("vrcp14pd %zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 4d */ + + asm volatile("vrcp14ss %xmm24,%xmm25,%xmm26{%k7}"); + asm volatile("vrcp14sd %xmm24,%xmm25,%xmm26{%k7}"); + + /* AVX-512: Op code 0f 38 4e */ + + asm volatile("vrsqrt14ps %zmm25,%zmm26"); + asm volatile("vrsqrt14pd %zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 4f */ + + asm volatile("vrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}"); + asm volatile("vrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}"); + + /* AVX-512: Op code 0f 38 59 */ + + asm volatile("vpbroadcastq %xmm4,%xmm6"); + asm volatile("vbroadcasti32x2 %xmm27,%zmm26"); + + /* AVX-512: Op code 0f 38 5a */ + + asm volatile("vbroadcasti128 (%rcx),%ymm4"); + asm volatile("vbroadcasti32x4 (%rcx),%zmm26"); + asm volatile("vbroadcasti64x2 (%rcx),%zmm26"); + + /* AVX-512: Op code 0f 38 5b */ + + asm volatile("vbroadcasti32x8 (%rcx),%zmm28"); + asm volatile("vbroadcasti64x4 (%rcx),%zmm26"); + + /* AVX-512: Op code 0f 38 64 */ + + asm volatile("vpblendmd %zmm26,%zmm27,%zmm28"); + asm volatile("vpblendmq %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 65 */ + + asm volatile("vblendmps %zmm24,%zmm25,%zmm26"); + asm volatile("vblendmpd %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 66 */ + + asm volatile("vpblendmb %zmm26,%zmm27,%zmm28"); + asm volatile("vpblendmw %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 75 */ + + asm volatile("vpermi2b %zmm24,%zmm25,%zmm26"); + asm volatile("vpermi2w %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 76 */ + + asm volatile("vpermi2d %zmm26,%zmm27,%zmm28"); + asm volatile("vpermi2q %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 77 */ + + asm volatile("vpermi2ps %zmm26,%zmm27,%zmm28"); + asm volatile("vpermi2pd %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 7a */ + + asm volatile("vpbroadcastb %eax,%xmm30"); + + /* AVX-512: Op code 0f 38 7b */ + + asm volatile("vpbroadcastw %eax,%xmm30"); + + /* AVX-512: Op code 0f 38 7c */ + + asm volatile("vpbroadcastd %eax,%xmm30"); + asm volatile("vpbroadcastq %rax,%zmm30"); + + /* AVX-512: Op code 0f 38 7d */ + + asm volatile("vpermt2b %zmm26,%zmm27,%zmm28"); + asm volatile("vpermt2w %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 7e */ + + asm volatile("vpermt2d %zmm26,%zmm27,%zmm28"); + asm volatile("vpermt2q %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 7f */ + + asm volatile("vpermt2ps %zmm26,%zmm27,%zmm28"); + asm volatile("vpermt2pd %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 83 */ + + asm volatile("vpmultishiftqb %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 88 */ + + asm volatile("vexpandps (%rcx),%zmm26"); + asm volatile("vexpandpd (%rcx),%zmm28"); + + /* AVX-512: Op code 0f 38 89 */ + + asm volatile("vpexpandd (%rcx),%zmm28"); + asm volatile("vpexpandq (%rcx),%zmm26"); + + /* AVX-512: Op code 0f 38 8a */ + + asm volatile("vcompressps %zmm28,(%rcx)"); + asm volatile("vcompresspd %zmm28,(%rcx)"); + + /* AVX-512: Op code 0f 38 8b */ + + asm volatile("vpcompressd %zmm28,(%rcx)"); + asm volatile("vpcompressq %zmm26,(%rcx)"); + + /* AVX-512: Op code 0f 38 8d */ + + asm volatile("vpermb %zmm26,%zmm27,%zmm28"); + asm volatile("vpermw %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 90 */ + + asm volatile("vpgatherdd %xmm2,0x02(%rbp,%xmm7,2),%xmm1"); + asm volatile("vpgatherdq %xmm2,0x04(%rbp,%xmm7,2),%xmm1"); + asm volatile("vpgatherdd 0x7b(%rbp,%zmm27,8),%zmm26{%k1}"); + asm volatile("vpgatherdq 0x7b(%rbp,%ymm27,8),%zmm26{%k1}"); + + /* AVX-512: Op code 0f 38 91 */ + + asm volatile("vpgatherqd %xmm2,0x02(%rbp,%xmm7,2),%xmm1"); + asm volatile("vpgatherqq %xmm2,0x02(%rbp,%xmm7,2),%xmm1"); + asm volatile("vpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}"); + asm volatile("vpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}"); + + /* AVX-512: Op code 0f 38 a0 */ + + asm volatile("vpscatterdd %zmm28,0x7b(%rbp,%zmm29,8){%k1}"); + asm volatile("vpscatterdq %zmm26,0x7b(%rbp,%ymm27,8){%k1}"); + + /* AVX-512: Op code 0f 38 a1 */ + + asm volatile("vpscatterqd %ymm6,0x7b(%rbp,%zmm29,8){%k1}"); + asm volatile("vpscatterqq %ymm6,0x7b(%rbp,%ymm27,8){%k1}"); + + /* AVX-512: Op code 0f 38 a2 */ + + asm volatile("vscatterdps %zmm28,0x7b(%rbp,%zmm29,8){%k1}"); + asm volatile("vscatterdpd %zmm28,0x7b(%rbp,%ymm27,8){%k1}"); + + /* AVX-512: Op code 0f 38 a3 */ + + asm volatile("vscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}"); + asm volatile("vscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}"); + + /* AVX-512: Op code 0f 38 b4 */ + + asm volatile("vpmadd52luq %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 b5 */ + + asm volatile("vpmadd52huq %zmm26,%zmm27,%zmm28"); + + /* AVX-512: Op code 0f 38 c4 */ + + asm volatile("vpconflictd %zmm26,%zmm27"); + asm volatile("vpconflictq %zmm26,%zmm27"); + + /* AVX-512: Op code 0f 38 c8 */ + + asm volatile("vexp2ps %zmm29,%zmm30"); + asm volatile("vexp2pd %zmm26,%zmm27"); + + /* AVX-512: Op code 0f 38 ca */ + + asm volatile("vrcp28ps %zmm29,%zmm30"); + asm volatile("vrcp28pd %zmm26,%zmm27"); + + /* AVX-512: Op code 0f 38 cb */ + + asm volatile("vrcp28ss %xmm28,%xmm29,%xmm30{%k7}"); + asm volatile("vrcp28sd %xmm25,%xmm26,%xmm27{%k7}"); + + /* AVX-512: Op code 0f 38 cc */ + + asm volatile("vrsqrt28ps %zmm29,%zmm30"); + asm volatile("vrsqrt28pd %zmm26,%zmm27"); + + /* AVX-512: Op code 0f 38 cd */ + + asm volatile("vrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}"); + asm volatile("vrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}"); + + /* AVX-512: Op code 0f 3a 03 */ + + asm volatile("valignd $0x12,%zmm28,%zmm29,%zmm30"); + asm volatile("valignq $0x12,%zmm25,%zmm26,%zmm27"); + + /* AVX-512: Op code 0f 3a 08 */ + + asm volatile("vroundps $0x5,%ymm6,%ymm2"); + asm volatile("vrndscaleps $0x12,%zmm25,%zmm26"); + + /* AVX-512: Op code 0f 3a 09 */ + + asm volatile("vroundpd $0x5,%ymm6,%ymm2"); + asm volatile("vrndscalepd $0x12,%zmm25,%zmm26"); + + /* AVX-512: Op code 0f 3a 1a */ + + asm volatile("vroundss $0x5,%xmm4,%xmm6,%xmm2"); + asm volatile("vrndscaless $0x12,%xmm24,%xmm25,%xmm26{%k7}"); + + /* AVX-512: Op code 0f 3a 0b */ + + asm volatile("vroundsd $0x5,%xmm4,%xmm6,%xmm2"); + asm volatile("vrndscalesd $0x12,%xmm24,%xmm25,%xmm26{%k7}"); + + /* AVX-512: Op code 0f 3a 18 */ + + asm volatile("vinsertf128 $0x5,%xmm4,%ymm4,%ymm6"); + asm volatile("vinsertf32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}"); + asm volatile("vinsertf64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}"); + + /* AVX-512: Op code 0f 3a 19 */ + + asm volatile("vextractf128 $0x5,%ymm4,%xmm4"); + asm volatile("vextractf32x4 $0x12,%zmm25,%xmm26{%k7}"); + asm volatile("vextractf64x2 $0x12,%zmm25,%xmm26{%k7}"); + + /* AVX-512: Op code 0f 3a 1a */ + + asm volatile("vinsertf32x8 $0x12,%ymm25,%zmm26,%zmm27{%k7}"); + asm volatile("vinsertf64x4 $0x12,%ymm28,%zmm29,%zmm30{%k7}"); + + /* AVX-512: Op code 0f 3a 1b */ + + asm volatile("vextractf32x8 $0x12,%zmm29,%ymm30{%k7}"); + asm volatile("vextractf64x4 $0x12,%zmm26,%ymm27{%k7}"); + + /* AVX-512: Op code 0f 3a 1e */ + + asm volatile("vpcmpud $0x12,%zmm29,%zmm30,%k5"); + asm volatile("vpcmpuq $0x12,%zmm26,%zmm27,%k5"); + + /* AVX-512: Op code 0f 3a 1f */ + + asm volatile("vpcmpd $0x12,%zmm29,%zmm30,%k5"); + asm volatile("vpcmpq $0x12,%zmm26,%zmm27,%k5"); + + /* AVX-512: Op code 0f 3a 23 */ + + asm volatile("vshuff32x4 $0x12,%zmm28,%zmm29,%zmm30"); + asm volatile("vshuff64x2 $0x12,%zmm25,%zmm26,%zmm27"); + + /* AVX-512: Op code 0f 3a 25 */ + + asm volatile("vpternlogd $0x12,%zmm28,%zmm29,%zmm30"); + asm volatile("vpternlogq $0x12,%zmm28,%zmm29,%zmm30"); + + /* AVX-512: Op code 0f 3a 26 */ + + asm volatile("vgetmantps $0x12,%zmm26,%zmm27"); + asm volatile("vgetmantpd $0x12,%zmm29,%zmm30"); + + /* AVX-512: Op code 0f 3a 27 */ + + asm volatile("vgetmantss $0x12,%xmm25,%xmm26,%xmm27{%k7}"); + asm volatile("vgetmantsd $0x12,%xmm28,%xmm29,%xmm30{%k7}"); + + /* AVX-512: Op code 0f 3a 38 */ + + asm volatile("vinserti128 $0x5,%xmm4,%ymm4,%ymm6"); + asm volatile("vinserti32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}"); + asm volatile("vinserti64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}"); + + /* AVX-512: Op code 0f 3a 39 */ + + asm volatile("vextracti128 $0x5,%ymm4,%xmm6"); + asm volatile("vextracti32x4 $0x12,%zmm25,%xmm26{%k7}"); + asm volatile("vextracti64x2 $0x12,%zmm25,%xmm26{%k7}"); + + /* AVX-512: Op code 0f 3a 3a */ + + asm volatile("vinserti32x8 $0x12,%ymm28,%zmm29,%zmm30{%k7}"); + asm volatile("vinserti64x4 $0x12,%ymm25,%zmm26,%zmm27{%k7}"); + + /* AVX-512: Op code 0f 3a 3b */ + + asm volatile("vextracti32x8 $0x12,%zmm29,%ymm30{%k7}"); + asm volatile("vextracti64x4 $0x12,%zmm26,%ymm27{%k7}"); + + /* AVX-512: Op code 0f 3a 3e */ + + asm volatile("vpcmpub $0x12,%zmm29,%zmm30,%k5"); + asm volatile("vpcmpuw $0x12,%zmm26,%zmm27,%k5"); + + /* AVX-512: Op code 0f 3a 3f */ + + asm volatile("vpcmpb $0x12,%zmm29,%zmm30,%k5"); + asm volatile("vpcmpw $0x12,%zmm26,%zmm27,%k5"); + + /* AVX-512: Op code 0f 3a 43 */ + + asm volatile("vmpsadbw $0x5,%ymm4,%ymm6,%ymm2"); + asm volatile("vdbpsadbw $0x12,%zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 3a 43 */ + + asm volatile("vshufi32x4 $0x12,%zmm25,%zmm26,%zmm27"); + asm volatile("vshufi64x2 $0x12,%zmm28,%zmm29,%zmm30"); + + /* AVX-512: Op code 0f 3a 50 */ + + asm volatile("vrangeps $0x12,%zmm25,%zmm26,%zmm27"); + asm volatile("vrangepd $0x12,%zmm28,%zmm29,%zmm30"); + + /* AVX-512: Op code 0f 3a 51 */ + + asm volatile("vrangess $0x12,%xmm25,%xmm26,%xmm27"); + asm volatile("vrangesd $0x12,%xmm28,%xmm29,%xmm30"); + + /* AVX-512: Op code 0f 3a 54 */ + + asm volatile("vfixupimmps $0x12,%zmm28,%zmm29,%zmm30"); + asm volatile("vfixupimmpd $0x12,%zmm25,%zmm26,%zmm27"); + + /* AVX-512: Op code 0f 3a 55 */ + + asm volatile("vfixupimmss $0x12,%xmm28,%xmm29,%xmm30{%k7}"); + asm volatile("vfixupimmsd $0x12,%xmm25,%xmm26,%xmm27{%k7}"); + + /* AVX-512: Op code 0f 3a 56 */ + + asm volatile("vreduceps $0x12,%zmm26,%zmm27"); + asm volatile("vreducepd $0x12,%zmm29,%zmm30"); + + /* AVX-512: Op code 0f 3a 57 */ + + asm volatile("vreducess $0x12,%xmm25,%xmm26,%xmm27"); + asm volatile("vreducesd $0x12,%xmm28,%xmm29,%xmm30"); + + /* AVX-512: Op code 0f 3a 66 */ + + asm volatile("vfpclassps $0x12,%zmm27,%k5"); + asm volatile("vfpclasspd $0x12,%zmm30,%k5"); + + /* AVX-512: Op code 0f 3a 67 */ + + asm volatile("vfpclassss $0x12,%xmm27,%k5"); + asm volatile("vfpclasssd $0x12,%xmm30,%k5"); + + /* AVX-512: Op code 0f 72 (Grp13) */ + + asm volatile("vprord $0x12,%zmm25,%zmm26"); + asm volatile("vprorq $0x12,%zmm25,%zmm26"); + asm volatile("vprold $0x12,%zmm29,%zmm30"); + asm volatile("vprolq $0x12,%zmm29,%zmm30"); + asm volatile("psrad $0x2,%mm6"); + asm volatile("vpsrad $0x5,%ymm6,%ymm2"); + asm volatile("vpsrad $0x5,%zmm26,%zmm22"); + asm volatile("vpsraq $0x5,%zmm26,%zmm22"); + + /* AVX-512: Op code 0f 38 c6 (Grp18) */ + + asm volatile("vgatherpf0dps 0x7b(%r14,%zmm31,8){%k1}"); + asm volatile("vgatherpf0dpd 0x7b(%r14,%ymm31,8){%k1}"); + asm volatile("vgatherpf1dps 0x7b(%r14,%zmm31,8){%k1}"); + asm volatile("vgatherpf1dpd 0x7b(%r14,%ymm31,8){%k1}"); + asm volatile("vscatterpf0dps 0x7b(%r14,%zmm31,8){%k1}"); + asm volatile("vscatterpf0dpd 0x7b(%r14,%ymm31,8){%k1}"); + asm volatile("vscatterpf1dps 0x7b(%r14,%zmm31,8){%k1}"); + asm volatile("vscatterpf1dpd 0x7b(%r14,%ymm31,8){%k1}"); + + /* AVX-512: Op code 0f 38 c7 (Grp19) */ + + asm volatile("vgatherpf0qps 0x7b(%r14,%zmm31,8){%k1}"); + asm volatile("vgatherpf0qpd 0x7b(%r14,%zmm31,8){%k1}"); + asm volatile("vgatherpf1qps 0x7b(%r14,%zmm31,8){%k1}"); + asm volatile("vgatherpf1qpd 0x7b(%r14,%zmm31,8){%k1}"); + asm volatile("vscatterpf0qps 0x7b(%r14,%zmm31,8){%k1}"); + asm volatile("vscatterpf0qpd 0x7b(%r14,%zmm31,8){%k1}"); + asm volatile("vscatterpf1qps 0x7b(%r14,%zmm31,8){%k1}"); + asm volatile("vscatterpf1qpd 0x7b(%r14,%zmm31,8){%k1}"); + + /* AVX-512: Examples */ + + asm volatile("vaddpd %zmm28,%zmm29,%zmm30"); + asm volatile("vaddpd %zmm28,%zmm29,%zmm30{%k7}"); + asm volatile("vaddpd %zmm28,%zmm29,%zmm30{%k7}{z}"); + asm volatile("vaddpd {rn-sae},%zmm28,%zmm29,%zmm30"); + asm volatile("vaddpd {ru-sae},%zmm28,%zmm29,%zmm30"); + asm volatile("vaddpd {rd-sae},%zmm28,%zmm29,%zmm30"); + asm volatile("vaddpd {rz-sae},%zmm28,%zmm29,%zmm30"); + asm volatile("vaddpd (%rcx),%zmm29,%zmm30"); + asm volatile("vaddpd 0x123(%rax,%r14,8),%zmm29,%zmm30"); + asm volatile("vaddpd (%rcx){1to8},%zmm29,%zmm30"); + asm volatile("vaddpd 0x1fc0(%rdx),%zmm29,%zmm30"); + asm volatile("vaddpd 0x3f8(%rdx){1to8},%zmm29,%zmm30"); + asm volatile("vcmpeq_uqps 0x1fc(%rdx){1to16},%zmm30,%k5"); + asm volatile("vcmpltsd 0x123(%rax,%r14,8),%xmm29,%k5{%k7}"); + asm volatile("vcmplesd {sae},%xmm28,%xmm29,%k5{%k7}"); + asm volatile("vgetmantss $0x5b,0x123(%rax,%r14,8),%xmm29,%xmm30{%k7}"); + /* bndmk m64, bnd */ asm volatile("bndmk (%rax), %bnd0"); @@ -471,6 +1345,921 @@ int main(void) #else /* #ifdef __x86_64__ */ + /* bound r32, mem (same op code as EVEX prefix) */ + + asm volatile("bound %eax, 0x12345678(%ecx)"); + asm volatile("bound %ecx, 0x12345678(%eax)"); + asm volatile("bound %edx, 0x12345678(%eax)"); + asm volatile("bound %ebx, 0x12345678(%eax)"); + asm volatile("bound %esp, 0x12345678(%eax)"); + asm volatile("bound %ebp, 0x12345678(%eax)"); + asm volatile("bound %esi, 0x12345678(%eax)"); + asm volatile("bound %edi, 0x12345678(%eax)"); + asm volatile("bound %ecx, (%eax)"); + asm volatile("bound %eax, (0x12345678)"); + asm volatile("bound %edx, (%ecx,%eax,1)"); + asm volatile("bound %edx, 0x12345678(,%eax,1)"); + asm volatile("bound %edx, (%eax,%ecx,1)"); + asm volatile("bound %edx, (%eax,%ecx,8)"); + asm volatile("bound %edx, 0x12(%eax)"); + asm volatile("bound %edx, 0x12(%ebp)"); + asm volatile("bound %edx, 0x12(%ecx,%eax,1)"); + asm volatile("bound %edx, 0x12(%ebp,%eax,1)"); + asm volatile("bound %edx, 0x12(%eax,%ecx,1)"); + asm volatile("bound %edx, 0x12(%eax,%ecx,8)"); + asm volatile("bound %edx, 0x12345678(%eax)"); + asm volatile("bound %edx, 0x12345678(%ebp)"); + asm volatile("bound %edx, 0x12345678(%ecx,%eax,1)"); + asm volatile("bound %edx, 0x12345678(%ebp,%eax,1)"); + asm volatile("bound %edx, 0x12345678(%eax,%ecx,1)"); + asm volatile("bound %edx, 0x12345678(%eax,%ecx,8)"); + + /* bound r16, mem (same op code as EVEX prefix) */ + + asm volatile("bound %ax, 0x12345678(%ecx)"); + asm volatile("bound %cx, 0x12345678(%eax)"); + asm volatile("bound %dx, 0x12345678(%eax)"); + asm volatile("bound %bx, 0x12345678(%eax)"); + asm volatile("bound %sp, 0x12345678(%eax)"); + asm volatile("bound %bp, 0x12345678(%eax)"); + asm volatile("bound %si, 0x12345678(%eax)"); + asm volatile("bound %di, 0x12345678(%eax)"); + asm volatile("bound %cx, (%eax)"); + asm volatile("bound %ax, (0x12345678)"); + asm volatile("bound %dx, (%ecx,%eax,1)"); + asm volatile("bound %dx, 0x12345678(,%eax,1)"); + asm volatile("bound %dx, (%eax,%ecx,1)"); + asm volatile("bound %dx, (%eax,%ecx,8)"); + asm volatile("bound %dx, 0x12(%eax)"); + asm volatile("bound %dx, 0x12(%ebp)"); + asm volatile("bound %dx, 0x12(%ecx,%eax,1)"); + asm volatile("bound %dx, 0x12(%ebp,%eax,1)"); + asm volatile("bound %dx, 0x12(%eax,%ecx,1)"); + asm volatile("bound %dx, 0x12(%eax,%ecx,8)"); + asm volatile("bound %dx, 0x12345678(%eax)"); + asm volatile("bound %dx, 0x12345678(%ebp)"); + asm volatile("bound %dx, 0x12345678(%ecx,%eax,1)"); + asm volatile("bound %dx, 0x12345678(%ebp,%eax,1)"); + asm volatile("bound %dx, 0x12345678(%eax,%ecx,1)"); + asm volatile("bound %dx, 0x12345678(%eax,%ecx,8)"); + + /* AVX-512: Instructions with the same op codes as Mask Instructions */ + + asm volatile("cmovno %eax,%ebx"); + asm volatile("cmovno 0x12345678(%eax),%ecx"); + asm volatile("cmovno 0x12345678(%eax),%cx"); + + asm volatile("cmove %eax,%ebx"); + asm volatile("cmove 0x12345678(%eax),%ecx"); + asm volatile("cmove 0x12345678(%eax),%cx"); + + asm volatile("seto 0x12345678(%eax)"); + asm volatile("setno 0x12345678(%eax)"); + asm volatile("setb 0x12345678(%eax)"); + asm volatile("setc 0x12345678(%eax)"); + asm volatile("setnae 0x12345678(%eax)"); + asm volatile("setae 0x12345678(%eax)"); + asm volatile("setnb 0x12345678(%eax)"); + asm volatile("setnc 0x12345678(%eax)"); + asm volatile("sets 0x12345678(%eax)"); + asm volatile("setns 0x12345678(%eax)"); + + /* AVX-512: Mask Instructions */ + + asm volatile("kandw %k7,%k6,%k5"); + asm volatile("kandq %k7,%k6,%k5"); + asm volatile("kandb %k7,%k6,%k5"); + asm volatile("kandd %k7,%k6,%k5"); + + asm volatile("kandnw %k7,%k6,%k5"); + asm volatile("kandnq %k7,%k6,%k5"); + asm volatile("kandnb %k7,%k6,%k5"); + asm volatile("kandnd %k7,%k6,%k5"); + + asm volatile("knotw %k7,%k6"); + asm volatile("knotq %k7,%k6"); + asm volatile("knotb %k7,%k6"); + asm volatile("knotd %k7,%k6"); + + asm volatile("korw %k7,%k6,%k5"); + asm volatile("korq %k7,%k6,%k5"); + asm volatile("korb %k7,%k6,%k5"); + asm volatile("kord %k7,%k6,%k5"); + + asm volatile("kxnorw %k7,%k6,%k5"); + asm volatile("kxnorq %k7,%k6,%k5"); + asm volatile("kxnorb %k7,%k6,%k5"); + asm volatile("kxnord %k7,%k6,%k5"); + + asm volatile("kxorw %k7,%k6,%k5"); + asm volatile("kxorq %k7,%k6,%k5"); + asm volatile("kxorb %k7,%k6,%k5"); + asm volatile("kxord %k7,%k6,%k5"); + + asm volatile("kaddw %k7,%k6,%k5"); + asm volatile("kaddq %k7,%k6,%k5"); + asm volatile("kaddb %k7,%k6,%k5"); + asm volatile("kaddd %k7,%k6,%k5"); + + asm volatile("kunpckbw %k7,%k6,%k5"); + asm volatile("kunpckwd %k7,%k6,%k5"); + asm volatile("kunpckdq %k7,%k6,%k5"); + + asm volatile("kmovw %k6,%k5"); + asm volatile("kmovw (%ecx),%k5"); + asm volatile("kmovw 0x123(%eax,%ecx,8),%k5"); + asm volatile("kmovw %k5,(%ecx)"); + asm volatile("kmovw %k5,0x123(%eax,%ecx,8)"); + asm volatile("kmovw %eax,%k5"); + asm volatile("kmovw %ebp,%k5"); + asm volatile("kmovw %k5,%eax"); + asm volatile("kmovw %k5,%ebp"); + + asm volatile("kmovq %k6,%k5"); + asm volatile("kmovq (%ecx),%k5"); + asm volatile("kmovq 0x123(%eax,%ecx,8),%k5"); + asm volatile("kmovq %k5,(%ecx)"); + asm volatile("kmovq %k5,0x123(%eax,%ecx,8)"); + + asm volatile("kmovb %k6,%k5"); + asm volatile("kmovb (%ecx),%k5"); + asm volatile("kmovb 0x123(%eax,%ecx,8),%k5"); + asm volatile("kmovb %k5,(%ecx)"); + asm volatile("kmovb %k5,0x123(%eax,%ecx,8)"); + asm volatile("kmovb %eax,%k5"); + asm volatile("kmovb %ebp,%k5"); + asm volatile("kmovb %k5,%eax"); + asm volatile("kmovb %k5,%ebp"); + + asm volatile("kmovd %k6,%k5"); + asm volatile("kmovd (%ecx),%k5"); + asm volatile("kmovd 0x123(%eax,%ecx,8),%k5"); + asm volatile("kmovd %k5,(%ecx)"); + asm volatile("kmovd %k5,0x123(%eax,%ecx,8)"); + asm volatile("kmovd %eax,%k5"); + asm volatile("kmovd %ebp,%k5"); + asm volatile("kmovd %k5,%eax"); + asm volatile("kmovd %k5,%ebp"); + + asm volatile("kortestw %k6,%k5"); + asm volatile("kortestq %k6,%k5"); + asm volatile("kortestb %k6,%k5"); + asm volatile("kortestd %k6,%k5"); + + asm volatile("ktestw %k6,%k5"); + asm volatile("ktestq %k6,%k5"); + asm volatile("ktestb %k6,%k5"); + asm volatile("ktestd %k6,%k5"); + + asm volatile("kshiftrw $0x12,%k6,%k5"); + asm volatile("kshiftrq $0x5b,%k6,%k5"); + asm volatile("kshiftlw $0x12,%k6,%k5"); + asm volatile("kshiftlq $0x5b,%k6,%k5"); + + /* AVX-512: Op code 0f 5b */ + asm volatile("vcvtdq2ps %xmm5,%xmm6"); + asm volatile("vcvtqq2ps %zmm5,%ymm6{%k7}"); + asm volatile("vcvtps2dq %xmm5,%xmm6"); + asm volatile("vcvttps2dq %xmm5,%xmm6"); + + /* AVX-512: Op code 0f 6f */ + + asm volatile("movq %mm0,%mm4"); + asm volatile("vmovdqa %ymm4,%ymm6"); + asm volatile("vmovdqa32 %zmm5,%zmm6"); + asm volatile("vmovdqa64 %zmm5,%zmm6"); + asm volatile("vmovdqu %ymm4,%ymm6"); + asm volatile("vmovdqu32 %zmm5,%zmm6"); + asm volatile("vmovdqu64 %zmm5,%zmm6"); + asm volatile("vmovdqu8 %zmm5,%zmm6"); + asm volatile("vmovdqu16 %zmm5,%zmm6"); + + /* AVX-512: Op code 0f 78 */ + + asm volatile("vmread %eax,%ebx"); + asm volatile("vcvttps2udq %zmm5,%zmm6"); + asm volatile("vcvttpd2udq %zmm5,%ymm6{%k7}"); + asm volatile("vcvttsd2usi %xmm6,%eax"); + asm volatile("vcvttss2usi %xmm6,%eax"); + asm volatile("vcvttps2uqq %ymm5,%zmm6{%k7}"); + asm volatile("vcvttpd2uqq %zmm5,%zmm6"); + + /* AVX-512: Op code 0f 79 */ + + asm volatile("vmwrite %eax,%ebx"); + asm volatile("vcvtps2udq %zmm5,%zmm6"); + asm volatile("vcvtpd2udq %zmm5,%ymm6{%k7}"); + asm volatile("vcvtsd2usi %xmm6,%eax"); + asm volatile("vcvtss2usi %xmm6,%eax"); + asm volatile("vcvtps2uqq %ymm5,%zmm6{%k7}"); + asm volatile("vcvtpd2uqq %zmm5,%zmm6"); + + /* AVX-512: Op code 0f 7a */ + + asm volatile("vcvtudq2pd %ymm5,%zmm6{%k7}"); + asm volatile("vcvtuqq2pd %zmm5,%zmm6"); + asm volatile("vcvtudq2ps %zmm5,%zmm6"); + asm volatile("vcvtuqq2ps %zmm5,%ymm6{%k7}"); + asm volatile("vcvttps2qq %ymm5,%zmm6{%k7}"); + asm volatile("vcvttpd2qq %zmm5,%zmm6"); + + /* AVX-512: Op code 0f 7b */ + + asm volatile("vcvtusi2sd %eax,%xmm5,%xmm6"); + asm volatile("vcvtusi2ss %eax,%xmm5,%xmm6"); + asm volatile("vcvtps2qq %ymm5,%zmm6{%k7}"); + asm volatile("vcvtpd2qq %zmm5,%zmm6"); + + /* AVX-512: Op code 0f 7f */ + + asm volatile("movq.s %mm0,%mm4"); + asm volatile("vmovdqa.s %ymm5,%ymm6"); + asm volatile("vmovdqa32.s %zmm5,%zmm6"); + asm volatile("vmovdqa64.s %zmm5,%zmm6"); + asm volatile("vmovdqu.s %ymm5,%ymm6"); + asm volatile("vmovdqu32.s %zmm5,%zmm6"); + asm volatile("vmovdqu64.s %zmm5,%zmm6"); + asm volatile("vmovdqu8.s %zmm5,%zmm6"); + asm volatile("vmovdqu16.s %zmm5,%zmm6"); + + /* AVX-512: Op code 0f db */ + + asm volatile("pand %mm1,%mm2"); + asm volatile("pand %xmm1,%xmm2"); + asm volatile("vpand %ymm4,%ymm6,%ymm2"); + asm volatile("vpandd %zmm4,%zmm5,%zmm6"); + asm volatile("vpandq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f df */ + + asm volatile("pandn %mm1,%mm2"); + asm volatile("pandn %xmm1,%xmm2"); + asm volatile("vpandn %ymm4,%ymm6,%ymm2"); + asm volatile("vpandnd %zmm4,%zmm5,%zmm6"); + asm volatile("vpandnq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f e6 */ + + asm volatile("vcvttpd2dq %xmm1,%xmm2"); + asm volatile("vcvtdq2pd %xmm5,%xmm6"); + asm volatile("vcvtdq2pd %ymm5,%zmm6{%k7}"); + asm volatile("vcvtqq2pd %zmm5,%zmm6"); + asm volatile("vcvtpd2dq %xmm1,%xmm2"); + + /* AVX-512: Op code 0f eb */ + + asm volatile("por %mm4,%mm6"); + asm volatile("vpor %ymm4,%ymm6,%ymm2"); + asm volatile("vpord %zmm4,%zmm5,%zmm6"); + asm volatile("vporq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f ef */ + + asm volatile("pxor %mm4,%mm6"); + asm volatile("vpxor %ymm4,%ymm6,%ymm2"); + asm volatile("vpxord %zmm4,%zmm5,%zmm6"); + asm volatile("vpxorq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 10 */ + + asm volatile("pblendvb %xmm1,%xmm0"); + asm volatile("vpsrlvw %zmm4,%zmm5,%zmm6"); + asm volatile("vpmovuswb %zmm5,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 11 */ + + asm volatile("vpmovusdb %zmm5,%xmm6{%k7}"); + asm volatile("vpsravw %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 12 */ + + asm volatile("vpmovusqb %zmm5,%xmm6{%k7}"); + asm volatile("vpsllvw %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 13 */ + + asm volatile("vcvtph2ps %xmm3,%ymm5"); + asm volatile("vcvtph2ps %ymm5,%zmm6{%k7}"); + asm volatile("vpmovusdw %zmm5,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 14 */ + + asm volatile("blendvps %xmm1,%xmm0"); + asm volatile("vpmovusqw %zmm5,%xmm6{%k7}"); + asm volatile("vprorvd %zmm4,%zmm5,%zmm6"); + asm volatile("vprorvq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 15 */ + + asm volatile("blendvpd %xmm1,%xmm0"); + asm volatile("vpmovusqd %zmm5,%ymm6{%k7}"); + asm volatile("vprolvd %zmm4,%zmm5,%zmm6"); + asm volatile("vprolvq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 16 */ + + asm volatile("vpermps %ymm4,%ymm6,%ymm2"); + asm volatile("vpermps %ymm4,%ymm6,%ymm2{%k7}"); + asm volatile("vpermpd %ymm4,%ymm6,%ymm2{%k7}"); + + /* AVX-512: Op code 0f 38 19 */ + + asm volatile("vbroadcastsd %xmm4,%ymm6"); + asm volatile("vbroadcastf32x2 %xmm7,%zmm6"); + + /* AVX-512: Op code 0f 38 1a */ + + asm volatile("vbroadcastf128 (%ecx),%ymm4"); + asm volatile("vbroadcastf32x4 (%ecx),%zmm6"); + asm volatile("vbroadcastf64x2 (%ecx),%zmm6"); + + /* AVX-512: Op code 0f 38 1b */ + + asm volatile("vbroadcastf32x8 (%ecx),%zmm6"); + asm volatile("vbroadcastf64x4 (%ecx),%zmm6"); + + /* AVX-512: Op code 0f 38 1f */ + + asm volatile("vpabsq %zmm4,%zmm6"); + + /* AVX-512: Op code 0f 38 20 */ + + asm volatile("vpmovsxbw %xmm4,%xmm5"); + asm volatile("vpmovswb %zmm5,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 21 */ + + asm volatile("vpmovsxbd %xmm4,%ymm6"); + asm volatile("vpmovsdb %zmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 22 */ + + asm volatile("vpmovsxbq %xmm4,%ymm4"); + asm volatile("vpmovsqb %zmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 23 */ + + asm volatile("vpmovsxwd %xmm4,%ymm4"); + asm volatile("vpmovsdw %zmm5,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 24 */ + + asm volatile("vpmovsxwq %xmm4,%ymm6"); + asm volatile("vpmovsqw %zmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 25 */ + + asm volatile("vpmovsxdq %xmm4,%ymm4"); + asm volatile("vpmovsqd %zmm5,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 26 */ + + asm volatile("vptestmb %zmm5,%zmm6,%k5"); + asm volatile("vptestmw %zmm5,%zmm6,%k5"); + asm volatile("vptestnmb %zmm4,%zmm5,%k5"); + asm volatile("vptestnmw %zmm4,%zmm5,%k5"); + + /* AVX-512: Op code 0f 38 27 */ + + asm volatile("vptestmd %zmm5,%zmm6,%k5"); + asm volatile("vptestmq %zmm5,%zmm6,%k5"); + asm volatile("vptestnmd %zmm4,%zmm5,%k5"); + asm volatile("vptestnmq %zmm4,%zmm5,%k5"); + + /* AVX-512: Op code 0f 38 28 */ + + asm volatile("vpmuldq %ymm4,%ymm6,%ymm2"); + asm volatile("vpmovm2b %k5,%zmm6"); + asm volatile("vpmovm2w %k5,%zmm6"); + + /* AVX-512: Op code 0f 38 29 */ + + asm volatile("vpcmpeqq %ymm4,%ymm6,%ymm2"); + asm volatile("vpmovb2m %zmm6,%k5"); + asm volatile("vpmovw2m %zmm6,%k5"); + + /* AVX-512: Op code 0f 38 2a */ + + asm volatile("vmovntdqa (%ecx),%ymm4"); + asm volatile("vpbroadcastmb2q %k6,%zmm1"); + + /* AVX-512: Op code 0f 38 2c */ + + asm volatile("vmaskmovps (%ecx),%ymm4,%ymm6"); + asm volatile("vscalefps %zmm4,%zmm5,%zmm6"); + asm volatile("vscalefpd %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 2d */ + + asm volatile("vmaskmovpd (%ecx),%ymm4,%ymm6"); + asm volatile("vscalefss %xmm4,%xmm5,%xmm6{%k7}"); + asm volatile("vscalefsd %xmm4,%xmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 30 */ + + asm volatile("vpmovzxbw %xmm4,%ymm4"); + asm volatile("vpmovwb %zmm5,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 31 */ + + asm volatile("vpmovzxbd %xmm4,%ymm6"); + asm volatile("vpmovdb %zmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 32 */ + + asm volatile("vpmovzxbq %xmm4,%ymm4"); + asm volatile("vpmovqb %zmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 33 */ + + asm volatile("vpmovzxwd %xmm4,%ymm4"); + asm volatile("vpmovdw %zmm5,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 34 */ + + asm volatile("vpmovzxwq %xmm4,%ymm6"); + asm volatile("vpmovqw %zmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 35 */ + + asm volatile("vpmovzxdq %xmm4,%ymm4"); + asm volatile("vpmovqd %zmm5,%ymm6{%k7}"); + + /* AVX-512: Op code 0f 38 36 */ + + asm volatile("vpermd %ymm4,%ymm6,%ymm2"); + asm volatile("vpermd %ymm4,%ymm6,%ymm2{%k7}"); + asm volatile("vpermq %ymm4,%ymm6,%ymm2{%k7}"); + + /* AVX-512: Op code 0f 38 38 */ + + asm volatile("vpminsb %ymm4,%ymm6,%ymm2"); + asm volatile("vpmovm2d %k5,%zmm6"); + asm volatile("vpmovm2q %k5,%zmm6"); + + /* AVX-512: Op code 0f 38 39 */ + + asm volatile("vpminsd %xmm1,%xmm2,%xmm3"); + asm volatile("vpminsd %zmm4,%zmm5,%zmm6"); + asm volatile("vpminsq %zmm4,%zmm5,%zmm6"); + asm volatile("vpmovd2m %zmm6,%k5"); + asm volatile("vpmovq2m %zmm6,%k5"); + + /* AVX-512: Op code 0f 38 3a */ + + asm volatile("vpminuw %ymm4,%ymm6,%ymm2"); + asm volatile("vpbroadcastmw2d %k6,%zmm6"); + + /* AVX-512: Op code 0f 38 3b */ + + asm volatile("vpminud %ymm4,%ymm6,%ymm2"); + asm volatile("vpminud %zmm4,%zmm5,%zmm6"); + asm volatile("vpminuq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 3d */ + + asm volatile("vpmaxsd %ymm4,%ymm6,%ymm2"); + asm volatile("vpmaxsd %zmm4,%zmm5,%zmm6"); + asm volatile("vpmaxsq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 3f */ + + asm volatile("vpmaxud %ymm4,%ymm6,%ymm2"); + asm volatile("vpmaxud %zmm4,%zmm5,%zmm6"); + asm volatile("vpmaxuq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 40 */ + + asm volatile("vpmulld %ymm4,%ymm6,%ymm2"); + asm volatile("vpmulld %zmm4,%zmm5,%zmm6"); + asm volatile("vpmullq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 42 */ + + asm volatile("vgetexpps %zmm5,%zmm6"); + asm volatile("vgetexppd %zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 43 */ + + asm volatile("vgetexpss %xmm4,%xmm5,%xmm6{%k7}"); + asm volatile("vgetexpsd %xmm2,%xmm3,%xmm4{%k7}"); + + /* AVX-512: Op code 0f 38 44 */ + + asm volatile("vplzcntd %zmm5,%zmm6"); + asm volatile("vplzcntq %zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 46 */ + + asm volatile("vpsravd %ymm4,%ymm6,%ymm2"); + asm volatile("vpsravd %zmm4,%zmm5,%zmm6"); + asm volatile("vpsravq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 4c */ + + asm volatile("vrcp14ps %zmm5,%zmm6"); + asm volatile("vrcp14pd %zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 4d */ + + asm volatile("vrcp14ss %xmm4,%xmm5,%xmm6{%k7}"); + asm volatile("vrcp14sd %xmm4,%xmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 4e */ + + asm volatile("vrsqrt14ps %zmm5,%zmm6"); + asm volatile("vrsqrt14pd %zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 4f */ + + asm volatile("vrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}"); + asm volatile("vrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 38 59 */ + + asm volatile("vpbroadcastq %xmm4,%xmm6"); + asm volatile("vbroadcasti32x2 %xmm7,%zmm6"); + + /* AVX-512: Op code 0f 38 5a */ + + asm volatile("vbroadcasti128 (%ecx),%ymm4"); + asm volatile("vbroadcasti32x4 (%ecx),%zmm6"); + asm volatile("vbroadcasti64x2 (%ecx),%zmm6"); + + /* AVX-512: Op code 0f 38 5b */ + + asm volatile("vbroadcasti32x8 (%ecx),%zmm6"); + asm volatile("vbroadcasti64x4 (%ecx),%zmm6"); + + /* AVX-512: Op code 0f 38 64 */ + + asm volatile("vpblendmd %zmm4,%zmm5,%zmm6"); + asm volatile("vpblendmq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 65 */ + + asm volatile("vblendmps %zmm4,%zmm5,%zmm6"); + asm volatile("vblendmpd %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 66 */ + + asm volatile("vpblendmb %zmm4,%zmm5,%zmm6"); + asm volatile("vpblendmw %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 75 */ + + asm volatile("vpermi2b %zmm4,%zmm5,%zmm6"); + asm volatile("vpermi2w %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 76 */ + + asm volatile("vpermi2d %zmm4,%zmm5,%zmm6"); + asm volatile("vpermi2q %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 77 */ + + asm volatile("vpermi2ps %zmm4,%zmm5,%zmm6"); + asm volatile("vpermi2pd %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 7a */ + + asm volatile("vpbroadcastb %eax,%xmm3"); + + /* AVX-512: Op code 0f 38 7b */ + + asm volatile("vpbroadcastw %eax,%xmm3"); + + /* AVX-512: Op code 0f 38 7c */ + + asm volatile("vpbroadcastd %eax,%xmm3"); + + /* AVX-512: Op code 0f 38 7d */ + + asm volatile("vpermt2b %zmm4,%zmm5,%zmm6"); + asm volatile("vpermt2w %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 7e */ + + asm volatile("vpermt2d %zmm4,%zmm5,%zmm6"); + asm volatile("vpermt2q %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 7f */ + + asm volatile("vpermt2ps %zmm4,%zmm5,%zmm6"); + asm volatile("vpermt2pd %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 83 */ + + asm volatile("vpmultishiftqb %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 88 */ + + asm volatile("vexpandps (%ecx),%zmm6"); + asm volatile("vexpandpd (%ecx),%zmm6"); + + /* AVX-512: Op code 0f 38 89 */ + + asm volatile("vpexpandd (%ecx),%zmm6"); + asm volatile("vpexpandq (%ecx),%zmm6"); + + /* AVX-512: Op code 0f 38 8a */ + + asm volatile("vcompressps %zmm6,(%ecx)"); + asm volatile("vcompresspd %zmm6,(%ecx)"); + + /* AVX-512: Op code 0f 38 8b */ + + asm volatile("vpcompressd %zmm6,(%ecx)"); + asm volatile("vpcompressq %zmm6,(%ecx)"); + + /* AVX-512: Op code 0f 38 8d */ + + asm volatile("vpermb %zmm4,%zmm5,%zmm6"); + asm volatile("vpermw %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 90 */ + + asm volatile("vpgatherdd %xmm2,0x02(%ebp,%xmm7,2),%xmm1"); + asm volatile("vpgatherdq %xmm2,0x04(%ebp,%xmm7,2),%xmm1"); + asm volatile("vpgatherdd 0x7b(%ebp,%zmm7,8),%zmm6{%k1}"); + asm volatile("vpgatherdq 0x7b(%ebp,%ymm7,8),%zmm6{%k1}"); + + /* AVX-512: Op code 0f 38 91 */ + + asm volatile("vpgatherqd %xmm2,0x02(%ebp,%xmm7,2),%xmm1"); + asm volatile("vpgatherqq %xmm2,0x02(%ebp,%xmm7,2),%xmm1"); + asm volatile("vpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}"); + asm volatile("vpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}"); + + /* AVX-512: Op code 0f 38 a0 */ + + asm volatile("vpscatterdd %zmm6,0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vpscatterdq %zmm6,0x7b(%ebp,%ymm7,8){%k1}"); + + /* AVX-512: Op code 0f 38 a1 */ + + asm volatile("vpscatterqd %ymm6,0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vpscatterqq %ymm6,0x7b(%ebp,%ymm7,8){%k1}"); + + /* AVX-512: Op code 0f 38 a2 */ + + asm volatile("vscatterdps %zmm6,0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vscatterdpd %zmm6,0x7b(%ebp,%ymm7,8){%k1}"); + + /* AVX-512: Op code 0f 38 a3 */ + + asm volatile("vscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}"); + + /* AVX-512: Op code 0f 38 b4 */ + + asm volatile("vpmadd52luq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 b5 */ + + asm volatile("vpmadd52huq %zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 c4 */ + + asm volatile("vpconflictd %zmm5,%zmm6"); + asm volatile("vpconflictq %zmm5,%zmm6"); + + /* AVX-512: Op code 0f 38 c8 */ + + asm volatile("vexp2ps %zmm6,%zmm7"); + asm volatile("vexp2pd %zmm6,%zmm7"); + + /* AVX-512: Op code 0f 38 ca */ + + asm volatile("vrcp28ps %zmm6,%zmm7"); + asm volatile("vrcp28pd %zmm6,%zmm7"); + + /* AVX-512: Op code 0f 38 cb */ + + asm volatile("vrcp28ss %xmm5,%xmm6,%xmm7{%k7}"); + asm volatile("vrcp28sd %xmm5,%xmm6,%xmm7{%k7}"); + + /* AVX-512: Op code 0f 38 cc */ + + asm volatile("vrsqrt28ps %zmm6,%zmm7"); + asm volatile("vrsqrt28pd %zmm6,%zmm7"); + + /* AVX-512: Op code 0f 38 cd */ + + asm volatile("vrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}"); + asm volatile("vrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}"); + + /* AVX-512: Op code 0f 3a 03 */ + + asm volatile("valignd $0x12,%zmm5,%zmm6,%zmm7"); + asm volatile("valignq $0x12,%zmm5,%zmm6,%zmm7"); + + /* AVX-512: Op code 0f 3a 08 */ + + asm volatile("vroundps $0x5,%ymm6,%ymm2"); + asm volatile("vrndscaleps $0x12,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 3a 09 */ + + asm volatile("vroundpd $0x5,%ymm6,%ymm2"); + asm volatile("vrndscalepd $0x12,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 3a 0a */ + + asm volatile("vroundss $0x5,%xmm4,%xmm6,%xmm2"); + asm volatile("vrndscaless $0x12,%xmm4,%xmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 3a 0b */ + + asm volatile("vroundsd $0x5,%xmm4,%xmm6,%xmm2"); + asm volatile("vrndscalesd $0x12,%xmm4,%xmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 3a 18 */ + + asm volatile("vinsertf128 $0x5,%xmm4,%ymm4,%ymm6"); + asm volatile("vinsertf32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}"); + asm volatile("vinsertf64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}"); + + /* AVX-512: Op code 0f 3a 19 */ + + asm volatile("vextractf128 $0x5,%ymm4,%xmm4"); + asm volatile("vextractf32x4 $0x12,%zmm5,%xmm6{%k7}"); + asm volatile("vextractf64x2 $0x12,%zmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 3a 1a */ + + asm volatile("vinsertf32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}"); + asm volatile("vinsertf64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}"); + + /* AVX-512: Op code 0f 3a 1b */ + + asm volatile("vextractf32x8 $0x12,%zmm6,%ymm7{%k7}"); + asm volatile("vextractf64x4 $0x12,%zmm6,%ymm7{%k7}"); + + /* AVX-512: Op code 0f 3a 1e */ + + asm volatile("vpcmpud $0x12,%zmm6,%zmm7,%k5"); + asm volatile("vpcmpuq $0x12,%zmm6,%zmm7,%k5"); + + /* AVX-512: Op code 0f 3a 1f */ + + asm volatile("vpcmpd $0x12,%zmm6,%zmm7,%k5"); + asm volatile("vpcmpq $0x12,%zmm6,%zmm7,%k5"); + + /* AVX-512: Op code 0f 3a 23 */ + + asm volatile("vshuff32x4 $0x12,%zmm5,%zmm6,%zmm7"); + asm volatile("vshuff64x2 $0x12,%zmm5,%zmm6,%zmm7"); + + /* AVX-512: Op code 0f 3a 25 */ + + asm volatile("vpternlogd $0x12,%zmm5,%zmm6,%zmm7"); + asm volatile("vpternlogq $0x12,%zmm5,%zmm6,%zmm7"); + + /* AVX-512: Op code 0f 3a 26 */ + + asm volatile("vgetmantps $0x12,%zmm6,%zmm7"); + asm volatile("vgetmantpd $0x12,%zmm6,%zmm7"); + + /* AVX-512: Op code 0f 3a 27 */ + + asm volatile("vgetmantss $0x12,%xmm5,%xmm6,%xmm7{%k7}"); + asm volatile("vgetmantsd $0x12,%xmm5,%xmm6,%xmm7{%k7}"); + + /* AVX-512: Op code 0f 3a 38 */ + + asm volatile("vinserti128 $0x5,%xmm4,%ymm4,%ymm6"); + asm volatile("vinserti32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}"); + asm volatile("vinserti64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}"); + + /* AVX-512: Op code 0f 3a 39 */ + + asm volatile("vextracti128 $0x5,%ymm4,%xmm6"); + asm volatile("vextracti32x4 $0x12,%zmm5,%xmm6{%k7}"); + asm volatile("vextracti64x2 $0x12,%zmm5,%xmm6{%k7}"); + + /* AVX-512: Op code 0f 3a 3a */ + + asm volatile("vinserti32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}"); + asm volatile("vinserti64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}"); + + /* AVX-512: Op code 0f 3a 3b */ + + asm volatile("vextracti32x8 $0x12,%zmm6,%ymm7{%k7}"); + asm volatile("vextracti64x4 $0x12,%zmm6,%ymm7{%k7}"); + + /* AVX-512: Op code 0f 3a 3e */ + + asm volatile("vpcmpub $0x12,%zmm6,%zmm7,%k5"); + asm volatile("vpcmpuw $0x12,%zmm6,%zmm7,%k5"); + + /* AVX-512: Op code 0f 3a 3f */ + + asm volatile("vpcmpb $0x12,%zmm6,%zmm7,%k5"); + asm volatile("vpcmpw $0x12,%zmm6,%zmm7,%k5"); + + /* AVX-512: Op code 0f 3a 42 */ + + asm volatile("vmpsadbw $0x5,%ymm4,%ymm6,%ymm2"); + asm volatile("vdbpsadbw $0x12,%zmm4,%zmm5,%zmm6"); + + /* AVX-512: Op code 0f 3a 43 */ + + asm volatile("vshufi32x4 $0x12,%zmm5,%zmm6,%zmm7"); + asm volatile("vshufi64x2 $0x12,%zmm5,%zmm6,%zmm7"); + + /* AVX-512: Op code 0f 3a 50 */ + + asm volatile("vrangeps $0x12,%zmm5,%zmm6,%zmm7"); + asm volatile("vrangepd $0x12,%zmm5,%zmm6,%zmm7"); + + /* AVX-512: Op code 0f 3a 51 */ + + asm volatile("vrangess $0x12,%xmm5,%xmm6,%xmm7"); + asm volatile("vrangesd $0x12,%xmm5,%xmm6,%xmm7"); + + /* AVX-512: Op code 0f 3a 54 */ + + asm volatile("vfixupimmps $0x12,%zmm5,%zmm6,%zmm7"); + asm volatile("vfixupimmpd $0x12,%zmm5,%zmm6,%zmm7"); + + /* AVX-512: Op code 0f 3a 55 */ + + asm volatile("vfixupimmss $0x12,%xmm5,%xmm6,%xmm7{%k7}"); + asm volatile("vfixupimmsd $0x12,%xmm5,%xmm6,%xmm7{%k7}"); + + /* AVX-512: Op code 0f 3a 56 */ + + asm volatile("vreduceps $0x12,%zmm6,%zmm7"); + asm volatile("vreducepd $0x12,%zmm6,%zmm7"); + + /* AVX-512: Op code 0f 3a 57 */ + + asm volatile("vreducess $0x12,%xmm5,%xmm6,%xmm7"); + asm volatile("vreducesd $0x12,%xmm5,%xmm6,%xmm7"); + + /* AVX-512: Op code 0f 3a 66 */ + + asm volatile("vfpclassps $0x12,%zmm7,%k5"); + asm volatile("vfpclasspd $0x12,%zmm7,%k5"); + + /* AVX-512: Op code 0f 3a 67 */ + + asm volatile("vfpclassss $0x12,%xmm7,%k5"); + asm volatile("vfpclasssd $0x12,%xmm7,%k5"); + + /* AVX-512: Op code 0f 72 (Grp13) */ + + asm volatile("vprord $0x12,%zmm5,%zmm6"); + asm volatile("vprorq $0x12,%zmm5,%zmm6"); + asm volatile("vprold $0x12,%zmm5,%zmm6"); + asm volatile("vprolq $0x12,%zmm5,%zmm6"); + asm volatile("psrad $0x2,%mm6"); + asm volatile("vpsrad $0x5,%ymm6,%ymm2"); + asm volatile("vpsrad $0x5,%zmm6,%zmm2"); + asm volatile("vpsraq $0x5,%zmm6,%zmm2"); + + /* AVX-512: Op code 0f 38 c6 (Grp18) */ + + asm volatile("vgatherpf0dps 0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vgatherpf0dpd 0x7b(%ebp,%ymm7,8){%k1}"); + asm volatile("vgatherpf1dps 0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vgatherpf1dpd 0x7b(%ebp,%ymm7,8){%k1}"); + asm volatile("vscatterpf0dps 0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vscatterpf0dpd 0x7b(%ebp,%ymm7,8){%k1}"); + asm volatile("vscatterpf1dps 0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vscatterpf1dpd 0x7b(%ebp,%ymm7,8){%k1}"); + + /* AVX-512: Op code 0f 38 c7 (Grp19) */ + + asm volatile("vgatherpf0qps 0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vgatherpf0qpd 0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vgatherpf1qps 0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vgatherpf1qpd 0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vscatterpf0qps 0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vscatterpf0qpd 0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vscatterpf1qps 0x7b(%ebp,%zmm7,8){%k1}"); + asm volatile("vscatterpf1qpd 0x7b(%ebp,%zmm7,8){%k1}"); + + /* AVX-512: Examples */ + + asm volatile("vaddpd %zmm4,%zmm5,%zmm6"); + asm volatile("vaddpd %zmm4,%zmm5,%zmm6{%k7}"); + asm volatile("vaddpd %zmm4,%zmm5,%zmm6{%k7}{z}"); + asm volatile("vaddpd {rn-sae},%zmm4,%zmm5,%zmm6"); + asm volatile("vaddpd {ru-sae},%zmm4,%zmm5,%zmm6"); + asm volatile("vaddpd {rd-sae},%zmm4,%zmm5,%zmm6"); + asm volatile("vaddpd {rz-sae},%zmm4,%zmm5,%zmm6"); + asm volatile("vaddpd (%ecx),%zmm5,%zmm6"); + asm volatile("vaddpd 0x123(%eax,%ecx,8),%zmm5,%zmm6"); + asm volatile("vaddpd (%ecx){1to8},%zmm5,%zmm6"); + asm volatile("vaddpd 0x1fc0(%edx),%zmm5,%zmm6"); + asm volatile("vaddpd 0x3f8(%edx){1to8},%zmm5,%zmm6"); + asm volatile("vcmpeq_uqps 0x1fc(%edx){1to16},%zmm6,%k5"); + asm volatile("vcmpltsd 0x123(%eax,%ecx,8),%xmm3,%k5{%k7}"); + asm volatile("vcmplesd {sae},%xmm4,%xmm5,%k5{%k7}"); + asm volatile("vgetmantss $0x5b,0x123(%eax,%ecx,8),%xmm4,%xmm5{%k7}"); + /* bndmk m32, bnd */ asm volatile("bndmk (%eax), %bnd0"); @@ -866,10 +2655,6 @@ int main(void) #endif /* #ifndef __x86_64__ */ - /* pcommit */ - - asm volatile("pcommit"); - /* Following line is a marker for the awk script - do not change */ asm volatile("rdtsc"); /* Stop here */ diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 9d29ee283ac5..5c76cc83186a 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -71,7 +71,7 @@ int test__perf_time_to_tsc(int subtest __maybe_unused) CHECK__(parse_events(evlist, "cycles:u", NULL)); - perf_evlist__config(evlist, &opts); + perf_evlist__config(evlist, &opts, NULL); evsel = perf_evlist__first(evlist); @@ -154,10 +154,6 @@ next_event: err = 0; out_err: - if (evlist) { - perf_evlist__disable(evlist); - perf_evlist__delete(evlist); - } - + perf_evlist__delete(evlist); return err; } diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index 72193f19d6d7..500cf96db979 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -1,12 +1,16 @@ +#include <errno.h> #include <unistd.h> #include <stdlib.h> #include <signal.h> #include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> #include <linux/types.h> #include "perf.h" #include "debug.h" #include "tests/tests.h" #include "cloexec.h" +#include "util.h" #include "arch-tests.h" static u64 rdpmc(unsigned int counter) @@ -111,14 +115,14 @@ static int __test__rdpmc(void) if (fd < 0) { pr_err("Error: sys_perf_event_open() syscall returned " "with %d (%s)\n", fd, - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); return -1; } addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); if (addr == (void *)(-1)) { pr_err("Error: mmap() syscall returned with (%s)\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_close; } diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 465970370f3e..f95e6f46ef0d 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -3,11 +3,12 @@ libperf-y += tsc.o libperf-y += pmu.o libperf-y += kvm-stat.o libperf-y += perf_regs.o +libperf-y += group.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o -libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index 7a7805583e3f..cc1d865e31f1 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -37,7 +37,7 @@ struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist, intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); if (evlist) { - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (intel_pt_pmu && evsel->attr.type == intel_pt_pmu->type) found_pt = true; diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c new file mode 100644 index 000000000000..37f92aa39a5d --- /dev/null +++ b/tools/perf/arch/x86/util/group.c @@ -0,0 +1,27 @@ +#include <stdio.h> +#include "api/fs/fs.h" +#include "util/group.h" + +/* + * Check whether we can use a group for top down. + * Without a group may get bad results due to multiplexing. + */ +bool arch_topdown_check_group(bool *warn) +{ + int n; + + if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0) + return false; + if (n > 0) { + *warn = true; + return false; + } + return true; +} + +void arch_topdown_group_warn(void) +{ + fprintf(stderr, + "nmi_watchdog enabled with topdown. May give wrong results.\n" + "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n"); +} diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index d66f9ad4df2e..5132775a044f 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -124,7 +124,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, btsr->evlist = evlist; btsr->snapshot_mode = opts->auxtrace_snapshot_mode; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type == intel_bts_pmu->type) { if (intel_bts_evsel) { pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n"); @@ -327,7 +327,7 @@ static int intel_bts_snapshot_start(struct auxtrace_record *itr) container_of(itr, struct intel_bts_recording, itr); struct perf_evsel *evsel; - evlist__for_each(btsr->evlist, evsel) { + evlist__for_each_entry(btsr->evlist, evsel) { if (evsel->attr.type == btsr->intel_bts_pmu->type) return perf_evsel__disable(evsel); } @@ -340,7 +340,7 @@ static int intel_bts_snapshot_finish(struct auxtrace_record *itr) container_of(itr, struct intel_bts_recording, itr); struct perf_evsel *evsel; - evlist__for_each(btsr->evlist, evsel) { + evlist__for_each_entry(btsr->evlist, evsel) { if (evsel->attr.type == btsr->intel_bts_pmu->type) return perf_evsel__enable(evsel); } @@ -422,7 +422,7 @@ static int intel_bts_read_finish(struct auxtrace_record *itr, int idx) container_of(itr, struct intel_bts_recording, itr); struct perf_evsel *evsel; - evlist__for_each(btsr->evlist, evsel) { + evlist__for_each_entry(btsr->evlist, evsel) { if (evsel->attr.type == btsr->intel_bts_pmu->type) return perf_evlist__enable_event_idx(btsr->evlist, evsel, idx); @@ -438,6 +438,11 @@ struct auxtrace_record *intel_bts_recording_init(int *err) if (!intel_bts_pmu) return NULL; + if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { + *err = -errno; + return NULL; + } + btsr = zalloc(sizeof(struct intel_bts_recording)); if (!btsr) { *err = -ENOMEM; diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index a3395179c9ee..a2412e9d883b 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -131,7 +131,7 @@ static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, if (!mask) return -EINVAL; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type == intel_pt_pmu->type) { *res = intel_pt_masked_bits(mask, evsel->attr.config); return 0; @@ -501,7 +501,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, struct intel_pt_recording *ptr = container_of(itr, struct intel_pt_recording, itr); struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; - bool have_timing_info; + bool have_timing_info, need_immediate = false; struct perf_evsel *evsel, *intel_pt_evsel = NULL; const struct cpu_map *cpus = evlist->cpus; bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; @@ -511,7 +511,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, ptr->evlist = evlist; ptr->snapshot_mode = opts->auxtrace_snapshot_mode; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type == intel_pt_pmu->type) { if (intel_pt_evsel) { pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); @@ -655,6 +655,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, ptr->have_sched_switch = 3; } else { opts->record_switch_events = true; + need_immediate = true; if (cpu_wide) ptr->have_sched_switch = 3; else @@ -700,6 +701,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, tracking_evsel->attr.freq = 0; tracking_evsel->attr.sample_period = 1; + if (need_immediate) + tracking_evsel->immediate = true; + /* In per-cpu case, always need the time of mmap events etc */ if (!cpu_map__empty(cpus)) { perf_evsel__set_sample_bit(tracking_evsel, TIME); @@ -725,7 +729,7 @@ static int intel_pt_snapshot_start(struct auxtrace_record *itr) container_of(itr, struct intel_pt_recording, itr); struct perf_evsel *evsel; - evlist__for_each(ptr->evlist, evsel) { + evlist__for_each_entry(ptr->evlist, evsel) { if (evsel->attr.type == ptr->intel_pt_pmu->type) return perf_evsel__disable(evsel); } @@ -738,7 +742,7 @@ static int intel_pt_snapshot_finish(struct auxtrace_record *itr) container_of(itr, struct intel_pt_recording, itr); struct perf_evsel *evsel; - evlist__for_each(ptr->evlist, evsel) { + evlist__for_each_entry(ptr->evlist, evsel) { if (evsel->attr.type == ptr->intel_pt_pmu->type) return perf_evsel__enable(evsel); } @@ -1011,7 +1015,7 @@ static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) container_of(itr, struct intel_pt_recording, itr); struct perf_evsel *evsel; - evlist__for_each(ptr->evlist, evsel) { + evlist__for_each_entry(ptr->evlist, evsel) { if (evsel->attr.type == ptr->intel_pt_pmu->type) return perf_evlist__enable_event_idx(ptr->evlist, evsel, idx); @@ -1027,6 +1031,11 @@ struct auxtrace_record *intel_pt_recording_init(int *err) if (!intel_pt_pmu) return NULL; + if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { + *err = -errno; + return NULL; + } + ptr = zalloc(sizeof(struct intel_pt_recording)); if (!ptr) { *err = -ENOMEM; diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index fd2868490d00..2e5567c94e09 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -7,7 +7,6 @@ #include <linux/types.h> #include "../../util/debug.h" #include "../../util/tsc.h" -#include "tsc.h" int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, struct perf_tsc_conversion *tc) @@ -46,3 +45,36 @@ u64 rdtsc(void) return low | ((u64)high) << 32; } + +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, + struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event event = { + .time_conv = { + .header = { + .type = PERF_RECORD_TIME_CONV, + .size = sizeof(struct time_conv_event), + }, + }, + }; + struct perf_tsc_conversion tc; + int err; + + if (!pc) + return 0; + err = perf_read_tsc_conversion(pc, &tc); + if (err == -EOPNOTSUPP) + return 0; + if (err) + return err; + + pr_debug2("Synthesizing TSC conversion information\n"); + + event.time_conv.time_mult = tc.time_mult; + event.time_conv.time_shift = tc.time_shift; + event.time_conv.time_zero = tc.time_zero; + + return process(tool, &event, NULL, machine); +} diff --git a/tools/perf/arch/x86/util/tsc.h b/tools/perf/arch/x86/util/tsc.h deleted file mode 100644 index 2edc4d31065c..000000000000 --- a/tools/perf/arch/x86/util/tsc.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ -#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ - -#include <linux/types.h> - -struct perf_tsc_conversion { - u16 time_shift; - u32 time_mult; - u64 time_zero; -}; - -struct perf_event_mmap_page; - -int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, - struct perf_tsc_conversion *tc); - -#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */ diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c index db25e93d989c..4f16661cbdbb 100644 --- a/tools/perf/arch/x86/util/unwind-libunwind.c +++ b/tools/perf/arch/x86/util/unwind-libunwind.c @@ -1,12 +1,14 @@ +#ifndef REMOTE_UNWIND_LIBUNWIND #include <errno.h> #include <libunwind.h> #include "perf_regs.h" #include "../../util/unwind.h" #include "../../util/debug.h" +#endif #ifdef HAVE_ARCH_X86_64_SUPPORT -int libunwind__arch_reg_id(int regnum) +int LIBUNWIND__ARCH_REG_ID(int regnum) { int id; @@ -70,7 +72,7 @@ int libunwind__arch_reg_id(int regnum) return id; } #else -int libunwind__arch_reg_id(int regnum) +int LIBUNWIND__ARCH_REG_ID(int regnum) { int id; diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 0999ac536d86..8024cd5febd2 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -8,18 +8,23 @@ * many threads and futexes as possible. */ -#include "../perf.h" -#include "../util/util.h" +/* For the CLR_() macros */ +#include <pthread.h> + +#include <errno.h> +#include <signal.h> +#include <stdlib.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <sys/time.h> + #include "../util/stat.h" #include <subcmd/parse-options.h> -#include "../util/header.h" #include "bench.h" #include "futex.h" #include <err.h> -#include <stdlib.h> #include <sys/time.h> -#include <pthread.h> static unsigned int nthreads = 0; static unsigned int nsecs = 10; diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 6a18ce21f865..936d89d30483 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -2,18 +2,21 @@ * Copyright (C) 2015 Davidlohr Bueso. */ -#include "../perf.h" -#include "../util/util.h" +/* For the CLR_() macros */ +#include <pthread.h> + +#include <signal.h> #include "../util/stat.h" #include <subcmd/parse-options.h> -#include "../util/header.h" +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <errno.h> #include "bench.h" #include "futex.h" #include <err.h> #include <stdlib.h> #include <sys/time.h> -#include <pthread.h> struct worker { int tid; @@ -83,7 +86,7 @@ static void *workerfn(void *arg) do { int ret; again: - ret = futex_lock_pi(w->futex, NULL, 0, futex_flag); + ret = futex_lock_pi(w->futex, NULL, futex_flag); if (ret) { /* handle lock acquisition */ if (!silent) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 718238683013..f96e22ed9f87 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -8,18 +8,21 @@ * requeues without waking up any tasks -- thus mimicking a regular futex_wait. */ -#include "../perf.h" -#include "../util/util.h" +/* For the CLR_() macros */ +#include <pthread.h> + +#include <signal.h> #include "../util/stat.h" #include <subcmd/parse-options.h> -#include "../util/header.h" +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <errno.h> #include "bench.h" #include "futex.h" #include <err.h> #include <stdlib.h> #include <sys/time.h> -#include <pthread.h> static u_int32_t futex1 = 0, futex2 = 0; diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 91aaf2a1fa90..4a2ecd7438ca 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -7,18 +7,21 @@ * it can be used to measure futex_wake() changes. */ -#include "../perf.h" -#include "../util/util.h" +/* For the CLR_() macros */ +#include <pthread.h> + +#include <signal.h> #include "../util/stat.h" #include <subcmd/parse-options.h> -#include "../util/header.h" +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <errno.h> #include "bench.h" #include "futex.h" #include <err.h> #include <stdlib.h> #include <sys/time.h> -#include <pthread.h> struct thread_data { pthread_t worker; diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index f416bd705f66..87d8f4f292d9 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -8,18 +8,21 @@ * one or more tasks, and thus the waitqueue is never empty. */ -#include "../perf.h" -#include "../util/util.h" +/* For the CLR_() macros */ +#include <pthread.h> + +#include <signal.h> #include "../util/stat.h" #include <subcmd/parse-options.h> -#include "../util/header.h" +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <errno.h> #include "bench.h" #include "futex.h" #include <err.h> #include <stdlib.h> #include <sys/time.h> -#include <pthread.h> /* all threads will block on the same futex */ static u_int32_t futex1 = 0; diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index d44de9f44281..b2e06d1190d0 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -57,13 +57,11 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) /** * futex_lock_pi() - block on uaddr as a PI mutex - * @detect: whether (1) or not (0) to perform deadlock detection */ static inline int -futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect, - int opflags) +futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags) { - return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags); + return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags); } /** diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index a91aa85d80ff..2b54d0f2672a 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -6,6 +6,7 @@ * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> */ +#include "debug.h" #include "../perf.h" #include "../util/util.h" #include <subcmd/parse-options.h> @@ -63,14 +64,16 @@ static struct perf_event_attr cycle_attr = { .config = PERF_COUNT_HW_CPU_CYCLES }; -static void init_cycles(void) +static int init_cycles(void) { cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); - if (cycles_fd < 0 && errno == ENOSYS) - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); - else - BUG_ON(cycles_fd < 0); + if (cycles_fd < 0 && errno == ENOSYS) { + pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + return -1; + } + + return cycles_fd; } static u64 get_cycles(void) @@ -155,8 +158,13 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * argc = parse_options(argc, argv, options, info->usage, 0); - if (use_cycles) - init_cycles(); + if (use_cycles) { + i = init_cycles(); + if (i < 0) { + fprintf(stderr, "Failed to open cycles counter\n"); + return i; + } + } size = (size_t)perf_atoll((char *)size_str); size_total = (double)size * nr_loops; diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index 5c3cce082cb8..f700369bb0f6 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -6,7 +6,7 @@ #define globl p2align 4; .globl #define _ASM_EXTABLE_FAULT(x, y) -#include "../../../arch/x86/lib/memcpy_64.S" +#include "../../arch/x86/lib/memcpy_64.S" /* * We need to provide note.GNU-stack section, saying that we want * NOT executable stack. Otherwise the final linking will assume that diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S index de278784c866..58407aa24c1b 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm.S +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -1,7 +1,7 @@ #define memset MEMSET /* don't hide glibc's memset() */ #define altinstr_replacement text #define globl p2align 4; .globl -#include "../../../arch/x86/lib/memset_64.S" +#include "../../arch/x86/lib/memset_64.S" /* * We need to provide note.GNU-stack section, saying that we want diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 7500d959d7eb..f7f530081aa9 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -4,6 +4,9 @@ * numa: Simulate NUMA-sensitive workload and measure their NUMA performance */ +/* For the CLR_() macros */ +#include <pthread.h> + #include "../perf.h" #include "../builtin.h" #include "../util/util.h" @@ -21,7 +24,6 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <pthread.h> #include <sys/mman.h> #include <sys/time.h> #include <sys/resource.h> diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 814158393656..9c1034d81b4f 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -75,7 +75,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, sample->period = 1; sample->weight = 1; - he = __hists__add_entry(hists, al, NULL, NULL, NULL, sample, true); + he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true); if (he == NULL) return -ENOMEM; @@ -236,7 +236,7 @@ static int __cmd_annotate(struct perf_annotate *ann) perf_session__fprintf_dsos(session, stdout); total_nr_samples = 0; - evlist__for_each(session->evlist, pos) { + evlist__for_each_entry(session->evlist, pos) { struct hists *hists = evsel__hists(pos); u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; @@ -324,8 +324,9 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing, "Skip symbols that cannot be annotated"), OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"), - OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", - "Look for files with symbols relative to this directory"), + OPT_CALLBACK(0, "symfs", NULL, "directory", + "Look for files with symbols relative to this directory", + symbol__config_symfs), OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, "Interleave source code with assembly code (default)"), OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, @@ -338,6 +339,9 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) "Show event group information together"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), + OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode", + "'always' (default), 'never' or 'auto' only applicable to --stdio mode", + stdio__config_color, "always"), OPT_END() }; int ret = hists__init(); diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 632efc6b79a0..30e2b2cb2421 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -119,8 +119,8 @@ static int build_id_cache__add_kcore(const char *filename, bool force) if (build_id_cache__kcore_buildid(from_dir, sbuildid) < 0) return -1; - scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", - buildid_dir, sbuildid); + scnprintf(to_dir, sizeof(to_dir), "%s/%s/%s", + buildid_dir, DSO__NAME_KCORE, sbuildid); if (!force && !build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) { @@ -131,8 +131,8 @@ static int build_id_cache__add_kcore(const char *filename, bool force) if (build_id_cache__kcore_dir(dir, sizeof(dir))) return -1; - scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s", - buildid_dir, sbuildid, dir); + scnprintf(to_dir, sizeof(to_dir), "%s/%s/%s/%s", + buildid_dir, DSO__NAME_KCORE, sbuildid, dir); if (mkdir_p(to_dir, 0755)) return -1; @@ -209,7 +209,7 @@ static int build_id_cache__purge_path(const char *pathname) if (err) goto out; - strlist__for_each(pos, list) { + strlist__for_each_entry(pos, list) { err = build_id_cache__remove_s(pos->s); pr_debug("Removing %s %s: %s\n", pos->s, pathname, err ? "FAIL" : "Ok"); @@ -343,7 +343,7 @@ int cmd_buildid_cache(int argc, const char **argv, if (add_name_list_str) { list = strlist__new(add_name_list_str, NULL); if (list) { - strlist__for_each(pos, list) + strlist__for_each_entry(pos, list) if (build_id_cache__add_file(pos->s)) { if (errno == EEXIST) { pr_debug("%s already in the cache\n", @@ -351,7 +351,7 @@ int cmd_buildid_cache(int argc, const char **argv, continue; } pr_warning("Couldn't add %s: %s\n", - pos->s, strerror_r(errno, sbuf, sizeof(sbuf))); + pos->s, str_error_r(errno, sbuf, sizeof(sbuf))); } strlist__delete(list); @@ -361,7 +361,7 @@ int cmd_buildid_cache(int argc, const char **argv, if (remove_name_list_str) { list = strlist__new(remove_name_list_str, NULL); if (list) { - strlist__for_each(pos, list) + strlist__for_each_entry(pos, list) if (build_id_cache__remove_file(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", @@ -369,7 +369,7 @@ int cmd_buildid_cache(int argc, const char **argv, continue; } pr_warning("Couldn't remove %s: %s\n", - pos->s, strerror_r(errno, sbuf, sizeof(sbuf))); + pos->s, str_error_r(errno, sbuf, sizeof(sbuf))); } strlist__delete(list); @@ -379,7 +379,7 @@ int cmd_buildid_cache(int argc, const char **argv, if (purge_name_list_str) { list = strlist__new(purge_name_list_str, NULL); if (list) { - strlist__for_each(pos, list) + strlist__for_each_entry(pos, list) if (build_id_cache__purge_path(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", @@ -387,7 +387,7 @@ int cmd_buildid_cache(int argc, const char **argv, continue; } pr_warning("Couldn't remove %s: %s\n", - pos->s, strerror_r(errno, sbuf, sizeof(sbuf))); + pos->s, str_error_r(errno, sbuf, sizeof(sbuf))); } strlist__delete(list); @@ -400,7 +400,7 @@ int cmd_buildid_cache(int argc, const char **argv, if (update_name_list_str) { list = strlist__new(update_name_list_str, NULL); if (list) { - strlist__for_each(pos, list) + strlist__for_each_entry(pos, list) if (build_id_cache__update_file(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", @@ -408,7 +408,7 @@ int cmd_buildid_cache(int argc, const char **argv, continue; } pr_warning("Couldn't update %s: %s\n", - pos->s, strerror_r(errno, sbuf, sizeof(sbuf))); + pos->s, str_error_r(errno, sbuf, sizeof(sbuf))); } strlist__delete(list); @@ -419,8 +419,7 @@ int cmd_buildid_cache(int argc, const char **argv, pr_warning("Couldn't add %s\n", kcore_filename); out: - if (session) - perf_session__delete(session); + perf_session__delete(session); return ret; } diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index c42448ed5dfe..e4207a23b52c 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -12,6 +12,7 @@ #include <subcmd/parse-options.h> #include "util/util.h" #include "util/debug.h" +#include "util/config.h" static bool use_system_config, use_user_config; @@ -32,13 +33,21 @@ static struct option config_options[] = { OPT_END() }; -static int show_config(const char *key, const char *value, - void *cb __maybe_unused) +static int show_config(struct perf_config_set *set) { - if (value) - printf("%s=%s\n", key, value); - else - printf("%s\n", key); + struct perf_config_section *section; + struct perf_config_item *item; + + if (set == NULL) + return -1; + + perf_config_set__for_each_entry(set, section, item) { + char *value = item->value; + + if (value) + printf("%s.%s=%s\n", section->name, + item->name, value); + } return 0; } @@ -46,6 +55,7 @@ static int show_config(const char *key, const char *value, int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) { int ret = 0; + struct perf_config_set *set; char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); argc = parse_options(argc, argv, config_options, config_usage, @@ -63,13 +73,23 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) else if (use_user_config) config_exclusive_filename = user_config; + /* + * At only 'config' sub-command, individually use the config set + * because of reinitializing with options config file location. + */ + set = perf_config_set__new(); + if (!set) { + ret = -1; + goto out_err; + } + switch (actions) { case ACTION_LIST: if (argc) { pr_err("Error: takes no arguments\n"); parse_options_usage(config_usage, config_options, "l", 1); } else { - ret = perf_config(show_config, NULL); + ret = show_config(set); if (ret < 0) { const char * config_filename = config_exclusive_filename; if (!config_exclusive_filename) @@ -83,5 +103,7 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(config_usage, config_options); } + perf_config_set__delete(set); +out_err: return ret; } diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index b97bc1518b44..7ad6e17ac6b3 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -3,6 +3,7 @@ #include "perf.h" #include "debug.h" #include <subcmd/parse-options.h> +#include "data-convert.h" #include "data-convert-bt.h" typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix); @@ -53,14 +54,18 @@ static int cmd_data_convert(int argc, const char **argv, const char *prefix __maybe_unused) { const char *to_ctf = NULL; - bool force = false; + struct perf_data_convert_opts opts = { + .force = false, + .all = false, + }; const struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_STRING('i', "input", &input_name, "file", "input file name"), #ifdef HAVE_LIBBABELTRACE_SUPPORT OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"), #endif - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_BOOLEAN('f', "force", &opts.force, "don't complain, do it"), + OPT_BOOLEAN(0, "all", &opts.all, "Convert all events"), OPT_END() }; @@ -78,7 +83,7 @@ static int cmd_data_convert(int argc, const char **argv, if (to_ctf) { #ifdef HAVE_LIBBABELTRACE_SUPPORT - return bt_convert__perf2ctf(input_name, to_ctf, force); + return bt_convert__perf2ctf(input_name, to_ctf, &opts); #else pr_err("The libbabeltrace support is not compiled in.\n"); return -1; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 8053a8ceefda..21ee753211ad 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -310,16 +310,6 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair, return -1; } -static int hists__add_entry(struct hists *hists, - struct addr_location *al, - struct perf_sample *sample) -{ - if (__hists__add_entry(hists, al, NULL, NULL, NULL, - sample, true) != NULL) - return 0; - return -ENOMEM; -} - static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -336,7 +326,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, return -1; } - if (hists__add_entry(hists, &al, sample)) { + if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, true)) { pr_warning("problem incrementing symbol period, skipping event\n"); goto out_put; } @@ -373,7 +363,7 @@ static struct perf_evsel *evsel_match(struct perf_evsel *evsel, { struct perf_evsel *e; - evlist__for_each(evlist, e) { + evlist__for_each_entry(evlist, e) { if (perf_evsel__match2(evsel, e)) return e; } @@ -385,7 +375,7 @@ static void perf_evlist__collapse_resort(struct perf_evlist *evlist) { struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { struct hists *hists = evsel__hists(evsel); hists__collapse_resort(hists, NULL); @@ -428,7 +418,7 @@ static void hists__baseline_only(struct hists *hists) struct rb_root *root; struct rb_node *next; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -450,7 +440,7 @@ static void hists__precompute(struct hists *hists) struct rb_root *root; struct rb_node *next; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -666,7 +656,8 @@ static void hists__process(struct hists *hists) hists__precompute(hists); hists__output_resort(hists, NULL); - hists__fprintf(hists, true, 0, 0, 0, stdout); + hists__fprintf(hists, true, 0, 0, 0, stdout, + symbol_conf.use_callchain); } static void data__fprintf(void) @@ -690,7 +681,7 @@ static void data_process(void) struct perf_evsel *evsel_base; bool first = true; - evlist__for_each(evlist_base, evsel_base) { + evlist__for_each_entry(evlist_base, evsel_base) { struct hists *hists_base = evsel__hists(evsel_base); struct data__file *d; int i; @@ -765,9 +756,7 @@ static int __cmd_diff(void) out_delete: data__for_each_file(i, d) { - if (d->session) - perf_session__delete(d->session); - + perf_session__delete(d->session); data__free(d); } @@ -812,8 +801,9 @@ static const struct option options[] = { OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), - OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", - "Look for files with symbols relative to this directory"), + OPT_CALLBACK(0, "symfs", NULL, "directory", + "Look for files with symbols relative to this directory", + symbol__config_symfs), OPT_UINTEGER('o', "order", &sort_compute, "Specify compute sorting."), OPT_CALLBACK(0, "percentage", NULL, "relative|absolute", "How to display percentage of filtered entries", parse_filter_percentage), @@ -1043,7 +1033,7 @@ static int hpp__entry_global(struct perf_hpp_fmt *_fmt, struct perf_hpp *hpp, } static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct perf_evsel *evsel __maybe_unused) + struct hists *hists __maybe_unused) { struct diff_hpp_fmt *dfmt = container_of(fmt, struct diff_hpp_fmt, fmt); @@ -1054,7 +1044,7 @@ static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, static int hpp__width(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp __maybe_unused, - struct perf_evsel *evsel __maybe_unused) + struct hists *hists __maybe_unused) { struct diff_hpp_fmt *dfmt = container_of(fmt, struct diff_hpp_fmt, fmt); diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 8a31f511e1a0..e09c4287fe87 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -32,7 +32,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details if (session == NULL) return -1; - evlist__for_each(session->evlist, pos) { + evlist__for_each_entry(session->evlist, pos) { perf_evsel__fprintf(pos, details, stdout); if (pos->attr.type == PERF_TYPE_TRACEPOINT) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index bc1de9b8fd67..3bdb2c78a21b 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -4,7 +4,7 @@ * Builtin help command */ #include "perf.h" -#include "util/cache.h" +#include "util/config.h" #include "builtin.h" #include <subcmd/exec-cmd.h> #include "common-cmds.h" @@ -61,6 +61,7 @@ static int check_emacsclient_version(void) struct child_process ec_process; const char *argv_ec[] = { "emacsclient", "--version", NULL }; int version; + int ret = -1; /* emacsclient prints its version number on stderr */ memset(&ec_process, 0, sizeof(ec_process)); @@ -71,7 +72,10 @@ static int check_emacsclient_version(void) fprintf(stderr, "Failed to start emacsclient.\n"); return -1; } - strbuf_read(&buffer, ec_process.err, 20); + if (strbuf_read(&buffer, ec_process.err, 20) < 0) { + fprintf(stderr, "Failed to read emacsclient version\n"); + goto out; + } close(ec_process.err); /* @@ -82,8 +86,7 @@ static int check_emacsclient_version(void) if (prefixcmp(buffer.buf, "emacsclient")) { fprintf(stderr, "Failed to parse emacsclient version.\n"); - strbuf_release(&buffer); - return -1; + goto out; } version = atoi(buffer.buf + strlen("emacsclient")); @@ -92,12 +95,11 @@ static int check_emacsclient_version(void) fprintf(stderr, "emacsclient version '%d' too old (< 22).\n", version); - strbuf_release(&buffer); - return -1; - } - + } else + ret = 0; +out: strbuf_release(&buffer); - return 0; + return ret; } static void exec_woman_emacs(const char *path, const char *page) @@ -115,7 +117,7 @@ static void exec_woman_emacs(const char *path, const char *page) free(man_page); } warning("failed to exec '%s': %s", path, - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); } } @@ -148,7 +150,7 @@ static void exec_man_konqueror(const char *path, const char *page) free(man_page); } warning("failed to exec '%s': %s", path, - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); } } @@ -160,7 +162,7 @@ static void exec_man_man(const char *path, const char *page) path = "man"; execlp(path, "man", page, NULL); warning("failed to exec '%s': %s", path, - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); } static void exec_man_cmd(const char *cmd, const char *page) @@ -173,7 +175,7 @@ static void exec_man_cmd(const char *cmd, const char *page) free(shell_cmd); } warning("failed to exec '%s': %s", cmd, - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); } static void add_man_viewer(const char *name) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index d1a2d104f2bc..73c1c4cc3600 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -562,7 +562,7 @@ static void strip_init(struct perf_inject *inject) inject->tool.context_switch = perf_event__drop; - evlist__for_each(evlist, evsel) + evlist__for_each_entry(evlist, evsel) evsel->handler = drop_sample; } @@ -590,7 +590,7 @@ static bool ok_to_remove(struct perf_evlist *evlist, if (!has_tracking(evsel_to_remove)) return true; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->handler != drop_sample) { cnt += 1; if ((evsel->attr.sample_type & COMPAT_MASK) == @@ -608,7 +608,7 @@ static void strip_fini(struct perf_inject *inject) struct perf_evsel *evsel, *tmp; /* Remove non-synthesized evsels if possible */ - evlist__for_each_safe(evlist, tmp, evsel) { + evlist__for_each_entry_safe(evlist, tmp, evsel) { if (evsel->handler == drop_sample && ok_to_remove(evlist, evsel)) { pr_debug("Deleting %s\n", perf_evsel__name(evsel)); @@ -643,7 +643,7 @@ static int __cmd_inject(struct perf_inject *inject) } else if (inject->sched_stat) { struct perf_evsel *evsel; - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { const char *name = perf_evsel__name(evsel); if (!strcmp(name, "sched:sched_switch")) { @@ -748,6 +748,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .auxtrace_info = perf_event__repipe_op2_synth, .auxtrace = perf_event__repipe_auxtrace, .auxtrace_error = perf_event__repipe_op2_synth, + .time_conv = perf_event__repipe_op2_synth, .finished_round = perf_event__repipe_oe_synth, .build_id = perf_event__repipe_op2_synth, .id_index = perf_event__repipe_op2_synth, diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index c9cb3be47cff..fdde1bd3e306 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -4,7 +4,7 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/util.h" -#include "util/cache.h" +#include "util/config.h" #include "util/symbol.h" #include "util/thread.h" #include "util/header.h" @@ -375,7 +375,7 @@ static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample) } al.thread = machine__findnew_thread(machine, sample->pid, sample->tid); - sample__resolve_callchain(sample, NULL, evsel, &al, 16); + sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16); callchain_cursor_commit(&callchain_cursor); while (true) { @@ -608,6 +608,7 @@ static const struct { const char *compact; } gfp_compact_table[] = { { "GFP_TRANSHUGE", "THP" }, + { "GFP_TRANSHUGE_LIGHT", "THL" }, { "GFP_HIGHUSER_MOVABLE", "HUM" }, { "GFP_HIGHUSER", "HU" }, { "GFP_USER", "U" }, @@ -1354,7 +1355,7 @@ static int __cmd_kmem(struct perf_session *session) goto out; } - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") && perf_evsel__field(evsel, "pfn")) { use_pfn = true; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index bff666458b28..5e2127e04f83 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -982,13 +982,13 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) struct perf_evlist *evlist = kvm->evlist; char sbuf[STRERR_BUFSIZE]; - perf_evlist__config(evlist, &kvm->opts); + perf_evlist__config(evlist, &kvm->opts, NULL); /* * Note: exclude_{guest,host} do not apply here. * This command processes KVM tracepoints from host only */ - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { struct perf_event_attr *attr = &pos->attr; /* make sure these *are* set */ @@ -1018,13 +1018,13 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) err = perf_evlist__open(evlist); if (err < 0) { printf("Couldn't create the events: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out; } if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages, false) < 0) { ui__error("Failed to mmap the events: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); perf_evlist__close(evlist); goto out; } @@ -1426,11 +1426,9 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, err = kvm_events_live_report(kvm); out: - if (kvm->session) - perf_session__delete(kvm->session); + perf_session__delete(kvm->session); kvm->session = NULL; - if (kvm->evlist) - perf_evlist__delete(kvm->evlist); + perf_evlist__delete(kvm->evlist); return err; } diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 5e22db4684b8..88ee419e5189 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -25,7 +25,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) OPT_END() }; const char * const list_usage[] = { - "perf list [hw|sw|cache|tracepoint|pmu|event_glob]", + "perf list [hw|sw|cache|tracepoint|pmu|sdt|event_glob]", NULL }; @@ -62,6 +62,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, raw_dump); + else if (strcmp(argv[i], "sdt") == 0) + print_sdt_events(NULL, NULL, raw_dump); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; @@ -76,6 +78,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) s[sep_idx] = '\0'; print_tracepoint_events(s, s + sep_idx + 1, raw_dump); + print_sdt_events(s, s + sep_idx + 1, raw_dump); free(s); } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { @@ -89,6 +92,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_hwcache_events(s, raw_dump); print_pmu_events(s, raw_dump); print_tracepoint_events(NULL, s, raw_dump); + print_sdt_events(NULL, s, raw_dump); free(s); } } diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 85db3be4b3cb..d1ce29be560e 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -62,19 +62,23 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) int rec_argc, i = 0, j; const char **rec_argv; int ret; + bool all_user = false, all_kernel = false; struct option options[] = { OPT_CALLBACK('e', "event", &mem, "event", "event selector. use 'perf mem record -e list' to list available events", parse_record_events), + OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"), + OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"), OPT_END() }; argc = parse_options(argc, argv, options, record_mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); - rec_argc = argc + 7; /* max number of arguments */ + rec_argc = argc + 9; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) return -1; @@ -84,6 +88,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (mem->operation & MEM_OPERATION_LOAD) perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; + if (mem->operation & MEM_OPERATION_STORE) + perf_mem_events[PERF_MEM_EVENTS__STORE].record = true; + if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) rec_argv[i++] = "-W"; @@ -103,6 +110,12 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = perf_mem_events__name(j); }; + if (all_user) + rec_argv[i++] = "--all-user"; + + if (all_kernel) + rec_argv[i++] = "--all-kernel"; + for (j = 0; j < argc; j++, i++) rec_argv[i] = argv[j]; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 9af859b28b15..ee5b42173ba3 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -44,7 +44,7 @@ #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*" #define DEFAULT_FUNC_FILTER "!_*" -#define DEFAULT_LIST_FILTER "*:*" +#define DEFAULT_LIST_FILTER "*" /* Session management structure */ static struct { @@ -308,7 +308,7 @@ static void pr_err_with_code(const char *msg, int err) pr_err("%s", msg); pr_debug(" Reason: %s (Code: %d)", - strerror_r(-err, sbuf, sizeof(sbuf)), err); + str_error_r(-err, sbuf, sizeof(sbuf)), err); pr_err("\n"); } @@ -363,6 +363,32 @@ out_cleanup: return ret; } +static int del_perf_probe_caches(struct strfilter *filter) +{ + struct probe_cache *cache; + struct strlist *bidlist; + struct str_node *nd; + int ret; + + bidlist = build_id_cache__list_all(false); + if (!bidlist) { + ret = -errno; + pr_debug("Failed to get buildids: %d\n", ret); + return ret ?: -ENOMEM; + } + + strlist__for_each_entry(nd, bidlist) { + cache = probe_cache__new(nd->s); + if (!cache) + continue; + if (probe_cache__filter_purge(cache, filter) < 0 || + probe_cache__commit(cache) < 0) + pr_warning("Failed to remove entries for %s\n", nd->s); + probe_cache__delete(cache); + } + return 0; +} + static int perf_del_probe_events(struct strfilter *filter) { int ret, ret2, ufd = -1, kfd = -1; @@ -375,6 +401,9 @@ static int perf_del_probe_events(struct strfilter *filter) pr_debug("Delete filter: \'%s\'\n", str); + if (probe_conf.cache) + return del_perf_probe_caches(filter); + /* Get current event names */ ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW); if (ret < 0) @@ -389,7 +418,7 @@ static int perf_del_probe_events(struct strfilter *filter) ret = probe_file__get_events(kfd, filter, klist); if (ret == 0) { - strlist__for_each(ent, klist) + strlist__for_each_entry(ent, klist) pr_info("Removed event: %s\n", ent->s); ret = probe_file__del_strlist(kfd, klist); @@ -399,7 +428,7 @@ static int perf_del_probe_events(struct strfilter *filter) ret2 = probe_file__get_events(ufd, filter, ulist); if (ret2 == 0) { - strlist__for_each(ent, ulist) + strlist__for_each_entry(ent, ulist) pr_info("Removed event: %s\n", ent->s); ret2 = probe_file__del_strlist(ufd, ulist); @@ -512,6 +541,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) "Enable symbol demangling"), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), + OPT_BOOLEAN(0, "cache", &probe_conf.cache, "Manipulate probe cache"), OPT_END() }; int ret; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 515510ecc76a..6355902fbfc8 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -13,6 +13,7 @@ #include "util/util.h" #include <subcmd/parse-options.h> #include "util/parse-events.h" +#include "util/config.h" #include "util/callchain.h" #include "util/cgroup.h" @@ -29,15 +30,18 @@ #include "util/data.h" #include "util/perf_regs.h" #include "util/auxtrace.h" +#include "util/tsc.h" #include "util/parse-branch-options.h" #include "util/parse-regs-options.h" #include "util/llvm-utils.h" #include "util/bpf-loader.h" +#include "util/trigger.h" #include "asm/bug.h" #include <unistd.h> #include <sched.h> #include <sys/mman.h> +#include <asm/bug.h> struct record { @@ -55,6 +59,8 @@ struct record { bool no_buildid_cache; bool no_buildid_cache_set; bool buildid_all; + bool timestamp_filename; + bool switch_output; unsigned long long samples; }; @@ -78,27 +84,87 @@ static int process_synthesized_event(struct perf_tool *tool, return record__write(rec, event, event->header.size); } -static int record__mmap_read(struct record *rec, int idx) +static int +backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) +{ + struct perf_event_header *pheader; + u64 evt_head = head; + int size = mask + 1; + + pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); + pheader = (struct perf_event_header *)(buf + (head & mask)); + *start = head; + while (true) { + if (evt_head - head >= (unsigned int)size) { + pr_debug("Finshed reading backward ring buffer: rewind\n"); + if (evt_head - head > (unsigned int)size) + evt_head -= pheader->size; + *end = evt_head; + return 0; + } + + pheader = (struct perf_event_header *)(buf + (evt_head & mask)); + + if (pheader->size == 0) { + pr_debug("Finshed reading backward ring buffer: get start\n"); + *end = evt_head; + return 0; + } + + evt_head += pheader->size; + pr_debug3("move evt_head: %"PRIx64"\n", evt_head); + } + WARN_ONCE(1, "Shouldn't get here\n"); + return -1; +} + +static int +rb_find_range(void *data, int mask, u64 head, u64 old, + u64 *start, u64 *end, bool backward) +{ + if (!backward) { + *start = old; + *end = head; + return 0; + } + + return backward_rb_find_range(data, mask, head, start, end); +} + +static int +record__mmap_read(struct record *rec, struct perf_mmap *md, + bool overwrite, bool backward) { - struct perf_mmap *md = &rec->evlist->mmap[idx]; u64 head = perf_mmap__read_head(md); u64 old = md->prev; + u64 end = head, start = old; unsigned char *data = md->base + page_size; unsigned long size; void *buf; int rc = 0; - if (old == head) + if (rb_find_range(data, md->mask, head, + old, &start, &end, backward)) + return -1; + + if (start == end) return 0; rec->samples++; - size = head - old; + size = end - start; + if (size > (unsigned long)(md->mask) + 1) { + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); + + md->prev = head; + perf_mmap__consume(md, overwrite || backward); + return 0; + } - if ((old & md->mask) + size != (head & md->mask)) { - buf = &data[old & md->mask]; - size = md->mask + 1 - (old & md->mask); - old += size; + if ((start & md->mask) + size != (end & md->mask)) { + buf = &data[start & md->mask]; + size = md->mask + 1 - (start & md->mask); + start += size; if (record__write(rec, buf, size) < 0) { rc = -1; @@ -106,17 +172,17 @@ static int record__mmap_read(struct record *rec, int idx) } } - buf = &data[old & md->mask]; - size = head - old; - old += size; + buf = &data[start & md->mask]; + size = end - start; + start += size; if (record__write(rec, buf, size) < 0) { rc = -1; goto out; } - md->prev = old; - perf_evlist__mmap_consume(rec->evlist, idx); + md->prev = head; + perf_mmap__consume(md, overwrite || backward); out: return rc; } @@ -124,9 +190,10 @@ out: static volatile int done; static volatile int signr = -1; static volatile int child_finished; -static volatile int auxtrace_snapshot_enabled; -static volatile int auxtrace_snapshot_err; + static volatile int auxtrace_record__snapshot_started; +static DEFINE_TRIGGER(auxtrace_snapshot_trigger); +static DEFINE_TRIGGER(switch_output_trigger); static void sig_handler(int sig) { @@ -244,11 +311,12 @@ static void record__read_auxtrace_snapshot(struct record *rec) { pr_debug("Recording AUX area tracing snapshot\n"); if (record__auxtrace_read_snapshot_all(rec) < 0) { - auxtrace_snapshot_err = -1; + trigger_error(&auxtrace_snapshot_trigger); } else { - auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr); - if (!auxtrace_snapshot_err) - auxtrace_snapshot_enabled = 1; + if (auxtrace_record__snapshot_finish(rec->itr)) + trigger_error(&auxtrace_snapshot_trigger); + else + trigger_ready(&auxtrace_snapshot_trigger); } } @@ -274,6 +342,40 @@ int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) #endif +static int record__mmap_evlist(struct record *rec, + struct perf_evlist *evlist) +{ + struct record_opts *opts = &rec->opts; + char msg[512]; + + if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false, + opts->auxtrace_mmap_pages, + opts->auxtrace_snapshot_mode) < 0) { + if (errno == EPERM) { + pr_err("Permission error mapping pages.\n" + "Consider increasing " + "/proc/sys/kernel/perf_event_mlock_kb,\n" + "or try again with a smaller value of -m/--mmap_pages.\n" + "(current value: %u,%u)\n", + opts->mmap_pages, opts->auxtrace_mmap_pages); + return -errno; + } else { + pr_err("failed to mmap with %d (%s)\n", errno, + str_error_r(errno, msg, sizeof(msg))); + if (errno) + return -errno; + else + return -EINVAL; + } + } + return 0; +} + +static int record__mmap(struct record *rec) +{ + return record__mmap_evlist(rec, rec->evlist); +} + static int record__open(struct record *rec) { char msg[512]; @@ -283,9 +385,9 @@ static int record__open(struct record *rec) struct record_opts *opts = &rec->opts; int rc = 0; - perf_evlist__config(evlist, opts); + perf_evlist__config(evlist, opts, &callchain_param); - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { try_again: if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { @@ -305,32 +407,14 @@ try_again: if (perf_evlist__apply_filters(evlist, &pos)) { error("failed to set filter \"%s\" on event %s with %d (%s)\n", pos->filter, perf_evsel__name(pos), errno, - strerror_r(errno, msg, sizeof(msg))); + str_error_r(errno, msg, sizeof(msg))); rc = -1; goto out; } - if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false, - opts->auxtrace_mmap_pages, - opts->auxtrace_snapshot_mode) < 0) { - if (errno == EPERM) { - pr_err("Permission error mapping pages.\n" - "Consider increasing " - "/proc/sys/kernel/perf_event_mlock_kb,\n" - "or try again with a smaller value of -m/--mmap_pages.\n" - "(current value: %u,%u)\n", - opts->mmap_pages, opts->auxtrace_mmap_pages); - rc = -errno; - } else { - pr_err("failed to mmap with %d (%s)\n", errno, - strerror_r(errno, msg, sizeof(msg))); - if (errno) - rc = -errno; - else - rc = -EINVAL; - } + rc = record__mmap(rec); + if (rc) goto out; - } session->evlist = evlist; perf_session__set_id_hdr_size(session); @@ -414,17 +498,30 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; -static int record__mmap_read_all(struct record *rec) +static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, + bool backward) { u64 bytes_written = rec->bytes_written; int i; int rc = 0; + struct perf_mmap *maps; - for (i = 0; i < rec->evlist->nr_mmaps; i++) { - struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap; + if (!evlist) + return 0; - if (rec->evlist->mmap[i].base) { - if (record__mmap_read(rec, i) != 0) { + maps = backward ? evlist->backward_mmap : evlist->mmap; + if (!maps) + return 0; + + if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) + return 0; + + for (i = 0; i < evlist->nr_mmaps; i++) { + struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; + + if (maps[i].base) { + if (record__mmap_read(rec, &maps[i], + evlist->overwrite, backward) != 0) { rc = -1; goto out; } @@ -444,10 +541,23 @@ static int record__mmap_read_all(struct record *rec) if (bytes_written != rec->bytes_written) rc = record__write(rec, &finished_round_event, sizeof(finished_round_event)); + if (backward) + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); out: return rc; } +static int record__mmap_read_all(struct record *rec) +{ + int err; + + err = record__mmap_read_evlist(rec, rec->evlist, false); + if (err) + return err; + + return record__mmap_read_evlist(rec, rec->evlist, true); +} + static void record__init_features(struct record *rec) { struct perf_session *session = rec->session; @@ -494,6 +604,80 @@ record__finish_output(struct record *rec) return; } +static int record__synthesize_workload(struct record *rec, bool tail) +{ + struct { + struct thread_map map; + struct thread_map_data map_data; + } thread_map; + + if (rec->opts.tail_synthesize != tail) + return 0; + + thread_map.map.nr = 1; + thread_map.map.map[0].pid = rec->evlist->workload.pid; + thread_map.map.map[0].comm = NULL; + return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map, + process_synthesized_event, + &rec->session->machines.host, + rec->opts.sample_address, + rec->opts.proc_map_timeout); +} + +static int record__synthesize(struct record *rec, bool tail); + +static int +record__switch_output(struct record *rec, bool at_exit) +{ + struct perf_data_file *file = &rec->file; + int fd, err; + + /* Same Size: "2015122520103046"*/ + char timestamp[] = "InvalidTimestamp"; + + record__synthesize(rec, true); + if (target__none(&rec->opts.target)) + record__synthesize_workload(rec, true); + + rec->samples = 0; + record__finish_output(rec); + err = fetch_current_timestamp(timestamp, sizeof(timestamp)); + if (err) { + pr_err("Failed to get current timestamp\n"); + return -EINVAL; + } + + fd = perf_data_file__switch(file, timestamp, + rec->session->header.data_offset, + at_exit); + if (fd >= 0 && !at_exit) { + rec->bytes_written = 0; + rec->session->header.data_size = 0; + } + + if (!quiet) + fprintf(stderr, "[ perf record: Dump %s.%s ]\n", + file->path, timestamp); + + /* Output tracking events */ + if (!at_exit) { + record__synthesize(rec, false); + + /* + * In 'perf record --switch-output' without -a, + * record__synthesize() in record__switch_output() won't + * generate tracking events because there's no thread_map + * in evlist. Which causes newly created perf.data doesn't + * contain map and comm information. + * Create a fake thread_map and directly call + * perf_event__synthesize_thread_map() for those events. + */ + if (target__none(&rec->opts.target)) + record__synthesize_workload(rec, false); + } + return fd; +} + static volatile int workload_exec_errno; /* @@ -512,7 +696,38 @@ static void workload_exec_failed_signal(int signo __maybe_unused, static void snapshot_sig_handler(int sig); -static int record__synthesize(struct record *rec) +int __weak +perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, + struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +static const struct perf_event_mmap_page * +perf_evlist__pick_pc(struct perf_evlist *evlist) +{ + if (evlist) { + if (evlist->mmap && evlist->mmap[0].base) + return evlist->mmap[0].base; + if (evlist->backward_mmap && evlist->backward_mmap[0].base) + return evlist->backward_mmap[0].base; + } + return NULL; +} + +static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) +{ + const struct perf_event_mmap_page *pc; + + pc = perf_evlist__pick_pc(rec->evlist); + if (pc) + return pc; + return NULL; +} + +static int record__synthesize(struct record *rec, bool tail) { struct perf_session *session = rec->session; struct machine *machine = &session->machines.host; @@ -522,6 +737,9 @@ static int record__synthesize(struct record *rec) int fd = perf_data_file__fd(file); int err = 0; + if (rec->opts.tail_synthesize != tail) + return 0; + if (file->is_pipe) { err = perf_event__synthesize_attrs(tool, session, process_synthesized_event); @@ -549,6 +767,11 @@ static int record__synthesize(struct record *rec) } } + err = perf_event__synth_time_conv(record__pick_pc(rec), tool, + process_synthesized_event, machine); + if (err) + goto out; + if (rec->opts.full_auxtrace) { err = perf_event__synthesize_auxtrace_info(rec->itr, tool, session, process_synthesized_event); @@ -600,10 +823,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); signal(SIGTERM, sig_handler); - if (rec->opts.auxtrace_snapshot_mode) + + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) { signal(SIGUSR2, snapshot_sig_handler); - else + if (rec->opts.auxtrace_snapshot_mode) + trigger_on(&auxtrace_snapshot_trigger); + if (rec->switch_output) + trigger_on(&switch_output_trigger); + } else { signal(SIGUSR2, SIG_IGN); + } session = perf_session__new(file, false, tool); if (session == NULL) { @@ -674,7 +903,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) machine = &session->machines.host; - err = record__synthesize(rec); + err = record__synthesize(rec, false); if (err < 0) goto out_child; @@ -729,27 +958,74 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) perf_evlist__enable(rec->evlist); } - auxtrace_snapshot_enabled = 1; + trigger_ready(&auxtrace_snapshot_trigger); + trigger_ready(&switch_output_trigger); for (;;) { unsigned long long hits = rec->samples; + /* + * rec->evlist->bkw_mmap_state is possible to be + * BKW_MMAP_EMPTY here: when done == true and + * hits != rec->samples in previous round. + * + * perf_evlist__toggle_bkw_mmap ensure we never + * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. + */ + if (trigger_is_hit(&switch_output_trigger) || done || draining) + perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); + if (record__mmap_read_all(rec) < 0) { - auxtrace_snapshot_enabled = 0; + trigger_error(&auxtrace_snapshot_trigger); + trigger_error(&switch_output_trigger); err = -1; goto out_child; } if (auxtrace_record__snapshot_started) { auxtrace_record__snapshot_started = 0; - if (!auxtrace_snapshot_err) + if (!trigger_is_error(&auxtrace_snapshot_trigger)) record__read_auxtrace_snapshot(rec); - if (auxtrace_snapshot_err) { + if (trigger_is_error(&auxtrace_snapshot_trigger)) { pr_err("AUX area tracing snapshot failed\n"); err = -1; goto out_child; } } + if (trigger_is_hit(&switch_output_trigger)) { + /* + * If switch_output_trigger is hit, the data in + * overwritable ring buffer should have been collected, + * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. + * + * If SIGUSR2 raise after or during record__mmap_read_all(), + * record__mmap_read_all() didn't collect data from + * overwritable ring buffer. Read again. + */ + if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) + continue; + trigger_ready(&switch_output_trigger); + + /* + * Reenable events in overwrite ring buffer after + * record__mmap_read_all(): we should have collected + * data from it. + */ + perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); + + if (!quiet) + fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", + waking); + waking = 0; + fd = record__switch_output(rec, false); + if (fd < 0) { + pr_err("Failed to switch to new file\n"); + trigger_error(&switch_output_trigger); + err = fd; + goto out_child; + } + } + if (hits == rec->samples) { if (done || draining) break; @@ -772,16 +1048,17 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * disable events in this case. */ if (done && !disabled && !target__none(&opts->target)) { - auxtrace_snapshot_enabled = 0; + trigger_off(&auxtrace_snapshot_trigger); perf_evlist__disable(rec->evlist); disabled = true; } } - auxtrace_snapshot_enabled = 0; + trigger_off(&auxtrace_snapshot_trigger); + trigger_off(&switch_output_trigger); if (forks && workload_exec_errno) { char msg[STRERR_BUFSIZE]; - const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg)); + const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); pr_err("Workload failed: %s\n", emsg); err = -1; goto out_child; @@ -790,6 +1067,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (!quiet) fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); + if (target__none(&rec->opts.target)) + record__synthesize_workload(rec, true); + out_child: if (forks) { int exit_status; @@ -808,14 +1088,26 @@ out_child: } else status = err; + record__synthesize(rec, true); /* this will be recalculated during process_buildids() */ rec->samples = 0; - if (!err) - record__finish_output(rec); + if (!err) { + if (!rec->timestamp_filename) { + record__finish_output(rec); + } else { + fd = record__switch_output(rec, true); + if (fd < 0) { + status = fd; + goto out_delete_session; + } + } + } if (!err && !quiet) { char samples[128]; + const char *postfix = rec->timestamp_filename ? + ".<timestamp>" : ""; if (rec->samples && !rec->opts.full_auxtrace) scnprintf(samples, sizeof(samples), @@ -823,9 +1115,9 @@ out_child: else samples[0] = '\0'; - fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n", + fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", perf_data_file__size(file) / 1024.0 / 1024.0, - file->path, samples); + file->path, postfix, samples); } out_delete_session: @@ -833,58 +1125,61 @@ out_delete_session: return status; } -static void callchain_debug(void) +static void callchain_debug(struct callchain_param *callchain) { static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; - pr_debug("callchain: type %s\n", str[callchain_param.record_mode]); + pr_debug("callchain: type %s\n", str[callchain->record_mode]); - if (callchain_param.record_mode == CALLCHAIN_DWARF) + if (callchain->record_mode == CALLCHAIN_DWARF) pr_debug("callchain: stack dump size %d\n", - callchain_param.dump_size); + callchain->dump_size); } -int record_parse_callchain_opt(const struct option *opt, - const char *arg, - int unset) +int record_opts__parse_callchain(struct record_opts *record, + struct callchain_param *callchain, + const char *arg, bool unset) { int ret; - struct record_opts *record = (struct record_opts *)opt->value; - - record->callgraph_set = true; - callchain_param.enabled = !unset; + callchain->enabled = !unset; /* --no-call-graph */ if (unset) { - callchain_param.record_mode = CALLCHAIN_NONE; + callchain->record_mode = CALLCHAIN_NONE; pr_debug("callchain: disabled\n"); return 0; } - ret = parse_callchain_record_opt(arg, &callchain_param); + ret = parse_callchain_record_opt(arg, callchain); if (!ret) { /* Enable data address sampling for DWARF unwind. */ - if (callchain_param.record_mode == CALLCHAIN_DWARF) + if (callchain->record_mode == CALLCHAIN_DWARF) record->sample_address = true; - callchain_debug(); + callchain_debug(callchain); } return ret; } +int record_parse_callchain_opt(const struct option *opt, + const char *arg, + int unset) +{ + return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); +} + int record_callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unset __maybe_unused) { - struct record_opts *record = (struct record_opts *)opt->value; + struct callchain_param *callchain = opt->value; - record->callgraph_set = true; - callchain_param.enabled = true; + callchain->enabled = true; - if (callchain_param.record_mode == CALLCHAIN_NONE) - callchain_param.record_mode = CALLCHAIN_FP; + if (callchain->record_mode == CALLCHAIN_NONE) + callchain->record_mode = CALLCHAIN_FP; - callchain_debug(); + callchain_debug(callchain); return 0; } @@ -1080,6 +1375,8 @@ static struct record record = { const char record_callchain_help[] = CALLCHAIN_RECORD_HELP "\n\t\t\t\tDefault: fp"; +static bool dry_run; + /* * XXX Will stay a global variable till we fix builtin-script.c to stop messing * with it and switch to use the library functions in perf_evlist that came @@ -1116,13 +1413,16 @@ struct option __record_options[] = { OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, &record.opts.no_inherit_set, "child tasks do not inherit counters"), + OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, + "synthesize non-sample events at the end of output"), + OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", "number of mmap data pages and AUX area tracing mmap pages", record__parse_mmap_pages), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), - OPT_CALLBACK_NOOPT('g', NULL, &record.opts, + OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, NULL, "enables call-graph recording" , &record_callchain_opt), OPT_CALLBACK(0, "call-graph", &record.opts, @@ -1134,6 +1434,7 @@ struct option __record_options[] = { OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, "per thread counts"), OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), + OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, &record.opts.sample_time_set, "Record the sample timestamps"), @@ -1195,6 +1496,12 @@ struct option __record_options[] = { "file", "vmlinux pathname"), OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, "Record build-id of all DSOs regardless of hits"), + OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, + "append timestamp to output filename"), + OPT_BOOLEAN(0, "switch-output", &record.switch_output, + "Switch output when receive SIGUSR2"), + OPT_BOOLEAN(0, "dry-run", &dry_run, + "Parse options then exit"), OPT_END() }; @@ -1250,6 +1557,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) return -EINVAL; } + if (rec->switch_output) + rec->timestamp_filename = true; + if (!rec->itr) { rec->itr = auxtrace_record__init(rec->evlist, &err); if (err) @@ -1261,6 +1571,17 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) if (err) return err; + if (dry_run) + return 0; + + err = bpf__setup_stdout(rec->evlist); + if (err) { + bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); + pr_err("ERROR: Setup BPF stdout failed: %s\n", + errbuf); + return err; + } + err = -ENOMEM; symbol__init(NULL); @@ -1275,8 +1596,39 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" "even with a suitable vmlinux or kallsyms file.\n\n"); - if (rec->no_buildid_cache || rec->no_buildid) + if (rec->no_buildid_cache || rec->no_buildid) { disable_buildid_cache(); + } else if (rec->switch_output) { + /* + * In 'perf record --switch-output', disable buildid + * generation by default to reduce data file switching + * overhead. Still generate buildid if they are required + * explicitly using + * + * perf record --signal-trigger --no-no-buildid \ + * --no-no-buildid-cache + * + * Following code equals to: + * + * if ((rec->no_buildid || !rec->no_buildid_set) && + * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) + * disable_buildid_cache(); + */ + bool disable = true; + + if (rec->no_buildid_set && !rec->no_buildid) + disable = false; + if (rec->no_buildid_cache_set && !rec->no_buildid_cache) + disable = false; + if (disable) { + rec->no_buildid = true; + rec->no_buildid_cache = true; + disable_buildid_cache(); + } + } + + if (record.opts.overwrite) + record.opts.tail_synthesize = true; if (rec->evlist->nr_entries == 0 && perf_evlist__add_default(rec->evlist) < 0) { @@ -1335,9 +1687,13 @@ out_symbol_exit: static void snapshot_sig_handler(int sig __maybe_unused) { - if (!auxtrace_snapshot_enabled) - return; - auxtrace_snapshot_enabled = 0; - auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr); - auxtrace_record__snapshot_started = 1; + if (trigger_is_ready(&auxtrace_snapshot_trigger)) { + trigger_hit(&auxtrace_snapshot_trigger); + auxtrace_record__snapshot_started = 1; + if (auxtrace_record__snapshot_start(record.itr)) + trigger_error(&auxtrace_snapshot_trigger); + } + + if (trigger_is_ready(&switch_output_trigger)) + trigger_hit(&switch_output_trigger); } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 160ea23b45aa..949e5a15c960 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -8,7 +8,7 @@ #include "builtin.h" #include "util/util.h" -#include "util/cache.h" +#include "util/config.h" #include "util/annotate.h" #include "util/color.h" @@ -47,7 +47,6 @@ struct report { struct perf_tool tool; struct perf_session *session; bool use_tui, use_gtk, use_stdio; - bool dont_use_callchains; bool show_full_info; bool show_threads; bool inverted_callchain; @@ -235,7 +234,7 @@ static int report__setup_sample_type(struct report *rep) sample_type |= PERF_SAMPLE_BRANCH_STACK; if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { - if (sort__has_parent) { + if (perf_hpp_list.parent) { ui__error("Selected --sort parent, but no " "callchain data. Did you call " "'perf record' without -g?\n"); @@ -247,7 +246,7 @@ static int report__setup_sample_type(struct report *rep) "you call 'perf record' without -g?\n"); return -1; } - } else if (!rep->dont_use_callchains && + } else if (!callchain_param.enabled && callchain_param.mode != CHAIN_NONE && !symbol_conf.use_callchain) { symbol_conf.use_callchain = true; @@ -362,7 +361,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, struct perf_evsel *pos; fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n", evlist->stats.total_lost_samples); - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); const char *evname = perf_evsel__name(pos); @@ -371,7 +370,8 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, continue; hists__fprintf_nr_sample_events(hists, rep, evname, stdout); - hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout); + hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout, + symbol_conf.use_callchain); fprintf(stdout, "\n\n"); } @@ -478,7 +478,7 @@ static int report__collapse_hists(struct report *rep) ui_progress__init(&prog, rep->nr_entries, "Merging related events..."); - evlist__for_each(rep->session->evlist, pos) { + evlist__for_each_entry(rep->session->evlist, pos) { struct hists *hists = evsel__hists(pos); if (pos->idx == 0) @@ -511,7 +511,7 @@ static void report__output_resort(struct report *rep) ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); - evlist__for_each(rep->session->evlist, pos) + evlist__for_each_entry(rep->session->evlist, pos) perf_evsel__output_resort(pos, &prog); ui_progress__finish(); @@ -552,7 +552,7 @@ static int __cmd_report(struct report *rep) report__warn_kptr_restrict(rep); - evlist__for_each(session->evlist, pos) + evlist__for_each_entry(session->evlist, pos) rep->nr_entries += evsel__hists(pos)->nr_entries; if (use_browser == 0) { @@ -583,7 +583,7 @@ static int __cmd_report(struct report *rep) * might be changed during the collapse phase. */ rep->nr_entries = 0; - evlist__for_each(session->evlist, pos) + evlist__for_each_entry(session->evlist, pos) rep->nr_entries += evsel__hists(pos)->nr_entries; if (rep->nr_entries == 0) { @@ -599,13 +599,15 @@ static int __cmd_report(struct report *rep) static int report_parse_callchain_opt(const struct option *opt, const char *arg, int unset) { - struct report *rep = (struct report *)opt->value; + struct callchain_param *callchain = opt->value; + callchain->enabled = !unset; /* * --no-call-graph */ if (unset) { - rep->dont_use_callchains = true; + symbol_conf.use_callchain = false; + callchain->mode = CHAIN_NONE; return 0; } @@ -734,7 +736,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, "Only display entries with parent-match"), - OPT_CALLBACK_DEFAULT('g', "call-graph", &report, + OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param, "print_type,threshold[,print_limit],order,sort_key[,branch],value", report_callchain_help, &report_parse_callchain_opt, callchain_default_opt), @@ -743,7 +745,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_INTEGER(0, "max-stack", &report.max_stack, "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " - "Default: " __stringify(PERF_MAX_STACK_DEPTH)), + "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, "alias for inverted call graph"), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", @@ -769,8 +771,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "columns '.' is reserved."), OPT_BOOLEAN('U', "hide-unresolved", &symbol_conf.hide_unresolved, "Only display entries resolved to a symbol"), - OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", - "Look for files with symbols relative to this directory"), + OPT_CALLBACK(0, "symfs", NULL, "directory", + "Look for files with symbols relative to this directory", + symbol__config_symfs), OPT_STRING('C', "cpu", &report.cpu_list, "cpu", "list of cpus to profile"), OPT_BOOLEAN('I', "show-info", &report.show_full_info, @@ -814,6 +817,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "Show raw trace event output (do not use print fmt or plugins)"), OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, "Show entries in a hierarchy"), + OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode", + "'always' (default), 'never' or 'auto' only applicable to --stdio mode", + stdio__config_color, "always"), OPT_END() }; struct perf_data_file file = { @@ -935,7 +941,7 @@ repeat: goto error; } - sort__need_collapse = true; + perf_hpp_list.need_collapse = true; } /* Force tty output for header output and per-thread stat. */ diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 871b55ae22a4..0dfe8df2ab9b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -11,6 +11,8 @@ #include "util/session.h" #include "util/tool.h" #include "util/cloexec.h" +#include "util/thread_map.h" +#include "util/color.h" #include <subcmd/parse-options.h> #include "util/trace-event.h" @@ -122,6 +124,21 @@ struct trace_sched_handler { struct machine *machine); }; +#define COLOR_PIDS PERF_COLOR_BLUE +#define COLOR_CPUS PERF_COLOR_BG_RED + +struct perf_sched_map { + DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); + int *comp_cpus; + bool comp; + struct thread_map *color_pids; + const char *color_pids_str; + struct cpu_map *color_cpus; + const char *color_cpus_str; + struct cpu_map *cpus; + const char *cpus_str; +}; + struct perf_sched { struct perf_tool tool; const char *sort_order; @@ -173,6 +190,7 @@ struct perf_sched { struct list_head sort_list, cmp_pid; bool force; bool skip_merge; + struct perf_sched_map map; }; static u64 get_nsecs(void) @@ -476,7 +494,7 @@ force_again: } pr_err("Error: sys_perf_event_open() syscall returned " "with %d (%s)\n%s", fd, - strerror_r(errno, sbuf, sizeof(sbuf)), info); + str_error_r(errno, sbuf, sizeof(sbuf)), info); exit(EXIT_FAILURE); } return fd; @@ -1339,6 +1357,38 @@ static int process_sched_wakeup_event(struct perf_tool *tool, return 0; } +union map_priv { + void *ptr; + bool color; +}; + +static bool thread__has_color(struct thread *thread) +{ + union map_priv priv = { + .ptr = thread__priv(thread), + }; + + return priv.color; +} + +static struct thread* +map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid) +{ + struct thread *thread = machine__findnew_thread(machine, pid, tid); + union map_priv priv = { + .color = false, + }; + + if (!sched->map.color_pids || !thread || thread__priv(thread)) + return thread; + + if (thread_map__has(sched->map.color_pids, tid)) + priv.color = true; + + thread__set_priv(thread, priv.ptr); + return thread; +} + static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, struct perf_sample *sample, struct machine *machine) { @@ -1347,13 +1397,25 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, int new_shortname; u64 timestamp0, timestamp = sample->time; s64 delta; - int cpu, this_cpu = sample->cpu; + int i, this_cpu = sample->cpu; + int cpus_nr; + bool new_cpu = false; + const char *color = PERF_COLOR_NORMAL; BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); if (this_cpu > sched->max_cpu) sched->max_cpu = this_cpu; + if (sched->map.comp) { + cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); + if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) { + sched->map.comp_cpus[cpus_nr++] = this_cpu; + new_cpu = true; + } + } else + cpus_nr = sched->max_cpu; + timestamp0 = sched->cpu_last_switched[this_cpu]; sched->cpu_last_switched[this_cpu] = timestamp; if (timestamp0) @@ -1366,7 +1428,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, return -1; } - sched_in = machine__findnew_thread(machine, -1, next_pid); + sched_in = map__findnew_thread(sched, machine, -1, next_pid); if (sched_in == NULL) return -1; @@ -1400,26 +1462,52 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, new_shortname = 1; } - for (cpu = 0; cpu <= sched->max_cpu; cpu++) { + for (i = 0; i < cpus_nr; i++) { + int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i; + struct thread *curr_thread = sched->curr_thread[cpu]; + const char *pid_color = color; + const char *cpu_color = color; + + if (curr_thread && thread__has_color(curr_thread)) + pid_color = COLOR_PIDS; + + if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu)) + continue; + + if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu)) + cpu_color = COLOR_CPUS; + if (cpu != this_cpu) - printf(" "); + color_fprintf(stdout, cpu_color, " "); else - printf("*"); + color_fprintf(stdout, cpu_color, "*"); if (sched->curr_thread[cpu]) - printf("%2s ", sched->curr_thread[cpu]->shortname); + color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname); else - printf(" "); + color_fprintf(stdout, color, " "); } - printf(" %12.6f secs ", (double)timestamp/1e9); + if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) + goto out; + + color_fprintf(stdout, color, " %12.6f secs ", (double)timestamp/1e9); if (new_shortname) { - printf("%s => %s:%d\n", + const char *pid_color = color; + + if (thread__has_color(sched_in)) + pid_color = COLOR_PIDS; + + color_fprintf(stdout, pid_color, "%s => %s:%d", sched_in->shortname, thread__comm_str(sched_in), sched_in->tid); - } else { - printf("\n"); } + if (sched->map.comp && new_cpu) + color_fprintf(stdout, color, " (CPU %d)", this_cpu); + +out: + color_fprintf(stdout, color, "\n"); + thread__put(sched_in); return 0; @@ -1675,9 +1763,75 @@ static int perf_sched__lat(struct perf_sched *sched) return 0; } +static int setup_map_cpus(struct perf_sched *sched) +{ + struct cpu_map *map; + + sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); + + if (sched->map.comp) { + sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int)); + if (!sched->map.comp_cpus) + return -1; + } + + if (!sched->map.cpus_str) + return 0; + + map = cpu_map__new(sched->map.cpus_str); + if (!map) { + pr_err("failed to get cpus map from %s\n", sched->map.cpus_str); + return -1; + } + + sched->map.cpus = map; + return 0; +} + +static int setup_color_pids(struct perf_sched *sched) +{ + struct thread_map *map; + + if (!sched->map.color_pids_str) + return 0; + + map = thread_map__new_by_tid_str(sched->map.color_pids_str); + if (!map) { + pr_err("failed to get thread map from %s\n", sched->map.color_pids_str); + return -1; + } + + sched->map.color_pids = map; + return 0; +} + +static int setup_color_cpus(struct perf_sched *sched) +{ + struct cpu_map *map; + + if (!sched->map.color_cpus_str) + return 0; + + map = cpu_map__new(sched->map.color_cpus_str); + if (!map) { + pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str); + return -1; + } + + sched->map.color_cpus = map; + return 0; +} + static int perf_sched__map(struct perf_sched *sched) { - sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); + if (setup_map_cpus(sched)) + return -1; + + if (setup_color_pids(sched)) + return -1; + + if (setup_color_cpus(sched)) + return -1; setup_pager(); if (perf_sched__read_events(sched)) @@ -1831,6 +1985,17 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "dump raw trace in ASCII"), OPT_END() }; + const struct option map_options[] = { + OPT_BOOLEAN(0, "compact", &sched.map.comp, + "map output in compact mode"), + OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids", + "highlight given pids in map"), + OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus", + "highlight given CPUs in map"), + OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", + "display given CPUs in map"), + OPT_END() + }; const char * const latency_usage[] = { "perf sched latency [<options>]", NULL @@ -1839,6 +2004,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "perf sched replay [<options>]", NULL }; + const char * const map_usage[] = { + "perf sched map [<options>]", + NULL + }; const char *const sched_subcommands[] = { "record", "latency", "map", "replay", "script", NULL }; const char *sched_usage[] = { @@ -1887,6 +2056,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) setup_sorting(&sched, latency_options, latency_usage); return perf_sched__lat(&sched); } else if (!strcmp(argv[0], "map")) { + if (argc) { + argc = parse_options(argc, argv, map_options, map_usage, 0); + if (argc) + usage_with_options(map_usage, map_options); + } sched.tp_handler = &map_ops; setup_sorting(&sched, latency_options, latency_usage); return perf_sched__map(&sched); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 52826696c852..c859e59dfe3e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -21,7 +21,9 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/stat.h" +#include "util/thread-stack.h" #include <linux/bitmap.h> +#include <linux/stringify.h> #include "asm/bug.h" #include "util/mem-events.h" @@ -62,6 +64,7 @@ enum perf_output_field { PERF_OUTPUT_DATA_SRC = 1U << 17, PERF_OUTPUT_WEIGHT = 1U << 18, PERF_OUTPUT_BPF_OUTPUT = 1U << 19, + PERF_OUTPUT_CALLINDENT = 1U << 20, }; struct output_option { @@ -88,6 +91,7 @@ struct output_option { {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC}, {.str = "weight", .field = PERF_OUTPUT_WEIGHT}, {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT}, + {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, }; /* default set to maintain compatibility with current format */ @@ -317,19 +321,19 @@ static void set_print_ip_opts(struct perf_event_attr *attr) output[type].print_ip_opts = 0; if (PRINT_FIELD(IP)) - output[type].print_ip_opts |= PRINT_IP_OPT_IP; + output[type].print_ip_opts |= EVSEL__PRINT_IP; if (PRINT_FIELD(SYM)) - output[type].print_ip_opts |= PRINT_IP_OPT_SYM; + output[type].print_ip_opts |= EVSEL__PRINT_SYM; if (PRINT_FIELD(DSO)) - output[type].print_ip_opts |= PRINT_IP_OPT_DSO; + output[type].print_ip_opts |= EVSEL__PRINT_DSO; if (PRINT_FIELD(SYMOFFSET)) - output[type].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET; + output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET; if (PRINT_FIELD(SRCLINE)) - output[type].print_ip_opts |= PRINT_IP_OPT_SRCLINE; + output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE; } /* @@ -338,7 +342,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr) */ static int perf_session__check_output_opt(struct perf_session *session) { - int j; + unsigned int j; struct perf_evsel *evsel; for (j = 0; j < PERF_TYPE_MAX; ++j) { @@ -367,14 +371,16 @@ static int perf_session__check_output_opt(struct perf_session *session) if (!no_callchain) { bool use_callchain = false; + bool not_pipe = false; - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { + not_pipe = true; if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { use_callchain = true; break; } } - if (!use_callchain) + if (not_pipe && !use_callchain) symbol_conf.use_callchain = false; } @@ -387,17 +393,20 @@ static int perf_session__check_output_opt(struct perf_session *session) struct perf_event_attr *attr; j = PERF_TYPE_TRACEPOINT; - evsel = perf_session__find_first_evtype(session, j); - if (evsel == NULL) - goto out; - attr = &evsel->attr; + evlist__for_each_entry(session->evlist, evsel) { + if (evsel->attr.type != j) + continue; + + attr = &evsel->attr; - if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) { - output[j].fields |= PERF_OUTPUT_IP; - output[j].fields |= PERF_OUTPUT_SYM; - output[j].fields |= PERF_OUTPUT_DSO; - set_print_ip_opts(attr); + if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) { + output[j].fields |= PERF_OUTPUT_IP; + output[j].fields |= PERF_OUTPUT_SYM; + output[j].fields |= PERF_OUTPUT_DSO; + set_print_ip_opts(attr); + goto out; + } } } @@ -558,6 +567,62 @@ static void print_sample_addr(struct perf_sample *sample, } } +static void print_sample_callindent(struct perf_sample *sample, + struct perf_evsel *evsel, + struct thread *thread, + struct addr_location *al) +{ + struct perf_event_attr *attr = &evsel->attr; + size_t depth = thread_stack__depth(thread); + struct addr_location addr_al; + const char *name = NULL; + static int spacing; + int len = 0; + u64 ip = 0; + + /* + * The 'return' has already been popped off the stack so the depth has + * to be adjusted to match the 'call'. + */ + if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN) + depth += 1; + + if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) { + if (sample_addr_correlates_sym(attr)) { + thread__resolve(thread, &addr_al, sample); + if (addr_al.sym) + name = addr_al.sym->name; + else + ip = sample->addr; + } else { + ip = sample->addr; + } + } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) { + if (al->sym) + name = al->sym->name; + else + ip = sample->ip; + } + + if (name) + len = printf("%*s%s", (int)depth * 4, "", name); + else if (ip) + len = printf("%*s%16" PRIx64, (int)depth * 4, "", ip); + + if (len < 0) + return; + + /* + * Try to keep the output length from changing frequently so that the + * output lines up more nicely. + */ + if (len > spacing || (len && len < spacing - 52)) + spacing = round_up(len + 4, 32); + + if (len < spacing) + printf("%*s", spacing - len, ""); +} + static void print_sample_bts(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, @@ -566,21 +631,29 @@ static void print_sample_bts(struct perf_sample *sample, struct perf_event_attr *attr = &evsel->attr; bool print_srcline_last = false; + if (PRINT_FIELD(CALLINDENT)) + print_sample_callindent(sample, evsel, thread, al); + /* print branch_from information */ if (PRINT_FIELD(IP)) { unsigned int print_opts = output[attr->type].print_ip_opts; + struct callchain_cursor *cursor = NULL; - if (symbol_conf.use_callchain && sample->callchain) { - printf("\n"); - } else { - printf(" "); - if (print_opts & PRINT_IP_OPT_SRCLINE) { + if (symbol_conf.use_callchain && sample->callchain && + thread__resolve_callchain(al->thread, &callchain_cursor, evsel, + sample, NULL, NULL, scripting_max_stack) == 0) + cursor = &callchain_cursor; + + if (cursor == NULL) { + putchar(' '); + if (print_opts & EVSEL__PRINT_SRCLINE) { print_srcline_last = true; - print_opts &= ~PRINT_IP_OPT_SRCLINE; + print_opts &= ~EVSEL__PRINT_SRCLINE; } - } - perf_evsel__print_ip(evsel, sample, al, print_opts, - scripting_max_stack); + } else + putchar('\n'); + + sample__fprintf_sym(sample, al, 0, print_opts, cursor, stdout); } /* print branch_to information */ @@ -597,13 +670,42 @@ static void print_sample_bts(struct perf_sample *sample, printf("\n"); } +static struct { + u32 flags; + const char *name; +} sample_flags[] = { + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "jcc"}, + {PERF_IP_FLAG_BRANCH, "jmp"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, "int"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, "iret"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, "syscall"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, "sysret"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "async"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT, "hw int"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"}, + {0, NULL} +}; + static void print_sample_flags(u32 flags) { const char *chars = PERF_IP_FLAG_CHARS; const int n = strlen(PERF_IP_FLAG_CHARS); + bool in_tx = flags & PERF_IP_FLAG_IN_TX; + const char *name = NULL; char str[33]; int i, pos = 0; + for (i = 0; sample_flags[i].name ; i++) { + if (sample_flags[i].flags == (flags & ~PERF_IP_FLAG_IN_TX)) { + name = sample_flags[i].name; + break; + } + } + for (i = 0; i < n; i++, flags >>= 1) { if (flags & 1) str[pos++] = chars[i]; @@ -613,7 +715,11 @@ static void print_sample_flags(u32 flags) str[pos++] = '?'; } str[pos] = 0; - printf(" %-4s ", str); + + if (name) + printf(" %-7s%4s ", name, in_tx ? "(x)" : ""); + else + printf(" %-11s ", str); } struct printer_data { @@ -711,7 +817,7 @@ static int perf_evlist__max_name_len(struct perf_evlist *evlist) struct perf_evsel *evsel; int max = 0; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { int len = strlen(perf_evsel__name(evsel)); max = MAX(len, max); @@ -783,14 +889,15 @@ static void process_event(struct perf_script *script, printf("%16" PRIu64, sample->weight); if (PRINT_FIELD(IP)) { - if (!symbol_conf.use_callchain) - printf(" "); - else - printf("\n"); + struct callchain_cursor *cursor = NULL; + + if (symbol_conf.use_callchain && sample->callchain && + thread__resolve_callchain(al->thread, &callchain_cursor, evsel, + sample, NULL, NULL, scripting_max_stack) == 0) + cursor = &callchain_cursor; - perf_evsel__print_ip(evsel, sample, al, - output[attr->type].print_ip_opts, - scripting_max_stack); + putchar(cursor ? '\n' : ' '); + sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout); } if (PRINT_FIELD(IREGS)) @@ -935,7 +1042,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, if (evsel->attr.type >= PERF_TYPE_MAX) return 0; - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { if (pos->attr.type == evsel->attr.type && pos != evsel) return 0; } @@ -1585,8 +1692,13 @@ static int list_available_scripts(const struct option *opt __maybe_unused, snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path()); scripts_dir = opendir(scripts_path); - if (!scripts_dir) - return -1; + if (!scripts_dir) { + fprintf(stdout, + "open(%s) failed.\n" + "Check \"PERF_EXEC_PATH\" env to set scripts dir.\n", + scripts_path); + exit(-1); + } for_each_lang(scripts_path, scripts_dir, lang_dirent) { snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, @@ -1661,7 +1773,7 @@ static int check_ev_match(char *dir_name, char *scriptname, snprintf(evname, len + 1, "%s", p); match = 0; - evlist__for_each(session->evlist, pos) { + evlist__for_each_entry(session->evlist, pos) { if (!strcmp(perf_evsel__name(pos), evname)) { match = 1; break; @@ -1863,7 +1975,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, struct stat_round_event *round = &event->stat_round; struct perf_evsel *counter; - evlist__for_each(session->evlist, counter) { + evlist__for_each_entry(session->evlist, counter) { perf_stat_process_counter(&stat_config, counter); process_stat(counter, round->time); } @@ -1959,6 +2071,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) .exit = perf_event__process_exit, .fork = perf_event__process_fork, .attr = process_attr, + .event_update = perf_event__process_event_update, .tracing_data = perf_event__process_tracing_data, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, @@ -2002,13 +2115,15 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "file", "kallsyms pathname"), OPT_BOOLEAN('G', "hide-call-graph", &no_callchain, "When printing symbols do not display call chain"), - OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", - "Look for files with symbols relative to this directory"), + OPT_CALLBACK(0, "symfs", NULL, "directory", + "Look for files with symbols relative to this directory", + symbol__config_symfs), OPT_CALLBACK('F', "fields", NULL, "str", "comma separated output fields prepend with 'type:'. " "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," - "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields), + "addr,symoff,period,iregs,brstack,brstacksym,flags," + "bpf-output,callindent", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", @@ -2020,6 +2135,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "only consider symbols in these pids"), OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]", "only consider symbols in these tids"), + OPT_UINTEGER(0, "max-stack", &scripting_max_stack, + "Set the maximum stack depth when parsing the callchain, " + "anything beyond the specified depth will be ignored. " + "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_BOOLEAN('I', "show-info", &show_full_info, "display extended information from perf.data file"), OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, @@ -2243,6 +2362,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) script.session = session; script__setup_sample_type(&script); + if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) + itrace_synth_opts.thread_stack = true; + session->itrace_synth_opts = &itrace_synth_opts; if (cpu_list) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 307e8a1a003c..3c7452b39f57 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -59,13 +59,17 @@ #include "util/thread.h" #include "util/thread_map.h" #include "util/counts.h" +#include "util/group.h" #include "util/session.h" #include "util/tool.h" +#include "util/group.h" #include "asm/bug.h" +#include <api/fs/fs.h> #include <stdlib.h> #include <sys/prctl.h> #include <locale.h> +#include <math.h> #define DEFAULT_SEPARATOR " " #define CNTR_NOT_SUPPORTED "<not supported>" @@ -97,6 +101,15 @@ static const char * transaction_limited_attrs = { "}" }; +static const char * topdown_attrs[] = { + "topdown-total-slots", + "topdown-slots-retired", + "topdown-recovery-bubbles", + "topdown-fetch-bubbles", + "topdown-slots-issued", + NULL, +}; + static struct perf_evlist *evsel_list; static struct target target = { @@ -111,6 +124,7 @@ static volatile pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; static bool transaction_run; +static bool topdown_run = false; static bool big_num = true; static int big_num_opt = -1; static const char *csv_sep = NULL; @@ -123,6 +137,7 @@ static unsigned int initial_delay = 0; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static bool metric_only = false; +static bool force_metric_only = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; @@ -275,8 +290,12 @@ perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, static int read_counter(struct perf_evsel *counter) { int nthreads = thread_map__nr(evsel_list->threads); - int ncpus = perf_evsel__nr_cpus(counter); - int cpu, thread; + int ncpus, cpu, thread; + + if (target__has_cpu(&target)) + ncpus = perf_evsel__nr_cpus(counter); + else + ncpus = 1; if (!counter->supported) return -ENOENT; @@ -298,27 +317,30 @@ static int read_counter(struct perf_evsel *counter) return -1; } } + + if (verbose > 1) { + fprintf(stat_config.output, + "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + perf_evsel__name(counter), + cpu, + count->val, count->ena, count->run); + } } } return 0; } -static void read_counters(bool close_counters) +static void read_counters(void) { struct perf_evsel *counter; - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { if (read_counter(counter)) pr_debug("failed to read counter %s\n", counter->name); if (perf_stat_process_counter(&stat_config, counter)) pr_warning("failed to process counter %s\n", counter->name); - - if (close_counters) { - perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), - thread_map__nr(evsel_list->threads)); - } } } @@ -326,7 +348,7 @@ static void process_interval(void) { struct timespec ts, rs; - read_counters(false); + read_counters(); clock_gettime(CLOCK_MONOTONIC, &ts); diff_timespec(&rs, &ts, &ref_time); @@ -353,6 +375,17 @@ static void enable_counters(void) perf_evlist__enable(evsel_list); } +static void disable_counters(void) +{ + /* + * If we don't have tracee (attaching to task or cpu), counters may + * still be running. To get accurate group ratios, we must stop groups + * from counting before reading their constituent counters. + */ + if (!target__none(&target)) + perf_evlist__disable(evsel_list); +} + static volatile int workload_exec_errno; /* @@ -394,7 +427,7 @@ static int perf_stat_synthesize_config(bool is_pipe) * Synthesize other events stuff not carried within * attr event - unit, scale, name */ - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { if (!counter->supported) continue; @@ -527,7 +560,7 @@ static int __run_perf_stat(int argc, const char **argv) if (group) perf_evlist__set_leader(evsel_list); - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { try_again: if (create_perf_stat_counter(counter) < 0) { /* @@ -573,7 +606,7 @@ try_again: if (perf_evlist__apply_filters(evsel_list, &counter)) { error("failed to set filter \"%s\" on event %s with %d (%s)\n", counter->filter, perf_evsel__name(counter), errno, - strerror_r(errno, msg, sizeof(msg))); + str_error_r(errno, msg, sizeof(msg))); return -1; } @@ -614,7 +647,7 @@ try_again: wait(&status); if (workload_exec_errno) { - const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg)); + const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); pr_err("Workload failed: %s\n", emsg); return -1; } @@ -630,11 +663,20 @@ try_again: } } + disable_counters(); + t1 = rdclock(); update_stats(&walltime_nsecs_stats, t1 - t0); - read_counters(true); + /* + * Closing a group leader splits the group, and as we only disable + * group leaders, results in remaining events becoming enabled. To + * avoid arbitrary skew, we must read all counters before closing any + * group leaders. + */ + read_counters(); + perf_evlist__close(evsel_list); return WEXITSTATUS(status); } @@ -983,12 +1025,12 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) const char *fmt; if (csv_output) { - fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; + fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; } else { if (big_num) - fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; + fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; else - fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; + fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; } aggr_printout(evsel, id, nr); @@ -1111,7 +1153,7 @@ static void aggr_update_shadow(void) for (s = 0; s < aggr_map->nr; s++) { id = aggr_map->map[s]; - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { val = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { s2 = aggr_get_id(evsel_list->cpus, cpu); @@ -1150,7 +1192,7 @@ static void print_aggr(char *prefix) id = aggr_map->map[s]; first = true; - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { val = ena = run = 0; nr = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { @@ -1269,7 +1311,7 @@ static void print_no_aggr_metric(char *prefix) if (prefix) fputs(prefix, stat_config.output); - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { if (first) { aggr_printout(counter, cpu, 0); first = false; @@ -1293,7 +1335,15 @@ static int aggr_header_lens[] = { [AGGR_GLOBAL] = 0, }; -static void print_metric_headers(char *prefix) +static const char *aggr_header_csv[] = { + [AGGR_CORE] = "core,cpus,", + [AGGR_SOCKET] = "socket,cpus", + [AGGR_NONE] = "cpu,", + [AGGR_THREAD] = "comm-pid,", + [AGGR_GLOBAL] = "" +}; + +static void print_metric_headers(const char *prefix, bool no_indent) { struct perf_stat_output_ctx out; struct perf_evsel *counter; @@ -1304,12 +1354,18 @@ static void print_metric_headers(char *prefix) if (prefix) fprintf(stat_config.output, "%s", prefix); - if (!csv_output) + if (!csv_output && !no_indent) fprintf(stat_config.output, "%*s", aggr_header_lens[stat_config.aggr_mode], ""); + if (csv_output) { + if (stat_config.interval) + fputs("time,", stat_config.output); + fputs(aggr_header_csv[stat_config.aggr_mode], + stat_config.output); + } /* Print metrics headers only */ - evlist__for_each(evsel_list, counter) { + evlist__for_each_entry(evsel_list, counter) { os.evsel = counter; out.ctx = &os; out.print_metric = print_metric_header; @@ -1329,28 +1385,40 @@ static void print_interval(char *prefix, struct timespec *ts) sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); - if (num_print_interval == 0 && !csv_output && !metric_only) { + if (num_print_interval == 0 && !csv_output) { switch (stat_config.aggr_mode) { case AGGR_SOCKET: - fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time socket cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_CORE: - fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time core cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_NONE: - fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time CPU"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_THREAD: - fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time comm-pid"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_GLOBAL: default: - fprintf(output, "# time counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); case AGGR_UNSET: break; } } + if (num_print_interval == 0 && metric_only) + print_metric_headers(" ", true); if (++num_print_interval == 25) num_print_interval = 0; } @@ -1419,8 +1487,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (metric_only) { static int num_print_iv; - if (num_print_iv == 0) - print_metric_headers(prefix); + if (num_print_iv == 0 && !interval) + print_metric_headers(prefix, false); if (num_print_iv++ == 25) num_print_iv = 0; if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) @@ -1433,11 +1501,11 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) print_aggr(prefix); break; case AGGR_THREAD: - evlist__for_each(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) print_aggr_thread(counter, prefix); break; case AGGR_GLOBAL: - evlist__for_each(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) print_counter_aggr(counter, prefix); if (metric_only) fputc('\n', stat_config.output); @@ -1446,7 +1514,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (metric_only) print_no_aggr_metric(prefix); else { - evlist__for_each(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) print_counter(counter, prefix); } break; @@ -1511,6 +1579,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, return 0; } +static int enable_metric_only(const struct option *opt __maybe_unused, + const char *s __maybe_unused, int unset) +{ + force_metric_only = true; + metric_only = !unset; + return 0; +} + static const struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1569,8 +1645,10 @@ static const struct option stat_options[] = { "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), - OPT_BOOLEAN(0, "metric-only", &metric_only, - "Only print computed metrics. No raw values"), + OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, + "Only print computed metrics. No raw values", enable_metric_only), + OPT_BOOLEAN(0, "topdown", &topdown_run, + "measure topdown level 1 statistics"), OPT_END() }; @@ -1763,12 +1841,62 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) return 0; } +static int topdown_filter_events(const char **attr, char **str, bool use_group) +{ + int off = 0; + int i; + int len = 0; + char *s; + + for (i = 0; attr[i]; i++) { + if (pmu_have_event("cpu", attr[i])) { + len += strlen(attr[i]) + 1; + attr[i - off] = attr[i]; + } else + off++; + } + attr[i - off] = NULL; + + *str = malloc(len + 1 + 2); + if (!*str) + return -1; + s = *str; + if (i - off == 0) { + *s = 0; + return 0; + } + if (use_group) + *s++ = '{'; + for (i = 0; attr[i]; i++) { + strcpy(s, attr[i]); + s += strlen(s); + *s++ = ','; + } + if (use_group) { + s[-1] = '}'; + *s = 0; + } else + s[-1] = 0; + return 0; +} + +__weak bool arch_topdown_check_group(bool *warn) +{ + *warn = false; + return false; +} + +__weak void arch_topdown_group_warn(void) +{ +} + /* * Add default attributes, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used: */ static int add_default_attributes(void) { + int err; struct perf_event_attr default_attrs0[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, @@ -1887,7 +2015,6 @@ static int add_default_attributes(void) return 0; if (transaction_run) { - int err; if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) err = parse_events(evsel_list, transaction_attrs, NULL); @@ -1900,7 +2027,50 @@ static int add_default_attributes(void) return 0; } + if (topdown_run) { + char *str = NULL; + bool warn = false; + + if (stat_config.aggr_mode != AGGR_GLOBAL && + stat_config.aggr_mode != AGGR_CORE) { + pr_err("top down event configuration requires --per-core mode\n"); + return -1; + } + stat_config.aggr_mode = AGGR_CORE; + if (nr_cgroups || !target__has_cpu(&target)) { + pr_err("top down event configuration requires system-wide mode (-a)\n"); + return -1; + } + + if (!force_metric_only) + metric_only = true; + if (topdown_filter_events(topdown_attrs, &str, + arch_topdown_check_group(&warn)) < 0) { + pr_err("Out of memory\n"); + return -1; + } + if (topdown_attrs[0] && str) { + if (warn) + arch_topdown_group_warn(); + err = parse_events(evsel_list, str, NULL); + if (err) { + fprintf(stderr, + "Cannot set up top down events %s: %d\n", + str, err); + free(str); + return -1; + } + } else { + fprintf(stderr, "System does not support topdown\n"); + return -1; + } + free(str); + } + if (!evsel_list->nr_entries) { + if (target__has_cpu(&target)) + default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; + if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) return -1; if (pmu_have_event("cpu", "stalled-cycles-frontend")) { @@ -1992,21 +2162,21 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_session *session) { - struct stat_round_event *round = &event->stat_round; + struct stat_round_event *stat_round = &event->stat_round; struct perf_evsel *counter; struct timespec tsh, *ts = NULL; const char **argv = session->header.env.cmdline_argv; int argc = session->header.env.nr_cmdline; - evlist__for_each(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) perf_stat_process_counter(&stat_config, counter); - if (round->type == PERF_STAT_ROUND_TYPE__FINAL) - update_stats(&walltime_nsecs_stats, round->time); + if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) + update_stats(&walltime_nsecs_stats, stat_round->time); - if (stat_config.interval && round->time) { - tsh.tv_sec = round->time / NSECS_PER_SEC; - tsh.tv_nsec = round->time % NSECS_PER_SEC; + if (stat_config.interval && stat_round->time) { + tsh.tv_sec = stat_round->time / NSECS_PER_SEC; + tsh.tv_nsec = stat_round->time % NSECS_PER_SEC; ts = &tsh; } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 40cc9bb3506c..733a55422d03 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1945,8 +1945,9 @@ int cmd_timechart(int argc, const char **argv, OPT_CALLBACK('p', "process", NULL, "process", "process selector. Pass a pid or process name.", parse_process), - OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", - "Look for files with symbols relative to this directory"), + OPT_CALLBACK(0, "symfs", NULL, "directory", + "Look for files with symbols relative to this directory", + symbol__config_symfs), OPT_INTEGER('n', "proc-num", &tchart.proc_num, "min. number of tasks to print"), OPT_BOOLEAN('t', "topology", &tchart.topology, diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 833214979c4f..418ed94756d3 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -22,7 +22,7 @@ #include "perf.h" #include "util/annotate.h" -#include "util/cache.h" +#include "util/config.h" #include "util/color.h" #include "util/evlist.h" #include "util/evsel.h" @@ -128,10 +128,14 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(sym, map, 0); + err = symbol__disassemble(sym, map, 0); if (err == 0) { out_assign: top->sym_filter_entry = he; + } else { + char msg[BUFSIZ]; + symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); + pr_err("Couldn't annotate %s: %s\n", sym->name, msg); } pthread_mutex_unlock(¬es->lock); @@ -295,7 +299,7 @@ static void perf_top__print_sym_table(struct perf_top *top) hists__output_recalc_col_len(hists, top->print_entries - printed); putchar('\n'); hists__fprintf(hists, false, top->print_entries - printed, win_width, - top->min_percent, stdout); + top->min_percent, stdout, symbol_conf.use_callchain); } static void prompt_integer(int *target, const char *msg) @@ -479,7 +483,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) fprintf(stderr, "\nAvailable events:"); - evlist__for_each(top->evlist, top->sym_evsel) + evlist__for_each_entry(top->evlist, top->sym_evsel) fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel)); prompt_integer(&counter, "Enter details event counter"); @@ -490,7 +494,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) sleep(1); break; } - evlist__for_each(top->evlist, top->sym_evsel) + evlist__for_each_entry(top->evlist, top->sym_evsel) if (top->sym_evsel->idx == counter) break; } else @@ -583,7 +587,7 @@ static void *display_thread_tui(void *arg) * Zooming in/out UIDs. For now juse use whatever the user passed * via --uid. */ - evlist__for_each(top->evlist, pos) { + evlist__for_each_entry(top->evlist, pos) { struct hists *hists = evsel__hists(pos); hists->uid_filter_str = top->record_opts.target.uid_str; } @@ -688,7 +692,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter, struct hist_entry *he = iter->he; struct perf_evsel *evsel = iter->evsel; - if (sort__has_sym && single) + if (perf_hpp_list.sym && single) perf_top__record_precise_ip(top, he, evsel->idx, al->addr); hist__account_cycles(iter->sample->branch_stack, al, iter->sample, @@ -732,7 +736,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (machine__resolve(machine, &al, sample) < 0) return; - if (!top->kptr_restrict_warned && + if (!machine->kptr_restrict_warned && symbol_conf.kptr_restrict && al.cpumode == PERF_RECORD_MISC_KERNEL) { ui__warning( @@ -743,7 +747,7 @@ static void perf_event__process_sample(struct perf_tool *tool, " modules" : ""); if (use_browser <= 0) sleep(5); - top->kptr_restrict_warned = true; + machine->kptr_restrict_warned = true; } if (al.sym == NULL) { @@ -759,7 +763,7 @@ static void perf_event__process_sample(struct perf_tool *tool, * --hide-kernel-symbols, even if the user specifies an * invalid --vmlinux ;-) */ - if (!top->kptr_restrict_warned && !top->vmlinux_warned && + if (!machine->kptr_restrict_warned && !top->vmlinux_warned && al.map == machine->vmlinux_maps[MAP__FUNCTION] && RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { if (symbol_conf.vmlinux_name) { @@ -886,9 +890,9 @@ static int perf_top__start_counters(struct perf_top *top) struct perf_evlist *evlist = top->evlist; struct record_opts *opts = &top->record_opts; - perf_evlist__config(evlist, opts); + perf_evlist__config(evlist, opts, &callchain_param); - evlist__for_each(evlist, counter) { + evlist__for_each_entry(evlist, counter) { try_again: if (perf_evsel__open(counter, top->evlist->cpus, top->evlist->threads) < 0) { @@ -907,7 +911,7 @@ try_again: if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) { ui__error("Failed to mmap with %d (%s)\n", - errno, strerror_r(errno, msg, sizeof(msg))); + errno, str_error_r(errno, msg, sizeof(msg))); goto out_err; } @@ -917,15 +921,15 @@ out_err: return -1; } -static int perf_top__setup_sample_type(struct perf_top *top __maybe_unused) +static int callchain_param__setup_sample_type(struct callchain_param *callchain) { - if (!sort__has_sym) { - if (symbol_conf.use_callchain) { + if (!perf_hpp_list.sym) { + if (callchain->enabled) { ui__error("Selected -g but \"sym\" not present in --sort/-s."); return -EINVAL; } - } else if (callchain_param.mode != CHAIN_NONE) { - if (callchain_register_param(&callchain_param) < 0) { + } else if (callchain->mode != CHAIN_NONE) { + if (callchain_register_param(callchain) < 0) { ui__error("Can't register callchain params.\n"); return -EINVAL; } @@ -952,7 +956,7 @@ static int __cmd_top(struct perf_top *top) goto out_delete; } - ret = perf_top__setup_sample_type(top); + ret = callchain_param__setup_sample_type(&callchain_param); if (ret) goto out_delete; @@ -962,7 +966,7 @@ static int __cmd_top(struct perf_top *top) machine__synthesize_threads(&top->session->machines.host, &opts->target, top->evlist->threads, false, opts->proc_map_timeout); - if (sort__has_socket) { + if (perf_hpp_list.socket) { ret = perf_env__read_cpu_topology_map(&perf_env); if (ret < 0) goto out_err_cpu_topo; @@ -1028,7 +1032,7 @@ out_delete: out_err_cpu_topo: { char errbuf[BUFSIZ]; - const char *err = strerror_r(-ret, errbuf, sizeof(errbuf)); + const char *err = str_error_r(-ret, errbuf, sizeof(errbuf)); ui__error("Could not read the CPU topology map: %s\n", err); goto out_delete; @@ -1045,18 +1049,17 @@ callchain_opt(const struct option *opt, const char *arg, int unset) static int parse_callchain_opt(const struct option *opt, const char *arg, int unset) { - struct record_opts *record = (struct record_opts *)opt->value; + struct callchain_param *callchain = opt->value; - record->callgraph_set = true; - callchain_param.enabled = !unset; - callchain_param.record_mode = CALLCHAIN_FP; + callchain->enabled = !unset; + callchain->record_mode = CALLCHAIN_FP; /* * --no-call-graph */ if (unset) { symbol_conf.use_callchain = false; - callchain_param.record_mode = CALLCHAIN_NONE; + callchain->record_mode = CALLCHAIN_NONE; return 0; } @@ -1104,7 +1107,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) }, .proc_map_timeout = 500, }, - .max_stack = PERF_MAX_STACK_DEPTH, + .max_stack = sysctl_perf_event_max_stack, .sym_pcnt_filter = 5, }; struct record_opts *opts = &top.record_opts; @@ -1162,17 +1165,17 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "output field(s): overhead, period, sample plus all of sort keys"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), - OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts, + OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, NULL, "enables call-graph recording and display", &callchain_opt), - OPT_CALLBACK(0, "call-graph", &top.record_opts, + OPT_CALLBACK(0, "call-graph", &callchain_param, "record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]", top_callchain_help, &parse_callchain_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &top.max_stack, "Set the maximum stack depth when parsing the callchain. " - "Default: " __stringify(PERF_MAX_STACK_DEPTH)), + "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", "ignore callees of these functions in call graphs", report_parse_ignore_callees_opt), @@ -1256,7 +1259,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) sort__mode = SORT_MODE__TOP; /* display thread wants entries to be collapsed in a different tree */ - sort__need_collapse = 1; + perf_hpp_list.need_collapse = 1; if (top.use_stdio) use_browser = 0; @@ -1296,7 +1299,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) if (perf_evlist__create_maps(top.evlist, target) < 0) { ui__error("Couldn't create thread/CPU maps: %s\n", - errno == ENOENT ? "No such process" : strerror_r(errno, errbuf, sizeof(errbuf))); + errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); goto out_delete_evlist; } @@ -1312,7 +1315,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) top.sym_evsel = perf_evlist__first(top.evlist); - if (!symbol_conf.use_callchain) { + if (!callchain_param.enabled) { symbol_conf.cumulate_callchain = false; perf_hpp__cancel_cumulate(); } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 93ac724fb635..b8c6766301db 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -34,79 +34,75 @@ #include "trace-event.h" #include "util/parse-events.h" #include "util/bpf-loader.h" +#include "callchain.h" +#include "syscalltbl.h" +#include "rb_resort.h" -#include <libaudit.h> +#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */ #include <stdlib.h> -#include <sys/mman.h> -#include <linux/futex.h> #include <linux/err.h> - -/* For older distros: */ -#ifndef MAP_STACK -# define MAP_STACK 0x20000 -#endif - -#ifndef MADV_HWPOISON -# define MADV_HWPOISON 100 - -#endif - -#ifndef MADV_MERGEABLE -# define MADV_MERGEABLE 12 -#endif - -#ifndef MADV_UNMERGEABLE -# define MADV_UNMERGEABLE 13 -#endif - -#ifndef EFD_SEMAPHORE -# define EFD_SEMAPHORE 1 -#endif - -#ifndef EFD_NONBLOCK -# define EFD_NONBLOCK 00004000 -#endif - -#ifndef EFD_CLOEXEC -# define EFD_CLOEXEC 02000000 -#endif +#include <linux/filter.h> +#include <linux/audit.h> +#include <linux/random.h> +#include <linux/stringify.h> #ifndef O_CLOEXEC # define O_CLOEXEC 02000000 #endif -#ifndef SOCK_DCCP -# define SOCK_DCCP 6 -#endif - -#ifndef SOCK_CLOEXEC -# define SOCK_CLOEXEC 02000000 -#endif - -#ifndef SOCK_NONBLOCK -# define SOCK_NONBLOCK 00004000 -#endif - -#ifndef MSG_CMSG_CLOEXEC -# define MSG_CMSG_CLOEXEC 0x40000000 -#endif - -#ifndef PERF_FLAG_FD_NO_GROUP -# define PERF_FLAG_FD_NO_GROUP (1UL << 0) -#endif - -#ifndef PERF_FLAG_FD_OUTPUT -# define PERF_FLAG_FD_OUTPUT (1UL << 1) -#endif - -#ifndef PERF_FLAG_PID_CGROUP -# define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ -#endif - -#ifndef PERF_FLAG_FD_CLOEXEC -# define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ -#endif - +struct trace { + struct perf_tool tool; + struct syscalltbl *sctbl; + struct { + int max; + struct syscall *table; + struct { + struct perf_evsel *sys_enter, + *sys_exit; + } events; + } syscalls; + struct record_opts opts; + struct perf_evlist *evlist; + struct machine *host; + struct thread *current; + u64 base_time; + FILE *output; + unsigned long nr_events; + struct strlist *ev_qualifier; + struct { + size_t nr; + int *entries; + } ev_qualifier_ids; + struct intlist *tid_list; + struct intlist *pid_list; + struct { + size_t nr; + pid_t *entries; + } filter_pids; + double duration_filter; + double runtime_ms; + struct { + u64 vfs_getname, + proc_getname; + } stats; + unsigned int max_stack; + unsigned int min_stack; + bool not_ev_qualifier; + bool live; + bool full_time; + bool sched; + bool multiple_threads; + bool summary; + bool summary_only; + bool show_comm; + bool show_tool_stats; + bool trace_syscalls; + bool kernel_syscallchains; + bool force; + bool vfs_getname; + int trace_pgfaults; + int open_id; +}; struct tp_field { int offset; @@ -337,6 +333,10 @@ static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, #define SCA_FD syscall_arg__scnprintf_fd +#ifndef AT_FDCWD +#define AT_FDCWD -100 +#endif + static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, struct syscall_arg *arg) { @@ -371,221 +371,6 @@ static size_t syscall_arg__scnprintf_int(char *bf, size_t size, #define SCA_INT syscall_arg__scnprintf_int -static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, prot = arg->val; - - if (prot == PROT_NONE) - return scnprintf(bf, size, "NONE"); -#define P_MMAP_PROT(n) \ - if (prot & PROT_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - prot &= ~PROT_##n; \ - } - - P_MMAP_PROT(EXEC); - P_MMAP_PROT(READ); - P_MMAP_PROT(WRITE); -#ifdef PROT_SEM - P_MMAP_PROT(SEM); -#endif - P_MMAP_PROT(GROWSDOWN); - P_MMAP_PROT(GROWSUP); -#undef P_MMAP_PROT - - if (prot) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); - - return printed; -} - -#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot - -static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, flags = arg->val; - -#define P_MMAP_FLAG(n) \ - if (flags & MAP_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~MAP_##n; \ - } - - P_MMAP_FLAG(SHARED); - P_MMAP_FLAG(PRIVATE); -#ifdef MAP_32BIT - P_MMAP_FLAG(32BIT); -#endif - P_MMAP_FLAG(ANONYMOUS); - P_MMAP_FLAG(DENYWRITE); - P_MMAP_FLAG(EXECUTABLE); - P_MMAP_FLAG(FILE); - P_MMAP_FLAG(FIXED); - P_MMAP_FLAG(GROWSDOWN); -#ifdef MAP_HUGETLB - P_MMAP_FLAG(HUGETLB); -#endif - P_MMAP_FLAG(LOCKED); - P_MMAP_FLAG(NONBLOCK); - P_MMAP_FLAG(NORESERVE); - P_MMAP_FLAG(POPULATE); - P_MMAP_FLAG(STACK); -#ifdef MAP_UNINITIALIZED - P_MMAP_FLAG(UNINITIALIZED); -#endif -#undef P_MMAP_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; -} - -#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags - -static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, flags = arg->val; - -#define P_MREMAP_FLAG(n) \ - if (flags & MREMAP_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~MREMAP_##n; \ - } - - P_MREMAP_FLAG(MAYMOVE); -#ifdef MREMAP_FIXED - P_MREMAP_FLAG(FIXED); -#endif -#undef P_MREMAP_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; -} - -#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags - -static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, - struct syscall_arg *arg) -{ - int behavior = arg->val; - - switch (behavior) { -#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) - P_MADV_BHV(NORMAL); - P_MADV_BHV(RANDOM); - P_MADV_BHV(SEQUENTIAL); - P_MADV_BHV(WILLNEED); - P_MADV_BHV(DONTNEED); - P_MADV_BHV(REMOVE); - P_MADV_BHV(DONTFORK); - P_MADV_BHV(DOFORK); - P_MADV_BHV(HWPOISON); -#ifdef MADV_SOFT_OFFLINE - P_MADV_BHV(SOFT_OFFLINE); -#endif - P_MADV_BHV(MERGEABLE); - P_MADV_BHV(UNMERGEABLE); -#ifdef MADV_HUGEPAGE - P_MADV_BHV(HUGEPAGE); -#endif -#ifdef MADV_NOHUGEPAGE - P_MADV_BHV(NOHUGEPAGE); -#endif -#ifdef MADV_DONTDUMP - P_MADV_BHV(DONTDUMP); -#endif -#ifdef MADV_DODUMP - P_MADV_BHV(DODUMP); -#endif -#undef P_MADV_PHV - default: break; - } - - return scnprintf(bf, size, "%#x", behavior); -} - -#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior - -static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, op = arg->val; - - if (op == 0) - return scnprintf(bf, size, "NONE"); -#define P_CMD(cmd) \ - if ((op & LOCK_##cmd) == LOCK_##cmd) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \ - op &= ~LOCK_##cmd; \ - } - - P_CMD(SH); - P_CMD(EX); - P_CMD(NB); - P_CMD(UN); - P_CMD(MAND); - P_CMD(RW); - P_CMD(READ); - P_CMD(WRITE); -#undef P_OP - - if (op) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op); - - return printed; -} - -#define SCA_FLOCK syscall_arg__scnprintf_flock - -static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) -{ - enum syscall_futex_args { - SCF_UADDR = (1 << 0), - SCF_OP = (1 << 1), - SCF_VAL = (1 << 2), - SCF_TIMEOUT = (1 << 3), - SCF_UADDR2 = (1 << 4), - SCF_VAL3 = (1 << 5), - }; - int op = arg->val; - int cmd = op & FUTEX_CMD_MASK; - size_t printed = 0; - - switch (cmd) { -#define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); - P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; - P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; - P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; - P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; - P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; - P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; - P_FUTEX_OP(WAKE_OP); break; - P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; - P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; - P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; - P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; - P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; - P_FUTEX_OP(WAIT_REQUEUE_PI); break; - default: printed = scnprintf(bf, size, "%#x", cmd); break; - } - - if (op & FUTEX_PRIVATE_FLAG) - printed += scnprintf(bf + printed, size - printed, "|PRIV"); - - if (op & FUTEX_CLOCK_REALTIME) - printed += scnprintf(bf + printed, size - printed, "|CLKRT"); - - return printed; -} - -#define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op - static const char *bpf_cmd[] = { "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", "MAP_GET_NEXT_KEY", "PROG_LOAD", @@ -652,110 +437,6 @@ static const char *socket_families[] = { }; static DEFINE_STRARRAY(socket_families); -#ifndef SOCK_TYPE_MASK -#define SOCK_TYPE_MASK 0xf -#endif - -static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, - struct syscall_arg *arg) -{ - size_t printed; - int type = arg->val, - flags = type & ~SOCK_TYPE_MASK; - - type &= SOCK_TYPE_MASK; - /* - * Can't use a strarray, MIPS may override for ABI reasons. - */ - switch (type) { -#define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; - P_SK_TYPE(STREAM); - P_SK_TYPE(DGRAM); - P_SK_TYPE(RAW); - P_SK_TYPE(RDM); - P_SK_TYPE(SEQPACKET); - P_SK_TYPE(DCCP); - P_SK_TYPE(PACKET); -#undef P_SK_TYPE - default: - printed = scnprintf(bf, size, "%#x", type); - } - -#define P_SK_FLAG(n) \ - if (flags & SOCK_##n) { \ - printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ - flags &= ~SOCK_##n; \ - } - - P_SK_FLAG(CLOEXEC); - P_SK_FLAG(NONBLOCK); -#undef P_SK_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "|%#x", flags); - - return printed; -} - -#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type - -#ifndef MSG_PROBE -#define MSG_PROBE 0x10 -#endif -#ifndef MSG_WAITFORONE -#define MSG_WAITFORONE 0x10000 -#endif -#ifndef MSG_SENDPAGE_NOTLAST -#define MSG_SENDPAGE_NOTLAST 0x20000 -#endif -#ifndef MSG_FASTOPEN -#define MSG_FASTOPEN 0x20000000 -#endif - -static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, flags = arg->val; - - if (flags == 0) - return scnprintf(bf, size, "NONE"); -#define P_MSG_FLAG(n) \ - if (flags & MSG_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~MSG_##n; \ - } - - P_MSG_FLAG(OOB); - P_MSG_FLAG(PEEK); - P_MSG_FLAG(DONTROUTE); - P_MSG_FLAG(TRYHARD); - P_MSG_FLAG(CTRUNC); - P_MSG_FLAG(PROBE); - P_MSG_FLAG(TRUNC); - P_MSG_FLAG(DONTWAIT); - P_MSG_FLAG(EOR); - P_MSG_FLAG(WAITALL); - P_MSG_FLAG(FIN); - P_MSG_FLAG(SYN); - P_MSG_FLAG(CONFIRM); - P_MSG_FLAG(RST); - P_MSG_FLAG(ERRQUEUE); - P_MSG_FLAG(NOSIGNAL); - P_MSG_FLAG(MORE); - P_MSG_FLAG(WAITFORONE); - P_MSG_FLAG(SENDPAGE_NOTLAST); - P_MSG_FLAG(FASTOPEN); - P_MSG_FLAG(CMSG_CLOEXEC); -#undef P_MSG_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; -} - -#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags - static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, struct syscall_arg *arg) { @@ -788,116 +469,6 @@ static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, #define SCA_FILENAME syscall_arg__scnprintf_filename -static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, flags = arg->val; - - if (!(flags & O_CREAT)) - arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ - - if (flags == 0) - return scnprintf(bf, size, "RDONLY"); -#define P_FLAG(n) \ - if (flags & O_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~O_##n; \ - } - - P_FLAG(APPEND); - P_FLAG(ASYNC); - P_FLAG(CLOEXEC); - P_FLAG(CREAT); - P_FLAG(DIRECT); - P_FLAG(DIRECTORY); - P_FLAG(EXCL); - P_FLAG(LARGEFILE); - P_FLAG(NOATIME); - P_FLAG(NOCTTY); -#ifdef O_NONBLOCK - P_FLAG(NONBLOCK); -#elif O_NDELAY - P_FLAG(NDELAY); -#endif -#ifdef O_PATH - P_FLAG(PATH); -#endif - P_FLAG(RDWR); -#ifdef O_DSYNC - if ((flags & O_SYNC) == O_SYNC) - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); - else { - P_FLAG(DSYNC); - } -#else - P_FLAG(SYNC); -#endif - P_FLAG(TRUNC); - P_FLAG(WRONLY); -#undef P_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; -} - -#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags - -static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, flags = arg->val; - - if (flags == 0) - return 0; - -#define P_FLAG(n) \ - if (flags & PERF_FLAG_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~PERF_FLAG_##n; \ - } - - P_FLAG(FD_NO_GROUP); - P_FLAG(FD_OUTPUT); - P_FLAG(PID_CGROUP); - P_FLAG(FD_CLOEXEC); -#undef P_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; -} - -#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags - -static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, - struct syscall_arg *arg) -{ - int printed = 0, flags = arg->val; - - if (flags == 0) - return scnprintf(bf, size, "NONE"); -#define P_FLAG(n) \ - if (flags & EFD_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~EFD_##n; \ - } - - P_FLAG(SEMAPHORE); - P_FLAG(CLOEXEC); - P_FLAG(NONBLOCK); -#undef P_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; -} - -#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags - static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -921,59 +492,6 @@ static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags -static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) -{ - int sig = arg->val; - - switch (sig) { -#define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) - P_SIGNUM(HUP); - P_SIGNUM(INT); - P_SIGNUM(QUIT); - P_SIGNUM(ILL); - P_SIGNUM(TRAP); - P_SIGNUM(ABRT); - P_SIGNUM(BUS); - P_SIGNUM(FPE); - P_SIGNUM(KILL); - P_SIGNUM(USR1); - P_SIGNUM(SEGV); - P_SIGNUM(USR2); - P_SIGNUM(PIPE); - P_SIGNUM(ALRM); - P_SIGNUM(TERM); - P_SIGNUM(CHLD); - P_SIGNUM(CONT); - P_SIGNUM(STOP); - P_SIGNUM(TSTP); - P_SIGNUM(TTIN); - P_SIGNUM(TTOU); - P_SIGNUM(URG); - P_SIGNUM(XCPU); - P_SIGNUM(XFSZ); - P_SIGNUM(VTALRM); - P_SIGNUM(PROF); - P_SIGNUM(WINCH); - P_SIGNUM(IO); - P_SIGNUM(PWR); - P_SIGNUM(SYS); -#ifdef SIGEMT - P_SIGNUM(EMT); -#endif -#ifdef SIGSTKFLT - P_SIGNUM(STKFLT); -#endif -#ifdef SIGSWI - P_SIGNUM(SWI); -#endif - default: break; - } - - return scnprintf(bf, size, "%#x", sig); -} - -#define SCA_SIGNUM syscall_arg__scnprintf_signum - #if defined(__i386__) || defined(__x86_64__) /* * FIXME: Make this available to all arches. @@ -1001,105 +519,125 @@ static const char *tioctls[] = { static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); #endif /* defined(__i386__) || defined(__x86_64__) */ +#ifndef GRND_NONBLOCK +#define GRND_NONBLOCK 0x0001 +#endif +#ifndef GRND_RANDOM +#define GRND_RANDOM 0x0002 +#endif + +static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_FLAG(n) \ + if (flags & GRND_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~GRND_##n; \ + } + + P_FLAG(RANDOM); + P_FLAG(NONBLOCK); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags + #define STRARRAY(arg, name, array) \ .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ .arg_parm = { [arg] = &strarray__##array, } +#include "trace/beauty/eventfd.c" +#include "trace/beauty/flock.c" +#include "trace/beauty/futex_op.c" +#include "trace/beauty/mmap.c" +#include "trace/beauty/mode_t.c" +#include "trace/beauty/msg_flags.c" +#include "trace/beauty/open_flags.c" +#include "trace/beauty/perf_event_open.c" +#include "trace/beauty/pid.c" +#include "trace/beauty/sched_policy.c" +#include "trace/beauty/seccomp.c" +#include "trace/beauty/signum.c" +#include "trace/beauty/socket_type.c" +#include "trace/beauty/waitid_options.c" + static struct syscall_fmt { const char *name; const char *alias; size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); void *arg_parm[6]; bool errmsg; + bool errpid; bool timeout; bool hexret; } syscall_fmts[] = { { .name = "access", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ - [1] = SCA_ACCMODE, /* mode */ }, }, + .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), }, { .name = "brk", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, - { .name = "chdir", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, - { .name = "chmod", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, - { .name = "chroot", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + { .name = "chdir", .errmsg = true, }, + { .name = "chmod", .errmsg = true, }, + { .name = "chroot", .errmsg = true, }, { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, + { .name = "clone", .errpid = true, }, { .name = "close", .errmsg = true, .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, { .name = "connect", .errmsg = true, }, - { .name = "creat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "dup", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "dup2", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "dup3", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "creat", .errmsg = true, }, + { .name = "dup", .errmsg = true, }, + { .name = "dup2", .errmsg = true, }, + { .name = "dup3", .errmsg = true, }, { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, { .name = "eventfd2", .errmsg = true, .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, - { .name = "faccessat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [1] = SCA_FILENAME, /* filename */ }, }, - { .name = "fadvise64", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "fallocate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "fchdir", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "fchmod", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "faccessat", .errmsg = true, }, + { .name = "fadvise64", .errmsg = true, }, + { .name = "fallocate", .errmsg = true, }, + { .name = "fchdir", .errmsg = true, }, + { .name = "fchmod", .errmsg = true, }, { .name = "fchmodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ - [1] = SCA_FILENAME, /* filename */ }, }, - { .name = "fchown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + { .name = "fchown", .errmsg = true, }, { .name = "fchownat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ - [1] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fcntl", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [1] = SCA_STRARRAY, /* cmd */ }, + .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ }, .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, - { .name = "fdatasync", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "fdatasync", .errmsg = true, }, { .name = "flock", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [1] = SCA_FLOCK, /* cmd */ }, }, - { .name = "fsetxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "fstat", .errmsg = true, .alias = "newfstat", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "fstatat", .errmsg = true, .alias = "newfstatat", - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [1] = SCA_FILENAME, /* filename */ }, }, - { .name = "fstatfs", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "fsync", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "ftruncate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, }, + { .name = "fsetxattr", .errmsg = true, }, + { .name = "fstat", .errmsg = true, .alias = "newfstat", }, + { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, + { .name = "fstatfs", .errmsg = true, }, + { .name = "fsync", .errmsg = true, }, + { .name = "ftruncate", .errmsg = true, }, { .name = "futex", .errmsg = true, .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, { .name = "futimesat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ - [1] = SCA_FILENAME, /* filename */ }, }, - { .name = "getdents", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "getdents64", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + { .name = "getdents", .errmsg = true, }, + { .name = "getdents64", .errmsg = true, }, { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, + { .name = "getpid", .errpid = true, }, + { .name = "getpgid", .errpid = true, }, + { .name = "getppid", .errpid = true, }, + { .name = "getrandom", .errmsg = true, + .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, }, { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, - { .name = "getxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "inotify_add_watch", .errmsg = true, - .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, }, + { .name = "getxattr", .errmsg = true, }, + { .name = "inotify_add_watch", .errmsg = true, }, { .name = "ioctl", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ + .arg_scnprintf = { #if defined(__i386__) || defined(__x86_64__) /* * FIXME: Make this available to all arches. @@ -1113,41 +651,28 @@ static struct syscall_fmt { { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), }, { .name = "kill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, - { .name = "lchown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, - { .name = "lgetxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "lchown", .errmsg = true, }, + { .name = "lgetxattr", .errmsg = true, }, { .name = "linkat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, - { .name = "listxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "llistxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "lremovexattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "listxattr", .errmsg = true, }, + { .name = "llistxattr", .errmsg = true, }, + { .name = "lremovexattr", .errmsg = true, }, { .name = "lseek", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [2] = SCA_STRARRAY, /* whence */ }, + .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ }, .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, - { .name = "lsetxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "lstat", .errmsg = true, .alias = "newlstat", - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, - { .name = "lsxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "lsetxattr", .errmsg = true, }, + { .name = "lstat", .errmsg = true, .alias = "newlstat", }, + { .name = "lsxattr", .errmsg = true, }, { .name = "madvise", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MADV_BHV, /* behavior */ }, }, - { .name = "mkdir", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "mkdir", .errmsg = true, }, { .name = "mkdirat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ - [1] = SCA_FILENAME, /* pathname */ }, }, - { .name = "mknod", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + { .name = "mknod", .errmsg = true, }, { .name = "mknodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ - [1] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "mlock", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mlockall", .errmsg = true, @@ -1155,8 +680,7 @@ static struct syscall_fmt { { .name = "mmap", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ [2] = SCA_MMAP_PROT, /* prot */ - [3] = SCA_MMAP_FLAGS, /* flags */ - [4] = SCA_FD, /* fd */ }, }, + [3] = SCA_MMAP_FLAGS, /* flags */ }, }, { .name = "mprotect", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MMAP_PROT, /* prot */ }, }, @@ -1173,60 +697,43 @@ static struct syscall_fmt { { .name = "name_to_handle_at", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "newfstatat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [1] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "open", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ - [1] = SCA_OPEN_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "open_by_handle_at", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ [2] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "openat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [1] = SCA_FILENAME, /* filename */ [2] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "perf_event_open", .errmsg = true, - .arg_scnprintf = { [1] = SCA_INT, /* pid */ - [2] = SCA_INT, /* cpu */ + .arg_scnprintf = { [2] = SCA_INT, /* cpu */ [3] = SCA_FD, /* group_fd */ [4] = SCA_PERF_FLAGS, /* flags */ }, }, { .name = "pipe2", .errmsg = true, .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, { .name = "poll", .errmsg = true, .timeout = true, }, { .name = "ppoll", .errmsg = true, .timeout = true, }, - { .name = "pread", .errmsg = true, .alias = "pread64", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "preadv", .errmsg = true, .alias = "pread", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "pread", .errmsg = true, .alias = "pread64", }, + { .name = "preadv", .errmsg = true, .alias = "pread", }, { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, - { .name = "pwrite", .errmsg = true, .alias = "pwrite64", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "pwritev", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "read", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "readlink", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, + { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, + { .name = "pwritev", .errmsg = true, }, + { .name = "read", .errmsg = true, }, + { .name = "readlink", .errmsg = true, }, { .name = "readlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [1] = SCA_FILENAME, /* pathname */ }, }, - { .name = "readv", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + { .name = "readv", .errmsg = true, }, { .name = "recvfrom", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmmsg", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmsg", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [2] = SCA_MSG_FLAGS, /* flags */ }, }, - { .name = "removexattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, + { .name = "removexattr", .errmsg = true, }, { .name = "renameat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, - { .name = "rmdir", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "rmdir", .errmsg = true, }, { .name = "rt_sigaction", .errmsg = true, .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, @@ -1234,22 +741,24 @@ static struct syscall_fmt { .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_tgsigqueueinfo", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, + { .name = "sched_setscheduler", .errmsg = true, + .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, }, + { .name = "seccomp", .errmsg = true, + .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */ + [1] = SCA_SECCOMP_FLAGS, /* flags */ }, }, { .name = "select", .errmsg = true, .timeout = true, }, { .name = "sendmmsg", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "sendmsg", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [2] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "sendto", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + { .name = "set_tid_address", .errpid = true, }, { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, + { .name = "setpgid", .errmsg = true, }, { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, - { .name = "setxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "shutdown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "setxattr", .errmsg = true, }, + { .name = "shutdown", .errmsg = true, }, { .name = "socket", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, @@ -1258,10 +767,8 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, - { .name = "stat", .errmsg = true, .alias = "newstat", - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "statfs", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "stat", .errmsg = true, .alias = "newstat", }, + { .name = "statfs", .errmsg = true, }, { .name = "swapoff", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, { .name = "swapon", .errmsg = true, @@ -1272,25 +779,21 @@ static struct syscall_fmt { .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "tkill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, - { .name = "truncate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, + { .name = "truncate", .errmsg = true, }, { .name = "uname", .errmsg = true, .alias = "newuname", }, { .name = "unlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [1] = SCA_FILENAME, /* pathname */ }, }, - { .name = "utime", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + { .name = "utime", .errmsg = true, }, { .name = "utimensat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ - [1] = SCA_FILENAME, /* filename */ }, }, - { .name = "utimes", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, - { .name = "vmsplice", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "write", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "writev", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, + { .name = "utimes", .errmsg = true, }, + { .name = "vmsplice", .errmsg = true, }, + { .name = "wait4", .errpid = true, + .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, }, + { .name = "waitid", .errpid = true, + .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, }, + { .name = "write", .errmsg = true, }, + { .name = "writev", .errmsg = true, }, }; static int syscall_fmt__cmp(const void *name, const void *fmtp) @@ -1398,59 +901,6 @@ fail: static const size_t trace__entry_str_size = 2048; -struct trace { - struct perf_tool tool; - struct { - int machine; - int open_id; - } audit; - struct { - int max; - struct syscall *table; - struct { - struct perf_evsel *sys_enter, - *sys_exit; - } events; - } syscalls; - struct record_opts opts; - struct perf_evlist *evlist; - struct machine *host; - struct thread *current; - u64 base_time; - FILE *output; - unsigned long nr_events; - struct strlist *ev_qualifier; - struct { - size_t nr; - int *entries; - } ev_qualifier_ids; - struct intlist *tid_list; - struct intlist *pid_list; - struct { - size_t nr; - pid_t *entries; - } filter_pids; - double duration_filter; - double runtime_ms; - struct { - u64 vfs_getname, - proc_getname; - } stats; - bool not_ev_qualifier; - bool live; - bool full_time; - bool sched; - bool multiple_threads; - bool summary; - bool summary_only; - bool show_comm; - bool show_tool_stats; - bool trace_syscalls; - bool force; - bool vfs_getname; - int trace_pgfaults; -}; - static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) { struct thread_trace *ttrace = thread__priv(thread); @@ -1618,6 +1068,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine, color_fprintf(trace->output, PERF_COLOR_RED, "LOST %" PRIu64 " events!\n", event->lost.lost); ret = machine__process_lost_event(machine, event, sample); + break; default: ret = machine__process_event(machine, event, sample); break; @@ -1635,6 +1086,24 @@ static int trace__tool_process(struct perf_tool *tool, return trace__process_event(trace, machine, event, sample); } +static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) +{ + struct machine *machine = vmachine; + + if (machine->kptr_restrict_warned) + return NULL; + + if (symbol_conf.kptr_restrict) { + pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" + "Check /proc/sys/kernel/kptr_restrict.\n\n" + "Kernel samples will not be resolved.\n"); + machine->kptr_restrict_warned = true; + return NULL; + } + + return machine__resolve_kernel_addr(vmachine, addrp, modp); +} + static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) { int err = symbol__init(NULL); @@ -1646,7 +1115,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) if (trace->host == NULL) return -ENOMEM; - if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0) + if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0) return -errno; err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, @@ -1661,7 +1130,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) static int syscall__set_arg_fmts(struct syscall *sc) { struct format_field *field; - int idx = 0; + int idx = 0, len; sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *)); if (sc->arg_scnprintf == NULL) @@ -1673,8 +1142,31 @@ static int syscall__set_arg_fmts(struct syscall *sc) for (field = sc->args; field; field = field->next) { if (sc->fmt && sc->fmt->arg_scnprintf[idx]) sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; + else if (strcmp(field->type, "const char *") == 0 && + (strcmp(field->name, "filename") == 0 || + strcmp(field->name, "path") == 0 || + strcmp(field->name, "pathname") == 0)) + sc->arg_scnprintf[idx] = SCA_FILENAME; else if (field->flags & FIELD_IS_POINTER) sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; + else if (strcmp(field->type, "pid_t") == 0) + sc->arg_scnprintf[idx] = SCA_PID; + else if (strcmp(field->type, "umode_t") == 0) + sc->arg_scnprintf[idx] = SCA_MODE_T; + else if ((strcmp(field->type, "int") == 0 || + strcmp(field->type, "unsigned int") == 0 || + strcmp(field->type, "long") == 0) && + (len = strlen(field->name)) >= 2 && + strcmp(field->name + len - 2, "fd") == 0) { + /* + * /sys/kernel/tracing/events/syscalls/sys_enter* + * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c + * 65 int + * 23 unsigned int + * 7 unsigned long + */ + sc->arg_scnprintf[idx] = SCA_FD; + } ++idx; } @@ -1685,7 +1177,7 @@ static int trace__read_syscall_info(struct trace *trace, int id) { char tp_name[128]; struct syscall *sc; - const char *name = audit_syscall_to_name(id, trace->audit.machine); + const char *name = syscalltbl__name(trace->sctbl, id); if (name == NULL) return -1; @@ -1758,9 +1250,9 @@ static int trace__validate_ev_qualifier(struct trace *trace) i = 0; - strlist__for_each(pos, trace->ev_qualifier) { + strlist__for_each_entry(pos, trace->ev_qualifier) { const char *sc = pos->s; - int id = audit_name_to_syscall(sc, trace->audit.machine); + int id = syscalltbl__id(trace->sctbl, sc); if (id < 0) { if (err == 0) { @@ -1846,7 +1338,12 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, "%ld", val); } } - } else { + } else if (IS_ERR(sc->tp_format)) { + /* + * If we managed to read the tracepoint /format file, then we + * may end up not having any args, like with gettid(), so only + * print the raw args when we didn't manage to read it. + */ int i = 0; while (i < 6) { @@ -1987,7 +1484,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, goto out_put; } - if (!trace->summary_only) + if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) trace__printf_interrupted_entry(trace, sample); ttrace->entry_time = sample->time; @@ -1998,9 +1495,9 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, args, trace, thread); if (sc->is_exit) { - if (!trace->duration_filter && !trace->summary_only) { + if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); - fprintf(trace->output, "%-70s\n", ttrace->entry_str); + fprintf(trace->output, "%-70s)\n", ttrace->entry_str); } } else { ttrace->entry_pending = true; @@ -2018,6 +1515,29 @@ out_put: return err; } +static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel, + struct perf_sample *sample, + struct callchain_cursor *cursor) +{ + struct addr_location al; + + if (machine__resolve(trace->host, &al, sample) < 0 || + thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack)) + return -1; + + return 0; +} + +static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample) +{ + /* TODO: user-configurable print_opts */ + const unsigned int print_opts = EVSEL__PRINT_SYM | + EVSEL__PRINT_DSO | + EVSEL__PRINT_UNKNOWN_AS_ADDR; + + return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); +} + static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -2025,7 +1545,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, long ret; u64 duration = 0; struct thread *thread; - int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; + int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0; struct syscall *sc = trace__syscall_info(trace, evsel, id); struct thread_trace *ttrace; @@ -2042,7 +1562,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ret = perf_evsel__sc_tp_uint(evsel, ret, sample); - if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) { + if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) { trace__set_fd_pathname(thread, ret, ttrace->filename.name); ttrace->filename.pending_open = false; ++trace->stats.vfs_getname; @@ -2057,6 +1577,15 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, } else if (trace->duration_filter) goto out; + if (sample->callchain) { + callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); + if (callchain_ret == 0) { + if (callchain_cursor.nr < trace->min_stack) + goto out; + callchain_ret = 1; + } + } + if (trace->summary_only) goto out; @@ -2073,9 +1602,9 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (sc->fmt == NULL) { signed_print: fprintf(trace->output, ") = %ld", ret); - } else if (ret < 0 && sc->fmt->errmsg) { + } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) { char bf[STRERR_BUFSIZE]; - const char *emsg = strerror_r(-ret, bf, sizeof(bf)), + const char *emsg = str_error_r(-ret, bf, sizeof(bf)), *e = audit_errno_to_name(-ret); fprintf(trace->output, ") = -1 %s %s", e, emsg); @@ -2083,10 +1612,24 @@ signed_print: fprintf(trace->output, ") = 0 Timeout"); else if (sc->fmt->hexret) fprintf(trace->output, ") = %#lx", ret); - else + else if (sc->fmt->errpid) { + struct thread *child = machine__find_thread(trace->host, ret, ret); + + if (child != NULL) { + fprintf(trace->output, ") = %ld", ret); + if (child->comm_set) + fprintf(trace->output, " (%s)", thread__comm_str(child)); + thread__put(child); + } + } else goto signed_print; fputc('\n', trace->output); + + if (callchain_ret > 0) + trace__fprintf_callchain(trace, sample); + else if (callchain_ret < 0) + pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); out: ttrace->entry_pending = false; err = 0; @@ -2217,6 +1760,17 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { + int callchain_ret = 0; + + if (sample->callchain) { + callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); + if (callchain_ret == 0) { + if (callchain_cursor.nr < trace->min_stack) + goto out; + callchain_ret = 1; + } + } + trace__printf_interrupted_entry(trace, sample); trace__fprintf_tstamp(trace, sample->time, trace->output); @@ -2234,6 +1788,12 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, } fprintf(trace->output, ")\n"); + + if (callchain_ret > 0) + trace__fprintf_callchain(trace, sample); + else if (callchain_ret < 0) + pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); +out: return 0; } @@ -2264,8 +1824,19 @@ static int trace__pgfault(struct trace *trace, char map_type = 'd'; struct thread_trace *ttrace; int err = -1; + int callchain_ret = 0; thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); + + if (sample->callchain) { + callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); + if (callchain_ret == 0) { + if (callchain_cursor.nr < trace->min_stack) + goto out_put; + callchain_ret = 1; + } + } + ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) goto out_put; @@ -2307,6 +1878,11 @@ static int trace__pgfault(struct trace *trace, print_location(trace->output, sample, &al, true, false); fprintf(trace->output, " (%c%c)\n", map_type, al.level); + + if (callchain_ret > 0) + trace__fprintf_callchain(trace, sample); + else if (callchain_ret < 0) + pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); out: err = 0; out_put: @@ -2326,6 +1902,23 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample) return false; } +static void trace__set_base_time(struct trace *trace, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + /* + * BPF events were not setting PERF_SAMPLE_TIME, so be more robust + * and don't use sample->time unconditionally, we may end up having + * some other event in the future without PERF_SAMPLE_TIME for good + * reason, i.e. we may not be interested in its timestamps, just in + * it taking place, picking some piece of information when it + * appears in our event stream (vfs_getname comes to mind). + */ + if (trace->base_time == 0 && !trace->full_time && + (evsel->attr.sample_type & PERF_SAMPLE_TIME)) + trace->base_time = sample->time; +} + static int trace__process_sample(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -2340,8 +1933,7 @@ static int trace__process_sample(struct perf_tool *tool, if (skip_sample(trace, sample)) return 0; - if (!trace->full_time && trace->base_time == 0) - trace->base_time = sample->time; + trace__set_base_time(trace, evsel, sample); if (handler) { ++trace->nr_events; @@ -2450,8 +2042,7 @@ static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) return true; } -static int perf_evlist__add_pgfault(struct perf_evlist *evlist, - u64 config) +static struct perf_evsel *perf_evsel__new_pgfault(u64 config) { struct perf_evsel *evsel; struct perf_event_attr attr = { @@ -2465,13 +2056,10 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist, event_attr_init(&attr); evsel = perf_evsel__new(&attr); - if (!evsel) - return -ENOMEM; - - evsel->handler = trace__pgfault; - perf_evlist__add(evlist, evsel); + if (evsel) + evsel->handler = trace__pgfault; - return 0; + return evsel; } static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) @@ -2479,9 +2067,6 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st const u32 type = event->header.type; struct perf_evsel *evsel; - if (!trace->full_time && trace->base_time == 0) - trace->base_time = sample->time; - if (type != PERF_RECORD_SAMPLE) { trace__process_event(trace, trace->host, event, sample); return; @@ -2493,6 +2078,8 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st return; } + trace__set_base_time(trace, evsel, sample); + if (evsel->attr.type == PERF_TYPE_TRACEPOINT && sample->raw_data == NULL) { fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", @@ -2527,6 +2114,15 @@ static int trace__add_syscall_newtp(struct trace *trace) perf_evlist__add(evlist, sys_enter); perf_evlist__add(evlist, sys_exit); + if (callchain_param.enabled && !trace->kernel_syscallchains) { + /* + * We're interested only in the user space callchain + * leading to the syscall, allow overriding that for + * debugging reasons using --kernel_syscall_callchains + */ + sys_exit->attr.exclude_callchain_kernel = 1; + } + trace->syscalls.events.sys_enter = sys_enter; trace->syscalls.events.sys_exit = sys_exit; @@ -2565,7 +2161,7 @@ out_enomem: static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = trace->evlist; - struct perf_evsel *evsel; + struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; int err = -1, i; unsigned long before; const bool forks = argc > 0; @@ -2579,14 +2175,19 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->trace_syscalls) trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); - if ((trace->trace_pgfaults & TRACE_PFMAJ) && - perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { - goto out_error_mem; + if ((trace->trace_pgfaults & TRACE_PFMAJ)) { + pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); + if (pgfault_maj == NULL) + goto out_error_mem; + perf_evlist__add(evlist, pgfault_maj); } - if ((trace->trace_pgfaults & TRACE_PFMIN) && - perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) - goto out_error_mem; + if ((trace->trace_pgfaults & TRACE_PFMIN)) { + pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); + if (pgfault_min == NULL) + goto out_error_mem; + perf_evlist__add(evlist, pgfault_min); + } if (trace->sched && perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", @@ -2605,7 +2206,45 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_delete_evlist; } - perf_evlist__config(evlist, &trace->opts); + perf_evlist__config(evlist, &trace->opts, NULL); + + if (callchain_param.enabled) { + bool use_identifier = false; + + if (trace->syscalls.events.sys_exit) { + perf_evsel__config_callchain(trace->syscalls.events.sys_exit, + &trace->opts, &callchain_param); + use_identifier = true; + } + + if (pgfault_maj) { + perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); + use_identifier = true; + } + + if (pgfault_min) { + perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); + use_identifier = true; + } + + if (use_identifier) { + /* + * Now we have evsels with different sample_ids, use + * PERF_SAMPLE_IDENTIFIER to map from sample to evsel + * from a fixed position in each ring buffer record. + * + * As of this the changeset introducing this comment, this + * isn't strictly needed, as the fields that can come before + * PERF_SAMPLE_ID are all used, but we'll probably disable + * some of those for things like copying the payload of + * pointer syscall arguments, and for vfs_getname we don't + * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this + * here as a warning we need to use PERF_SAMPLE_IDENTIFIER. + */ + perf_evlist__set_sample_bit(evlist, IDENTIFIER); + perf_evlist__reset_sample_bit(evlist, ID); + } + } signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); @@ -2766,7 +2405,7 @@ out_error_apply_filters: fprintf(trace->output, "Failed to set filter \"%s\" on event %s with %d (%s)\n", evsel->filter, perf_evsel__name(evsel), errno, - strerror_r(errno, errbuf, sizeof(errbuf))); + str_error_r(errno, errbuf, sizeof(errbuf))); goto out_delete_evlist; } out_error_mem: @@ -2847,7 +2486,7 @@ static int trace__replay(struct trace *trace) goto out; } - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { if (evsel->attr.type == PERF_TYPE_SOFTWARE && (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || @@ -2883,15 +2522,29 @@ static size_t trace__fprintf_threads_header(FILE *fp) return printed; } +DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs, + struct stats *stats; + double msecs; + int syscall; +) +{ + struct int_node *source = rb_entry(nd, struct int_node, rb_node); + struct stats *stats = source->priv; + + entry->syscall = source->i; + entry->stats = stats; + entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0; +} + static size_t thread__dump_stats(struct thread_trace *ttrace, struct trace *trace, FILE *fp) { - struct stats *stats; size_t printed = 0; struct syscall *sc; - struct int_node *inode = intlist__first(ttrace->syscall_stats); + struct rb_node *nd; + DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats); - if (inode == NULL) + if (syscall_stats == NULL) return 0; printed += fprintf(fp, "\n"); @@ -2900,9 +2553,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); - /* each int_node is a syscall */ - while (inode) { - stats = inode->priv; + resort_rb__for_each_entry(nd, syscall_stats) { + struct stats *stats = syscall_stats_entry->stats; if (stats) { double min = (double)(stats->min) / NSEC_PER_MSEC; double max = (double)(stats->max) / NSEC_PER_MSEC; @@ -2913,34 +2565,23 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; avg /= NSEC_PER_MSEC; - sc = &trace->syscalls.table[inode->i]; + sc = &trace->syscalls.table[syscall_stats_entry->syscall]; printed += fprintf(fp, " %-15s", sc->name); printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", - n, avg * n, min, avg); + n, syscall_stats_entry->msecs, min, avg); printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); } - - inode = intlist__next(inode); } + resort_rb__delete(syscall_stats); printed += fprintf(fp, "\n\n"); return printed; } -/* struct used to pass data to per-thread function */ -struct summary_data { - FILE *fp; - struct trace *trace; - size_t printed; -}; - -static int trace__fprintf_one_thread(struct thread *thread, void *priv) +static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace) { - struct summary_data *data = priv; - FILE *fp = data->fp; - size_t printed = data->printed; - struct trace *trace = data->trace; + size_t printed = 0; struct thread_trace *ttrace = thread__priv(thread); double ratio; @@ -2956,25 +2597,45 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv) printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj); if (ttrace->pfmin) printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin); - printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); + if (trace->sched) + printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); + else if (fputc('\n', fp) != EOF) + ++printed; + printed += thread__dump_stats(ttrace, trace, fp); - data->printed += printed; + return printed; +} - return 0; +static unsigned long thread__nr_events(struct thread_trace *ttrace) +{ + return ttrace ? ttrace->nr_events : 0; +} + +DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)), + struct thread *thread; +) +{ + entry->thread = rb_entry(nd, struct thread, rb_node); } static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) { - struct summary_data data = { - .fp = fp, - .trace = trace - }; - data.printed = trace__fprintf_threads_header(fp); + DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host); + size_t printed = trace__fprintf_threads_header(fp); + struct rb_node *nd; - machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data); + if (threads == NULL) { + fprintf(fp, "%s", "Error sorting output by nr_events!\n"); + return 0; + } - return data.printed; + resort_rb__for_each_entry(nd, threads) + printed += trace__fprintf_thread(fp, threads_entry->thread, trace); + + resort_rb__delete(threads); + + return printed; } static int trace__set_duration(const struct option *opt, const char *str, @@ -3056,7 +2717,7 @@ static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) { struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) + evlist__for_each_entry(evlist, evsel) evsel->handler = handler; } @@ -3070,10 +2731,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; struct trace trace = { - .audit = { - .machine = audit_detect_machine(), - .open_id = audit_name_to_syscall("open", trace.audit.machine), - }, .syscalls = { . max = -1, }, @@ -3091,6 +2748,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .output = stderr, .show_comm = true, .trace_syscalls = true, + .kernel_syscallchains = false, + .max_stack = UINT_MAX, }; const char *output_name = NULL; const char *ev_qualifier_str = NULL; @@ -3136,10 +2795,24 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) "Trace pagefaults", parse_pagefaults, "maj"), OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), + OPT_CALLBACK(0, "call-graph", &trace.opts, + "record_mode[,record_size]", record_callchain_help, + &record_parse_callchain_opt), + OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, + "Show the kernel callchains on the syscall exit path"), + OPT_UINTEGER(0, "min-stack", &trace.min_stack, + "Set the minimum stack depth when parsing the callchain, " + "anything below the specified depth will be ignored."), + OPT_UINTEGER(0, "max-stack", &trace.max_stack, + "Set the maximum stack depth when parsing the callchain, " + "anything beyond the specified depth will be ignored. " + "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, "per thread proc mmap processing timeout in ms"), OPT_END() }; + bool __maybe_unused max_stack_user_set = true; + bool mmap_pages_user_set = true; const char * const trace_subcommands[] = { "record", NULL }; int err; char bf[BUFSIZ]; @@ -3148,8 +2821,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) signal(SIGFPE, sighandler_dump_stack); trace.evlist = perf_evlist__new(); + trace.sctbl = syscalltbl__new(); - if (trace.evlist == NULL) { + if (trace.evlist == NULL || trace.sctbl == NULL) { pr_err("Not enough memory to run!\n"); err = -ENOMEM; goto out; @@ -3158,11 +2832,40 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); + err = bpf__setup_stdout(trace.evlist); + if (err) { + bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf)); + pr_err("ERROR: Setup BPF stdout failed: %s\n", bf); + goto out; + } + + err = -1; + if (trace.trace_pgfaults) { trace.opts.sample_address = true; trace.opts.sample_time = true; } + if (trace.opts.mmap_pages == UINT_MAX) + mmap_pages_user_set = false; + + if (trace.max_stack == UINT_MAX) { + trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack; + max_stack_user_set = false; + } + +#ifdef HAVE_DWARF_UNWIND_SUPPORT + if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls) + record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false); +#endif + + if (callchain_param.enabled) { + if (!mmap_pages_user_set && geteuid() == 0) + trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4; + + symbol_conf.use_callchain = true; + } + if (trace.evlist->nr_entries > 0) evlist__set_evsel_handler(trace.evlist, trace__event_handler); @@ -3179,6 +2882,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } + if (!trace.trace_syscalls && ev_qualifier_str) { + pr_err("The -e option can't be used with --no-syscalls.\n"); + goto out; + } + if (output_name != NULL) { err = trace__open_output(&trace, output_name); if (err < 0) { @@ -3187,6 +2895,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) } } + trace.open_id = syscalltbl__id(trace.sctbl, "open"); + if (ev_qualifier_str != NULL) { const char *s = ev_qualifier_str; struct strlist_config slist_config = { diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index 6461e02ab940..55daefff0d54 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -59,7 +59,6 @@ static int get_e_machine(struct jitheader *hdr) ssize_t sret; char id[16]; int fd, ret = -1; - int m = -1; struct { uint16_t e_type; uint16_t e_machine; @@ -81,17 +80,29 @@ static int get_e_machine(struct jitheader *hdr) if (sret != sizeof(info)) goto error; - m = info.e_machine; - if (m < 0) - m = 0; /* ELF EM_NONE */ - - hdr->elf_mach = m; + hdr->elf_mach = info.e_machine; ret = 0; error: close(fd); return ret; } +static int use_arch_timestamp; + +static inline uint64_t +get_arch_timestamp(void) +{ +#if defined(__i386__) || defined(__x86_64__) + unsigned int low, high; + + asm volatile("rdtsc" : "=a" (low), "=d" (high)); + + return low | ((uint64_t)high) << 32; +#else + return 0; +#endif +} + #define NSEC_PER_SEC 1000000000 static int perf_clk_id = CLOCK_MONOTONIC; @@ -107,6 +118,9 @@ perf_get_timestamp(void) struct timespec ts; int ret; + if (use_arch_timestamp) + return get_arch_timestamp(); + ret = clock_gettime(perf_clk_id, &ts); if (ret) return 0; @@ -203,6 +217,17 @@ perf_close_marker_file(void) munmap(marker_addr, pgsz); } +static void +init_arch_timestamp(void) +{ + char *str = getenv("JITDUMP_USE_ARCH_TIMESTAMP"); + + if (!str || !*str || !strcmp(str, "0")) + return; + + use_arch_timestamp = 1; +} + void *jvmti_open(void) { int pad_cnt; @@ -211,11 +236,17 @@ void *jvmti_open(void) int fd; FILE *fp; + init_arch_timestamp(); + /* * check if clockid is supported */ - if (!perf_get_timestamp()) - warnx("jvmti: kernel does not support %d clock id", perf_clk_id); + if (!perf_get_timestamp()) { + if (use_arch_timestamp) + warnx("jvmti: arch timestamp not supported"); + else + warnx("jvmti: kernel does not support %d clock id", perf_clk_id); + } memset(&header, 0, sizeof(header)); @@ -263,6 +294,9 @@ void *jvmti_open(void) header.timestamp = perf_get_timestamp(); + if (use_arch_timestamp) + header.flags |= JITDUMP_FLAGS_ARCH_TIMESTAMP; + if (!fwrite(&header, sizeof(header), 1, fp)) { warn("jvmti: cannot write dumpfile header"); goto error; @@ -452,10 +486,11 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file, if (sret != 1) goto error; } - if (padding_count) + if (padding_count) { sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp); if (sret != 1) goto error; + } funlockfile(fp); return 0; diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index 83a25cef82fd..7ed72a475c57 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -5,35 +5,18 @@ #include <sys/types.h> #include <sys/syscall.h> #include <linux/types.h> +#include <linux/compiler.h> #include <linux/perf_event.h> #include <asm/barrier.h> #if defined(__i386__) #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC {"model name"} -#ifndef __NR_perf_event_open -# define __NR_perf_event_open 336 -#endif -#ifndef __NR_futex -# define __NR_futex 240 -#endif -#ifndef __NR_gettid -# define __NR_gettid 224 -#endif #endif #if defined(__x86_64__) #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC {"model name"} -#ifndef __NR_perf_event_open -# define __NR_perf_event_open 298 -#endif -#ifndef __NR_futex -# define __NR_futex 202 -#endif -#ifndef __NR_gettid -# define __NR_gettid 186 -#endif #endif #ifdef __powerpc__ diff --git a/tools/perf/perf.c b/tools/perf/perf.c index aaee0a782747..64c06961bfe4 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -10,13 +10,14 @@ #include "util/env.h" #include <subcmd/exec-cmd.h> -#include "util/cache.h" +#include "util/config.h" #include "util/quote.h" #include <subcmd/run-command.h> #include "util/parse-events.h" #include <subcmd/parse-options.h> #include "util/bpf-loader.h" #include "util/debug.h" +#include <api/fs/fs.h> #include <api/fs/tracing_path.h> #include <pthread.h> #include <stdlib.h> @@ -138,8 +139,6 @@ struct option options[] = { OPT_ARGUMENT("html-path", "html-path"), OPT_ARGUMENT("paginate", "paginate"), OPT_ARGUMENT("no-pager", "no-pager"), - OPT_ARGUMENT("perf-dir", "perf-dir"), - OPT_ARGUMENT("work-tree", "work-tree"), OPT_ARGUMENT("debugfs-dir", "debugfs-dir"), OPT_ARGUMENT("buildid-dir", "buildid-dir"), OPT_ARGUMENT("list-cmds", "list-cmds"), @@ -199,35 +198,6 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) use_pager = 0; if (envchanged) *envchanged = 1; - } else if (!strcmp(cmd, "--perf-dir")) { - if (*argc < 2) { - fprintf(stderr, "No directory given for --perf-dir.\n"); - usage(perf_usage_string); - } - setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1); - if (envchanged) - *envchanged = 1; - (*argv)++; - (*argc)--; - handled++; - } else if (!prefixcmp(cmd, CMD_PERF_DIR)) { - setenv(PERF_DIR_ENVIRONMENT, cmd + strlen(CMD_PERF_DIR), 1); - if (envchanged) - *envchanged = 1; - } else if (!strcmp(cmd, "--work-tree")) { - if (*argc < 2) { - fprintf(stderr, "No directory given for --work-tree.\n"); - usage(perf_usage_string); - } - setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1); - if (envchanged) - *envchanged = 1; - (*argv)++; - (*argc)--; - } else if (!prefixcmp(cmd, CMD_WORK_TREE)) { - setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + strlen(CMD_WORK_TREE), 1); - if (envchanged) - *envchanged = 1; } else if (!strcmp(cmd, "--debugfs-dir")) { if (*argc < 2) { fprintf(stderr, "No directory given for --debugfs-dir.\n"); @@ -308,9 +278,11 @@ static int handle_alias(int *argcp, const char ***argv) if (*argcp > 1) { struct strbuf buf; - strbuf_init(&buf, PATH_MAX); - strbuf_addstr(&buf, alias_string); - sq_quote_argv(&buf, (*argv) + 1, PATH_MAX); + if (strbuf_init(&buf, PATH_MAX) < 0 || + strbuf_addstr(&buf, alias_string) < 0 || + sq_quote_argv(&buf, (*argv) + 1, + PATH_MAX) < 0) + die("Failed to allocate memory."); free(alias_string); alias_string = buf.buf; } @@ -360,11 +332,6 @@ const char perf_version_string[] = PERF_VERSION; #define RUN_SETUP (1<<0) #define USE_PAGER (1<<1) -/* - * require working tree to be present -- anything uses this needs - * RUN_SETUP for reading from the configuration file. - */ -#define NEED_WORK_TREE (1<<2) static int run_builtin(struct cmd_struct *p, int argc, const char **argv) { @@ -388,6 +355,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) perf_env__set_cmdline(&perf_env, argc, argv); status = p->fn(argc, argv, prefix); + perf_config__exit(); exit_browser(status); perf_env__exit(&perf_env); bpf__clear(); @@ -406,7 +374,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) /* Check for ENOSPC and EIO errors.. */ if (fflush(stdout)) { fprintf(stderr, "write failure on standard output: %s", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out; } if (ferror(stdout)) { @@ -415,7 +383,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) } if (fclose(stdout)) { fprintf(stderr, "close failed on standard output: %s", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out; } status = 0; @@ -529,10 +497,21 @@ void pthread__unblock_sigwinch(void) pthread_sigmask(SIG_UNBLOCK, &set, NULL); } +#ifdef _SC_LEVEL1_DCACHE_LINESIZE +#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE) +#else +static void cache_line_size(int *cacheline_sizep) +{ + if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep)) + pr_debug("cannot determine cache line size"); +} +#endif + int main(int argc, const char **argv) { const char *cmd; char sbuf[STRERR_BUFSIZE]; + int value; /* libsubcmd init */ exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); @@ -540,7 +519,13 @@ int main(int argc, const char **argv) /* The page_size is placed in util object. */ page_size = sysconf(_SC_PAGE_SIZE); - cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + cache_line_size(&cacheline_size); + + if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0) + sysctl_perf_event_max_stack = value; + + if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0) + sysctl_perf_event_max_contexts_per_stack = value; cmd = extract_argv0_path(argv[0]); if (!cmd) @@ -548,7 +533,9 @@ int main(int argc, const char **argv) srandom(time(NULL)); + perf_config__init(); perf_config(perf_default_config, NULL); + set_buildid_dir(NULL); /* get debugfs/tracefs mount point from /proc/mounts */ tracing_path_mount(); @@ -572,7 +559,6 @@ int main(int argc, const char **argv) } if (!prefixcmp(cmd, "trace")) { #ifdef HAVE_LIBAUDIT_SUPPORT - set_buildid_dir(NULL); setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv, NULL); @@ -587,7 +573,6 @@ int main(int argc, const char **argv) argc--; handle_options(&argv, &argc, NULL); commit_pager_choice(); - set_buildid_dir(NULL); if (argc > 0) { if (!prefixcmp(argv[0], "--")) @@ -640,7 +625,7 @@ int main(int argc, const char **argv) } fprintf(stderr, "Failed to run command '%s': %s\n", - cmd, strerror_r(errno, sbuf, sizeof(sbuf))); + cmd, str_error_r(errno, sbuf, sizeof(sbuf))); out: return 1; } diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 5381a01c0610..cb0f1356ff81 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -52,7 +52,7 @@ struct record_opts { bool sample_weight; bool sample_time; bool sample_time_set; - bool callgraph_set; + bool sample_cpu; bool period; bool running_time; bool full_auxtrace; @@ -60,6 +60,8 @@ struct record_opts { bool record_switch_events; bool all_kernel; bool all_user; + bool tail_synthesize; + bool overwrite; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/python/tracepoint.py b/tools/perf/python/tracepoint.py new file mode 100755 index 000000000000..eb4dbed57de7 --- /dev/null +++ b/tools/perf/python/tracepoint.py @@ -0,0 +1,47 @@ +#! /usr/bin/python +# -*- python -*- +# -*- coding: utf-8 -*- + +import perf + +class tracepoint(perf.evsel): + def __init__(self, sys, name): + config = perf.tracepoint(sys, name) + perf.evsel.__init__(self, + type = perf.TYPE_TRACEPOINT, + config = config, + freq = 0, sample_period = 1, wakeup_events = 1, + sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_RAW | perf.SAMPLE_TIME) + +def main(): + tp = tracepoint("sched", "sched_switch") + cpus = perf.cpu_map() + threads = perf.thread_map(-1) + + evlist = perf.evlist(cpus, threads) + evlist.add(tp) + evlist.open() + evlist.mmap() + + while True: + evlist.poll(timeout = -1) + for cpu in cpus: + event = evlist.read_on_cpu(cpu) + if not event: + continue + + if not isinstance(event, perf.sample_event): + continue + + print "time %u prev_comm=%s prev_pid=%d prev_prio=%d prev_state=0x%x ==> next_comm=%s next_pid=%d next_prio=%d" % ( + event.sample_time, + event.prev_comm, + event.prev_pid, + event.prev_prio, + event.prev_state, + event.next_comm, + event.next_pid, + event.next_prio) + +if __name__ == '__main__': + main() diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build index 928e110179cb..34faecf774ae 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Build +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build @@ -1,3 +1,5 @@ libperf-y += Context.o -CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default +CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes +CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef +CFLAGS_Context.o += -Wno-switch-default -Wno-shadow diff --git a/tools/perf/scripts/python/bin/stackcollapse-record b/tools/perf/scripts/python/bin/stackcollapse-record new file mode 100755 index 000000000000..9d8f9f0f3a17 --- /dev/null +++ b/tools/perf/scripts/python/bin/stackcollapse-record @@ -0,0 +1,8 @@ +#!/bin/sh + +# +# stackcollapse.py can cover all type of perf samples including +# the tracepoints, so no special record requirements, just record what +# you want to analyze. +# +perf record "$@" diff --git a/tools/perf/scripts/python/bin/stackcollapse-report b/tools/perf/scripts/python/bin/stackcollapse-report new file mode 100755 index 000000000000..356b9656393d --- /dev/null +++ b/tools/perf/scripts/python/bin/stackcollapse-report @@ -0,0 +1,3 @@ +#!/bin/sh +# description: produce callgraphs in short form for scripting use +perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@" diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 1b02cdc0cab6..7656ff8aa066 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -34,10 +34,9 @@ import datetime # # ubuntu: # -# $ sudo apt-get install postgresql +# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql # $ sudo su - postgres -# $ createuser <your user id here> -# Shall the new role be a superuser? (y/n) y +# $ createuser -s <your user id here> # # An example of using this script with Intel PT: # @@ -224,11 +223,14 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \ perf_db_export_mode = True perf_db_export_calls = False +perf_db_export_callchains = False + def usage(): - print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]" + print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]" print >> sys.stderr, "where: columns 'all' or 'branches'" - print >> sys.stderr, " calls 'calls' => create calls table" + print >> sys.stderr, " calls 'calls' => create calls and call_paths table" + print >> sys.stderr, " callchains 'callchains' => create call_paths table" raise Exception("Too few arguments") if (len(sys.argv) < 2): @@ -246,9 +248,11 @@ if columns not in ("all", "branches"): branches = (columns == "branches") -if (len(sys.argv) >= 4): - if (sys.argv[3] == "calls"): +for i in range(3,len(sys.argv)): + if (sys.argv[i] == "calls"): perf_db_export_calls = True + elif (sys.argv[i] == "callchains"): + perf_db_export_callchains = True else: usage() @@ -359,14 +363,16 @@ else: 'transaction bigint,' 'data_src bigint,' 'branch_type integer,' - 'in_tx boolean)') + 'in_tx boolean,' + 'call_path_id bigint)') -if perf_db_export_calls: +if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'CREATE TABLE call_paths (' 'id bigint NOT NULL,' 'parent_id bigint,' 'symbol_id bigint,' 'ip bigint)') +if perf_db_export_calls: do_query(query, 'CREATE TABLE calls (' 'id bigint NOT NULL,' 'thread_id bigint,' @@ -428,7 +434,7 @@ do_query(query, 'CREATE VIEW comm_threads_view AS ' '(SELECT tid FROM threads WHERE id = thread_id) AS tid' ' FROM comm_threads') -if perf_db_export_calls: +if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'CREATE VIEW call_paths_view AS ' 'SELECT ' 'c.id,' @@ -444,6 +450,7 @@ if perf_db_export_calls: '(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,' '(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name' ' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id') +if perf_db_export_calls: do_query(query, 'CREATE VIEW calls_view AS ' 'SELECT ' 'calls.id,' @@ -541,8 +548,9 @@ dso_file = open_output_file("dso_table.bin") symbol_file = open_output_file("symbol_table.bin") branch_type_file = open_output_file("branch_type_table.bin") sample_file = open_output_file("sample_table.bin") -if perf_db_export_calls: +if perf_db_export_calls or perf_db_export_callchains: call_path_file = open_output_file("call_path_table.bin") +if perf_db_export_calls: call_file = open_output_file("call_table.bin") def trace_begin(): @@ -554,8 +562,8 @@ def trace_begin(): comm_table(0, "unknown") dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") - sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - if perf_db_export_calls: + sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + if perf_db_export_calls or perf_db_export_callchains: call_path_table(0, 0, 0, 0) unhandled_count = 0 @@ -571,8 +579,9 @@ def trace_end(): copy_output_file(symbol_file, "symbols") copy_output_file(branch_type_file, "branch_types") copy_output_file(sample_file, "samples") - if perf_db_export_calls: + if perf_db_export_calls or perf_db_export_callchains: copy_output_file(call_path_file, "call_paths") + if perf_db_export_calls: copy_output_file(call_file, "calls") print datetime.datetime.today(), "Removing intermediate files..." @@ -585,8 +594,9 @@ def trace_end(): remove_output_file(symbol_file) remove_output_file(branch_type_file) remove_output_file(sample_file) - if perf_db_export_calls: + if perf_db_export_calls or perf_db_export_callchains: remove_output_file(call_path_file) + if perf_db_export_calls: remove_output_file(call_file) os.rmdir(output_dir_name) print datetime.datetime.today(), "Adding primary keys" @@ -599,8 +609,9 @@ def trace_end(): do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)') - if perf_db_export_calls: + if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)') + if perf_db_export_calls: do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)') print datetime.datetime.today(), "Adding foreign keys" @@ -623,10 +634,11 @@ def trace_end(): 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),' 'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),' 'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)') - if perf_db_export_calls: + if perf_db_export_calls or perf_db_export_callchains: do_query(query, 'ALTER TABLE call_paths ' 'ADD CONSTRAINT parentfk FOREIGN KEY (parent_id) REFERENCES call_paths (id),' 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id)') + if perf_db_export_calls: do_query(query, 'ALTER TABLE calls ' 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),' 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),' @@ -694,11 +706,11 @@ def branch_type_table(branch_type, name, *x): value = struct.pack(fmt, 2, 4, branch_type, n, name) branch_type_file.write(value) -def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x): +def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x): if branches: - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx) + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id) else: - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx) + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id) sample_file.write(value) def call_path_table(cp_id, parent_id, symbol_id, ip, *x): diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py index 4d21ef2d601d..4c6f09ac7d12 100644 --- a/tools/perf/scripts/python/netdev-times.py +++ b/tools/perf/scripts/python/netdev-times.py @@ -252,9 +252,10 @@ def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, callchain, i event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret) all_event_list.append(event_info) -def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi, dev_name): +def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi, + dev_name, work=None, budget=None): event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, - napi, dev_name) + napi, dev_name, work, budget) all_event_list.append(event_info) def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr, @@ -354,11 +355,13 @@ def handle_irq_softirq_exit(event_info): receive_hunk_list.append(rec_data) def handle_napi_poll(event_info): - (name, context, cpu, time, pid, comm, napi, dev_name) = event_info + (name, context, cpu, time, pid, comm, napi, dev_name, + work, budget) = event_info if cpu in net_rx_dic.keys(): event_list = net_rx_dic[cpu]['event_list'] rec_data = {'event_name':'napi_poll', - 'dev':dev_name, 'event_t':time} + 'dev':dev_name, 'event_t':time, + 'work':work, 'budget':budget} event_list.append(rec_data) def handle_netif_rx(event_info): diff --git a/tools/perf/scripts/python/stackcollapse.py b/tools/perf/scripts/python/stackcollapse.py new file mode 100755 index 000000000000..5a605f70ef32 --- /dev/null +++ b/tools/perf/scripts/python/stackcollapse.py @@ -0,0 +1,125 @@ +# stackcollapse.py - format perf samples with one line per distinct call stack +# +# This script's output has two space-separated fields. The first is a semicolon +# separated stack including the program name (from the "comm" field) and the +# function names from the call stack. The second is a count: +# +# swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 2 +# +# The file is sorted according to the first field. +# +# Input may be created and processed using: +# +# perf record -a -g -F 99 sleep 60 +# perf script report stackcollapse > out.stacks-folded +# +# (perf script record stackcollapse works too). +# +# Written by Paolo Bonzini <pbonzini@redhat.com> +# Based on Brendan Gregg's stackcollapse-perf.pl script. + +import os +import sys +from collections import defaultdict +from optparse import OptionParser, make_option + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * +from EventClass import * + +# command line parsing + +option_list = [ + # formatting options for the bottom entry of the stack + make_option("--include-tid", dest="include_tid", + action="store_true", default=False, + help="include thread id in stack"), + make_option("--include-pid", dest="include_pid", + action="store_true", default=False, + help="include process id in stack"), + make_option("--no-comm", dest="include_comm", + action="store_false", default=True, + help="do not separate stacks according to comm"), + make_option("--tidy-java", dest="tidy_java", + action="store_true", default=False, + help="beautify Java signatures"), + make_option("--kernel", dest="annotate_kernel", + action="store_true", default=False, + help="annotate kernel functions with _[k]") +] + +parser = OptionParser(option_list=option_list) +(opts, args) = parser.parse_args() + +if len(args) != 0: + parser.error("unexpected command line argument") +if opts.include_tid and not opts.include_comm: + parser.error("requesting tid but not comm is invalid") +if opts.include_pid and not opts.include_comm: + parser.error("requesting pid but not comm is invalid") + +# event handlers + +lines = defaultdict(lambda: 0) + +def process_event(param_dict): + def tidy_function_name(sym, dso): + if sym is None: + sym = '[unknown]' + + sym = sym.replace(';', ':') + if opts.tidy_java: + # the original stackcollapse-perf.pl script gives the + # example of converting this: + # Lorg/mozilla/javascript/MemberBox;.<init>(Ljava/lang/reflect/Method;)V + # to this: + # org/mozilla/javascript/MemberBox:.init + sym = sym.replace('<', '') + sym = sym.replace('>', '') + if sym[0] == 'L' and sym.find('/'): + sym = sym[1:] + try: + sym = sym[:sym.index('(')] + except ValueError: + pass + + if opts.annotate_kernel and dso == '[kernel.kallsyms]': + return sym + '_[k]' + else: + return sym + + stack = list() + if 'callchain' in param_dict: + for entry in param_dict['callchain']: + entry.setdefault('sym', dict()) + entry['sym'].setdefault('name', None) + entry.setdefault('dso', None) + stack.append(tidy_function_name(entry['sym']['name'], + entry['dso'])) + else: + param_dict.setdefault('symbol', None) + param_dict.setdefault('dso', None) + stack.append(tidy_function_name(param_dict['symbol'], + param_dict['dso'])) + + if opts.include_comm: + comm = param_dict["comm"].replace(' ', '_') + sep = "-" + if opts.include_pid: + comm = comm + sep + str(param_dict['sample']['pid']) + sep = "/" + if opts.include_tid: + comm = comm + sep + str(param_dict['sample']['tid']) + stack.append(comm) + + stack_string = ';'.join(reversed(stack)) + lines[stack_string] = lines[stack_string] + 1 + +def trace_end(): + list = lines.keys() + list.sort() + for stack in list: + print "%s %d" % (stack, lines[stack]) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 1ba628ed049a..dc51bc570e51 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -37,6 +37,11 @@ perf-y += topology.o perf-y += cpumap.o perf-y += stat.o perf-y += event_update.o +perf-y += event-times.o +perf-y += backward-ring-buffer.o +perf-y += sdt.o +perf-y += is_printable_array.o +perf-y += bitmap.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c new file mode 100644 index 000000000000..615780cbfe1d --- /dev/null +++ b/tools/perf/tests/backward-ring-buffer.c @@ -0,0 +1,151 @@ +/* + * Test backward bit in event attribute, read ring buffer from end to + * beginning + */ + +#include <perf.h> +#include <evlist.h> +#include <sys/prctl.h> +#include "tests.h" +#include "debug.h" + +#define NR_ITERS 111 + +static void testcase(void) +{ + int i; + + for (i = 0; i < NR_ITERS; i++) { + char proc_name[10]; + + snprintf(proc_name, sizeof(proc_name), "p:%d\n", i); + prctl(PR_SET_NAME, proc_name); + } +} + +static int count_samples(struct perf_evlist *evlist, int *sample_count, + int *comm_count) +{ + int i; + + for (i = 0; i < evlist->nr_mmaps; i++) { + union perf_event *event; + + perf_mmap__read_catchup(&evlist->backward_mmap[i]); + while ((event = perf_mmap__read_backward(&evlist->backward_mmap[i])) != NULL) { + const u32 type = event->header.type; + + switch (type) { + case PERF_RECORD_SAMPLE: + (*sample_count)++; + break; + case PERF_RECORD_COMM: + (*comm_count)++; + break; + default: + pr_err("Unexpected record of type %d\n", type); + return TEST_FAIL; + } + } + } + return TEST_OK; +} + +static int do_test(struct perf_evlist *evlist, int mmap_pages, + int *sample_count, int *comm_count) +{ + int err; + char sbuf[STRERR_BUFSIZE]; + + err = perf_evlist__mmap(evlist, mmap_pages, true); + if (err < 0) { + pr_debug("perf_evlist__mmap: %s\n", + str_error_r(errno, sbuf, sizeof(sbuf))); + return TEST_FAIL; + } + + perf_evlist__enable(evlist); + testcase(); + perf_evlist__disable(evlist); + + err = count_samples(evlist, sample_count, comm_count); + perf_evlist__munmap(evlist); + return err; +} + + +int test__backward_ring_buffer(int subtest __maybe_unused) +{ + int ret = TEST_SKIP, err, sample_count = 0, comm_count = 0; + char pid[16], sbuf[STRERR_BUFSIZE]; + struct perf_evlist *evlist; + struct perf_evsel *evsel __maybe_unused; + struct parse_events_error parse_error; + struct record_opts opts = { + .target = { + .uid = UINT_MAX, + .uses_mmap = true, + }, + .freq = 0, + .mmap_pages = 256, + .default_interval = 1, + }; + + snprintf(pid, sizeof(pid), "%d", getpid()); + pid[sizeof(pid) - 1] = '\0'; + opts.target.tid = opts.target.pid = pid; + + evlist = perf_evlist__new(); + if (!evlist) { + pr_debug("No ehough memory to create evlist\n"); + return TEST_FAIL; + } + + err = perf_evlist__create_maps(evlist, &opts.target); + if (err < 0) { + pr_debug("Not enough memory to create thread/cpu maps\n"); + goto out_delete_evlist; + } + + bzero(&parse_error, sizeof(parse_error)); + /* + * Set backward bit, ring buffer should be writing from end. Record + * it in aux evlist + */ + err = parse_events(evlist, "syscalls:sys_enter_prctl/overwrite/", &parse_error); + if (err) { + pr_debug("Failed to parse tracepoint event, try use root\n"); + ret = TEST_SKIP; + goto out_delete_evlist; + } + + perf_evlist__config(evlist, &opts, NULL); + + err = perf_evlist__open(evlist); + if (err < 0) { + pr_debug("perf_evlist__open: %s\n", + str_error_r(errno, sbuf, sizeof(sbuf))); + goto out_delete_evlist; + } + + ret = TEST_FAIL; + err = do_test(evlist, opts.mmap_pages, &sample_count, + &comm_count); + if (err != TEST_OK) + goto out_delete_evlist; + + if ((sample_count != NR_ITERS) || (comm_count != NR_ITERS)) { + pr_err("Unexpected counter: sample_count=%d, comm_count=%d\n", + sample_count, comm_count); + goto out_delete_evlist; + } + + err = do_test(evlist, 1, &sample_count, &comm_count); + if (err != TEST_OK) + goto out_delete_evlist; + + ret = TEST_OK; +out_delete_evlist: + perf_evlist__delete(evlist); + return ret; +} diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c new file mode 100644 index 000000000000..9abe6c13090f --- /dev/null +++ b/tools/perf/tests/bitmap.c @@ -0,0 +1,53 @@ +#include <linux/compiler.h> +#include <linux/bitmap.h> +#include "tests.h" +#include "cpumap.h" +#include "debug.h" + +#define NBITS 100 + +static unsigned long *get_bitmap(const char *str, int nbits) +{ + struct cpu_map *map = cpu_map__new(str); + unsigned long *bm = NULL; + int i; + + bm = bitmap_alloc(nbits); + + if (map && bm) { + bitmap_zero(bm, nbits); + + for (i = 0; i < map->nr; i++) + set_bit(map->map[i], bm); + } + + if (map) + cpu_map__put(map); + return bm; +} + +static int test_bitmap(const char *str) +{ + unsigned long *bm = get_bitmap(str, NBITS); + char buf[100]; + int ret; + + bitmap_scnprintf(bm, NBITS, buf, sizeof(buf)); + pr_debug("bitmap: %s\n", buf); + + ret = !strcmp(buf, str); + free(bm); + return ret; +} + +int test__bitmap_print(int subtest __maybe_unused) +{ + TEST_ASSERT_VAL("failed to convert map", test_bitmap("1")); + TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,5")); + TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,3,5,7,9,11,13,15,17,19,21-40")); + TEST_ASSERT_VAL("failed to convert map", test_bitmap("2-5")); + TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,3-6,8-10,24,35-37")); + TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,3-6,8-10,24,35-37")); + TEST_ASSERT_VAL("failed to convert map", test_bitmap("1-10,12-20,22-30,32-40")); + return 0; +} diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c index 0ec9c2c03164..268e5f8e4aa2 100644 --- a/tools/perf/tests/bpf-script-example.c +++ b/tools/perf/tests/bpf-script-example.c @@ -31,8 +31,8 @@ struct bpf_map_def SEC("maps") flip_table = { .max_entries = 1, }; -SEC("func=sys_epoll_pwait") -int bpf_func__sys_epoll_pwait(void *ctx) +SEC("func=SyS_epoll_wait") +int bpf_func__SyS_epoll_wait(void *ctx) { int ind =0; int *flag = bpf_map_lookup_elem(&flip_table, &ind); diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 199501c71e27..fc54064b9186 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -13,13 +13,13 @@ #ifdef HAVE_LIBBPF_SUPPORT -static int epoll_pwait_loop(void) +static int epoll_wait_loop(void) { int i; /* Should fail NR_ITERS times */ for (i = 0; i < NR_ITERS; i++) - epoll_pwait(-(i + 1), NULL, 0, 0, NULL); + epoll_wait(-(i + 1), NULL, 0, 0); return 0; } @@ -61,7 +61,7 @@ static struct { "[basic_bpf_test]", "fix 'perf test LLVM' first", "load bpf object failed", - &epoll_pwait_loop, + &epoll_wait_loop, (NR_ITERS + 1) / 2, }, #ifdef HAVE_BPF_PROLOGUE @@ -138,19 +138,19 @@ static int do_test(struct bpf_object *obj, int (*func)(void), perf_evlist__splice_list_tail(evlist, &parse_evlist.list); evlist->nr_groups = parse_evlist.nr_groups; - perf_evlist__config(evlist, &opts); + perf_evlist__config(evlist, &opts, NULL); err = perf_evlist__open(evlist); if (err < 0) { pr_debug("perf_evlist__open: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } err = perf_evlist__mmap(evlist, opts.mmap_pages, false); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index f2b1dcac45d3..778668a2a966 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -14,6 +14,8 @@ #include <subcmd/parse-options.h> #include "symbol.h" +static bool dont_fork; + struct test __weak arch_tests[] = { { .func = NULL, @@ -204,6 +206,30 @@ static struct test generic_tests[] = { .func = test__event_update, }, { + .desc = "Test events times", + .func = test__event_times, + }, + { + .desc = "Test backward reading from ring buffer", + .func = test__backward_ring_buffer, + }, + { + .desc = "Test cpu map print", + .func = test__cpu_map_print, + }, + { + .desc = "Test SDT event probing", + .func = test__sdt_event, + }, + { + .desc = "Test is_printable_array function", + .func = test__is_printable_array, + }, + { + .desc = "Test bitmap print", + .func = test__bitmap_print, + }, + { .func = NULL, }, }; @@ -239,44 +265,51 @@ static bool perf_test__matches(struct test *test, int curr, int argc, const char static int run_test(struct test *test, int subtest) { - int status, err = -1, child = fork(); + int status, err = -1, child = dont_fork ? 0 : fork(); char sbuf[STRERR_BUFSIZE]; if (child < 0) { pr_err("failed to fork test: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); return -1; } if (!child) { - pr_debug("test child forked, pid %d\n", getpid()); - if (!verbose) { - int nullfd = open("/dev/null", O_WRONLY); - if (nullfd >= 0) { - close(STDERR_FILENO); - close(STDOUT_FILENO); - - dup2(nullfd, STDOUT_FILENO); - dup2(STDOUT_FILENO, STDERR_FILENO); - close(nullfd); + if (!dont_fork) { + pr_debug("test child forked, pid %d\n", getpid()); + + if (!verbose) { + int nullfd = open("/dev/null", O_WRONLY); + + if (nullfd >= 0) { + close(STDERR_FILENO); + close(STDOUT_FILENO); + + dup2(nullfd, STDOUT_FILENO); + dup2(STDOUT_FILENO, STDERR_FILENO); + close(nullfd); + } + } else { + signal(SIGSEGV, sighandler_dump_stack); + signal(SIGFPE, sighandler_dump_stack); } - } else { - signal(SIGSEGV, sighandler_dump_stack); - signal(SIGFPE, sighandler_dump_stack); } err = test->func(subtest); - exit(err); + if (!dont_fork) + exit(err); } - wait(&status); + if (!dont_fork) { + wait(&status); - if (WIFEXITED(status)) { - err = (signed char)WEXITSTATUS(status); - pr_debug("test child finished with %d\n", err); - } else if (WIFSIGNALED(status)) { - err = -1; - pr_debug("test child interrupted\n"); + if (WIFEXITED(status)) { + err = (signed char)WEXITSTATUS(status); + pr_debug("test child finished with %d\n", err); + } else if (WIFSIGNALED(status)) { + err = -1; + pr_debug("test child interrupted\n"); + } } return err; @@ -417,6 +450,8 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING('s', "skip", &skip, "tests", "tests to skip"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('F', "dont-fork", &dont_fork, + "Do not fork for testcase"), OPT_END() }; const char * const test_subcommands[] = { "list", NULL }; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index abd3f0ec0c0b..2af156a8d4e5 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -33,44 +33,86 @@ static unsigned int hex(char c) return c - 'A' + 10; } -static size_t read_objdump_line(const char *line, size_t line_len, void *buf, - size_t len) +static size_t read_objdump_chunk(const char **line, unsigned char **buf, + size_t *buf_len) { - const char *p; - size_t i, j = 0; - - /* Skip to a colon */ - p = strchr(line, ':'); - if (!p) - return 0; - i = p + 1 - line; + size_t bytes_read = 0; + unsigned char *chunk_start = *buf; /* Read bytes */ - while (j < len) { + while (*buf_len > 0) { char c1, c2; - /* Skip spaces */ - for (; i < line_len; i++) { - if (!isspace(line[i])) - break; - } /* Get 2 hex digits */ - if (i >= line_len || !isxdigit(line[i])) + c1 = *(*line)++; + if (!isxdigit(c1)) break; - c1 = line[i++]; - if (i >= line_len || !isxdigit(line[i])) + c2 = *(*line)++; + if (!isxdigit(c2)) break; - c2 = line[i++]; - /* Followed by a space */ - if (i < line_len && line[i] && !isspace(line[i])) + + /* Store byte and advance buf */ + **buf = (hex(c1) << 4) | hex(c2); + (*buf)++; + (*buf_len)--; + bytes_read++; + + /* End of chunk? */ + if (isspace(**line)) break; - /* Store byte */ - *(unsigned char *)buf = (hex(c1) << 4) | hex(c2); - buf += 1; - j++; } + + /* + * objdump will display raw insn as LE if code endian + * is LE and bytes_per_chunk > 1. In that case reverse + * the chunk we just read. + * + * see disassemble_bytes() at binutils/objdump.c for details + * how objdump chooses display endian) + */ + if (bytes_read > 1 && !bigendian()) { + unsigned char *chunk_end = chunk_start + bytes_read - 1; + unsigned char tmp; + + while (chunk_start < chunk_end) { + tmp = *chunk_start; + *chunk_start = *chunk_end; + *chunk_end = tmp; + chunk_start++; + chunk_end--; + } + } + + return bytes_read; +} + +static size_t read_objdump_line(const char *line, unsigned char *buf, + size_t buf_len) +{ + const char *p; + size_t ret, bytes_read = 0; + + /* Skip to a colon */ + p = strchr(line, ':'); + if (!p) + return 0; + p++; + + /* Skip initial spaces */ + while (*p) { + if (!isspace(*p)) + break; + p++; + } + + do { + ret = read_objdump_chunk(&p, &buf, &buf_len); + bytes_read += ret; + p++; + } while (ret > 0); + /* return number of successfully read bytes */ - return j; + return bytes_read; } static int read_objdump_output(FILE *f, void *buf, size_t *len, u64 start_addr) @@ -95,7 +137,7 @@ static int read_objdump_output(FILE *f, void *buf, size_t *len, u64 start_addr) } /* read objdump data into temporary buffer */ - read_bytes = read_objdump_line(line, ret, tmp, sizeof(tmp)); + read_bytes = read_objdump_line(line, tmp, sizeof(tmp)); if (!read_bytes) continue; @@ -152,7 +194,7 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf, ret = read_objdump_output(f, buf, &len, addr); if (len) { - pr_debug("objdump read too few bytes\n"); + pr_debug("objdump read too few bytes: %zd\n", len); if (!ret) ret = len; } @@ -532,7 +574,7 @@ static int do_test_code_reading(bool try_kcore) goto out_put; } - perf_evlist__config(evlist, &opts); + perf_evlist__config(evlist, &opts, NULL); evsel = perf_evlist__first(evlist); diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 4cb6418a8ffc..f168a85992d0 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -1,5 +1,12 @@ #include "tests.h" +#include <stdio.h> #include "cpumap.h" +#include "event.h" +#include <string.h> +#include <linux/bitops.h> +#include "debug.h" + +struct machine; static int process_event_mask(struct perf_tool *tool __maybe_unused, union perf_event *event, @@ -86,3 +93,27 @@ int test__cpu_map_synthesize(int subtest __maybe_unused) cpu_map__put(cpus); return 0; } + +static int cpu_map_print(const char *str) +{ + struct cpu_map *map = cpu_map__new(str); + char buf[100]; + + if (!map) + return -1; + + cpu_map__snprint(map, buf, sizeof(buf)); + return !strcmp(buf, str); +} + +int test__cpu_map_print(int subtest __maybe_unused) +{ + TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1")); + TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,5")); + TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,3,5,7,9,11,13,15,17,19,21-40")); + TEST_ASSERT_VAL("failed to convert map", cpu_map_print("2-5")); + TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,3-6,8-10,24,35-37")); + TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,3-6,8-10,24,35-37")); + TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1-10,12-20,22-30,32-40")); + return 0; +} diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index dc673ff7c437..13725e09ba22 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -202,7 +202,7 @@ static int dsos__create(int cnt, int size) { int i; - dsos = malloc(sizeof(dsos) * cnt); + dsos = malloc(sizeof(*dsos) * cnt); TEST_ASSERT_VAL("failed to alloc dsos array", dsos); for (i = 0; i < cnt; i++) { @@ -251,6 +251,9 @@ int test__dso_data_cache(int subtest __maybe_unused) long nr_end, nr = open_files_cnt(); int dso_cnt, limit, i, fd; + /* Rest the internal dso open counter limit. */ + reset_fd_limit(); + memset(&machine, 0, sizeof(machine)); /* set as system limit */ @@ -312,6 +315,9 @@ int test__dso_data_reopen(int subtest __maybe_unused) #define dso_1 (dsos[1]) #define dso_2 (dsos[2]) + /* Rest the internal dso open counter limit. */ + reset_fd_limit(); + memset(&machine, 0, sizeof(machine)); /* diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c new file mode 100644 index 000000000000..19ef77bd6eb4 --- /dev/null +++ b/tools/perf/tests/event-times.c @@ -0,0 +1,235 @@ +#include <linux/compiler.h> +#include <string.h> +#include "tests.h" +#include "evlist.h" +#include "evsel.h" +#include "util.h" +#include "debug.h" +#include "thread_map.h" +#include "target.h" + +static int attach__enable_on_exec(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct target target = { + .uid = UINT_MAX, + }; + const char *argv[] = { "true", NULL, }; + char sbuf[STRERR_BUFSIZE]; + int err; + + pr_debug("attaching to spawned child, enable on exec\n"); + + err = perf_evlist__create_maps(evlist, &target); + if (err < 0) { + pr_debug("Not enough memory to create thread/cpu maps\n"); + return err; + } + + err = perf_evlist__prepare_workload(evlist, &target, argv, false, NULL); + if (err < 0) { + pr_debug("Couldn't run the workload!\n"); + return err; + } + + evsel->attr.enable_on_exec = 1; + + err = perf_evlist__open(evlist); + if (err < 0) { + pr_debug("perf_evlist__open: %s\n", + str_error_r(errno, sbuf, sizeof(sbuf))); + return err; + } + + return perf_evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL; +} + +static int detach__enable_on_exec(struct perf_evlist *evlist) +{ + waitpid(evlist->workload.pid, NULL, 0); + return 0; +} + +static int attach__current_disabled(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct thread_map *threads; + int err; + + pr_debug("attaching to current thread as disabled\n"); + + threads = thread_map__new(-1, getpid(), UINT_MAX); + if (threads == NULL) { + pr_debug("thread_map__new\n"); + return -1; + } + + evsel->attr.disabled = 1; + + err = perf_evsel__open_per_thread(evsel, threads); + if (err) { + pr_debug("Failed to open event cpu-clock:u\n"); + return err; + } + + thread_map__put(threads); + return perf_evsel__enable(evsel) == 0 ? TEST_OK : TEST_FAIL; +} + +static int attach__current_enabled(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct thread_map *threads; + int err; + + pr_debug("attaching to current thread as enabled\n"); + + threads = thread_map__new(-1, getpid(), UINT_MAX); + if (threads == NULL) { + pr_debug("failed to call thread_map__new\n"); + return -1; + } + + err = perf_evsel__open_per_thread(evsel, threads); + + thread_map__put(threads); + return err == 0 ? TEST_OK : TEST_FAIL; +} + +static int detach__disable(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + + return perf_evsel__enable(evsel); +} + +static int attach__cpu_disabled(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct cpu_map *cpus; + int err; + + pr_debug("attaching to CPU 0 as enabled\n"); + + cpus = cpu_map__new("0"); + if (cpus == NULL) { + pr_debug("failed to call cpu_map__new\n"); + return -1; + } + + evsel->attr.disabled = 1; + + err = perf_evsel__open_per_cpu(evsel, cpus); + if (err) { + if (err == -EACCES) + return TEST_SKIP; + + pr_debug("Failed to open event cpu-clock:u\n"); + return err; + } + + cpu_map__put(cpus); + return perf_evsel__enable(evsel); +} + +static int attach__cpu_enabled(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct cpu_map *cpus; + int err; + + pr_debug("attaching to CPU 0 as enabled\n"); + + cpus = cpu_map__new("0"); + if (cpus == NULL) { + pr_debug("failed to call cpu_map__new\n"); + return -1; + } + + err = perf_evsel__open_per_cpu(evsel, cpus); + if (err == -EACCES) + return TEST_SKIP; + + cpu_map__put(cpus); + return err ? TEST_FAIL : TEST_OK; +} + +static int test_times(int (attach)(struct perf_evlist *), + int (detach)(struct perf_evlist *)) +{ + struct perf_counts_values count; + struct perf_evlist *evlist = NULL; + struct perf_evsel *evsel; + int err = -1, i; + + evlist = perf_evlist__new(); + if (!evlist) { + pr_debug("failed to create event list\n"); + goto out_err; + } + + err = parse_events(evlist, "cpu-clock:u", NULL); + if (err) { + pr_debug("failed to parse event cpu-clock:u\n"); + goto out_err; + } + + evsel = perf_evlist__last(evlist); + evsel->attr.read_format |= + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + + err = attach(evlist); + if (err == TEST_SKIP) { + pr_debug(" SKIP : not enough rights\n"); + return err; + } + + TEST_ASSERT_VAL("failed to attach", !err); + + for (i = 0; i < 100000000; i++) { } + + TEST_ASSERT_VAL("failed to detach", !detach(evlist)); + + perf_evsel__read(evsel, 0, 0, &count); + + err = !(count.ena == count.run); + + pr_debug(" %s: ena %" PRIu64", run %" PRIu64"\n", + !err ? "OK " : "FAILED", + count.ena, count.run); + +out_err: + perf_evlist__delete(evlist); + return !err ? TEST_OK : TEST_FAIL; +} + +/* + * This test creates software event 'cpu-clock' + * attaches it in several ways (explained below) + * and checks that enabled and running times + * match. + */ +int test__event_times(int subtest __maybe_unused) +{ + int err, ret = 0; + +#define _T(attach, detach) \ + err = test_times(attach, detach); \ + if (err && (ret == TEST_OK || ret == TEST_SKIP)) \ + ret = err; + + /* attach on newly spawned process after exec */ + _T(attach__enable_on_exec, detach__enable_on_exec) + /* attach on current process as enabled */ + _T(attach__current_enabled, detach__disable) + /* attach on current process as disabled */ + _T(attach__current_disabled, detach__disable) + /* attach on cpu as disabled */ + _T(attach__cpu_disabled, detach__disable) + /* attach on cpu as enabled */ + _T(attach__cpu_enabled, detach__disable) + +#undef _T + return ret; +} diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 012eab5d1df1..63ecf21750eb 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -30,7 +30,7 @@ static int process_event_scale(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong id", ev->id == 123); TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE); - TEST_ASSERT_VAL("wrong scale", ev_data->scale = 0.123); + TEST_ASSERT_VAL("wrong scale", ev_data->scale == 0.123); return 0; } diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index 2de4a4f2c3ed..60926a1f6fd7 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -80,7 +80,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names) } err = 0; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) { --err; pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]); diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c index c809463edbe5..a2b5ff9bf83d 100644 --- a/tools/perf/tests/fdarray.c +++ b/tools/perf/tests/fdarray.c @@ -1,4 +1,5 @@ #include <api/fd/array.h> +#include <poll.h> #include "util/debug.h" #include "tests/tests.h" @@ -36,7 +37,7 @@ int test__fdarray__filter(int subtest __maybe_unused) } fdarray__init_revents(fda, POLLIN); - nr_fds = fdarray__filter(fda, POLLHUP, NULL); + nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL); if (nr_fds != fda->nr_alloc) { pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything", nr_fds, fda->nr_alloc); @@ -44,7 +45,7 @@ int test__fdarray__filter(int subtest __maybe_unused) } fdarray__init_revents(fda, POLLHUP); - nr_fds = fdarray__filter(fda, POLLHUP, NULL); + nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL); if (nr_fds != 0) { pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds", nr_fds, fda->nr_alloc); @@ -57,7 +58,7 @@ int test__fdarray__filter(int subtest __maybe_unused) pr_debug("\nfiltering all but fda->entries[2]:"); fdarray__fprintf_prefix(fda, "before", stderr); - nr_fds = fdarray__filter(fda, POLLHUP, NULL); + nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL); fdarray__fprintf_prefix(fda, " after", stderr); if (nr_fds != 1) { pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds); @@ -78,7 +79,7 @@ int test__fdarray__filter(int subtest __maybe_unused) pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):"); fdarray__fprintf_prefix(fda, "before", stderr); - nr_fds = fdarray__filter(fda, POLLHUP, NULL); + nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL); fdarray__fprintf_prefix(fda, " after", stderr); if (nr_fds != 2) { pr_debug("\nfdarray__filter()=%d != 2, should have left just two events", diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index f55f4bd47932..6b21746d6eec 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -161,7 +161,7 @@ void print_hists_in(struct hists *hists) struct rb_root *root; struct rb_node *node; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index ed5aa9eaeb6c..9fd54b79a788 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -101,7 +101,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) if (machine__resolve(machine, &al, &sample) < 0) goto out; - if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, + if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack, NULL) < 0) { addr_location__put(&al); goto out; @@ -126,7 +126,7 @@ static void del_hist_entries(struct hists *hists) struct rb_root *root_out; struct rb_node *node; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root_in = &hists->entries_collapsed; else root_in = hists->entries_in; @@ -216,6 +216,8 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec /* check callchain entries */ root = &he->callchain->node.rb_root; + + TEST_ASSERT_VAL("callchains expected", !RB_EMPTY_ROOT(root)); cnode = rb_entry(rb_first(root), struct callchain_node, rb_node); c = 0; @@ -666,6 +668,8 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) perf_evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); + + callchain_param = callchain_param_default; callchain_register_param(&callchain_param); err = add_hist_entries(hists, machine); diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index b825d24f8186..62efb14f3a5a 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -56,7 +56,7 @@ static int add_hist_entries(struct perf_evlist *evlist, * (perf [perf] main) will be collapsed to an existing entry * so total 9 entries will be in the tree. */ - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { struct hist_entry_iter iter = { .evsel = evsel, @@ -81,7 +81,7 @@ static int add_hist_entries(struct perf_evlist *evlist, al.socket = fake_samples[i].socket; if (hist_entry_iter__add(&iter, &al, - PERF_MAX_STACK_DEPTH, NULL) < 0) { + sysctl_perf_event_max_stack, NULL) < 0) { addr_location__put(&al); goto out; } @@ -136,7 +136,7 @@ int test__hists_filter(int subtest __maybe_unused) if (err < 0) goto out; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { struct hists *hists = evsel__hists(evsel); hists__collapse_resort(hists, NULL); diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 358324e47805..eddc7407ff8a 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -72,7 +72,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) * However the second evsel also has a collapsed entry for * "bash [libc] malloc" so total 9 entries will be in the tree. */ - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { struct hists *hists = evsel__hists(evsel); for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) { @@ -84,7 +84,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) if (machine__resolve(machine, &al, &sample) < 0) goto out; - he = __hists__add_entry(hists, &al, NULL, + he = hists__add_entry(hists, &al, NULL, NULL, NULL, &sample, true); if (he == NULL) { addr_location__put(&al); @@ -103,7 +103,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) if (machine__resolve(machine, &al, &sample) < 0) goto out; - he = __hists__add_entry(hists, &al, NULL, + he = hists__add_entry(hists, &al, NULL, NULL, NULL, &sample, true); if (he == NULL) { addr_location__put(&al); @@ -145,7 +145,7 @@ static int __validate_match(struct hists *hists) /* * Only entries from fake_common_samples should have a pair. */ - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -197,7 +197,7 @@ static int __validate_link(struct hists *hists, int idx) * and some entries will have no pair. However every entry * in other hists should have (dummy) pair. */ - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -301,7 +301,7 @@ int test__hists_link(int subtest __maybe_unused) if (err < 0) goto out; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { hists = evsel__hists(evsel); hists__collapse_resort(hists, NULL); diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index d3556fbe8c5c..63c5efaba1b5 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -67,7 +67,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) if (machine__resolve(machine, &al, &sample) < 0) goto out; - if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, + if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack, NULL) < 0) { addr_location__put(&al); goto out; @@ -92,7 +92,7 @@ static void del_hist_entries(struct hists *hists) struct rb_root *root_out; struct rb_node *node; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root_in = &hists->entries_collapsed; else root_in = hists->entries_in; diff --git a/tools/perf/tests/is_printable_array.c b/tools/perf/tests/is_printable_array.c new file mode 100644 index 000000000000..42e13393e502 --- /dev/null +++ b/tools/perf/tests/is_printable_array.c @@ -0,0 +1,36 @@ +#include <linux/compiler.h> +#include "tests.h" +#include "debug.h" +#include "util.h" + +int test__is_printable_array(int subtest __maybe_unused) +{ + char buf1[] = { 'k', 'r', 4, 'v', 'a', 0 }; + char buf2[] = { 'k', 'r', 'a', 'v', 4, 0 }; + struct { + char *buf; + unsigned int len; + int ret; + } t[] = { + { (char *) "krava", sizeof("krava"), 1 }, + { (char *) "krava", sizeof("krava") - 1, 0 }, + { (char *) "", sizeof(""), 1 }, + { (char *) "", 0, 0 }, + { NULL, 0, 0 }, + { buf1, sizeof(buf1), 0 }, + { buf2, sizeof(buf2), 0 }, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(t); i++) { + int ret; + + ret = is_printable_array((char *) t[i].buf, t[i].len); + if (ret != t[i].ret) { + pr_err("failed: test %u\n", i); + return TEST_FAIL; + } + } + + return TEST_OK; +} diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index ddb78fae064a..614e45a3c603 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -80,7 +80,7 @@ int test__keep_tracking(int subtest __maybe_unused) CHECK__(parse_events(evlist, "dummy:u", NULL)); CHECK__(parse_events(evlist, "cycles:u", NULL)); - perf_evlist__config(evlist, &opts); + perf_evlist__config(evlist, &opts, NULL); evsel = perf_evlist__first(evlist); diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c index d2af78193153..76f41f249944 100644 --- a/tools/perf/tests/kmod-path.c +++ b/tools/perf/tests/kmod-path.c @@ -1,4 +1,5 @@ #include <stdbool.h> +#include <stdlib.h> #include "tests.h" #include "dso.h" #include "debug.h" diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index cff564fb4b66..b798a4bfd238 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -5,6 +5,7 @@ #include "llvm.h" #include "tests.h" #include "debug.h" +#include "util.h" #ifdef HAVE_LIBBPF_SUPPORT static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) diff --git a/tools/perf/tests/make b/tools/perf/tests/make index cac15d93aea6..143f4d549769 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -81,6 +81,8 @@ make_no_libbionic := NO_LIBBIONIC=1 make_no_auxtrace := NO_AUXTRACE=1 make_no_libbpf := NO_LIBBPF=1 make_no_libcrypto := NO_LIBCRYPTO=1 +make_with_babeltrace:= LIBBABELTRACE=1 +make_no_sdt := NO_SDT=1 make_tags := tags make_cscope := cscope make_help := help @@ -104,7 +106,7 @@ make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 -make_minimal += NO_LIBCRYPTO=1 +make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 # $(run) contains all available tests run := make_pure @@ -136,6 +138,7 @@ run += make_no_libaudit run += make_no_libbionic run += make_no_auxtrace run += make_no_libbpf +run += make_with_babeltrace run += make_help run += make_doc run += make_perf_o diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 359e98fcd94c..634bce9caebd 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -1,3 +1,6 @@ +/* For the CLR_() macros */ +#include <pthread.h> + #include "evlist.h" #include "evsel.h" #include "thread_map.h" @@ -49,7 +52,7 @@ int test__basic_mmap(int subtest __maybe_unused) sched_setaffinity(0, sizeof(cpu_set), &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - cpus->map[0], strerror_r(errno, sbuf, sizeof(sbuf))); + cpus->map[0], str_error_r(errno, sbuf, sizeof(sbuf))); goto out_free_cpus; } @@ -79,7 +82,7 @@ int test__basic_mmap(int subtest __maybe_unused) if (perf_evsel__open(evsels[i], cpus, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } @@ -89,7 +92,7 @@ int test__basic_mmap(int subtest __maybe_unused) if (perf_evlist__mmap(evlist, 128, true) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } @@ -126,7 +129,7 @@ int test__basic_mmap(int subtest __maybe_unused) } err = 0; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) { pr_debug("expected %d %s events, got %d\n", expected_nr_events[evsel->idx], diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 53c2273e8859..c8d9592eb142 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -1,3 +1,6 @@ +/* For the CPU_* macros */ +#include <pthread.h> + #include <api/fs/fs.h> #include <linux/err.h> #include "evsel.h" @@ -41,7 +44,7 @@ int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused) if (perf_evsel__open(evsel, cpus, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_evsel_delete; } @@ -62,7 +65,7 @@ int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused) if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", cpus->map[cpu], - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_close_fd; } for (i = 0; i < ncalls; ++i) { @@ -73,7 +76,7 @@ int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused) } /* - * Here we need to explicitely preallocate the counts, as if + * Here we need to explicitly preallocate the counts, as if * we use the auto allocation it will allocate just for 1 cpu, * as we start by cpu 0. */ diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index eb99a105f31c..f52239fed361 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -6,6 +6,13 @@ #include "tests.h" #include "debug.h" +#ifndef O_DIRECTORY +#define O_DIRECTORY 00200000 +#endif +#ifndef AT_FDCWD +#define AT_FDCWD -100 +#endif + int test__syscall_openat_tp_fields(int subtest __maybe_unused) { struct record_opts opts = { @@ -44,21 +51,21 @@ int test__syscall_openat_tp_fields(int subtest __maybe_unused) goto out_delete_evlist; } - perf_evsel__config(evsel, &opts); + perf_evsel__config(evsel, &opts, NULL); thread_map__set_pid(evlist->threads, 0, getpid()); err = perf_evlist__open(evlist); if (err < 0) { pr_debug("perf_evlist__open: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } err = perf_evlist__mmap(evlist, UINT_MAX, false); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 1184f9ba6499..d7414128d7fe 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -29,7 +29,7 @@ int test__openat_syscall_event(int subtest __maybe_unused) if (perf_evsel__open_per_thread(evsel, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_evsel_delete; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 7865f68dc0d8..20c2e641c422 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -32,7 +32,7 @@ static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1); TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups); - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); TEST_ASSERT_VAL("wrong sample_type", @@ -207,7 +207,7 @@ test__checkevent_tracepoint_multi_modifier(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1); - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", @@ -1783,8 +1783,8 @@ static int test_pmu_events(void) struct evlist_test e; char name[MAX_NAME]; - if (!strcmp(ent->d_name, ".") || - !strcmp(ent->d_name, "..")) + /* Names containing . are special and cannot be used directly */ + if (strchr(ent->d_name, '.')) continue; snprintf(name, MAX_NAME, "cpu/event=%s/u", ent->d_name); diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index 294c76b01b41..81c6eeaca0f5 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -44,8 +44,7 @@ static int process_events(union perf_event **events, size_t count) for (i = 0; i < count && !err; i++) err = process_event(&evlist, events[i]); - if (evlist) - perf_evlist__delete(evlist); + perf_evlist__delete(evlist); return err; } diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 1cc78cefe399..8f2e1de6d0ea 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -1,3 +1,6 @@ +/* For the CLR_() macros */ +#include <pthread.h> + #include <sched.h> #include "evlist.h" #include "evsel.h" @@ -99,12 +102,12 @@ int test__PERF_RECORD(int subtest __maybe_unused) perf_evsel__set_sample_bit(evsel, CPU); perf_evsel__set_sample_bit(evsel, TID); perf_evsel__set_sample_bit(evsel, TIME); - perf_evlist__config(evlist, &opts); + perf_evlist__config(evlist, &opts, NULL); err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask); if (err < 0) { pr_debug("sched__get_first_possible_cpu: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } @@ -115,7 +118,7 @@ int test__PERF_RECORD(int subtest __maybe_unused) */ if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) { pr_debug("sched_setaffinity: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } @@ -126,7 +129,7 @@ int test__PERF_RECORD(int subtest __maybe_unused) err = perf_evlist__open(evlist); if (err < 0) { pr_debug("perf_evlist__open: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } @@ -138,7 +141,7 @@ int test__PERF_RECORD(int subtest __maybe_unused) err = perf_evlist__mmap(evlist, opts.mmap_pages, false); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c new file mode 100644 index 000000000000..f59d210e1baf --- /dev/null +++ b/tools/perf/tests/sdt.c @@ -0,0 +1,115 @@ +#include <stdio.h> +#include <sys/epoll.h> +#include <util/util.h> +#include <util/evlist.h> +#include <linux/filter.h> +#include "tests.h" +#include "debug.h" +#include "probe-file.h" +#include "build-id.h" + +/* To test SDT event, we need libelf support to scan elf binary */ +#if defined(HAVE_SDT_EVENT) && defined(HAVE_LIBELF_SUPPORT) + +#include <sys/sdt.h> + +static int target_function(void) +{ + DTRACE_PROBE(perf, test_target); + return TEST_OK; +} + +/* Copied from builtin-buildid-cache.c */ +static int build_id_cache__add_file(const char *filename) +{ + char sbuild_id[SBUILD_ID_SIZE]; + u8 build_id[BUILD_ID_SIZE]; + int err; + + err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + if (err < 0) { + pr_debug("Failed to read build id of %s\n", filename); + return err; + } + + build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + err = build_id_cache__add_s(sbuild_id, filename, false, false); + if (err < 0) + pr_debug("Failed to add build id cache of %s\n", filename); + return err; +} + +static char *get_self_path(void) +{ + char *buf = calloc(PATH_MAX, sizeof(char)); + + if (buf && readlink("/proc/self/exe", buf, PATH_MAX) < 0) { + pr_debug("Failed to get correct path of perf\n"); + free(buf); + return NULL; + } + return buf; +} + +static int search_cached_probe(const char *target, + const char *group, const char *event) +{ + struct probe_cache *cache = probe_cache__new(target); + int ret = 0; + + if (!cache) { + pr_debug("Failed to open probe cache of %s\n", target); + return -EINVAL; + } + + if (!probe_cache__find_by_name(cache, group, event)) { + pr_debug("Failed to find %s:%s in the cache\n", group, event); + ret = -ENOENT; + } + probe_cache__delete(cache); + + return ret; +} + +int test__sdt_event(int subtests __maybe_unused) +{ + int ret = TEST_FAIL; + char __tempdir[] = "./test-buildid-XXXXXX"; + char *tempdir = NULL, *myself = get_self_path(); + + if (myself == NULL || mkdtemp(__tempdir) == NULL) { + pr_debug("Failed to make a tempdir for build-id cache\n"); + goto error; + } + /* Note that buildid_dir must be an absolute path */ + tempdir = realpath(__tempdir, NULL); + + /* At first, scan itself */ + set_buildid_dir(tempdir); + if (build_id_cache__add_file(myself) < 0) + goto error_rmdir; + + /* Open a cache and make sure the SDT is stored */ + if (search_cached_probe(myself, "sdt_perf", "test_target") < 0) + goto error_rmdir; + + /* TBD: probing on the SDT event and collect logs */ + + /* Call the target and get an event */ + ret = target_function(); + +error_rmdir: + /* Cleanup temporary buildid dir */ + rm_rf(tempdir); +error: + free(tempdir); + free(myself); + return ret; +} +#else +int test__sdt_event(int subtests __maybe_unused) +{ + pr_debug("Skip SDT event test because SDT support is not compiled\n"); + return TEST_SKIP; +} +#endif diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 36e8ce1550e3..4c9fd046d57b 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -70,7 +70,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) err = -errno; pr_debug("Couldn't open evlist: %s\nHint: check %s, using %" PRIu64 " in this test.\n", - strerror_r(errno, sbuf, sizeof(sbuf)), + str_error_r(errno, sbuf, sizeof(sbuf)), knob, (u64)attr.sample_freq); goto out_delete_evlist; } @@ -78,7 +78,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) err = perf_evlist__mmap(evlist, 128, true); if (err < 0) { pr_debug("failed to mmap event: %d (%s)\n", errno, - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index ebd80168d51e..7ddbe267d0ac 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -417,7 +417,7 @@ int test__switch_tracking(int subtest __maybe_unused) perf_evsel__set_sample_bit(tracking_evsel, TIME); /* Config events */ - perf_evlist__config(evlist, &opts); + perf_evlist__config(evlist, &opts, NULL); /* Check moved event is still at the front */ if (cycles_evsel != perf_evlist__first(evlist)) { @@ -432,7 +432,7 @@ int test__switch_tracking(int subtest __maybe_unused) } /* Check non-tracking events are not tracking */ - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel != tracking_evsel) { if (evsel->attr.mmap || evsel->attr.comm) { pr_debug("Non-tracking event is tracking\n"); diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 2dfff7ac8ef3..01a5ba2788c6 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -91,13 +91,13 @@ int test__task_exit(int subtest __maybe_unused) err = perf_evlist__open(evlist); if (err < 0) { pr_debug("Couldn't open the evlist: %s\n", - strerror_r(-err, sbuf, sizeof(sbuf))); + str_error_r(-err, sbuf, sizeof(sbuf))); goto out_delete_evlist; } if (perf_evlist__mmap(evlist, 128, true) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 82b2b5e6ba7c..7c196c585472 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -85,6 +85,12 @@ int test__synthesize_stat_config(int subtest); int test__synthesize_stat(int subtest); int test__synthesize_stat_round(int subtest); int test__event_update(int subtest); +int test__event_times(int subtest); +int test__backward_ring_buffer(int subtest); +int test__cpu_map_print(int subtest); +int test__sdt_event(int subtest); +int test__is_printable_array(int subtest); +int test__bitmap_print(int subtest); #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index fccde848fe9c..cee2a2cdc933 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -1,13 +1,20 @@ #include <sys/types.h> #include <unistd.h> +#include <sys/prctl.h> #include "tests.h" #include "thread_map.h" #include "debug.h" +#define NAME (const char *) "perf" +#define NAMEUL (unsigned long) NAME + int test__thread_map(int subtest __maybe_unused) { struct thread_map *map; + TEST_ASSERT_VAL("failed to set process name", + !prctl(PR_SET_NAME, NAMEUL, 0, 0, 0)); + /* test map on current pid */ map = thread_map__new_by_pid(getpid()); TEST_ASSERT_VAL("failed to alloc map", map); @@ -19,7 +26,7 @@ int test__thread_map(int subtest __maybe_unused) thread_map__pid(map, 0) == getpid()); TEST_ASSERT_VAL("wrong comm", thread_map__comm(map, 0) && - !strcmp(thread_map__comm(map, 0), "perf")); + !strcmp(thread_map__comm(map, 0), NAME)); TEST_ASSERT_VAL("wrong refcnt", atomic_read(&map->refcnt) == 1); thread_map__put(map); @@ -51,7 +58,7 @@ static int process_event(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong nr", map->nr == 1); TEST_ASSERT_VAL("wrong pid", map->entries[0].pid == (u64) getpid()); - TEST_ASSERT_VAL("wrong comm", !strcmp(map->entries[0].comm, "perf")); + TEST_ASSERT_VAL("wrong comm", !strcmp(map->entries[0].comm, NAME)); threads = thread_map__new_event(&event->thread_map); TEST_ASSERT_VAL("failed to alloc map", threads); @@ -61,7 +68,7 @@ static int process_event(struct perf_tool *tool __maybe_unused, thread_map__pid(threads, 0) == getpid()); TEST_ASSERT_VAL("wrong comm", thread_map__comm(threads, 0) && - !strcmp(thread_map__comm(threads, 0), "perf")); + !strcmp(thread_map__comm(threads, 0), NAME)); TEST_ASSERT_VAL("wrong refcnt", atomic_read(&threads->refcnt) == 1); thread_map__put(threads); @@ -72,6 +79,9 @@ int test__thread_map_synthesize(int subtest __maybe_unused) { struct thread_map *threads; + TEST_ASSERT_VAL("failed to set process name", + !prctl(PR_SET_NAME, NAMEUL, 0, 0, 0)); + /* test map on current pid */ threads = thread_map__new_by_pid(getpid()); TEST_ASSERT_VAL("failed to alloc map", threads); diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 630b0b409b97..e63abab7d5a1 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -54,8 +54,14 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused) * Step 3: * * Load and split /proc/kallsyms into multiple maps, one per module. + * Do not use kcore, as this test was designed before kcore support + * and has parts that only make sense if using the non-kcore code. + * XXX: extend it to stress the kcorre code as well, hint: the list + * of modules extracted from /proc/kcore, in its current form, can't + * be compacted against the list of modules found in the "vmlinux" + * code and with the one got from /proc/modules from the "kallsyms" code. */ - if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, NULL) <= 0) { + if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true, NULL) <= 0) { pr_debug("dso__load_kallsyms "); goto out; } @@ -157,6 +163,9 @@ next_pair: pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", mem_start, sym->name, pair->name); + } else { + pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", + mem_start, sym->name, first_pair->name); } } } else diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c new file mode 100644 index 000000000000..b08f21eb6f4d --- /dev/null +++ b/tools/perf/trace/beauty/eventfd.c @@ -0,0 +1,36 @@ +#ifndef EFD_SEMAPHORE +#define EFD_SEMAPHORE 1 +#endif + +#ifndef EFD_NONBLOCK +#define EFD_NONBLOCK 00004000 +#endif + +#ifndef EFD_CLOEXEC +#define EFD_CLOEXEC 02000000 +#endif + +static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (flags == 0) + return scnprintf(bf, size, "NONE"); +#define P_FLAG(n) \ + if (flags & EFD_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~EFD_##n; \ + } + + P_FLAG(SEMAPHORE); + P_FLAG(CLOEXEC); + P_FLAG(NONBLOCK); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c new file mode 100644 index 000000000000..74613703a14e --- /dev/null +++ b/tools/perf/trace/beauty/flock.c @@ -0,0 +1,48 @@ +#include <fcntl.h> + +#ifndef LOCK_MAND +#define LOCK_MAND 32 +#endif + +#ifndef LOCK_READ +#define LOCK_READ 64 +#endif + +#ifndef LOCK_WRITE +#define LOCK_WRITE 128 +#endif + +#ifndef LOCK_RW +#define LOCK_RW 192 +#endif + +static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, op = arg->val; + + if (op == 0) + return scnprintf(bf, size, "NONE"); +#define P_CMD(cmd) \ + if ((op & LOCK_##cmd) == LOCK_##cmd) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \ + op &= ~LOCK_##cmd; \ + } + + P_CMD(SH); + P_CMD(EX); + P_CMD(NB); + P_CMD(UN); + P_CMD(MAND); + P_CMD(RW); + P_CMD(READ); + P_CMD(WRITE); +#undef P_OP + + if (op) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op); + + return printed; +} + +#define SCA_FLOCK syscall_arg__scnprintf_flock diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c new file mode 100644 index 000000000000..bfd3359b09b6 --- /dev/null +++ b/tools/perf/trace/beauty/futex_op.c @@ -0,0 +1,60 @@ +#include <linux/futex.h> + +#ifndef FUTEX_WAIT_BITSET +#define FUTEX_WAIT_BITSET 9 +#endif +#ifndef FUTEX_WAKE_BITSET +#define FUTEX_WAKE_BITSET 10 +#endif +#ifndef FUTEX_WAIT_REQUEUE_PI +#define FUTEX_WAIT_REQUEUE_PI 11 +#endif +#ifndef FUTEX_CMP_REQUEUE_PI +#define FUTEX_CMP_REQUEUE_PI 12 +#endif +#ifndef FUTEX_CLOCK_REALTIME +#define FUTEX_CLOCK_REALTIME 256 +#endif + +static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) +{ + enum syscall_futex_args { + SCF_UADDR = (1 << 0), + SCF_OP = (1 << 1), + SCF_VAL = (1 << 2), + SCF_TIMEOUT = (1 << 3), + SCF_UADDR2 = (1 << 4), + SCF_VAL3 = (1 << 5), + }; + int op = arg->val; + int cmd = op & FUTEX_CMD_MASK; + size_t printed = 0; + + switch (cmd) { +#define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); + P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; + P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; + P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; + P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; + P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; + P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; + P_FUTEX_OP(WAKE_OP); break; + P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; + P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; + P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; + P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; + P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; + P_FUTEX_OP(WAIT_REQUEUE_PI); break; + default: printed = scnprintf(bf, size, "%#x", cmd); break; + } + + if (op & FUTEX_PRIVATE_FLAG) + printed += scnprintf(bf + printed, size - printed, "|PRIV"); + + if (op & FUTEX_CLOCK_REALTIME) + printed += scnprintf(bf + printed, size - printed, "|CLKRT"); + + return printed; +} + +#define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c new file mode 100644 index 000000000000..d0a3a8e402e7 --- /dev/null +++ b/tools/perf/trace/beauty/mmap.c @@ -0,0 +1,191 @@ +#include <sys/mman.h> + +#ifndef PROT_SEM +#define PROT_SEM 0x8 +#endif + +static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, prot = arg->val; + + if (prot == PROT_NONE) + return scnprintf(bf, size, "NONE"); +#define P_MMAP_PROT(n) \ + if (prot & PROT_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + prot &= ~PROT_##n; \ + } + + P_MMAP_PROT(EXEC); + P_MMAP_PROT(READ); + P_MMAP_PROT(WRITE); + P_MMAP_PROT(SEM); + P_MMAP_PROT(GROWSDOWN); + P_MMAP_PROT(GROWSUP); +#undef P_MMAP_PROT + + if (prot) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); + + return printed; +} + +#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot + +#ifndef MAP_FIXED +#define MAP_FIXED 0x10 +#endif + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS 0x20 +#endif + +#ifndef MAP_32BIT +#define MAP_32BIT 0x40 +#endif + +#ifndef MAP_STACK +#define MAP_STACK 0x20000 +#endif + +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 +#endif + +#ifndef MAP_UNINITIALIZED +#define MAP_UNINITIALIZED 0x4000000 +#endif + + +static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_MMAP_FLAG(n) \ + if (flags & MAP_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~MAP_##n; \ + } + + P_MMAP_FLAG(SHARED); + P_MMAP_FLAG(PRIVATE); + P_MMAP_FLAG(32BIT); + P_MMAP_FLAG(ANONYMOUS); + P_MMAP_FLAG(DENYWRITE); + P_MMAP_FLAG(EXECUTABLE); + P_MMAP_FLAG(FILE); + P_MMAP_FLAG(FIXED); + P_MMAP_FLAG(GROWSDOWN); + P_MMAP_FLAG(HUGETLB); + P_MMAP_FLAG(LOCKED); + P_MMAP_FLAG(NONBLOCK); + P_MMAP_FLAG(NORESERVE); + P_MMAP_FLAG(POPULATE); + P_MMAP_FLAG(STACK); + P_MMAP_FLAG(UNINITIALIZED); +#undef P_MMAP_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags + +#ifndef MREMAP_MAYMOVE +#define MREMAP_MAYMOVE 1 +#endif +#ifndef MREMAP_FIXED +#define MREMAP_FIXED 2 +#endif + +static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_MREMAP_FLAG(n) \ + if (flags & MREMAP_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~MREMAP_##n; \ + } + + P_MREMAP_FLAG(MAYMOVE); + P_MREMAP_FLAG(FIXED); +#undef P_MREMAP_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags + +#ifndef MADV_HWPOISON +#define MADV_HWPOISON 100 +#endif + +#ifndef MADV_SOFT_OFFLINE +#define MADV_SOFT_OFFLINE 101 +#endif + +#ifndef MADV_MERGEABLE +#define MADV_MERGEABLE 12 +#endif + +#ifndef MADV_UNMERGEABLE +#define MADV_UNMERGEABLE 13 +#endif + +#ifndef MADV_HUGEPAGE +#define MADV_HUGEPAGE 14 +#endif + +#ifndef MADV_NOHUGEPAGE +#define MADV_NOHUGEPAGE 15 +#endif + +#ifndef MADV_DONTDUMP +#define MADV_DONTDUMP 16 +#endif + +#ifndef MADV_DODUMP +#define MADV_DODUMP 17 +#endif + + +static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, + struct syscall_arg *arg) +{ + int behavior = arg->val; + + switch (behavior) { +#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) + P_MADV_BHV(NORMAL); + P_MADV_BHV(RANDOM); + P_MADV_BHV(SEQUENTIAL); + P_MADV_BHV(WILLNEED); + P_MADV_BHV(DONTNEED); + P_MADV_BHV(REMOVE); + P_MADV_BHV(DONTFORK); + P_MADV_BHV(DOFORK); + P_MADV_BHV(HWPOISON); + P_MADV_BHV(SOFT_OFFLINE); + P_MADV_BHV(MERGEABLE); + P_MADV_BHV(UNMERGEABLE); + P_MADV_BHV(HUGEPAGE); + P_MADV_BHV(NOHUGEPAGE); + P_MADV_BHV(DONTDUMP); + P_MADV_BHV(DODUMP); +#undef P_MADV_BHV + default: break; + } + + return scnprintf(bf, size, "%#x", behavior); +} + +#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c new file mode 100644 index 000000000000..930d8fef2400 --- /dev/null +++ b/tools/perf/trace/beauty/mode_t.c @@ -0,0 +1,68 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +/* From include/linux/stat.h */ +#ifndef S_IRWXUGO +#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) +#endif +#ifndef S_IALLUGO +#define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO) +#endif +#ifndef S_IRUGO +#define S_IRUGO (S_IRUSR|S_IRGRP|S_IROTH) +#endif +#ifndef S_IWUGO +#define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH) +#endif +#ifndef S_IXUGO +#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH) +#endif + +static size_t syscall_arg__scnprintf_mode_t(char *bf, size_t size, struct syscall_arg *arg) +{ + int printed = 0, mode = arg->val; + +#define P_MODE(n) \ + if ((mode & S_##n) == S_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + mode &= ~S_##n; \ + } + + P_MODE(IALLUGO); + P_MODE(IRWXUGO); + P_MODE(IRUGO); + P_MODE(IWUGO); + P_MODE(IXUGO); + P_MODE(IFMT); + P_MODE(IFSOCK); + P_MODE(IFLNK); + P_MODE(IFREG); + P_MODE(IFBLK); + P_MODE(IFDIR); + P_MODE(IFCHR); + P_MODE(IFIFO); + P_MODE(ISUID); + P_MODE(ISGID); + P_MODE(ISVTX); + P_MODE(IRWXU); + P_MODE(IRUSR); + P_MODE(IWUSR); + P_MODE(IXUSR); + P_MODE(IRWXG); + P_MODE(IRGRP); + P_MODE(IWGRP); + P_MODE(IXGRP); + P_MODE(IRWXO); + P_MODE(IROTH); + P_MODE(IWOTH); + P_MODE(IXOTH); +#undef P_MODE + + if (mode) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", mode); + + return printed; +} + +#define SCA_MODE_T syscall_arg__scnprintf_mode_t diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c new file mode 100644 index 000000000000..1106c8960cc4 --- /dev/null +++ b/tools/perf/trace/beauty/msg_flags.c @@ -0,0 +1,61 @@ +#include <sys/types.h> +#include <sys/socket.h> + +#ifndef MSG_PROBE +#define MSG_PROBE 0x10 +#endif +#ifndef MSG_WAITFORONE +#define MSG_WAITFORONE 0x10000 +#endif +#ifndef MSG_SENDPAGE_NOTLAST +#define MSG_SENDPAGE_NOTLAST 0x20000 +#endif +#ifndef MSG_FASTOPEN +#define MSG_FASTOPEN 0x20000000 +#endif +#ifndef MSG_CMSG_CLOEXEC +# define MSG_CMSG_CLOEXEC 0x40000000 +#endif + +static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (flags == 0) + return scnprintf(bf, size, "NONE"); +#define P_MSG_FLAG(n) \ + if (flags & MSG_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~MSG_##n; \ + } + + P_MSG_FLAG(OOB); + P_MSG_FLAG(PEEK); + P_MSG_FLAG(DONTROUTE); + P_MSG_FLAG(CTRUNC); + P_MSG_FLAG(PROBE); + P_MSG_FLAG(TRUNC); + P_MSG_FLAG(DONTWAIT); + P_MSG_FLAG(EOR); + P_MSG_FLAG(WAITALL); + P_MSG_FLAG(FIN); + P_MSG_FLAG(SYN); + P_MSG_FLAG(CONFIRM); + P_MSG_FLAG(RST); + P_MSG_FLAG(ERRQUEUE); + P_MSG_FLAG(NOSIGNAL); + P_MSG_FLAG(MORE); + P_MSG_FLAG(WAITFORONE); + P_MSG_FLAG(SENDPAGE_NOTLAST); + P_MSG_FLAG(FASTOPEN); + P_MSG_FLAG(CMSG_CLOEXEC); +#undef P_MSG_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c new file mode 100644 index 000000000000..f55a4597fc38 --- /dev/null +++ b/tools/perf/trace/beauty/open_flags.c @@ -0,0 +1,71 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#ifndef O_DIRECT +#define O_DIRECT 00040000 +#endif + +#ifndef O_DIRECTORY +#define O_DIRECTORY 00200000 +#endif + +#ifndef O_NOATIME +#define O_NOATIME 01000000 +#endif + +static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (!(flags & O_CREAT)) + arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ + + if (flags == 0) + return scnprintf(bf, size, "RDONLY"); +#define P_FLAG(n) \ + if (flags & O_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~O_##n; \ + } + + P_FLAG(APPEND); + P_FLAG(ASYNC); + P_FLAG(CLOEXEC); + P_FLAG(CREAT); + P_FLAG(DIRECT); + P_FLAG(DIRECTORY); + P_FLAG(EXCL); + P_FLAG(LARGEFILE); + P_FLAG(NOATIME); + P_FLAG(NOCTTY); +#ifdef O_NONBLOCK + P_FLAG(NONBLOCK); +#elif O_NDELAY + P_FLAG(NDELAY); +#endif +#ifdef O_PATH + P_FLAG(PATH); +#endif + P_FLAG(RDWR); +#ifdef O_DSYNC + if ((flags & O_SYNC) == O_SYNC) + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); + else { + P_FLAG(DSYNC); + } +#else + P_FLAG(SYNC); +#endif + P_FLAG(TRUNC); + P_FLAG(WRONLY); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c new file mode 100644 index 000000000000..311f09dd718d --- /dev/null +++ b/tools/perf/trace/beauty/perf_event_open.c @@ -0,0 +1,43 @@ +#ifndef PERF_FLAG_FD_NO_GROUP +# define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#endif + +#ifndef PERF_FLAG_FD_OUTPUT +# define PERF_FLAG_FD_OUTPUT (1UL << 1) +#endif + +#ifndef PERF_FLAG_PID_CGROUP +# define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ +#endif + +#ifndef PERF_FLAG_FD_CLOEXEC +# define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#endif + +static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (flags == 0) + return 0; + +#define P_FLAG(n) \ + if (flags & PERF_FLAG_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~PERF_FLAG_##n; \ + } + + P_FLAG(FD_NO_GROUP); + P_FLAG(FD_OUTPUT); + P_FLAG(PID_CGROUP); + P_FLAG(FD_CLOEXEC); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c new file mode 100644 index 000000000000..07486ea65ae3 --- /dev/null +++ b/tools/perf/trace/beauty/pid.c @@ -0,0 +1,21 @@ +static size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg) +{ + int pid = arg->val; + struct trace *trace = arg->trace; + size_t printed = scnprintf(bf, size, "%d", pid); + struct thread *thread = machine__findnew_thread(trace->host, pid, pid); + + if (thread != NULL) { + if (!thread->comm_set) + thread__set_comm_from_proc(thread); + + if (thread->comm_set) + printed += scnprintf(bf + printed, size - printed, + " (%s)", thread__comm_str(thread)); + thread__put(thread); + } + + return printed; +} + +#define SCA_PID syscall_arg__scnprintf_pid diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c new file mode 100644 index 000000000000..34775295b9b3 --- /dev/null +++ b/tools/perf/trace/beauty/sched_policy.c @@ -0,0 +1,47 @@ +#include <sched.h> + +/* + * Not defined anywhere else, probably, just to make sure we + * catch future flags + */ +#define SCHED_POLICY_MASK 0xff + +#ifndef SCHED_DEADLINE +#define SCHED_DEADLINE 6 +#endif +#ifndef SCHED_RESET_ON_FORK +#define SCHED_RESET_ON_FORK 0x40000000 +#endif + +static size_t syscall_arg__scnprintf_sched_policy(char *bf, size_t size, + struct syscall_arg *arg) +{ + const char *policies[] = { + "NORMAL", "FIFO", "RR", "BATCH", "ISO", "IDLE", "DEADLINE", + }; + size_t printed; + int policy = arg->val, + flags = policy & ~SCHED_POLICY_MASK; + + policy &= SCHED_POLICY_MASK; + if (policy <= SCHED_DEADLINE) + printed = scnprintf(bf, size, "%s", policies[policy]); + else + printed = scnprintf(bf, size, "%#x", policy); + +#define P_POLICY_FLAG(n) \ + if (flags & SCHED_##n) { \ + printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ + flags &= ~SCHED_##n; \ + } + + P_POLICY_FLAG(RESET_ON_FORK); +#undef P_POLICY_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "|%#x", flags); + + return printed; +} + +#define SCA_SCHED_POLICY syscall_arg__scnprintf_sched_policy diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c new file mode 100644 index 000000000000..356441bce27d --- /dev/null +++ b/tools/perf/trace/beauty/seccomp.c @@ -0,0 +1,50 @@ +#ifndef SECCOMP_SET_MODE_STRICT +#define SECCOMP_SET_MODE_STRICT 0 +#endif +#ifndef SECCOMP_SET_MODE_FILTER +#define SECCOMP_SET_MODE_FILTER 1 +#endif + +static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg) +{ + int op = arg->val; + size_t printed = 0; + + switch (op) { +#define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break + P_SECCOMP_SET_MODE_OP(STRICT); + P_SECCOMP_SET_MODE_OP(FILTER); +#undef P_SECCOMP_SET_MODE_OP + default: printed = scnprintf(bf, size, "%#x", op); break; + } + + return printed; +} + +#define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op + +#ifndef SECCOMP_FILTER_FLAG_TSYNC +#define SECCOMP_FILTER_FLAG_TSYNC 1 +#endif + +static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_FLAG(n) \ + if (flags & SECCOMP_FILTER_FLAG_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~SECCOMP_FILTER_FLAG_##n; \ + } + + P_FLAG(TSYNC); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c new file mode 100644 index 000000000000..d3b0b1fab077 --- /dev/null +++ b/tools/perf/trace/beauty/signum.c @@ -0,0 +1,53 @@ + +static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) +{ + int sig = arg->val; + + switch (sig) { +#define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) + P_SIGNUM(HUP); + P_SIGNUM(INT); + P_SIGNUM(QUIT); + P_SIGNUM(ILL); + P_SIGNUM(TRAP); + P_SIGNUM(ABRT); + P_SIGNUM(BUS); + P_SIGNUM(FPE); + P_SIGNUM(KILL); + P_SIGNUM(USR1); + P_SIGNUM(SEGV); + P_SIGNUM(USR2); + P_SIGNUM(PIPE); + P_SIGNUM(ALRM); + P_SIGNUM(TERM); + P_SIGNUM(CHLD); + P_SIGNUM(CONT); + P_SIGNUM(STOP); + P_SIGNUM(TSTP); + P_SIGNUM(TTIN); + P_SIGNUM(TTOU); + P_SIGNUM(URG); + P_SIGNUM(XCPU); + P_SIGNUM(XFSZ); + P_SIGNUM(VTALRM); + P_SIGNUM(PROF); + P_SIGNUM(WINCH); + P_SIGNUM(IO); + P_SIGNUM(PWR); + P_SIGNUM(SYS); +#ifdef SIGEMT + P_SIGNUM(EMT); +#endif +#ifdef SIGSTKFLT + P_SIGNUM(STKFLT); +#endif +#ifdef SIGSWI + P_SIGNUM(SWI); +#endif + default: break; + } + + return scnprintf(bf, size, "%#x", sig); +} + +#define SCA_SIGNUM syscall_arg__scnprintf_signum diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c new file mode 100644 index 000000000000..0a5ce818131c --- /dev/null +++ b/tools/perf/trace/beauty/socket_type.c @@ -0,0 +1,60 @@ +#include <sys/types.h> +#include <sys/socket.h> + +#ifndef SOCK_DCCP +# define SOCK_DCCP 6 +#endif + +#ifndef SOCK_CLOEXEC +# define SOCK_CLOEXEC 02000000 +#endif + +#ifndef SOCK_NONBLOCK +# define SOCK_NONBLOCK 00004000 +#endif + +#ifndef SOCK_TYPE_MASK +#define SOCK_TYPE_MASK 0xf +#endif + +static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, struct syscall_arg *arg) +{ + size_t printed; + int type = arg->val, + flags = type & ~SOCK_TYPE_MASK; + + type &= SOCK_TYPE_MASK; + /* + * Can't use a strarray, MIPS may override for ABI reasons. + */ + switch (type) { +#define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; + P_SK_TYPE(STREAM); + P_SK_TYPE(DGRAM); + P_SK_TYPE(RAW); + P_SK_TYPE(RDM); + P_SK_TYPE(SEQPACKET); + P_SK_TYPE(DCCP); + P_SK_TYPE(PACKET); +#undef P_SK_TYPE + default: + printed = scnprintf(bf, size, "%#x", type); + } + +#define P_SK_FLAG(n) \ + if (flags & SOCK_##n) { \ + printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ + flags &= ~SOCK_##n; \ + } + + P_SK_FLAG(CLOEXEC); + P_SK_FLAG(NONBLOCK); +#undef P_SK_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "|%#x", flags); + + return printed; +} + +#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c new file mode 100644 index 000000000000..7942724adec8 --- /dev/null +++ b/tools/perf/trace/beauty/waitid_options.c @@ -0,0 +1,26 @@ +#include <sys/types.h> +#include <sys/wait.h> + +static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, options = arg->val; + +#define P_OPTION(n) \ + if (options & W##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + options &= ~W##n; \ + } + + P_OPTION(NOHANG); + P_OPTION(UNTRACED); + P_OPTION(CONTINUED); +#undef P_OPTION + + if (options) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", options); + + return printed; +} + +#define SCA_WAITID_OPTIONS syscall_arg__scnprintf_waitid_options diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index af68a9d488bf..3eb3edb307a4 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -1,5 +1,5 @@ #include "../util.h" -#include "../cache.h" +#include "../config.h" #include "../../perf.h" #include "libslang.h" #include "ui.h" diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 4fc208e82c6f..2e2d10022355 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -8,6 +8,7 @@ #include "../../util/sort.h" #include "../../util/symbol.h" #include "../../util/evsel.h" +#include "../../util/config.h" #include <pthread.h> struct disasm_line_samples { @@ -222,16 +223,14 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } else if (ins__is_call(dl->ins)) { ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); SLsmg_write_char(' '); + } else if (ins__is_ret(dl->ins)) { + ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); + SLsmg_write_char(' '); } else { ui_browser__write_nstring(browser, " ", 2); } } else { - if (strcmp(dl->name, "retq")) { - ui_browser__write_nstring(browser, " ", 2); - } else { - ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); - SLsmg_write_char(' '); - } + ui_browser__write_nstring(browser, " ", 2); } disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset); @@ -842,14 +841,14 @@ show_help: ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org"); else if (browser->selection->offset == -1) ui_helpline__puts("Actions are only available for assembly lines."); - else if (!browser->selection->ins) { - if (strcmp(browser->selection->name, "retq")) - goto show_sup_ins; + else if (!browser->selection->ins) + goto show_sup_ins; + else if (ins__is_ret(browser->selection->ins)) goto out; - } else if (!(annotate_browser__jump(browser) || + else if (!(annotate_browser__jump(browser) || annotate_browser__callq(browser, evsel, hbt))) { show_sup_ins: - ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions."); + ui_helpline__puts("Actions are only available for function call/return & jump/branch instructions."); } continue; case 't': @@ -1027,7 +1026,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, .use_navkeypressed = true, }, }; - int ret = -1; + int ret = -1, err; int nr_pcnt = 1; size_t sizeof_bdl = sizeof(struct browser_disasm_line); @@ -1051,8 +1050,11 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, (nr_pcnt - 1); } - if (symbol__annotate(sym, map, sizeof_bdl) < 0) { - ui__error("%s", ui_helpline__last_msg); + err = symbol__disassemble(sym, map, sizeof_bdl); + if (err) { + char msg[BUFSIZ]; + symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); + ui__error("Couldn't annotate %s:\n%s", sym->name, msg); goto out_free_offsets; } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 2a83414159a6..13d414384739 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -12,35 +12,17 @@ #include "../../util/top.h" #include "../../arch/common.h" -#include "../browser.h" +#include "../browsers/hists.h" #include "../helpline.h" #include "../util.h" #include "../ui.h" #include "map.h" #include "annotate.h" -struct hist_browser { - struct ui_browser b; - struct hists *hists; - struct hist_entry *he_selection; - struct map_symbol *selection; - struct hist_browser_timer *hbt; - struct pstack *pstack; - struct perf_env *env; - int print_seq; - bool show_dso; - bool show_headers; - float min_pcnt; - u64 nr_non_filtered_entries; - u64 nr_hierarchy_entries; - u64 nr_callchain_rows; -}; - extern void hist_browser__init_hpp(void); -static int hists__browser_title(struct hists *hists, - struct hist_browser_timer *hbt, - char *bf, size_t size); +static int perf_evsel_browser_title(struct hist_browser *browser, + char *bf, size_t size); static void hist_browser__update_nr_entries(struct hist_browser *hb); static struct rb_node *hists__filter_entries(struct rb_node *nd, @@ -585,7 +567,12 @@ static void ui_browser__warn_lost_events(struct ui_browser *browser) "Or reduce the sampling frequency."); } -static int hist_browser__run(struct hist_browser *browser, const char *help) +static int hist_browser__title(struct hist_browser *browser, char *bf, size_t size) +{ + return browser->title ? browser->title(browser, bf, size) : 0; +} + +int hist_browser__run(struct hist_browser *browser, const char *help) { int key; char title[160]; @@ -595,7 +582,7 @@ static int hist_browser__run(struct hist_browser *browser, const char *help) browser->b.entries = &browser->hists->entries; browser->b.nr_entries = hist_browser__nr_entries(browser); - hists__browser_title(browser->hists, hbt, title, sizeof(title)); + hist_browser__title(browser, title, sizeof(title)); if (ui_browser__show(&browser->b, title, "%s", help) < 0) return -1; @@ -621,8 +608,7 @@ static int hist_browser__run(struct hist_browser *browser, const char *help) ui_browser__warn_lost_events(&browser->b); } - hists__browser_title(browser->hists, - hbt, title, sizeof(title)); + hist_browser__title(browser, title, sizeof(title)); ui_browser__show_title(&browser->b, title); continue; } @@ -1470,7 +1456,7 @@ static int hist_browser__show_no_entry(struct hist_browser *browser, column++ < browser->b.horiz_scroll) continue; - ret = fmt->width(fmt, NULL, hists_to_evsel(browser->hists)); + ret = fmt->width(fmt, NULL, browser->hists); if (first) { /* for folded sign */ @@ -1531,7 +1517,7 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char * if (perf_hpp__should_skip(fmt, hists) || column++ < browser->b.horiz_scroll) continue; - ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); + ret = fmt->header(fmt, &dummy_hpp, hists); if (advance_hpp_check(&dummy_hpp, ret)) break; @@ -1568,7 +1554,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows if (column++ < browser->b.horiz_scroll) continue; - ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); + ret = fmt->header(fmt, &dummy_hpp, hists); if (advance_hpp_check(&dummy_hpp, ret)) break; @@ -1605,11 +1591,10 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows } first_col = false; - ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); + ret = fmt->header(fmt, &dummy_hpp, hists); dummy_hpp.buf[ret] = '\0'; - rtrim(dummy_hpp.buf); - start = ltrim(dummy_hpp.buf); + start = trim(dummy_hpp.buf); ret = strlen(start); if (start != dummy_hpp.buf) @@ -1623,21 +1608,38 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows return ret; } -static void hist_browser__show_headers(struct hist_browser *browser) +static void hists_browser__hierarchy_headers(struct hist_browser *browser) { char headers[1024]; - if (symbol_conf.report_hierarchy) - hists_browser__scnprintf_hierarchy_headers(browser, headers, - sizeof(headers)); - else - hists_browser__scnprintf_headers(browser, headers, - sizeof(headers)); + hists_browser__scnprintf_hierarchy_headers(browser, headers, + sizeof(headers)); + ui_browser__gotorc(&browser->b, 0, 0); ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); } +static void hists_browser__headers(struct hist_browser *browser) +{ + char headers[1024]; + + hists_browser__scnprintf_headers(browser, headers, + sizeof(headers)); + + ui_browser__gotorc(&browser->b, 0, 0); + ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); + ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); +} + +static void hist_browser__show_headers(struct hist_browser *browser) +{ + if (symbol_conf.report_hierarchy) + hists_browser__hierarchy_headers(browser); + else + hists_browser__headers(browser); +} + static void ui_browser__hists_init_top(struct ui_browser *browser) { if (browser->top == NULL) { @@ -1897,11 +1899,10 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, bool first = true; int ret; - if (symbol_conf.use_callchain) + if (symbol_conf.use_callchain) { folded_sign = hist_entry__folded(he); - - if (symbol_conf.use_callchain) printed += fprintf(fp, "%c ", folded_sign); + } hists__for_each_format(browser->hists, fmt) { if (perf_hpp__should_skip(fmt, he->hists)) @@ -2028,7 +2029,7 @@ static int hist_browser__dump(struct hist_browser *browser) fp = fopen(filename, "w"); if (fp == NULL) { char bf[64]; - const char *err = strerror_r(errno, bf, sizeof(bf)); + const char *err = str_error_r(errno, bf, sizeof(bf)); ui_helpline__fpush("Couldn't write to %s: %s", filename, err); return -1; } @@ -2041,27 +2042,50 @@ static int hist_browser__dump(struct hist_browser *browser) return 0; } -static struct hist_browser *hist_browser__new(struct hists *hists, - struct hist_browser_timer *hbt, - struct perf_env *env) +void hist_browser__init(struct hist_browser *browser, + struct hists *hists) +{ + struct perf_hpp_fmt *fmt; + + browser->hists = hists; + browser->b.refresh = hist_browser__refresh; + browser->b.refresh_dimensions = hist_browser__refresh_dimensions; + browser->b.seek = ui_browser__hists_seek; + browser->b.use_navkeypressed = true; + browser->show_headers = symbol_conf.show_hist_headers; + + hists__for_each_format(hists, fmt) { + perf_hpp__reset_width(fmt, hists); + ++browser->b.columns; + } +} + +struct hist_browser *hist_browser__new(struct hists *hists) { struct hist_browser *browser = zalloc(sizeof(*browser)); + if (browser) + hist_browser__init(browser, hists); + + return browser; +} + +static struct hist_browser * +perf_evsel_browser__new(struct perf_evsel *evsel, + struct hist_browser_timer *hbt, + struct perf_env *env) +{ + struct hist_browser *browser = hist_browser__new(evsel__hists(evsel)); + if (browser) { - browser->hists = hists; - browser->b.refresh = hist_browser__refresh; - browser->b.refresh_dimensions = hist_browser__refresh_dimensions; - browser->b.seek = ui_browser__hists_seek; - browser->b.use_navkeypressed = true; - browser->show_headers = symbol_conf.show_hist_headers; - browser->hbt = hbt; - browser->env = env; + browser->hbt = hbt; + browser->env = env; + browser->title = perf_evsel_browser_title; } - return browser; } -static void hist_browser__delete(struct hist_browser *browser) +void hist_browser__delete(struct hist_browser *browser) { free(browser); } @@ -2082,10 +2106,11 @@ static inline bool is_report_browser(void *timer) return timer == NULL; } -static int hists__browser_title(struct hists *hists, - struct hist_browser_timer *hbt, +static int perf_evsel_browser_title(struct hist_browser *browser, char *bf, size_t size) { + struct hist_browser_timer *hbt = browser->hbt; + struct hists *hists = browser->hists; char unit; int printed; const struct dso *dso = hists->dso_filter; @@ -2137,7 +2162,7 @@ static int hists__browser_title(struct hists *hists, printed += snprintf(bf + printed, size - printed, ", UID: %s", hists->uid_filter_str); if (thread) { - if (sort__has_thread) { + if (hists__has(hists, thread)) { printed += scnprintf(bf + printed, size - printed, ", Thread: %s(%d)", (thread->comm_set ? thread__comm_str(thread) : ""), @@ -2322,7 +2347,8 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) { struct thread *thread = act->thread; - if ((!sort__has_thread && !sort__has_comm) || thread == NULL) + if ((!hists__has(browser->hists, thread) && + !hists__has(browser->hists, comm)) || thread == NULL) return 0; if (browser->hists->thread_filter) { @@ -2331,7 +2357,7 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) thread__zput(browser->hists->thread_filter); ui_helpline__pop(); } else { - if (sort__has_thread) { + if (hists__has(browser->hists, thread)) { ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"", thread->comm_set ? thread__comm_str(thread) : "", thread->tid); @@ -2356,10 +2382,11 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act, { int ret; - if ((!sort__has_thread && !sort__has_comm) || thread == NULL) + if ((!hists__has(browser->hists, thread) && + !hists__has(browser->hists, comm)) || thread == NULL) return 0; - if (sort__has_thread) { + if (hists__has(browser->hists, thread)) { ret = asprintf(optstr, "Zoom %s %s(%d) thread", browser->hists->thread_filter ? "out of" : "into", thread->comm_set ? thread__comm_str(thread) : "", @@ -2382,7 +2409,7 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) { struct map *map = act->ms.map; - if (!sort__has_dso || map == NULL) + if (!hists__has(browser->hists, dso) || map == NULL) return 0; if (browser->hists->dso_filter) { @@ -2409,7 +2436,7 @@ static int add_dso_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct map *map) { - if (!sort__has_dso || map == NULL) + if (!hists__has(browser->hists, dso) || map == NULL) return 0; if (asprintf(optstr, "Zoom %s %s DSO", @@ -2431,10 +2458,10 @@ do_browse_map(struct hist_browser *browser __maybe_unused, } static int -add_map_opt(struct hist_browser *browser __maybe_unused, +add_map_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct map *map) { - if (!sort__has_dso || map == NULL) + if (!hists__has(browser->hists, dso) || map == NULL) return 0; if (asprintf(optstr, "Browse map details") < 0) @@ -2536,7 +2563,7 @@ add_exit_opt(struct hist_browser *browser __maybe_unused, static int do_zoom_socket(struct hist_browser *browser, struct popup_action *act) { - if (!sort__has_socket || act->socket < 0) + if (!hists__has(browser->hists, socket) || act->socket < 0) return 0; if (browser->hists->socket_filter > -1) { @@ -2558,7 +2585,7 @@ static int add_socket_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, int socket_id) { - if (!sort__has_socket || socket_id < 0) + if (!hists__has(browser->hists, socket) || socket_id < 0) return 0; if (asprintf(optstr, "Zoom %s Processor Socket %d", @@ -2640,7 +2667,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, struct perf_env *env) { struct hists *hists = evsel__hists(evsel); - struct hist_browser *browser = hist_browser__new(hists, hbt, env); + struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env); struct branch_info *bi; #define MAX_OPTIONS 16 char *options[MAX_OPTIONS]; @@ -2649,7 +2676,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, int key = -1; char buf[64]; int delay_secs = hbt ? hbt->refresh : 0; - struct perf_hpp_fmt *fmt; #define HIST_BROWSER_HELP_COMMON \ "h/?/F1 Show this window\n" \ @@ -2708,18 +2734,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, memset(options, 0, sizeof(options)); memset(actions, 0, sizeof(actions)); - hists__for_each_format(browser->hists, fmt) { - perf_hpp__reset_width(fmt, hists); - /* - * This is done just once, and activates the horizontal scrolling - * code in the ui_browser code, it would be better to have a the - * counter in the perf_hpp code, but I couldn't find doing it here - * works, FIXME by setting this in hist_browser__new, for now, be - * clever 8-) - */ - ++browser->b.columns; - } - if (symbol_conf.col_width_list_str) perf_hpp__set_user_width(symbol_conf.col_width_list_str); @@ -2749,7 +2763,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, */ goto out_free_stack; case 'a': - if (!sort__has_sym) { + if (!hists__has(hists, sym)) { ui_browser__warning(&browser->b, delay_secs * 2, "Annotation is only available for symbolic views, " "include \"sym*\" in --sort to use it."); @@ -2912,7 +2926,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; } - if (!sort__has_sym || browser->selection == NULL) + if (!hists__has(hists, sym) || browser->selection == NULL) goto skip_annotation; if (sort__mode == SORT_MODE__BRANCH) { @@ -2956,7 +2970,7 @@ skip_annotation: goto skip_scripting; if (browser->he_selection) { - if (sort__has_thread && thread) { + if (hists__has(hists, thread) && thread) { nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], @@ -2971,7 +2985,7 @@ skip_annotation: * * See hist_browser__show_entry. */ - if (sort__has_sym && browser->selection->sym) { + if (hists__has(hists, sym) && browser->selection->sym) { nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], @@ -3185,7 +3199,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, ui_helpline__push("Press ESC to exit"); - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { const char *ev_name = perf_evsel__name(pos); size_t line_len = strlen(ev_name) + 7; @@ -3216,7 +3230,7 @@ single_entry: struct perf_evsel *pos; nr_entries = 0; - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { if (perf_evsel__is_group_leader(pos)) nr_entries++; } diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h new file mode 100644 index 000000000000..39bd0f28f211 --- /dev/null +++ b/tools/perf/ui/browsers/hists.h @@ -0,0 +1,32 @@ +#ifndef _PERF_UI_BROWSER_HISTS_H_ +#define _PERF_UI_BROWSER_HISTS_H_ 1 + +#include "ui/browser.h" + +struct hist_browser { + struct ui_browser b; + struct hists *hists; + struct hist_entry *he_selection; + struct map_symbol *selection; + struct hist_browser_timer *hbt; + struct pstack *pstack; + struct perf_env *env; + int print_seq; + bool show_dso; + bool show_headers; + float min_pcnt; + u64 nr_non_filtered_entries; + u64 nr_hierarchy_entries; + u64 nr_callchain_rows; + + /* Get title string. */ + int (*title)(struct hist_browser *browser, + char *bf, size_t size); +}; + +struct hist_browser *hist_browser__new(struct hists *hists); +void hist_browser__delete(struct hist_browser *browser); +int hist_browser__run(struct hist_browser *browser, const char *help); +void hist_browser__init(struct hist_browser *browser, + struct hists *hists); +#endif /* _PERF_UI_BROWSER_HISTS_H_ */ diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 9c7ff8d31b27..42d319927762 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -162,12 +162,16 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, GtkWidget *notebook; GtkWidget *scrolled_window; GtkWidget *tab_label; + int err; if (map->dso->annotate_warned) return -1; - if (symbol__annotate(sym, map, 0) < 0) { - ui__error("%s", ui_helpline__current); + err = symbol__disassemble(sym, map, 0); + if (err) { + char msg[BUFSIZ]; + symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); + ui__error("Couldn't annotate %s: %s\n", sym->name, msg); return -1; } diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 2aa45b606fa4..c5f3677f6679 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -379,7 +379,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, gtk_tree_store_set(store, &iter, col_idx++, s, -1); } - if (symbol_conf.use_callchain && sort__has_sym) { + if (symbol_conf.use_callchain && hists__has(hists, sym)) { if (callchain_param.mode == CHAIN_GRAPH_REL) total = symbol_conf.cumulate_callchain ? h->stat_acc->period : h->stat.period; @@ -549,7 +549,7 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists, strcat(buf, "+"); first_col = false; - fmt->header(fmt, &hpp, hists_to_evsel(hists)); + fmt->header(fmt, &hpp, hists); strcat(buf, ltrim(rtrim(hpp.buf))); } } @@ -627,7 +627,7 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, gtk_container_add(GTK_CONTAINER(window), vbox); - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); const char *evname = perf_evsel__name(pos); GtkWidget *scrolled_window; diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c index 52e7fc48af9f..00b91921edb1 100644 --- a/tools/perf/ui/gtk/util.c +++ b/tools/perf/ui/gtk/util.c @@ -1,4 +1,5 @@ #include "../util.h" +#include "../../util/util.h" #include "../../util/debug.h" #include "gtk.h" diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c index 700fb3cfa1c7..5b74a7eba210 100644 --- a/tools/perf/ui/helpline.c +++ b/tools/perf/ui/helpline.c @@ -5,6 +5,7 @@ #include "../debug.h" #include "helpline.h" #include "ui.h" +#include "../util.h" char ui_helpline__current[512]; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 3baeaa6e71b5..4274969ddc89 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -215,9 +215,10 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, static int hpp__width_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp __maybe_unused, - struct perf_evsel *evsel) + struct hists *hists) { int len = fmt->user_len ?: fmt->len; + struct perf_evsel *evsel = hists_to_evsel(hists); if (symbol_conf.event_group) len = max(len, evsel->nr_members * fmt->len); @@ -229,9 +230,9 @@ static int hpp__width_fn(struct perf_hpp_fmt *fmt, } static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct perf_evsel *evsel) + struct hists *hists) { - int len = hpp__width_fn(fmt, hpp, evsel); + int len = hpp__width_fn(fmt, hpp, hists); return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name); } @@ -632,10 +633,10 @@ unsigned int hists__sort_list_width(struct hists *hists) else ret += 2; - ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists)); + ret += fmt->width(fmt, &dummy_hpp, hists); } - if (verbose && sort__has_sym) /* Addr + origin */ + if (verbose && hists__has(hists, sym)) /* Addr + origin */ ret += 3 + BITS_PER_LONG / 4; return ret; @@ -657,7 +658,7 @@ unsigned int hists__overhead_width(struct hists *hists) else ret += 2; - ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists)); + ret += fmt->width(fmt, &dummy_hpp, hists); } return ret; @@ -765,7 +766,7 @@ int perf_hpp__setup_hists_formats(struct perf_hpp_list *list, if (!symbol_conf.report_hierarchy) return 0; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { hists = evsel__hists(evsel); perf_hpp_list__for_each_sort_list(list, fmt) { diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index ba51fa8a1176..1f6b0994f4f4 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -60,6 +60,13 @@ static inline int setup_gtk_browser(void) { return -1; } static inline void exit_gtk_browser(bool wait_for_ok __maybe_unused) {} #endif +int stdio__config_color(const struct option *opt __maybe_unused, + const char *mode, int unset __maybe_unused) +{ + perf_use_color_default = perf_config_colorbool("color.ui", mode, -1); + return 0; +} + void setup_browser(bool fallback_to_pager) { if (use_browser < 2 && (!isatty(1) || dump_trace)) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 7aff5acf3265..f04a63112079 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -492,14 +492,15 @@ out: } static int hist_entry__fprintf(struct hist_entry *he, size_t size, - struct hists *hists, - char *bf, size_t bfsz, FILE *fp) + char *bf, size_t bfsz, FILE *fp, + bool use_callchain) { int ret; struct perf_hpp hpp = { .buf = bf, .size = size, }; + struct hists *hists = he->hists; u64 total_period = hists->stats.total_period; if (size == 0 || size > bfsz) @@ -512,7 +513,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, ret = fprintf(fp, "%s\n", bf); - if (symbol_conf.use_callchain) + if (use_callchain) ret += hist_entry_callchain__fprintf(he, total_period, 0, fp); return ret; @@ -548,7 +549,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, struct perf_hpp_list_node, list); perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { - fmt->header(fmt, hpp, hists_to_evsel(hists)); + fmt->header(fmt, hpp, hists); fprintf(fp, "%s%s", hpp->buf, sep ?: " "); } @@ -568,10 +569,9 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, header_width += fprintf(fp, "+"); first_col = false; - fmt->header(fmt, hpp, hists_to_evsel(hists)); - rtrim(hpp->buf); + fmt->header(fmt, hpp, hists); - header_width += fprintf(fp, "%s", ltrim(hpp->buf)); + header_width += fprintf(fp, "%s", trim(hpp->buf)); } } @@ -590,7 +590,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, fprintf(fp, "%s", sep ?: ".."); first_col = false; - width = fmt->width(fmt, hpp, hists_to_evsel(hists)); + width = fmt->width(fmt, hpp, hists); fprintf(fp, "%.*s", width, dots); } @@ -607,7 +607,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, width++; /* for '+' sign between column header */ first_col = false; - width += fmt->width(fmt, hpp, hists_to_evsel(hists)); + width += fmt->width(fmt, hpp, hists); } if (width > header_width) @@ -623,47 +623,31 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, return 2; } -size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, - int max_cols, float min_pcnt, FILE *fp) +static int +hists__fprintf_hierarchy_headers(struct hists *hists, + struct perf_hpp *hpp, + FILE *fp) { - struct perf_hpp_fmt *fmt; struct perf_hpp_list_node *fmt_node; - struct rb_node *nd; - size_t ret = 0; - unsigned int width; - const char *sep = symbol_conf.field_sep; - int nr_rows = 0; - char bf[96]; - struct perf_hpp dummy_hpp = { - .buf = bf, - .size = sizeof(bf), - }; - bool first = true; - size_t linesz; - char *line = NULL; - unsigned indent; - - init_rem_hits(); - - hists__for_each_format(hists, fmt) - perf_hpp__reset_width(fmt, hists); - - if (symbol_conf.col_width_list_str) - perf_hpp__set_user_width(symbol_conf.col_width_list_str); + struct perf_hpp_fmt *fmt; - if (!show_header) - goto print_entries; + list_for_each_entry(fmt_node, &hists->hpp_formats, list) { + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) + perf_hpp__reset_width(fmt, hists); + } - fprintf(fp, "# "); + return print_hierarchy_header(hists, hpp, symbol_conf.field_sep, fp); +} - if (symbol_conf.report_hierarchy) { - list_for_each_entry(fmt_node, &hists->hpp_formats, list) { - perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) - perf_hpp__reset_width(fmt, hists); - } - nr_rows += print_hierarchy_header(hists, &dummy_hpp, sep, fp); - goto print_entries; - } +static int +hists__fprintf_standard_headers(struct hists *hists, + struct perf_hpp *hpp, + FILE *fp) +{ + struct perf_hpp_fmt *fmt; + unsigned int width; + const char *sep = symbol_conf.field_sep; + bool first = true; hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, hists)) @@ -674,16 +658,14 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, else first = false; - fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); - fprintf(fp, "%s", bf); + fmt->header(fmt, hpp, hists); + fprintf(fp, "%s", hpp->buf); } fprintf(fp, "\n"); - if (max_rows && ++nr_rows >= max_rows) - goto out; if (sep) - goto print_entries; + return 1; first = true; @@ -700,20 +682,60 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, else first = false; - width = fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists)); + width = fmt->width(fmt, hpp, hists); for (i = 0; i < width; i++) fprintf(fp, "."); } fprintf(fp, "\n"); - if (max_rows && ++nr_rows >= max_rows) - goto out; - fprintf(fp, "#\n"); - if (max_rows && ++nr_rows >= max_rows) + return 3; +} + +static int hists__fprintf_headers(struct hists *hists, FILE *fp) +{ + char bf[96]; + struct perf_hpp dummy_hpp = { + .buf = bf, + .size = sizeof(bf), + }; + + fprintf(fp, "# "); + + if (symbol_conf.report_hierarchy) + return hists__fprintf_hierarchy_headers(hists, &dummy_hpp, fp); + else + return hists__fprintf_standard_headers(hists, &dummy_hpp, fp); + +} + +size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, + int max_cols, float min_pcnt, FILE *fp, + bool use_callchain) +{ + struct perf_hpp_fmt *fmt; + struct rb_node *nd; + size_t ret = 0; + const char *sep = symbol_conf.field_sep; + int nr_rows = 0; + size_t linesz; + char *line = NULL; + unsigned indent; + + init_rem_hits(); + + hists__for_each_format(hists, fmt) + perf_hpp__reset_width(fmt, hists); + + if (symbol_conf.col_width_list_str) + perf_hpp__set_user_width(symbol_conf.col_width_list_str); + + if (show_header) + nr_rows += hists__fprintf_headers(hists, fp); + + if (max_rows && nr_rows >= max_rows) goto out; -print_entries: linesz = hists__sort_list_width(hists) + 3 + 1; linesz += perf_hpp__color_overhead(); line = malloc(linesz); @@ -735,7 +757,7 @@ print_entries: if (percent < min_pcnt) continue; - ret += hist_entry__fprintf(h, max_cols, hists, line, linesz, fp); + ret += hist_entry__fprintf(h, max_cols, line, linesz, fp, use_callchain); if (max_rows && ++nr_rows >= max_rows) break; diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 7dfeba0a91f3..4ea2ba861fc2 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -1,3 +1,4 @@ +#include <errno.h> #include <signal.h> #include <stdbool.h> #ifdef HAVE_BACKTRACE_SUPPORT @@ -6,6 +7,7 @@ #include "../../util/cache.h" #include "../../util/debug.h" +#include "../../util/util.h" #include "../browser.h" #include "../helpline.h" #include "../ui.h" diff --git a/tools/perf/ui/ui.h b/tools/perf/ui/ui.h index ab88383f8be8..4b6fb6c7a542 100644 --- a/tools/perf/ui/ui.h +++ b/tools/perf/ui/ui.h @@ -26,4 +26,8 @@ static inline void ui__exit(bool wait_for_ok __maybe_unused) {} void ui__refresh_dimensions(bool force); +struct option; + +int stdio__config_color(const struct option *opt, const char *mode, int unset); + #endif /* _PERF_UI_H_ */ diff --git a/tools/perf/util/Build b/tools/perf/util/Build index da48fd843438..91c5f6e1af59 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -8,6 +8,7 @@ libperf-y += env.o libperf-y += event.o libperf-y += evlist.o libperf-y += evsel.o +libperf-y += evsel_fprintf.o libperf-y += find_bit.o libperf-y += kallsyms.o libperf-y += levenshtein.o @@ -26,9 +27,9 @@ libperf-y += strlist.o libperf-y += strfilter.o libperf-y += top.o libperf-y += usage.o -libperf-y += wrapper.o libperf-y += dso.o libperf-y += symbol.o +libperf-y += symbol_fprintf.o libperf-y += color.o libperf-y += header.o libperf-y += callchain.o @@ -38,6 +39,7 @@ libperf-y += machine.o libperf-y += map.o libperf-y += pstack.o libperf-y += session.o +libperf-$(CONFIG_AUDIT) += syscalltbl.o libperf-y += ordered-events.o libperf-y += comm.o libperf-y += thread.o @@ -69,9 +71,9 @@ libperf-y += stat-shadow.o libperf-y += record.o libperf-y += srcline.o libperf-y += data.o -libperf-$(CONFIG_X86) += tsc.o -libperf-$(CONFIG_AUXTRACE) += tsc.o +libperf-y += tsc.o libperf-y += cloexec.o +libperf-y += call-path.o libperf-y += thread-stack.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ @@ -82,6 +84,7 @@ libperf-y += parse-regs-options.o libperf-y += term.o libperf-y += help-unknown-cmd.o libperf-y += mem-events.o +libperf-y += vsprintf.o libperf-$(CONFIG_LIBBPF) += bpf-loader.o libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o @@ -97,7 +100,10 @@ libperf-$(CONFIG_DWARF) += probe-finder.o libperf-$(CONFIG_DWARF) += dwarf-aux.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o +libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o @@ -106,6 +112,7 @@ libperf-y += scripting-engines/ libperf-$(CONFIG_ZLIB) += zlib.o libperf-$(CONFIG_LZMA) += lzma.o libperf-y += demangle-java.o +libperf-y += demangle-rust.o ifdef CONFIG_JITDUMP libperf-$(CONFIG_LIBELF) += jitdump.o @@ -171,3 +178,7 @@ $(OUTPUT)util/libstring.o: ../lib/string.c FORCE $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c index c0b43ee40d95..6455471d9cd1 100644 --- a/tools/perf/util/alias.c +++ b/tools/perf/util/alias.c @@ -1,4 +1,6 @@ #include "cache.h" +#include "util.h" +#include "config.h" static const char *alias_key; static char *alias_val; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b795b6994144..4024d309bb00 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -354,9 +354,15 @@ static struct ins_ops nop_ops = { .scnprintf = nop__scnprintf, }; -/* - * Must be sorted by name! - */ +static struct ins_ops ret_ops = { + .scnprintf = ins__raw_scnprintf, +}; + +bool ins__is_ret(const struct ins *ins) +{ + return ins->ops == &ret_ops; +} + static struct ins instructions[] = { { .name = "add", .ops = &mov_ops, }, { .name = "addl", .ops = &mov_ops, }, @@ -372,8 +378,8 @@ static struct ins instructions[] = { { .name = "bgt", .ops = &jump_ops, }, { .name = "bhi", .ops = &jump_ops, }, { .name = "bl", .ops = &call_ops, }, - { .name = "blt", .ops = &jump_ops, }, { .name = "bls", .ops = &jump_ops, }, + { .name = "blt", .ops = &jump_ops, }, { .name = "blx", .ops = &call_ops, }, { .name = "bne", .ops = &jump_ops, }, #endif @@ -447,20 +453,42 @@ static struct ins instructions[] = { { .name = "xadd", .ops = &mov_ops, }, { .name = "xbeginl", .ops = &jump_ops, }, { .name = "xbeginq", .ops = &jump_ops, }, + { .name = "retq", .ops = &ret_ops, }, }; -static int ins__cmp(const void *name, const void *insp) +static int ins__key_cmp(const void *name, const void *insp) { const struct ins *ins = insp; return strcmp(name, ins->name); } +static int ins__cmp(const void *a, const void *b) +{ + const struct ins *ia = a; + const struct ins *ib = b; + + return strcmp(ia->name, ib->name); +} + +static void ins__sort(void) +{ + const int nmemb = ARRAY_SIZE(instructions); + + qsort(instructions, nmemb, sizeof(struct ins), ins__cmp); +} + static struct ins *ins__find(const char *name) { const int nmemb = ARRAY_SIZE(instructions); + static bool sorted; + + if (!sorted) { + ins__sort(); + sorted = true; + } - return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__cmp); + return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__key_cmp); } int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym) @@ -1095,7 +1123,46 @@ static void delete_last_nop(struct symbol *sym) } } -int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize) +int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map *map, + int errnum, char *buf, size_t buflen) +{ + struct dso *dso = map->dso; + + BUG_ON(buflen == 0); + + if (errnum >= 0) { + str_error_r(errnum, buf, buflen); + return 0; + } + + switch (errnum) { + case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: { + char bf[SBUILD_ID_SIZE + 15] = " with build id "; + char *build_id_msg = NULL; + + if (dso->has_build_id) { + build_id__sprintf(dso->build_id, + sizeof(dso->build_id), bf + 15); + build_id_msg = bf; + } + scnprintf(buf, buflen, + "No vmlinux file%s\nwas found in the path.\n\n" + "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" + "Please use:\n\n" + " perf buildid-cache -vu vmlinux\n\n" + "or:\n\n" + " --vmlinux vmlinux\n", build_id_msg ?: ""); + } + break; + default: + scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); + break; + } + + return 0; +} + +int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize) { struct dso *dso = map->dso; char *filename = dso__build_id_filename(dso, NULL, 0); @@ -1106,23 +1173,21 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize) char symfs_filename[PATH_MAX]; struct kcore_extract kce; bool delete_extract = false; + int stdout_fd[2]; int lineno = 0; int nline; + pid_t pid; if (filename) symbol__join_symfs(symfs_filename, filename); if (filename == NULL) { - if (dso->has_build_id) { - pr_err("Can't annotate %s: not enough memory\n", - sym->name); - return -ENOMEM; - } - goto fallback; - } else if (dso__is_kcore(dso)) { + if (dso->has_build_id) + return ENOMEM; goto fallback; - } else if (readlink(symfs_filename, command, sizeof(command)) < 0 || - strstr(command, "[kernel.kallsyms]") || + } else if (dso__is_kcore(dso) || + readlink(symfs_filename, command, sizeof(command)) < 0 || + strstr(command, DSO__NAME_KALLSYMS) || access(symfs_filename, R_OK)) { free(filename); fallback: @@ -1138,27 +1203,7 @@ fallback: if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS && !dso__is_kcore(dso)) { - char bf[BUILD_ID_SIZE * 2 + 16] = " with build id "; - char *build_id_msg = NULL; - - if (dso->annotate_warned) - goto out_free_filename; - - if (dso->has_build_id) { - build_id__sprintf(dso->build_id, - sizeof(dso->build_id), bf + 15); - build_id_msg = bf; - } - err = -ENOENT; - dso->annotate_warned = 1; - pr_err("Can't annotate %s:\n\n" - "No vmlinux file%s\nwas found in the path.\n\n" - "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" - "Please use:\n\n" - " perf buildid-cache -vu vmlinux\n\n" - "or:\n\n" - " --vmlinux vmlinux\n", - sym->name, build_id_msg ?: ""); + err = SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX; goto out_free_filename; } @@ -1230,9 +1275,32 @@ fallback: pr_debug("Executing: %s\n", command); - file = popen(command, "r"); + err = -1; + if (pipe(stdout_fd) < 0) { + pr_err("Failure creating the pipe to run %s\n", command); + goto out_remove_tmp; + } + + pid = fork(); + if (pid < 0) { + pr_err("Failure forking to run %s\n", command); + goto out_close_stdout; + } + + if (pid == 0) { + close(stdout_fd[0]); + dup2(stdout_fd[1], 1); + close(stdout_fd[1]); + execl("/bin/sh", "sh", "-c", command, NULL); + perror(command); + exit(-1); + } + + close(stdout_fd[1]); + + file = fdopen(stdout_fd[0], "r"); if (!file) { - pr_err("Failure running %s\n", command); + pr_err("Failure creating FILE stream for %s\n", command); /* * If we were using debug info should retry with * original binary. @@ -1258,9 +1326,11 @@ fallback: if (dso__is_kcore(dso)) delete_last_nop(sym); - pclose(file); - + fclose(file); + err = 0; out_remove_tmp: + close(stdout_fd[0]); + if (dso__needs_decompress(dso)) unlink(symfs_filename); out_free_filename: @@ -1269,6 +1339,10 @@ out_free_filename: if (free_filename) free(filename); return err; + +out_close_stdout: + close(stdout_fd[1]); + goto out_remove_tmp; } static void insert_source_line(struct rb_root *root, struct source_line *src_line) @@ -1494,13 +1568,14 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, const char *d_filename; const char *evsel_name = perf_evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); + struct sym_hist *h = annotation__histogram(notes, evsel->idx); struct disasm_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); int printed = 2, queue_len = 0; int more = 0; u64 len; int width = 8; - int namelen, evsel_name_len, graph_dotted_len; + int graph_dotted_len; filename = strdup(dso->long_name); if (!filename) @@ -1512,17 +1587,14 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, d_filename = basename(filename); len = symbol__size(sym); - namelen = strlen(d_filename); - evsel_name_len = strlen(evsel_name); if (perf_evsel__is_group_event(evsel)) width *= evsel->nr_members; - printf(" %-*.*s| Source code & Disassembly of %s for %s\n", - width, width, "Percent", d_filename, evsel_name); + graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n", + width, width, "Percent", d_filename, evsel_name, h->sum); - graph_dotted_len = width + namelen + evsel_name_len; - printf("-%-*.*s-----------------------------------------\n", + printf("%-*.*s----\n", graph_dotted_len, graph_dotted_len, graph_dotted_line); if (verbose) @@ -1637,7 +1709,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct rb_root source_line = RB_ROOT; u64 len; - if (symbol__annotate(sym, map, 0) < 0) + if (symbol__disassemble(sym, map, 0) < 0) return -1; len = symbol__size(sym); @@ -1658,12 +1730,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, return 0; } -int hist_entry__annotate(struct hist_entry *he, size_t privsize) -{ - return symbol__annotate(he->ms.sym, he->ms.map, privsize); -} - bool ui__has_annotation(void) { - return use_browser == 1 && sort__has_sym; + return use_browser == 1 && perf_hpp_list.sym; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 9241f8c2b7e1..f67ccb027561 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -48,6 +48,7 @@ struct ins { bool ins__is_jump(const struct ins *ins); bool ins__is_call(const struct ins *ins); +bool ins__is_ret(const struct ins *ins); int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops); struct annotation; @@ -154,9 +155,27 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize); +int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize); -int hist_entry__annotate(struct hist_entry *he, size_t privsize); +enum symbol_disassemble_errno { + SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, + + /* + * Choose an arbitrary negative big number not to clash with standard + * errno since SUS requires the errno has distinct positive values. + * See 'Issue 6' in the link below. + * + * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html + */ + __SYMBOL_ANNOTATE_ERRNO__START = -10000, + + SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX = __SYMBOL_ANNOTATE_ERRNO__START, + + __SYMBOL_ANNOTATE_ERRNO__END, +}; + +int symbol__strerror_disassemble(struct symbol *sym, struct map *map, + int errnum, char *buf, size_t buflen); int symbol__annotate_init(struct map *map, struct symbol *sym); int symbol__annotate_printf(struct symbol *sym, struct map *map, diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index ec164fe70718..c9169011e55e 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -940,6 +940,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; + synth_opts->initial_skip = 0; } /* @@ -1064,6 +1065,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->last_branch_sz = val; } break; + case 's': + synth_opts->initial_skip = strtoul(p, &endptr, 10); + if (p == endptr) + goto out_err; + p = endptr; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 57ff31ecb8e4..ac5f0d7167e6 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -63,11 +63,13 @@ enum itrace_period_type { * @calls: limit branch samples to calls (can be combined with @returns) * @returns: limit branch samples to returns (can be combined with @calls) * @callchain: add callchain to 'instructions' events + * @thread_stack: feed branches to the thread_stack * @last_branch: add branch context to 'instruction' events * @callchain_sz: maximum callchain size * @last_branch_sz: branch context size * @period: 'instructions' events period * @period_type: 'instructions' events period type + * @initial_skip: skip N events at the beginning. */ struct itrace_synth_opts { bool set; @@ -81,11 +83,13 @@ struct itrace_synth_opts { bool calls; bool returns; bool callchain; + bool thread_stack; bool last_branch; unsigned int callchain_sz; unsigned int last_branch_sz; unsigned long long period; enum itrace_period_type period_type; + unsigned long initial_skip; }; /** diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 0967ce601931..1f12e4e40006 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -37,6 +37,9 @@ DEFINE_PRINT_FN(info, 1) DEFINE_PRINT_FN(debug, 1) struct bpf_prog_priv { + bool is_tp; + char *sys_name; + char *evt_name; struct perf_probe_event pev; bool need_prologue; struct bpf_insn *insns_buf; @@ -118,6 +121,8 @@ clear_prog_priv(struct bpf_program *prog __maybe_unused, cleanup_perf_probe_events(&priv->pev, 1); zfree(&priv->insns_buf); zfree(&priv->type_mapping); + zfree(&priv->sys_name); + zfree(&priv->evt_name); free(priv); } @@ -269,7 +274,8 @@ nextline: } static int -parse_prog_config(const char *config_str, struct perf_probe_event *pev) +parse_prog_config(const char *config_str, const char **p_main_str, + bool *is_tp, struct perf_probe_event *pev) { int err; const char *main_str = parse_prog_config_kvpair(config_str, pev); @@ -277,6 +283,22 @@ parse_prog_config(const char *config_str, struct perf_probe_event *pev) if (IS_ERR(main_str)) return PTR_ERR(main_str); + *p_main_str = main_str; + if (!strchr(main_str, '=')) { + /* Is a tracepoint event? */ + const char *s = strchr(main_str, ':'); + + if (!s) { + pr_debug("bpf: '%s' is not a valid tracepoint\n", + config_str); + return -BPF_LOADER_ERRNO__CONFIG; + } + + *is_tp = true; + return 0; + } + + *is_tp = false; err = parse_perf_probe_command(main_str, pev); if (err < 0) { pr_debug("bpf: '%s' is not a valid config string\n", @@ -292,7 +314,8 @@ config_bpf_program(struct bpf_program *prog) { struct perf_probe_event *pev = NULL; struct bpf_prog_priv *priv = NULL; - const char *config_str; + const char *config_str, *main_str; + bool is_tp = false; int err; /* Initialize per-program probing setting */ @@ -313,10 +336,19 @@ config_bpf_program(struct bpf_program *prog) pev = &priv->pev; pr_debug("bpf: config program '%s'\n", config_str); - err = parse_prog_config(config_str, pev); + err = parse_prog_config(config_str, &main_str, &is_tp, pev); if (err) goto errout; + if (is_tp) { + char *s = strchr(main_str, ':'); + + priv->is_tp = true; + priv->sys_name = strndup(main_str, s - main_str); + priv->evt_name = strdup(s + 1); + goto set_priv; + } + if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) { pr_debug("bpf: '%s': group for event is set and not '%s'.\n", config_str, PERF_BPF_PROBE_GROUP); @@ -339,7 +371,8 @@ config_bpf_program(struct bpf_program *prog) } pr_debug("bpf: config '%s' is ok\n", config_str); - err = bpf_program__set_private(prog, priv, clear_prog_priv); +set_priv: + err = bpf_program__set_priv(prog, priv, clear_prog_priv); if (err) { pr_debug("Failed to set priv for program '%s'\n", config_str); goto errout; @@ -380,15 +413,14 @@ preproc_gen_prologue(struct bpf_program *prog, int n, struct bpf_insn *orig_insns, int orig_insns_cnt, struct bpf_prog_prep_result *res) { + struct bpf_prog_priv *priv = bpf_program__priv(prog); struct probe_trace_event *tev; struct perf_probe_event *pev; - struct bpf_prog_priv *priv; struct bpf_insn *buf; size_t prologue_cnt = 0; int i, err; - err = bpf_program__get_private(prog, (void **)&priv); - if (err || !priv) + if (IS_ERR(priv) || !priv || priv->is_tp) goto errout; pev = &priv->pev; @@ -535,17 +567,21 @@ static int map_prologue(struct perf_probe_event *pev, int *mapping, static int hook_load_preprocessor(struct bpf_program *prog) { + struct bpf_prog_priv *priv = bpf_program__priv(prog); struct perf_probe_event *pev; - struct bpf_prog_priv *priv; bool need_prologue = false; int err, i; - err = bpf_program__get_private(prog, (void **)&priv); - if (err || !priv) { + if (IS_ERR(priv) || !priv) { pr_debug("Internal error when hook preprocessor\n"); return -BPF_LOADER_ERRNO__INTERNAL; } + if (priv->is_tp) { + priv->need_prologue = false; + return 0; + } + pev = &priv->pev; for (i = 0; i < pev->ntevs; i++) { struct probe_trace_event *tev = &pev->tevs[i]; @@ -607,9 +643,18 @@ int bpf__probe(struct bpf_object *obj) if (err) goto out; - err = bpf_program__get_private(prog, (void **)&priv); - if (err || !priv) + priv = bpf_program__priv(prog); + if (IS_ERR(priv) || !priv) { + err = PTR_ERR(priv); goto out; + } + + if (priv->is_tp) { + bpf_program__set_tracepoint(prog); + continue; + } + + bpf_program__set_kprobe(prog); pev = &priv->pev; err = convert_perf_probe_events(pev, 1); @@ -645,13 +690,12 @@ int bpf__unprobe(struct bpf_object *obj) { int err, ret = 0; struct bpf_program *prog; - struct bpf_prog_priv *priv; bpf_object__for_each_program(prog, obj) { + struct bpf_prog_priv *priv = bpf_program__priv(prog); int i; - err = bpf_program__get_private(prog, (void **)&priv); - if (err || !priv) + if (IS_ERR(priv) || !priv || priv->is_tp) continue; for (i = 0; i < priv->pev.ntevs; i++) { @@ -694,26 +738,34 @@ int bpf__load(struct bpf_object *obj) return 0; } -int bpf__foreach_tev(struct bpf_object *obj, - bpf_prog_iter_callback_t func, - void *arg) +int bpf__foreach_event(struct bpf_object *obj, + bpf_prog_iter_callback_t func, + void *arg) { struct bpf_program *prog; int err; bpf_object__for_each_program(prog, obj) { + struct bpf_prog_priv *priv = bpf_program__priv(prog); struct probe_trace_event *tev; struct perf_probe_event *pev; - struct bpf_prog_priv *priv; int i, fd; - err = bpf_program__get_private(prog, - (void **)&priv); - if (err || !priv) { + if (IS_ERR(priv) || !priv) { pr_debug("bpf: failed to get private field\n"); return -BPF_LOADER_ERRNO__INTERNAL; } + if (priv->is_tp) { + fd = bpf_program__fd(prog); + err = (*func)(priv->sys_name, priv->evt_name, fd, arg); + if (err) { + pr_debug("bpf: tracepoint call back failed, stop iterate\n"); + return err; + } + continue; + } + pev = &priv->pev; for (i = 0; i < pev->ntevs; i++) { tev = &pev->tevs[i]; @@ -731,7 +783,7 @@ int bpf__foreach_tev(struct bpf_object *obj, return fd; } - err = (*func)(tev, fd, arg); + err = (*func)(tev->group, tev->event, fd, arg); if (err) { pr_debug("bpf: call back failed, stop iterate\n"); return err; @@ -842,18 +894,67 @@ bpf_map_op__new(struct parse_events_term *term) return op; } +static struct bpf_map_op * +bpf_map_op__clone(struct bpf_map_op *op) +{ + struct bpf_map_op *newop; + + newop = memdup(op, sizeof(*op)); + if (!newop) { + pr_debug("Failed to alloc bpf_map_op\n"); + return NULL; + } + + INIT_LIST_HEAD(&newop->list); + if (op->key_type == BPF_MAP_KEY_RANGES) { + size_t memsz = op->k.array.nr_ranges * + sizeof(op->k.array.ranges[0]); + + newop->k.array.ranges = memdup(op->k.array.ranges, memsz); + if (!newop->k.array.ranges) { + pr_debug("Failed to alloc indices for map\n"); + free(newop); + return NULL; + } + } + + return newop; +} + +static struct bpf_map_priv * +bpf_map_priv__clone(struct bpf_map_priv *priv) +{ + struct bpf_map_priv *newpriv; + struct bpf_map_op *pos, *newop; + + newpriv = zalloc(sizeof(*newpriv)); + if (!newpriv) { + pr_debug("No enough memory to alloc map private\n"); + return NULL; + } + INIT_LIST_HEAD(&newpriv->ops_list); + + list_for_each_entry(pos, &priv->ops_list, list) { + newop = bpf_map_op__clone(pos); + if (!newop) { + bpf_map_priv__purge(newpriv); + return NULL; + } + list_add_tail(&newop->list, &newpriv->ops_list); + } + + return newpriv; +} + static int bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) { - struct bpf_map_priv *priv; - const char *map_name; - int err; + const char *map_name = bpf_map__name(map); + struct bpf_map_priv *priv = bpf_map__priv(map); - map_name = bpf_map__get_name(map); - err = bpf_map__get_private(map, (void **)&priv); - if (err) { + if (IS_ERR(priv)) { pr_debug("Failed to get private from map %s\n", map_name); - return err; + return PTR_ERR(priv); } if (!priv) { @@ -864,7 +965,7 @@ bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) } INIT_LIST_HEAD(&priv->ops_list); - if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) { + if (bpf_map__set_priv(map, priv, bpf_map_priv__clear)) { free(priv); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -896,30 +997,26 @@ static int __bpf_map__config_value(struct bpf_map *map, struct parse_events_term *term) { - struct bpf_map_def def; struct bpf_map_op *op; - const char *map_name; - int err; + const char *map_name = bpf_map__name(map); + const struct bpf_map_def *def = bpf_map__def(map); - map_name = bpf_map__get_name(map); - - err = bpf_map__get_def(map, &def); - if (err) { + if (IS_ERR(def)) { pr_debug("Unable to get map definition from '%s'\n", map_name); return -BPF_LOADER_ERRNO__INTERNAL; } - if (def.type != BPF_MAP_TYPE_ARRAY) { + if (def->type != BPF_MAP_TYPE_ARRAY) { pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n", map_name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; } - if (def.key_size < sizeof(unsigned int)) { + if (def->key_size < sizeof(unsigned int)) { pr_debug("Map %s has incorrect key size\n", map_name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE; } - switch (def.value_size) { + switch (def->value_size) { case 1: case 2: case 4: @@ -962,12 +1059,10 @@ __bpf_map__config_event(struct bpf_map *map, struct perf_evlist *evlist) { struct perf_evsel *evsel; - struct bpf_map_def def; + const struct bpf_map_def *def; struct bpf_map_op *op; - const char *map_name; - int err; + const char *map_name = bpf_map__name(map); - map_name = bpf_map__get_name(map); evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str); if (!evsel) { pr_debug("Event (for '%s') '%s' doesn't exist\n", @@ -975,18 +1070,18 @@ __bpf_map__config_event(struct bpf_map *map, return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT; } - err = bpf_map__get_def(map, &def); - if (err) { + def = bpf_map__def(map); + if (IS_ERR(def)) { pr_debug("Unable to get map definition from '%s'\n", map_name); - return err; + return PTR_ERR(def); } /* * No need to check key_size and value_size: * kernel has already checked them. */ - if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", map_name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; @@ -1035,9 +1130,8 @@ config_map_indices_range_check(struct parse_events_term *term, const char *map_name) { struct parse_events_array *array = &term->array; - struct bpf_map_def def; + const struct bpf_map_def *def; unsigned int i; - int err; if (!array->nr_ranges) return 0; @@ -1047,8 +1141,8 @@ config_map_indices_range_check(struct parse_events_term *term, return -BPF_LOADER_ERRNO__INTERNAL; } - err = bpf_map__get_def(map, &def); - if (err) { + def = bpf_map__def(map); + if (IS_ERR(def)) { pr_debug("ERROR: Unable to get map definition from '%s'\n", map_name); return -BPF_LOADER_ERRNO__INTERNAL; @@ -1059,7 +1153,7 @@ config_map_indices_range_check(struct parse_events_term *term, size_t length = array->ranges[i].length; unsigned int idx = start + length - 1; - if (idx >= def.max_entries) { + if (idx >= def->max_entries) { pr_debug("ERROR: index %d too large\n", idx); return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG; } @@ -1095,7 +1189,7 @@ bpf__obj_config_map(struct bpf_object *obj, goto out; } - map = bpf_object__get_map_by_name(obj, map_name); + map = bpf_object__find_map_by_name(obj, map_name); if (!map) { pr_debug("ERROR: Map %s doesn't exist\n", map_name); err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST; @@ -1152,14 +1246,14 @@ out: } typedef int (*map_config_func_t)(const char *name, int map_fd, - struct bpf_map_def *pdef, + const struct bpf_map_def *pdef, struct bpf_map_op *op, void *pkey, void *arg); static int foreach_key_array_all(map_config_func_t func, void *arg, const char *name, - int map_fd, struct bpf_map_def *pdef, + int map_fd, const struct bpf_map_def *pdef, struct bpf_map_op *op) { unsigned int i; @@ -1179,7 +1273,7 @@ foreach_key_array_all(map_config_func_t func, static int foreach_key_array_ranges(map_config_func_t func, void *arg, const char *name, int map_fd, - struct bpf_map_def *pdef, + const struct bpf_map_def *pdef, struct bpf_map_op *op) { unsigned int i, j; @@ -1209,15 +1303,12 @@ bpf_map_config_foreach_key(struct bpf_map *map, void *arg) { int err, map_fd; - const char *name; struct bpf_map_op *op; - struct bpf_map_def def; - struct bpf_map_priv *priv; + const struct bpf_map_def *def; + const char *name = bpf_map__name(map); + struct bpf_map_priv *priv = bpf_map__priv(map); - name = bpf_map__get_name(map); - - err = bpf_map__get_private(map, (void **)&priv); - if (err) { + if (IS_ERR(priv)) { pr_debug("ERROR: failed to get private from map %s\n", name); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -1226,29 +1317,29 @@ bpf_map_config_foreach_key(struct bpf_map *map, return 0; } - err = bpf_map__get_def(map, &def); - if (err) { + def = bpf_map__def(map); + if (IS_ERR(def)) { pr_debug("ERROR: failed to get definition from map %s\n", name); return -BPF_LOADER_ERRNO__INTERNAL; } - map_fd = bpf_map__get_fd(map); + map_fd = bpf_map__fd(map); if (map_fd < 0) { pr_debug("ERROR: failed to get fd from map %s\n", name); return map_fd; } list_for_each_entry(op, &priv->ops_list, list) { - switch (def.type) { + switch (def->type) { case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_PERF_EVENT_ARRAY: switch (op->key_type) { case BPF_MAP_KEY_ALL: err = foreach_key_array_all(func, arg, name, - map_fd, &def, op); + map_fd, def, op); break; case BPF_MAP_KEY_RANGES: err = foreach_key_array_ranges(func, arg, name, - map_fd, &def, + map_fd, def, op); break; default: @@ -1358,7 +1449,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, static int apply_obj_config_map_for_key(const char *name, int map_fd, - struct bpf_map_def *pdef __maybe_unused, + const struct bpf_map_def *pdef, struct bpf_map_op *op, void *pkey, void *arg __maybe_unused) { @@ -1417,6 +1508,87 @@ int bpf__apply_obj_config(void) return 0; } +#define bpf__for_each_map(pos, obj, objtmp) \ + bpf_object__for_each_safe(obj, objtmp) \ + bpf_map__for_each(pos, obj) + +#define bpf__for_each_stdout_map(pos, obj, objtmp) \ + bpf__for_each_map(pos, obj, objtmp) \ + if (bpf_map__name(pos) && \ + (strcmp("__bpf_stdout__", \ + bpf_map__name(pos)) == 0)) + +int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) +{ + struct bpf_map_priv *tmpl_priv = NULL; + struct bpf_object *obj, *tmp; + struct perf_evsel *evsel = NULL; + struct bpf_map *map; + int err; + bool need_init = false; + + bpf__for_each_stdout_map(map, obj, tmp) { + struct bpf_map_priv *priv = bpf_map__priv(map); + + if (IS_ERR(priv)) + return -BPF_LOADER_ERRNO__INTERNAL; + + /* + * No need to check map type: type should have been + * verified by kernel. + */ + if (!need_init && !priv) + need_init = !priv; + if (!tmpl_priv && priv) + tmpl_priv = priv; + } + + if (!need_init) + return 0; + + if (!tmpl_priv) { + err = parse_events(evlist, "bpf-output/no-inherit=1,name=__bpf_stdout__/", + NULL); + if (err) { + pr_debug("ERROR: failed to create bpf-output event\n"); + return -err; + } + + evsel = perf_evlist__last(evlist); + } + + bpf__for_each_stdout_map(map, obj, tmp) { + struct bpf_map_priv *priv = bpf_map__priv(map); + + if (IS_ERR(priv)) + return -BPF_LOADER_ERRNO__INTERNAL; + if (priv) + continue; + + if (tmpl_priv) { + priv = bpf_map_priv__clone(tmpl_priv); + if (!priv) + return -ENOMEM; + + err = bpf_map__set_priv(map, priv, bpf_map_priv__clear); + if (err) { + bpf_map_priv__clear(map, priv); + return err; + } + } else if (evsel) { + struct bpf_map_op *op; + + op = bpf_map__add_newop(map, NULL); + if (IS_ERR(op)) + return PTR_ERR(op); + op->op_type = BPF_MAP_OP_SET_EVSEL; + op->v.evsel = evsel; + } + } + + return 0; +} + #define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START) #define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c) #define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START) @@ -1472,7 +1644,7 @@ bpf_loader_strerror(int err, char *buf, size_t size) snprintf(buf, size, "Unknown bpf loader error %d", err); else snprintf(buf, size, "%s", - strerror_r(err, sbuf, sizeof(sbuf))); + str_error_r(err, sbuf, sizeof(sbuf))); buf[size - 1] = '\0'; return -1; @@ -1542,7 +1714,7 @@ int bpf__strerror_load(struct bpf_object *obj, { bpf__strerror_head(err, buf, size); case LIBBPF_ERRNO__KVER: { - unsigned int obj_kver = bpf_object__get_kversion(obj); + unsigned int obj_kver = bpf_object__kversion(obj); unsigned int real_kver; if (fetch_kernel_version(&real_kver, NULL, 0)) { @@ -1590,3 +1762,11 @@ int bpf__strerror_apply_obj_config(int err, char *buf, size_t size) bpf__strerror_end(buf, size); return 0; } + +int bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused, + int err, char *buf, size_t size) +{ + bpf__strerror_head(err, buf, size); + bpf__strerror_end(buf, size); + return 0; +} diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index be4311944e3d..f2b737b225f2 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -46,7 +46,7 @@ struct bpf_object; struct parse_events_term; #define PERF_BPF_PROBE_GROUP "perf_bpf_probe" -typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev, +typedef int (*bpf_prog_iter_callback_t)(const char *group, const char *event, int fd, void *arg); #ifdef HAVE_LIBBPF_SUPPORT @@ -67,8 +67,8 @@ int bpf__strerror_probe(struct bpf_object *obj, int err, int bpf__load(struct bpf_object *obj); int bpf__strerror_load(struct bpf_object *obj, int err, char *buf, size_t size); -int bpf__foreach_tev(struct bpf_object *obj, - bpf_prog_iter_callback_t func, void *arg); +int bpf__foreach_event(struct bpf_object *obj, + bpf_prog_iter_callback_t func, void *arg); int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term, struct perf_evlist *evlist, int *error_pos); @@ -79,6 +79,11 @@ int bpf__strerror_config_obj(struct bpf_object *obj, size_t size); int bpf__apply_obj_config(void); int bpf__strerror_apply_obj_config(int err, char *buf, size_t size); + +int bpf__setup_stdout(struct perf_evlist *evlist); +int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err, + char *buf, size_t size); + #else static inline struct bpf_object * bpf__prepare_load(const char *filename __maybe_unused, @@ -102,9 +107,9 @@ static inline int bpf__unprobe(struct bpf_object *obj __maybe_unused) { return 0 static inline int bpf__load(struct bpf_object *obj __maybe_unused) { return 0; } static inline int -bpf__foreach_tev(struct bpf_object *obj __maybe_unused, - bpf_prog_iter_callback_t func __maybe_unused, - void *arg __maybe_unused) +bpf__foreach_event(struct bpf_object *obj __maybe_unused, + bpf_prog_iter_callback_t func __maybe_unused, + void *arg __maybe_unused) { return 0; } @@ -125,6 +130,12 @@ bpf__apply_obj_config(void) } static inline int +bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) +{ + return 0; +} + +static inline int __bpf_strerror(char *buf, size_t size) { if (!size) @@ -177,5 +188,13 @@ bpf__strerror_apply_obj_config(int err __maybe_unused, { return __bpf_strerror(buf, size); } + +static inline int +bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused, + int err __maybe_unused, char *buf, + size_t size) +{ + return __bpf_strerror(buf, size); +} #endif #endif diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 0573c2ec861d..5651f3c12f93 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -17,6 +17,7 @@ #include "tool.h" #include "header.h" #include "vdso.h" +#include "probe-file.h" static bool no_buildid_cache; @@ -144,7 +145,28 @@ static int asnprintf(char **strp, size_t size, const char *fmt, ...) return ret; } -static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) +char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, + size_t size) +{ + bool retry_old = true; + + snprintf(bf, size, "%s/%s/%s/kallsyms", + buildid_dir, DSO__NAME_KALLSYMS, sbuild_id); +retry: + if (!access(bf, F_OK)) + return bf; + if (retry_old) { + /* Try old style kallsyms cache */ + snprintf(bf, size, "%s/%s/%s", + buildid_dir, DSO__NAME_KALLSYMS, sbuild_id); + retry_old = false; + goto retry; + } + + return NULL; +} + +char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size) { char *tmp = bf; int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir, @@ -154,23 +176,107 @@ static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) return bf; } +char *build_id_cache__origname(const char *sbuild_id) +{ + char *linkname; + char buf[PATH_MAX]; + char *ret = NULL, *p; + size_t offs = 5; /* == strlen("../..") */ + + linkname = build_id_cache__linkname(sbuild_id, NULL, 0); + if (!linkname) + return NULL; + + if (readlink(linkname, buf, PATH_MAX) < 0) + goto out; + /* The link should be "../..<origpath>/<sbuild_id>" */ + p = strrchr(buf, '/'); /* Cut off the "/<sbuild_id>" */ + if (p && (p > buf + offs)) { + *p = '\0'; + if (buf[offs + 1] == '[') + offs++; /* + * This is a DSO name, like [kernel.kallsyms]. + * Skip the first '/', since this is not the + * cache of a regular file. + */ + ret = strdup(buf + offs); /* Skip "../..[/]" */ + } +out: + free(linkname); + return ret; +} + +/* Check if the given build_id cache is valid on current running system */ +static bool build_id_cache__valid_id(char *sbuild_id) +{ + char real_sbuild_id[SBUILD_ID_SIZE] = ""; + char *pathname; + int ret = 0; + bool result = false; + + pathname = build_id_cache__origname(sbuild_id); + if (!pathname) + return false; + + if (!strcmp(pathname, DSO__NAME_KALLSYMS)) + ret = sysfs__sprintf_build_id("/", real_sbuild_id); + else if (pathname[0] == '/') + ret = filename__sprintf_build_id(pathname, real_sbuild_id); + else + ret = -EINVAL; /* Should we support other special DSO cache? */ + if (ret >= 0) + result = (strcmp(sbuild_id, real_sbuild_id) == 0); + free(pathname); + + return result; +} + +static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso) +{ + return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : "elf"); +} + char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) { - char build_id_hex[SBUILD_ID_SIZE]; + bool is_kallsyms = dso__is_kallsyms((struct dso *)dso); + bool is_vdso = dso__is_vdso((struct dso *)dso); + char sbuild_id[SBUILD_ID_SIZE]; + char *linkname; + bool alloc = (bf == NULL); + int ret; if (!dso->has_build_id) return NULL; - build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex); - return build_id__filename(build_id_hex, bf, size); + build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + linkname = build_id_cache__linkname(sbuild_id, NULL, 0); + if (!linkname) + return NULL; + + /* Check if old style build_id cache */ + if (is_regular_file(linkname)) + ret = asnprintf(&bf, size, "%s", linkname); + else + ret = asnprintf(&bf, size, "%s/%s", linkname, + build_id_cache__basename(is_kallsyms, is_vdso)); + if (ret < 0 || (!alloc && size < (unsigned int)ret)) + bf = NULL; + free(linkname); + + return bf; } bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) { - char *id_name, *ch; + char *id_name = NULL, *ch; struct stat sb; + char sbuild_id[SBUILD_ID_SIZE]; + + if (!dso->has_build_id) + goto err; - id_name = dso__build_id_filename(dso, bf, size); + build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + id_name = build_id_cache__linkname(sbuild_id, NULL, 0); if (!id_name) goto err; if (access(id_name, F_OK)) @@ -194,18 +300,14 @@ bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) if (ch - 3 < bf) goto err; + free(id_name); return strncmp(".ko", ch - 3, 3) == 0; err: - /* - * If dso__build_id_filename work, get id_name again, - * because id_name points to bf and is broken. - */ - if (id_name) - id_name = dso__build_id_filename(dso, bf, size); pr_err("Invalid build id: %s\n", id_name ? : dso->long_name ? : dso->short_name ? : "[unknown]"); + free(id_name); return false; } @@ -256,19 +358,19 @@ static int machine__write_buildid_table(struct machine *machine, int fd) size_t name_len; bool in_kernel = false; - if (!pos->hit) + if (!pos->hit && !dso__is_vdso(pos)) continue; if (dso__is_vdso(pos)) { name = pos->short_name; - name_len = pos->short_name_len + 1; + name_len = pos->short_name_len; } else if (dso__is_kcore(pos)) { machine__mmap_name(machine, nm, sizeof(nm)); name = nm; - name_len = strlen(nm) + 1; + name_len = strlen(nm); } else { name = pos->long_name; - name_len = pos->long_name_len + 1; + name_len = pos->long_name_len; } in_kernel = pos->kernel || @@ -340,8 +442,132 @@ void disable_buildid_cache(void) no_buildid_cache = true; } -static char *build_id_cache__dirname_from_path(const char *name, - bool is_kallsyms, bool is_vdso) +static bool lsdir_bid_head_filter(const char *name __maybe_unused, + struct dirent *d __maybe_unused) +{ + return (strlen(d->d_name) == 2) && + isxdigit(d->d_name[0]) && isxdigit(d->d_name[1]); +} + +static bool lsdir_bid_tail_filter(const char *name __maybe_unused, + struct dirent *d __maybe_unused) +{ + int i = 0; + while (isxdigit(d->d_name[i]) && i < SBUILD_ID_SIZE - 3) + i++; + return (i == SBUILD_ID_SIZE - 3) && (d->d_name[i] == '\0'); +} + +struct strlist *build_id_cache__list_all(bool validonly) +{ + struct strlist *toplist, *linklist = NULL, *bidlist; + struct str_node *nd, *nd2; + char *topdir, *linkdir = NULL; + char sbuild_id[SBUILD_ID_SIZE]; + + /* for filename__ functions */ + if (validonly) + symbol__init(NULL); + + /* Open the top-level directory */ + if (asprintf(&topdir, "%s/.build-id/", buildid_dir) < 0) + return NULL; + + bidlist = strlist__new(NULL, NULL); + if (!bidlist) + goto out; + + toplist = lsdir(topdir, lsdir_bid_head_filter); + if (!toplist) { + pr_debug("Error in lsdir(%s): %d\n", topdir, errno); + /* If there is no buildid cache, return an empty list */ + if (errno == ENOENT) + goto out; + goto err_out; + } + + strlist__for_each_entry(nd, toplist) { + if (asprintf(&linkdir, "%s/%s", topdir, nd->s) < 0) + goto err_out; + /* Open the lower-level directory */ + linklist = lsdir(linkdir, lsdir_bid_tail_filter); + if (!linklist) { + pr_debug("Error in lsdir(%s): %d\n", linkdir, errno); + goto err_out; + } + strlist__for_each_entry(nd2, linklist) { + if (snprintf(sbuild_id, SBUILD_ID_SIZE, "%s%s", + nd->s, nd2->s) != SBUILD_ID_SIZE - 1) + goto err_out; + if (validonly && !build_id_cache__valid_id(sbuild_id)) + continue; + if (strlist__add(bidlist, sbuild_id) < 0) + goto err_out; + } + strlist__delete(linklist); + zfree(&linkdir); + } + +out_free: + strlist__delete(toplist); +out: + free(topdir); + + return bidlist; + +err_out: + strlist__delete(linklist); + zfree(&linkdir); + strlist__delete(bidlist); + bidlist = NULL; + goto out_free; +} + +static bool str_is_build_id(const char *maybe_sbuild_id, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (!isxdigit(maybe_sbuild_id[i])) + return false; + } + return true; +} + +/* Return the valid complete build-id */ +char *build_id_cache__complement(const char *incomplete_sbuild_id) +{ + struct strlist *bidlist; + struct str_node *nd, *cand = NULL; + char *sbuild_id = NULL; + size_t len = strlen(incomplete_sbuild_id); + + if (len >= SBUILD_ID_SIZE || + !str_is_build_id(incomplete_sbuild_id, len)) + return NULL; + + bidlist = build_id_cache__list_all(true); + if (!bidlist) + return NULL; + + strlist__for_each_entry(nd, bidlist) { + if (strncmp(nd->s, incomplete_sbuild_id, len) != 0) + continue; + if (cand) { /* Error: There are more than 2 candidates. */ + cand = NULL; + break; + } + cand = nd; + } + if (cand) + sbuild_id = strdup(cand->s); + strlist__delete(bidlist); + + return sbuild_id; +} + +char *build_id_cache__cachedir(const char *sbuild_id, const char *name, + bool is_kallsyms, bool is_vdso) { char *realname = (char *)name, *filename; bool slash = is_kallsyms || is_vdso; @@ -352,8 +578,9 @@ static char *build_id_cache__dirname_from_path(const char *name, return NULL; } - if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "", - is_vdso ? DSO__NAME_VDSO : realname) < 0) + if (asprintf(&filename, "%s%s%s%s%s", buildid_dir, slash ? "/" : "", + is_vdso ? DSO__NAME_VDSO : realname, + sbuild_id ? "/" : "", sbuild_id ?: "") < 0) filename = NULL; if (!slash) @@ -365,49 +592,51 @@ static char *build_id_cache__dirname_from_path(const char *name, int build_id_cache__list_build_ids(const char *pathname, struct strlist **result) { - struct strlist *list; char *dir_name; - DIR *dir; - struct dirent *d; int ret = 0; - list = strlist__new(NULL, NULL); - dir_name = build_id_cache__dirname_from_path(pathname, false, false); - if (!list || !dir_name) { - ret = -ENOMEM; - goto out; - } + dir_name = build_id_cache__cachedir(NULL, pathname, false, false); + if (!dir_name) + return -ENOMEM; - /* List up all dirents */ - dir = opendir(dir_name); - if (!dir) { + *result = lsdir(dir_name, lsdir_no_dot_filter); + if (!*result) ret = -errno; - goto out; - } + free(dir_name); - while ((d = readdir(dir)) != NULL) { - if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) - continue; - strlist__add(list, d->d_name); - } - closedir(dir); + return ret; +} -out: - free(dir_name); - if (ret) - strlist__delete(list); - else - *result = list; +#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_GELF_GETNOTE_SUPPORT) +static int build_id_cache__add_sdt_cache(const char *sbuild_id, + const char *realname) +{ + struct probe_cache *cache; + int ret; + cache = probe_cache__new(sbuild_id); + if (!cache) + return -1; + + ret = probe_cache__scan_sdt(cache, realname); + if (ret >= 0) { + pr_debug("Found %d SDTs in %s\n", ret, realname); + if (probe_cache__commit(cache) < 0) + ret = -1; + } + probe_cache__delete(cache); return ret; } +#else +#define build_id_cache__add_sdt_cache(sbuild_id, realname) (0) +#endif int build_id_cache__add_s(const char *sbuild_id, const char *name, bool is_kallsyms, bool is_vdso) { const size_t size = PATH_MAX; char *realname = NULL, *filename = NULL, *dir_name = NULL, - *linkname = zalloc(size), *targetname, *tmp; + *linkname = zalloc(size), *tmp; int err = -1; if (!is_kallsyms) { @@ -416,14 +645,22 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, goto out_free; } - dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso); + dir_name = build_id_cache__cachedir(sbuild_id, name, + is_kallsyms, is_vdso); if (!dir_name) goto out_free; + /* Remove old style build-id cache */ + if (is_regular_file(dir_name)) + if (unlink(dir_name)) + goto out_free; + if (mkdir_p(dir_name, 0755)) goto out_free; - if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) { + /* Save the allocated buildid dirname */ + if (asprintf(&filename, "%s/%s", dir_name, + build_id_cache__basename(is_kallsyms, is_vdso)) < 0) { filename = NULL; goto out_free; } @@ -437,7 +674,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, goto out_free; } - if (!build_id__filename(sbuild_id, linkname, size)) + if (!build_id_cache__linkname(sbuild_id, linkname, size)) goto out_free; tmp = strrchr(linkname, '/'); *tmp = '\0'; @@ -446,11 +683,16 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, goto out_free; *tmp = '/'; - targetname = filename + strlen(buildid_dir) - 5; - memcpy(targetname, "../..", 5); + tmp = dir_name + strlen(buildid_dir) - 5; + memcpy(tmp, "../..", 5); - if (symlink(targetname, linkname) == 0) + if (symlink(tmp, linkname) == 0) err = 0; + + /* Update SDT cache : error is just warned */ + if (build_id_cache__add_sdt_cache(sbuild_id, realname) < 0) + pr_debug("Failed to update/scan SDT cache for %s\n", realname); + out_free: if (!is_kallsyms) free(realname); @@ -474,7 +716,7 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, bool build_id_cache__cached(const char *sbuild_id) { bool ret = false; - char *filename = build_id__filename(sbuild_id, NULL, 0); + char *filename = build_id_cache__linkname(sbuild_id, NULL, 0); if (filename && !access(filename, F_OK)) ret = true; @@ -493,7 +735,7 @@ int build_id_cache__remove_s(const char *sbuild_id) if (filename == NULL || linkname == NULL) goto out_free; - if (!build_id__filename(sbuild_id, linkname, size)) + if (!build_id_cache__linkname(sbuild_id, linkname, size)) goto out_free; if (access(linkname, F_OK)) @@ -511,7 +753,7 @@ int build_id_cache__remove_s(const char *sbuild_id) tmp = strrchr(linkname, '/') + 1; snprintf(tmp, size - (tmp - linkname), "%s", filename); - if (unlink(linkname)) + if (rm_rf(linkname)) goto out_free; err = 0; @@ -523,7 +765,7 @@ out_free: static int dso__cache_build_id(struct dso *dso, struct machine *machine) { - bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; + bool is_kallsyms = dso__is_kallsyms(dso); bool is_vdso = dso__is_vdso(dso); const char *name = dso->long_name; char nm[PATH_MAX]; diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 64af3e20610d..d27990610f9f 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -14,6 +14,8 @@ struct dso; int build_id__sprintf(const u8 *build_id, int len, char *bf); int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); int filename__sprintf_build_id(const char *pathname, char *sbuild_id); +char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, + size_t size); char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size); @@ -28,6 +30,12 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, int fd); int perf_session__cache_build_ids(struct perf_session *session); +char *build_id_cache__origname(const char *sbuild_id); +char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size); +char *build_id_cache__cachedir(const char *sbuild_id, const char *name, + bool is_kallsyms, bool is_vdso); +struct strlist *build_id_cache__list_all(bool validonly); +char *build_id_cache__complement(const char *incomplete_sbuild_id); int build_id_cache__list_build_ids(const char *pathname, struct strlist **result); bool build_id_cache__cached(const char *sbuild_id); diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 1f5a93c2c9a2..512c0c83fbc6 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -1,72 +1,30 @@ #ifndef __PERF_CACHE_H #define __PERF_CACHE_H -#include <stdbool.h> -#include "util.h" #include "strbuf.h" #include <subcmd/pager.h> -#include "../perf.h" #include "../ui/ui.h" #include <linux/string.h> #define CMD_EXEC_PATH "--exec-path" -#define CMD_PERF_DIR "--perf-dir=" -#define CMD_WORK_TREE "--work-tree=" #define CMD_DEBUGFS_DIR "--debugfs-dir=" -#define PERF_DIR_ENVIRONMENT "PERF_DIR" -#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" #define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" -#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" #define PERF_PAGER_ENVIRONMENT "PERF_PAGER" -extern const char *config_exclusive_filename; - -typedef int (*config_fn_t)(const char *, const char *, void *); -int perf_default_config(const char *, const char *, void *); -int perf_config(config_fn_t fn, void *); -int perf_config_int(const char *, const char *); -u64 perf_config_u64(const char *, const char *); -int perf_config_bool(const char *, const char *); -int config_error_nonbool(const char *); -const char *perf_config_dirname(const char *, const char *); -const char *perf_etc_perfconfig(void); - char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); #define alloc_nr(x) (((x)+16)*3/2) -/* - * Realloc the buffer pointed at by variable 'x' so that it can hold - * at least 'nr' entries; the number of entries currently allocated - * is 'alloc', using the standard growing factor alloc_nr() macro. - * - * DO NOT USE any expression with side-effect for 'x' or 'alloc'. - */ -#define ALLOC_GROW(x, nr, alloc) \ - do { \ - if ((nr) > alloc) { \ - if (alloc_nr(alloc) < (nr)) \ - alloc = (nr); \ - else \ - alloc = alloc_nr(alloc); \ - x = xrealloc((x), alloc * sizeof(*(x))); \ - } \ - } while(0) - - static inline int is_absolute_path(const char *path) { return path[0] == '/'; } -char *strip_path_suffix(const char *path, const char *suffix); - char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); -char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/call-path.c b/tools/perf/util/call-path.c new file mode 100644 index 000000000000..904a17052e38 --- /dev/null +++ b/tools/perf/util/call-path.c @@ -0,0 +1,122 @@ +/* + * call-path.h: Manipulate a tree data structure containing function call paths + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/rbtree.h> +#include <linux/list.h> + +#include "util.h" +#include "call-path.h" + +static void call_path__init(struct call_path *cp, struct call_path *parent, + struct symbol *sym, u64 ip, bool in_kernel) +{ + cp->parent = parent; + cp->sym = sym; + cp->ip = sym ? 0 : ip; + cp->db_id = 0; + cp->in_kernel = in_kernel; + RB_CLEAR_NODE(&cp->rb_node); + cp->children = RB_ROOT; +} + +struct call_path_root *call_path_root__new(void) +{ + struct call_path_root *cpr; + + cpr = zalloc(sizeof(struct call_path_root)); + if (!cpr) + return NULL; + call_path__init(&cpr->call_path, NULL, NULL, 0, false); + INIT_LIST_HEAD(&cpr->blocks); + return cpr; +} + +void call_path_root__free(struct call_path_root *cpr) +{ + struct call_path_block *pos, *n; + + list_for_each_entry_safe(pos, n, &cpr->blocks, node) { + list_del(&pos->node); + free(pos); + } + free(cpr); +} + +static struct call_path *call_path__new(struct call_path_root *cpr, + struct call_path *parent, + struct symbol *sym, u64 ip, + bool in_kernel) +{ + struct call_path_block *cpb; + struct call_path *cp; + size_t n; + + if (cpr->next < cpr->sz) { + cpb = list_last_entry(&cpr->blocks, struct call_path_block, + node); + } else { + cpb = zalloc(sizeof(struct call_path_block)); + if (!cpb) + return NULL; + list_add_tail(&cpb->node, &cpr->blocks); + cpr->sz += CALL_PATH_BLOCK_SIZE; + } + + n = cpr->next++ & CALL_PATH_BLOCK_MASK; + cp = &cpb->cp[n]; + + call_path__init(cp, parent, sym, ip, in_kernel); + + return cp; +} + +struct call_path *call_path__findnew(struct call_path_root *cpr, + struct call_path *parent, + struct symbol *sym, u64 ip, u64 ks) +{ + struct rb_node **p; + struct rb_node *node_parent = NULL; + struct call_path *cp; + bool in_kernel = ip >= ks; + + if (sym) + ip = 0; + + if (!parent) + return call_path__new(cpr, parent, sym, ip, in_kernel); + + p = &parent->children.rb_node; + while (*p != NULL) { + node_parent = *p; + cp = rb_entry(node_parent, struct call_path, rb_node); + + if (cp->sym == sym && cp->ip == ip) + return cp; + + if (sym < cp->sym || (sym == cp->sym && ip < cp->ip)) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + cp = call_path__new(cpr, parent, sym, ip, in_kernel); + if (!cp) + return NULL; + + rb_link_node(&cp->rb_node, node_parent, p); + rb_insert_color(&cp->rb_node, &parent->children); + + return cp; +} diff --git a/tools/perf/util/call-path.h b/tools/perf/util/call-path.h new file mode 100644 index 000000000000..477f6d03b659 --- /dev/null +++ b/tools/perf/util/call-path.h @@ -0,0 +1,77 @@ +/* + * call-path.h: Manipulate a tree data structure containing function call paths + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __PERF_CALL_PATH_H +#define __PERF_CALL_PATH_H + +#include <sys/types.h> + +#include <linux/types.h> +#include <linux/rbtree.h> + +/** + * struct call_path - node in list of calls leading to a function call. + * @parent: call path to the parent function call + * @sym: symbol of function called + * @ip: only if sym is null, the ip of the function + * @db_id: id used for db-export + * @in_kernel: whether function is a in the kernel + * @rb_node: node in parent's tree of called functions + * @children: tree of call paths of functions called + * + * In combination with the call_return structure, the call_path structure + * defines a context-sensitve call-graph. + */ +struct call_path { + struct call_path *parent; + struct symbol *sym; + u64 ip; + u64 db_id; + bool in_kernel; + struct rb_node rb_node; + struct rb_root children; +}; + +#define CALL_PATH_BLOCK_SHIFT 8 +#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT) +#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1) + +struct call_path_block { + struct call_path cp[CALL_PATH_BLOCK_SIZE]; + struct list_head node; +}; + +/** + * struct call_path_root - root of all call paths. + * @call_path: root call path + * @blocks: list of blocks to store call paths + * @next: next free space + * @sz: number of spaces + */ +struct call_path_root { + struct call_path call_path; + struct list_head blocks; + size_t next; + size_t sz; +}; + +struct call_path_root *call_path_root__new(void); +void call_path_root__free(struct call_path_root *cpr); + +struct call_path *call_path__findnew(struct call_path_root *cpr, + struct call_path *parent, + struct symbol *sym, u64 ip, u64 ks); + +#endif diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 24b4bd0d7754..07fd30bc2f81 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -109,6 +109,7 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt) bool record_opt_set = false; bool try_stack_size = false; + callchain_param.enabled = true; symbol_conf.use_callchain = true; if (!arg) @@ -117,6 +118,7 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt) while ((tok = strtok((char *)arg, ",")) != NULL) { if (!strncmp(tok, "none", strlen(tok))) { callchain_param.mode = CHAIN_NONE; + callchain_param.enabled = false; symbol_conf.use_callchain = false; return 0; } @@ -788,7 +790,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor, return 0; } -int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent, +int sample__resolve_callchain(struct perf_sample *sample, + struct callchain_cursor *cursor, struct symbol **parent, struct perf_evsel *evsel, struct addr_location *al, int max_stack) { @@ -796,8 +799,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent return 0; if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain || - sort__has_parent) { - return thread__resolve_callchain(al->thread, evsel, sample, + perf_hpp_list.parent) { + return thread__resolve_callchain(al->thread, cursor, evsel, sample, parent, al, max_stack); } return 0; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index d2a9e694810c..13e75549c440 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -94,6 +94,7 @@ struct callchain_param { enum perf_call_graph_mode record_mode; u32 dump_size; enum chain_mode mode; + u16 max_stack; u32 print_limit; double min_percent; sort_chain_func_t sort; @@ -105,6 +106,7 @@ struct callchain_param { }; extern struct callchain_param callchain_param; +extern struct callchain_param callchain_param_default; struct callchain_list { u64 ip; @@ -212,7 +214,14 @@ struct hist_entry; int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset); int record_callchain_opt(const struct option *opt, const char *arg, int unset); -int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent, +struct record_opts; + +int record_opts__parse_callchain(struct record_opts *record, + struct callchain_param *callchain, + const char *arg, bool unset); + +int sample__resolve_callchain(struct perf_sample *sample, + struct callchain_cursor *cursor, struct symbol **parent, struct perf_evsel *evsel, struct addr_location *al, int max_stack); int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample); diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 90aa1b46b2e5..8fdee24725a7 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -81,7 +81,7 @@ static int add_cgroup(struct perf_evlist *evlist, char *str) /* * check if cgrp is already defined, if so we reuse it */ - evlist__for_each(evlist, counter) { + evlist__for_each_entry(evlist, counter) { cgrp = counter->cgrp; if (!cgrp) continue; @@ -110,7 +110,7 @@ static int add_cgroup(struct perf_evlist *evlist, char *str) * if add cgroup N, then need to find event N */ n = 0; - evlist__for_each(evlist, counter) { + evlist__for_each_entry(evlist, counter) { if (n == nr_cgroups) goto found; n++; diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index 2babddaa2481..f0dcd0ee0afa 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -4,18 +4,24 @@ #include "cloexec.h" #include "asm/bug.h" #include "debug.h" +#include <unistd.h> +#include <asm/unistd.h> +#include <sys/syscall.h> static unsigned long flag = PERF_FLAG_FD_CLOEXEC; -#ifdef __GLIBC_PREREQ -#if !__GLIBC_PREREQ(2, 6) int __weak sched_getcpu(void) { +#ifdef __NR_getcpu + unsigned cpu; + int err = syscall(__NR_getcpu, &cpu, NULL, NULL); + if (!err) + return cpu; +#else errno = ENOSYS; +#endif return -1; } -#endif -#endif static int perf_flag_probe(void) { @@ -58,7 +64,7 @@ static int perf_flag_probe(void) WARN_ONCE(err != EINVAL && err != EBUSY, "perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n", - err, strerror_r(err, sbuf, sizeof(sbuf))); + err, str_error_r(err, sbuf, sizeof(sbuf))); /* not supported, confirm error related to PERF_FLAG_FD_CLOEXEC */ while (1) { @@ -76,7 +82,7 @@ static int perf_flag_probe(void) if (WARN_ONCE(fd < 0 && err != EBUSY, "perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n", - err, strerror_r(err, sbuf, sizeof(sbuf)))) + err, str_error_r(err, sbuf, sizeof(sbuf)))) return -1; return 0; diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 43e84aa27e4a..dbbf89b050a5 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -1,7 +1,11 @@ #include <linux/kernel.h> #include "cache.h" +#include "config.h" +#include <stdlib.h> +#include <stdio.h> #include "color.h" #include <math.h> +#include <unistd.h> int perf_use_color_default = -1; diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 4e727635476e..18dae745034f 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -13,6 +13,7 @@ #include <subcmd/exec-cmd.h> #include "util/hist.h" /* perf_hist_config */ #include "util/llvm-utils.h" /* perf_llvm_config */ +#include "config.h" #define MAXNAME (256) @@ -25,6 +26,7 @@ static FILE *config_file; static const char *config_file_name; static int config_linenr; static int config_file_eof; +static struct perf_config_set *config_set; const char *config_exclusive_filename; @@ -274,7 +276,8 @@ static int perf_parse_file(config_fn_t fn, void *data) break; } } - die("bad config file line %d in %s", config_linenr, config_file_name); + pr_err("bad config file line %d in %s\n", config_linenr, config_file_name); + return -1; } static int parse_unit_factor(const char *end, unsigned long *val) @@ -370,13 +373,28 @@ int perf_config_bool(const char *name, const char *value) return !!perf_config_bool_or_int(name, value, &discard); } -const char *perf_config_dirname(const char *name, const char *value) +static const char *perf_config_dirname(const char *name, const char *value) { if (!name) return NULL; return value; } +static int perf_buildid_config(const char *var, const char *value) +{ + /* same dir for all commands */ + if (!strcmp(var, "buildid.dir")) { + const char *dir = perf_config_dirname(var, value); + + if (!dir) + return -1; + strncpy(buildid_dir, dir, MAXPATHLEN-1); + buildid_dir[MAXPATHLEN-1] = '\0'; + } + + return 0; +} + static int perf_default_core_config(const char *var __maybe_unused, const char *value __maybe_unused) { @@ -412,6 +430,9 @@ int perf_default_config(const char *var, const char *value, if (!prefixcmp(var, "llvm.")) return perf_llvm_config(var, value); + if (!prefixcmp(var, "buildid.")) + return perf_buildid_config(var, value); + /* Add other config variables here. */ return 0; } @@ -458,18 +479,140 @@ static int perf_config_global(void) return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0); } -int perf_config(config_fn_t fn, void *data) +static struct perf_config_section *find_section(struct list_head *sections, + const char *section_name) +{ + struct perf_config_section *section; + + list_for_each_entry(section, sections, node) + if (!strcmp(section->name, section_name)) + return section; + + return NULL; +} + +static struct perf_config_item *find_config_item(const char *name, + struct perf_config_section *section) +{ + struct perf_config_item *item; + + list_for_each_entry(item, §ion->items, node) + if (!strcmp(item->name, name)) + return item; + + return NULL; +} + +static struct perf_config_section *add_section(struct list_head *sections, + const char *section_name) +{ + struct perf_config_section *section = zalloc(sizeof(*section)); + + if (!section) + return NULL; + + INIT_LIST_HEAD(§ion->items); + section->name = strdup(section_name); + if (!section->name) { + pr_debug("%s: strdup failed\n", __func__); + free(section); + return NULL; + } + + list_add_tail(§ion->node, sections); + return section; +} + +static struct perf_config_item *add_config_item(struct perf_config_section *section, + const char *name) +{ + struct perf_config_item *item = zalloc(sizeof(*item)); + + if (!item) + return NULL; + + item->name = strdup(name); + if (!item->name) { + pr_debug("%s: strdup failed\n", __func__); + free(item); + return NULL; + } + + list_add_tail(&item->node, §ion->items); + return item; +} + +static int set_value(struct perf_config_item *item, const char *value) +{ + char *val = strdup(value); + + if (!val) + return -1; + + zfree(&item->value); + item->value = val; + return 0; +} + +static int collect_config(const char *var, const char *value, + void *perf_config_set) +{ + int ret = -1; + char *ptr, *key; + char *section_name, *name; + struct perf_config_section *section = NULL; + struct perf_config_item *item = NULL; + struct perf_config_set *set = perf_config_set; + struct list_head *sections; + + if (set == NULL) + return -1; + + sections = &set->sections; + key = ptr = strdup(var); + if (!key) { + pr_debug("%s: strdup failed\n", __func__); + return -1; + } + + section_name = strsep(&ptr, "."); + name = ptr; + if (name == NULL || value == NULL) + goto out_free; + + section = find_section(sections, section_name); + if (!section) { + section = add_section(sections, section_name); + if (!section) + goto out_free; + } + + item = find_config_item(name, section); + if (!item) { + item = add_config_item(section, name); + if (!item) + goto out_free; + } + + ret = set_value(item, value); + return ret; + +out_free: + free(key); + return -1; +} + +static int perf_config_set__init(struct perf_config_set *set) { - int ret = 0, found = 0; + int ret = -1; const char *home = NULL; /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ if (config_exclusive_filename) - return perf_config_from_file(fn, config_exclusive_filename, data); + return perf_config_from_file(collect_config, config_exclusive_filename, set); if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) { - ret += perf_config_from_file(fn, perf_etc_perfconfig(), - data); - found += 1; + if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0) + goto out; } home = getenv("HOME"); @@ -495,52 +638,126 @@ int perf_config(config_fn_t fn, void *data) if (!st.st_size) goto out_free; - ret += perf_config_from_file(fn, user_config, data); - found += 1; + ret = perf_config_from_file(collect_config, user_config, set); + out_free: free(user_config); } out: - if (found == 0) + return ret; +} + +struct perf_config_set *perf_config_set__new(void) +{ + struct perf_config_set *set = zalloc(sizeof(*set)); + + if (set) { + INIT_LIST_HEAD(&set->sections); + if (perf_config_set__init(set) < 0) { + perf_config_set__delete(set); + set = NULL; + } + } + + return set; +} + +int perf_config(config_fn_t fn, void *data) +{ + int ret = 0; + char key[BUFSIZ]; + struct perf_config_section *section; + struct perf_config_item *item; + + if (config_set == NULL) return -1; + + perf_config_set__for_each_entry(config_set, section, item) { + char *value = item->value; + + if (value) { + scnprintf(key, sizeof(key), "%s.%s", + section->name, item->name); + ret = fn(key, value, data); + if (ret < 0) { + pr_err("Error: wrong config key-value pair %s=%s\n", + key, value); + break; + } + } + } + return ret; } -/* - * Call this to report error for your variable that should not - * get a boolean value (i.e. "[my] var" means "true"). - */ -int config_error_nonbool(const char *var) +void perf_config__init(void) { - return error("Missing value for '%s'", var); + if (config_set == NULL) + config_set = perf_config_set__new(); } -struct buildid_dir_config { - char *dir; -}; +void perf_config__exit(void) +{ + perf_config_set__delete(config_set); + config_set = NULL; +} -static int buildid_dir_command_config(const char *var, const char *value, - void *data) +void perf_config__refresh(void) { - struct buildid_dir_config *c = data; - const char *v; + perf_config__exit(); + perf_config__init(); +} - /* same dir for all commands */ - if (!strcmp(var, "buildid.dir")) { - v = perf_config_dirname(var, value); - if (!v) - return -1; - strncpy(c->dir, v, MAXPATHLEN-1); - c->dir[MAXPATHLEN-1] = '\0'; +static void perf_config_item__delete(struct perf_config_item *item) +{ + zfree(&item->name); + zfree(&item->value); + free(item); +} + +static void perf_config_section__purge(struct perf_config_section *section) +{ + struct perf_config_item *item, *tmp; + + list_for_each_entry_safe(item, tmp, §ion->items, node) { + list_del_init(&item->node); + perf_config_item__delete(item); } - return 0; } -static void check_buildid_dir_config(void) +static void perf_config_section__delete(struct perf_config_section *section) +{ + perf_config_section__purge(section); + zfree(§ion->name); + free(section); +} + +static void perf_config_set__purge(struct perf_config_set *set) { - struct buildid_dir_config c; - c.dir = buildid_dir; - perf_config(buildid_dir_command_config, &c); + struct perf_config_section *section, *tmp; + + list_for_each_entry_safe(section, tmp, &set->sections, node) { + list_del_init(§ion->node); + perf_config_section__delete(section); + } +} + +void perf_config_set__delete(struct perf_config_set *set) +{ + if (set == NULL) + return; + + perf_config_set__purge(set); + free(set); +} + +/* + * Call this to report error for your variable that should not + * get a boolean value (i.e. "[my] var" means "true"). + */ +int config_error_nonbool(const char *var) +{ + return error("Missing value for '%s'", var); } void set_buildid_dir(const char *dir) @@ -548,16 +765,13 @@ void set_buildid_dir(const char *dir) if (dir) scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir); - /* try config file */ - if (buildid_dir[0] == '\0') - check_buildid_dir_config(); - /* default to $HOME/.debug */ if (buildid_dir[0] == '\0') { - char *v = getenv("HOME"); - if (v) { + char *home = getenv("HOME"); + + if (home) { snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s", - v, DEBUG_CACHE_DIR); + home, DEBUG_CACHE_DIR); } else { strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1); } diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h new file mode 100644 index 000000000000..6f813d46045e --- /dev/null +++ b/tools/perf/util/config.h @@ -0,0 +1,66 @@ +#ifndef __PERF_CONFIG_H +#define __PERF_CONFIG_H + +#include <stdbool.h> +#include <linux/list.h> + +struct perf_config_item { + char *name; + char *value; + struct list_head node; +}; + +struct perf_config_section { + char *name; + struct list_head items; + struct list_head node; +}; + +struct perf_config_set { + struct list_head sections; +}; + +extern const char *config_exclusive_filename; + +typedef int (*config_fn_t)(const char *, const char *, void *); +int perf_default_config(const char *, const char *, void *); +int perf_config(config_fn_t fn, void *); +int perf_config_int(const char *, const char *); +u64 perf_config_u64(const char *, const char *); +int perf_config_bool(const char *, const char *); +int config_error_nonbool(const char *); +const char *perf_etc_perfconfig(void); + +struct perf_config_set *perf_config_set__new(void); +void perf_config_set__delete(struct perf_config_set *set); +void perf_config__init(void); +void perf_config__exit(void); +void perf_config__refresh(void); + +/** + * perf_config_sections__for_each - iterate thru all the sections + * @list: list_head instance to iterate + * @section: struct perf_config_section iterator + */ +#define perf_config_sections__for_each_entry(list, section) \ + list_for_each_entry(section, list, node) + +/** + * perf_config_items__for_each - iterate thru all the items + * @list: list_head instance to iterate + * @item: struct perf_config_item iterator + */ +#define perf_config_items__for_each_entry(list, item) \ + list_for_each_entry(item, list, node) + +/** + * perf_config_set__for_each - iterate thru all the config section-item pairs + * @set: evlist instance to iterate + * @section: struct perf_config_section iterator + * @item: struct perf_config_item iterator + */ +#define perf_config_set__for_each_entry(set, section, item) \ + perf_config_sections__for_each_entry(&set->sections, section) \ + perf_config_items__for_each_entry(§ion->items, item) + +#endif /* __PERF_CONFIG_H */ diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 9bcf2bed3a6d..2c0b52264a46 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -236,13 +236,12 @@ struct cpu_map *cpu_map__new_data(struct cpu_map_data *data) size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp) { - int i; - size_t printed = fprintf(fp, "%d cpu%s: ", - map->nr, map->nr > 1 ? "s" : ""); - for (i = 0; i < map->nr; ++i) - printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]); +#define BUFSIZE 1024 + char buf[BUFSIZE]; - return printed + fprintf(fp, "\n"); + cpu_map__snprint(map, buf, sizeof(buf)); + return fprintf(fp, "%s\n", buf); +#undef BUFSIZE } struct cpu_map *cpu_map__dummy_new(void) @@ -587,3 +586,68 @@ int cpu__setup_cpunode_map(void) closedir(dir1); return 0; } + +bool cpu_map__has(struct cpu_map *cpus, int cpu) +{ + return cpu_map__idx(cpus, cpu) != -1; +} + +int cpu_map__idx(struct cpu_map *cpus, int cpu) +{ + int i; + + for (i = 0; i < cpus->nr; ++i) { + if (cpus->map[i] == cpu) + return i; + } + + return -1; +} + +int cpu_map__cpu(struct cpu_map *cpus, int idx) +{ + return cpus->map[idx]; +} + +size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size) +{ + int i, cpu, start = -1; + bool first = true; + size_t ret = 0; + +#define COMMA first ? "" : "," + + for (i = 0; i < map->nr + 1; i++) { + bool last = i == map->nr; + + cpu = last ? INT_MAX : map->map[i]; + + if (start == -1) { + start = i; + if (last) { + ret += snprintf(buf + ret, size - ret, + "%s%d", COMMA, + map->map[i]); + } + } else if (((i - start) != (cpu - map->map[start])) || last) { + int end = i - 1; + + if (start == end) { + ret += snprintf(buf + ret, size - ret, + "%s%d", COMMA, + map->map[start]); + } else { + ret += snprintf(buf + ret, size - ret, + "%s%d-%d", COMMA, + map->map[start], map->map[end]); + } + first = false; + start = i; + } + } + +#undef COMMA + + pr_debug("cpumask list: %s\n", buf); + return ret; +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 81a2562aaa2b..06bd689f5989 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -19,6 +19,7 @@ struct cpu_map *cpu_map__empty_new(int nr); struct cpu_map *cpu_map__dummy_new(void); struct cpu_map *cpu_map__new_data(struct cpu_map_data *data); struct cpu_map *cpu_map__read(FILE *file); +size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); @@ -66,4 +67,8 @@ int cpu__get_node(int cpu); int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, int (*f)(struct cpu_map *map, int cpu, void *data), void *data); + +int cpu_map__cpu(struct cpu_map *cpus, int idx); +bool cpu_map__has(struct cpu_map *cpus, int cpu); +int cpu_map__idx(struct cpu_map *cpus, int cpu); #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index bbf69d248ec5..4f979bb27b6c 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -26,6 +26,7 @@ #include "evlist.h" #include "evsel.h" #include "machine.h" +#include "config.h" #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) @@ -68,6 +69,9 @@ struct ctf_writer { }; struct bt_ctf_field_type *array[6]; } data; + struct bt_ctf_event_class *comm_class; + struct bt_ctf_event_class *exit_class; + struct bt_ctf_event_class *fork_class; }; struct convert { @@ -76,6 +80,7 @@ struct convert { u64 events_size; u64 events_count; + u64 non_sample_count; /* Ordered events configured queue size. */ u64 queue_size; @@ -140,6 +145,36 @@ FUNC_VALUE_SET(s64) FUNC_VALUE_SET(u64) __FUNC_VALUE_SET(u64_hex, u64) +static int string_set_value(struct bt_ctf_field *field, const char *string); +static __maybe_unused int +value_set_string(struct ctf_writer *cw, struct bt_ctf_event *event, + const char *name, const char *string) +{ + struct bt_ctf_field_type *type = cw->data.string; + struct bt_ctf_field *field; + int ret = 0; + + field = bt_ctf_field_create(type); + if (!field) { + pr_err("failed to create a field %s\n", name); + return -1; + } + + ret = string_set_value(field, string); + if (ret) { + pr_err("failed to set value %s\n", name); + goto err_put_field; + } + + ret = bt_ctf_event_set_payload(event, name, field); + if (ret) + pr_err("failed to set payload %s\n", name); + +err_put_field: + bt_ctf_field_put(field); + return ret; +} + static struct bt_ctf_field_type* get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field) { @@ -204,6 +239,44 @@ static unsigned long long adjust_signedness(unsigned long long value_int, int si return (value_int & value_mask) | ~value_mask; } +static int string_set_value(struct bt_ctf_field *field, const char *string) +{ + char *buffer = NULL; + size_t len = strlen(string), i, p; + int err; + + for (i = p = 0; i < len; i++, p++) { + if (isprint(string[i])) { + if (!buffer) + continue; + buffer[p] = string[i]; + } else { + char numstr[5]; + + snprintf(numstr, sizeof(numstr), "\\x%02x", + (unsigned int)(string[i]) & 0xff); + + if (!buffer) { + buffer = zalloc(i + (len - i) * 4 + 2); + if (!buffer) { + pr_err("failed to set unprintable string '%s'\n", string); + return bt_ctf_field_string_set_value(field, "UNPRINTABLE-STRING"); + } + if (i > 0) + strncpy(buffer, string, i); + } + strncat(buffer + p, numstr, 4); + p += 3; + } + } + + if (!buffer) + return bt_ctf_field_string_set_value(field, string); + err = bt_ctf_field_string_set_value(field, buffer); + free(buffer); + return err; +} + static int add_tracepoint_field_value(struct ctf_writer *cw, struct bt_ctf_event_class *event_class, struct bt_ctf_event *event, @@ -270,8 +343,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, } if (flags & FIELD_IS_STRING) - ret = bt_ctf_field_string_set_value(field, - data + offset + i * len); + ret = string_set_value(field, data + offset + i * len); else { unsigned long long value_int; @@ -694,6 +766,72 @@ static int process_sample_event(struct perf_tool *tool, return cs ? 0 : -1; } +#define __NON_SAMPLE_SET_FIELD(_name, _type, _field) \ +do { \ + ret = value_set_##_type(cw, event, #_field, _event->_name._field);\ + if (ret) \ + return -1; \ +} while(0) + +#define __FUNC_PROCESS_NON_SAMPLE(_name, body) \ +static int process_##_name##_event(struct perf_tool *tool, \ + union perf_event *_event, \ + struct perf_sample *sample, \ + struct machine *machine) \ +{ \ + struct convert *c = container_of(tool, struct convert, tool);\ + struct ctf_writer *cw = &c->writer; \ + struct bt_ctf_event_class *event_class = cw->_name##_class;\ + struct bt_ctf_event *event; \ + struct ctf_stream *cs; \ + int ret; \ + \ + c->non_sample_count++; \ + c->events_size += _event->header.size; \ + event = bt_ctf_event_create(event_class); \ + if (!event) { \ + pr_err("Failed to create an CTF event\n"); \ + return -1; \ + } \ + \ + bt_ctf_clock_set_time(cw->clock, sample->time); \ + body \ + cs = ctf_stream(cw, 0); \ + if (cs) { \ + if (is_flush_needed(cs)) \ + ctf_stream__flush(cs); \ + \ + cs->count++; \ + bt_ctf_stream_append_event(cs->stream, event); \ + } \ + bt_ctf_event_put(event); \ + \ + return perf_event__process_##_name(tool, _event, sample, machine);\ +} + +__FUNC_PROCESS_NON_SAMPLE(comm, + __NON_SAMPLE_SET_FIELD(comm, u32, pid); + __NON_SAMPLE_SET_FIELD(comm, u32, tid); + __NON_SAMPLE_SET_FIELD(comm, string, comm); +) +__FUNC_PROCESS_NON_SAMPLE(fork, + __NON_SAMPLE_SET_FIELD(fork, u32, pid); + __NON_SAMPLE_SET_FIELD(fork, u32, ppid); + __NON_SAMPLE_SET_FIELD(fork, u32, tid); + __NON_SAMPLE_SET_FIELD(fork, u32, ptid); + __NON_SAMPLE_SET_FIELD(fork, u64, time); +) + +__FUNC_PROCESS_NON_SAMPLE(exit, + __NON_SAMPLE_SET_FIELD(fork, u32, pid); + __NON_SAMPLE_SET_FIELD(fork, u32, ppid); + __NON_SAMPLE_SET_FIELD(fork, u32, tid); + __NON_SAMPLE_SET_FIELD(fork, u32, ptid); + __NON_SAMPLE_SET_FIELD(fork, u64, time); +) +#undef __NON_SAMPLE_SET_FIELD +#undef __FUNC_PROCESS_NON_SAMPLE + /* If dup < 0, add a prefix. Else, add _dupl_X suffix. */ static char *change_name(char *name, char *orig_name, int dup) { @@ -960,7 +1098,7 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session) struct perf_evsel *evsel; int ret; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { ret = add_event(cw, evsel); if (ret) return ret; @@ -968,12 +1106,86 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session) return 0; } +#define __NON_SAMPLE_ADD_FIELD(t, n) \ + do { \ + pr2(" field '%s'\n", #n); \ + if (bt_ctf_event_class_add_field(event_class, cw->data.t, #n)) {\ + pr_err("Failed to add field '%s';\n", #n);\ + return -1; \ + } \ + } while(0) + +#define __FUNC_ADD_NON_SAMPLE_EVENT_CLASS(_name, body) \ +static int add_##_name##_event(struct ctf_writer *cw) \ +{ \ + struct bt_ctf_event_class *event_class; \ + int ret; \ + \ + pr("Adding "#_name" event\n"); \ + event_class = bt_ctf_event_class_create("perf_" #_name);\ + if (!event_class) \ + return -1; \ + body \ + \ + ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);\ + if (ret) { \ + pr("Failed to add event class '"#_name"' into stream.\n");\ + return ret; \ + } \ + \ + cw->_name##_class = event_class; \ + bt_ctf_event_class_put(event_class); \ + return 0; \ +} + +__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(comm, + __NON_SAMPLE_ADD_FIELD(u32, pid); + __NON_SAMPLE_ADD_FIELD(u32, tid); + __NON_SAMPLE_ADD_FIELD(string, comm); +) + +__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(fork, + __NON_SAMPLE_ADD_FIELD(u32, pid); + __NON_SAMPLE_ADD_FIELD(u32, ppid); + __NON_SAMPLE_ADD_FIELD(u32, tid); + __NON_SAMPLE_ADD_FIELD(u32, ptid); + __NON_SAMPLE_ADD_FIELD(u64, time); +) + +__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(exit, + __NON_SAMPLE_ADD_FIELD(u32, pid); + __NON_SAMPLE_ADD_FIELD(u32, ppid); + __NON_SAMPLE_ADD_FIELD(u32, tid); + __NON_SAMPLE_ADD_FIELD(u32, ptid); + __NON_SAMPLE_ADD_FIELD(u64, time); +) + +#undef __NON_SAMPLE_ADD_FIELD +#undef __FUNC_ADD_NON_SAMPLE_EVENT_CLASS + +static int setup_non_sample_events(struct ctf_writer *cw, + struct perf_session *session __maybe_unused) +{ + int ret; + + ret = add_comm_event(cw); + if (ret) + return ret; + ret = add_exit_event(cw); + if (ret) + return ret; + ret = add_fork_event(cw); + if (ret) + return ret; + return 0; +} + static void cleanup_events(struct perf_session *session) { struct perf_evlist *evlist = session->evlist; struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { struct evsel_priv *priv; priv = evsel->priv; @@ -1236,13 +1448,14 @@ static int convert__config(const char *var, const char *value, void *cb) return 0; } -int bt_convert__perf2ctf(const char *input, const char *path, bool force) +int bt_convert__perf2ctf(const char *input, const char *path, + struct perf_data_convert_opts *opts) { struct perf_session *session; struct perf_data_file file = { .path = input, .mode = PERF_DATA_MODE_READ, - .force = force, + .force = opts->force, }; struct convert c = { .tool = { @@ -1262,6 +1475,12 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force) struct ctf_writer *cw = &c.writer; int err = -1; + if (opts->all) { + c.tool.comm = process_comm_event; + c.tool.exit = process_exit_event; + c.tool.fork = process_fork_event; + } + perf_config(convert__config, &c); /* CTF writer */ @@ -1286,6 +1505,9 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force) if (setup_events(cw, session)) goto free_session; + if (opts->all && setup_non_sample_events(cw, session)) + goto free_session; + if (setup_streams(cw, session)) goto free_session; @@ -1300,10 +1522,15 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force) file.path, path); fprintf(stderr, - "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n", + "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples", (double) c.events_size / 1024.0 / 1024.0, c.events_count); + if (!c.non_sample_count) + fprintf(stderr, ") ]\n"); + else + fprintf(stderr, ", %" PRIu64 " non-samples) ]\n", c.non_sample_count); + cleanup_events(session); perf_session__delete(session); ctf_writer__cleanup(cw); diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h index 4c204342a9d8..9a3b587f76c1 100644 --- a/tools/perf/util/data-convert-bt.h +++ b/tools/perf/util/data-convert-bt.h @@ -1,8 +1,10 @@ #ifndef __DATA_CONVERT_BT_H #define __DATA_CONVERT_BT_H +#include "data-convert.h" #ifdef HAVE_LIBBABELTRACE_SUPPORT -int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, bool force); +int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, + struct perf_data_convert_opts *opts); #endif /* HAVE_LIBBABELTRACE_SUPPORT */ #endif /* __DATA_CONVERT_BT_H */ diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h new file mode 100644 index 000000000000..5314962fe95b --- /dev/null +++ b/tools/perf/util/data-convert.h @@ -0,0 +1,9 @@ +#ifndef __DATA_CONVERT_H +#define __DATA_CONVERT_H + +struct perf_data_convert_opts { + bool force; + bool all; +}; + +#endif /* __DATA_CONVERT_H */ diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 1921942fc2e0..60bfc9ca1e22 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -57,7 +57,7 @@ static int open_file_read(struct perf_data_file *file) int err = errno; pr_err("failed to open %s: %s", file->path, - strerror_r(err, sbuf, sizeof(sbuf))); + str_error_r(err, sbuf, sizeof(sbuf))); if (err == ENOENT && !strcmp(file->path, "perf.data")) pr_err(" (try 'perf record' first)"); pr_err("\n"); @@ -99,7 +99,7 @@ static int open_file_write(struct perf_data_file *file) if (fd < 0) pr_err("failed to open %s : %s\n", file->path, - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); return fd; } @@ -136,3 +136,44 @@ ssize_t perf_data_file__write(struct perf_data_file *file, { return writen(file->fd, buf, size); } + +int perf_data_file__switch(struct perf_data_file *file, + const char *postfix, + size_t pos, bool at_exit) +{ + char *new_filepath; + int ret; + + if (check_pipe(file)) + return -EINVAL; + if (perf_data_file__is_read(file)) + return -EINVAL; + + if (asprintf(&new_filepath, "%s.%s", file->path, postfix) < 0) + return -ENOMEM; + + /* + * Only fire a warning, don't return error, continue fill + * original file. + */ + if (rename(file->path, new_filepath)) + pr_warning("Failed to rename %s to %s\n", file->path, new_filepath); + + if (!at_exit) { + close(file->fd); + ret = perf_data_file__open(file); + if (ret < 0) + goto out; + + if (lseek(file->fd, pos, SEEK_SET) == (off_t)-1) { + ret = -errno; + pr_debug("Failed to lseek to %zu: %s", + pos, strerror(errno)); + goto out; + } + } + ret = file->fd; +out: + free(new_filepath); + return ret; +} diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 2b15d0c95c7f..ae510ce16cb1 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -46,5 +46,14 @@ int perf_data_file__open(struct perf_data_file *file); void perf_data_file__close(struct perf_data_file *file); ssize_t perf_data_file__write(struct perf_data_file *file, void *buf, size_t size); - +/* + * If at_exit is set, only rename current perf.data to + * perf.data.<postfix>, continue write on original file. + * Set at_exit when flushing the last output. + * + * Return value is fd of new output. + */ +int perf_data_file__switch(struct perf_data_file *file, + const char *postfix, + size_t pos, bool at_exit); #endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 049438d51b9a..b0c2b5c5d337 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -23,6 +23,8 @@ #include "event.h" #include "util.h" #include "thread-stack.h" +#include "callchain.h" +#include "call-path.h" #include "db-export.h" struct deferred_export { @@ -231,17 +233,6 @@ int db_export__symbol(struct db_export *dbe, struct symbol *sym, return 0; } -static struct thread *get_main_thread(struct machine *machine, struct thread *thread) -{ - if (thread->pid_ == thread->tid) - return thread__get(thread); - - if (thread->pid_ == -1) - return NULL; - - return machine__find_thread(machine, thread->pid_, thread->pid_); -} - static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, u64 *dso_db_id, u64 *sym_db_id, u64 *offset) { @@ -258,8 +249,7 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, if (!al->sym) { al->sym = symbol__new(al->addr, 0, 0, "unknown"); if (al->sym) - symbols__insert(&dso->symbols[al->map->type], - al->sym); + dso__insert_symbol(dso, al->map->type, al->sym); } if (al->sym) { @@ -276,6 +266,79 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, return 0; } +static struct call_path *call_path_from_sample(struct db_export *dbe, + struct machine *machine, + struct thread *thread, + struct perf_sample *sample, + struct perf_evsel *evsel) +{ + u64 kernel_start = machine__kernel_start(machine); + struct call_path *current = &dbe->cpr->call_path; + enum chain_order saved_order = callchain_param.order; + int err; + + if (!symbol_conf.use_callchain || !sample->callchain) + return NULL; + + /* + * Since the call path tree must be built starting with the root, we + * must use ORDER_CALL for call chain resolution, in order to process + * the callchain starting with the root node and ending with the leaf. + */ + callchain_param.order = ORDER_CALLER; + err = thread__resolve_callchain(thread, &callchain_cursor, evsel, + sample, NULL, NULL, PERF_MAX_STACK_DEPTH); + if (err) { + callchain_param.order = saved_order; + return NULL; + } + callchain_cursor_commit(&callchain_cursor); + + while (1) { + struct callchain_cursor_node *node; + struct addr_location al; + u64 dso_db_id = 0, sym_db_id = 0, offset = 0; + + memset(&al, 0, sizeof(al)); + + node = callchain_cursor_current(&callchain_cursor); + if (!node) + break; + /* + * Handle export of symbol and dso for this node by + * constructing an addr_location struct and then passing it to + * db_ids_from_al() to perform the export. + */ + al.sym = node->sym; + al.map = node->map; + al.machine = machine; + al.addr = node->ip; + + if (al.map && !al.sym) + al.sym = dso__find_symbol(al.map->dso, MAP__FUNCTION, + al.addr); + + db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset); + + /* add node to the call path tree if it doesn't exist */ + current = call_path__findnew(dbe->cpr, current, + al.sym, node->ip, + kernel_start); + + callchain_cursor_advance(&callchain_cursor); + } + + /* Reset the callchain order to its prior value. */ + callchain_param.order = saved_order; + + if (current == &dbe->cpr->call_path) { + /* Bail because the callchain was empty. */ + return NULL; + } + + return current; +} + int db_export__branch_type(struct db_export *dbe, u32 branch_type, const char *name) { @@ -308,7 +371,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, if (err) return err; - main_thread = get_main_thread(al->machine, thread); + main_thread = thread__main_thread(al->machine, thread); if (main_thread) comm = machine__thread_exec_comm(al->machine, main_thread); @@ -329,6 +392,16 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, if (err) goto out_put; + if (dbe->cpr) { + struct call_path *cp = call_path_from_sample(dbe, al->machine, + thread, sample, + evsel); + if (cp) { + db_export__call_path(dbe, cp); + es.call_path_id = cp->db_id; + } + } + if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && sample_addr_correlates_sym(&evsel->attr)) { struct addr_location addr_al; diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index 25e22fd76aca..67bc6b8ad2d6 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -27,6 +27,7 @@ struct dso; struct perf_sample; struct addr_location; struct call_return_processor; +struct call_path_root; struct call_path; struct call_return; @@ -43,6 +44,7 @@ struct export_sample { u64 addr_dso_db_id; u64 addr_sym_db_id; u64 addr_offset; /* addr offset from symbol start */ + u64 call_path_id; }; struct db_export { @@ -64,6 +66,7 @@ struct db_export { int (*export_call_return)(struct db_export *dbe, struct call_return *cr); struct call_return_processor *crp; + struct call_path_root *cpr; u64 evsel_last_db_id; u64 machine_last_db_id; u64 thread_last_db_id; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 14bafda79eda..d242adc3d5a2 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -38,7 +38,7 @@ extern int debug_data_convert; #define pr_oe_time(t, fmt, ...) pr_time_N(1, debug_ordered_events, t, pr_fmt(fmt), ##__VA_ARGS__) #define pr_oe_time2(t, fmt, ...) pr_time_N(2, debug_ordered_events, t, pr_fmt(fmt), ##__VA_ARGS__) -#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ +#define STRERR_BUFSIZE 128 /* For the buffer size of str_error_r */ int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); void trace_event(union perf_event *event); diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c new file mode 100644 index 000000000000..f9dafa888c06 --- /dev/null +++ b/tools/perf/util/demangle-rust.c @@ -0,0 +1,269 @@ +#include <string.h> +#include "util.h" +#include "debug.h" + +#include "demangle-rust.h" + +/* + * Mangled Rust symbols look like this: + * + * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a + * + * The original symbol is: + * + * <std::sys::fd::FileDesc as core::ops::Drop>::drop + * + * The last component of the path is a 64-bit hash in lowercase hex, prefixed + * with "h". Rust does not have a global namespace between crates, an illusion + * which Rust maintains by using the hash to distinguish things that would + * otherwise have the same symbol. + * + * Any path component not starting with a XID_Start character is prefixed with + * "_". + * + * The following escape sequences are used: + * + * "," => $C$ + * "@" => $SP$ + * "*" => $BP$ + * "&" => $RF$ + * "<" => $LT$ + * ">" => $GT$ + * "(" => $LP$ + * ")" => $RP$ + * " " => $u20$ + * "'" => $u27$ + * "[" => $u5b$ + * "]" => $u5d$ + * "~" => $u7e$ + * + * A double ".." means "::" and a single "." means "-". + * + * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$ + */ + +static const char *hash_prefix = "::h"; +static const size_t hash_prefix_len = 3; +static const size_t hash_len = 16; + +static bool is_prefixed_hash(const char *start); +static bool looks_like_rust(const char *sym, size_t len); +static bool unescape(const char **in, char **out, const char *seq, char value); + +/* + * INPUT: + * sym: symbol that has been through BFD-demangling + * + * This function looks for the following indicators: + * + * 1. The hash must consist of "h" followed by 16 lowercase hex digits. + * + * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible + * hex digits. This is true of 99.9998% of hashes so once in your life you + * may see a false negative. The point is to notice path components that + * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In + * this case a false positive (non-Rust symbol has an important path + * component removed because it looks like a Rust hash) is worse than a + * false negative (the rare Rust symbol is not demangled) so this sets the + * balance in favor of false negatives. + * + * 3. There must be no characters other than a-zA-Z0-9 and _.:$ + * + * 4. There must be no unrecognized $-sign sequences. + * + * 5. There must be no sequence of three or more dots in a row ("..."). + */ +bool +rust_is_mangled(const char *sym) +{ + size_t len, len_without_hash; + + if (!sym) + return false; + + len = strlen(sym); + if (len <= hash_prefix_len + hash_len) + /* Not long enough to contain "::h" + hash + something else */ + return false; + + len_without_hash = len - (hash_prefix_len + hash_len); + if (!is_prefixed_hash(sym + len_without_hash)) + return false; + + return looks_like_rust(sym, len_without_hash); +} + +/* + * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex + * digits must comprise between 5 and 15 (inclusive) distinct digits. + */ +static bool is_prefixed_hash(const char *str) +{ + const char *end; + bool seen[16]; + size_t i; + int count; + + if (strncmp(str, hash_prefix, hash_prefix_len)) + return false; + str += hash_prefix_len; + + memset(seen, false, sizeof(seen)); + for (end = str + hash_len; str < end; str++) + if (*str >= '0' && *str <= '9') + seen[*str - '0'] = true; + else if (*str >= 'a' && *str <= 'f') + seen[*str - 'a' + 10] = true; + else + return false; + + /* Count how many distinct digits seen */ + count = 0; + for (i = 0; i < 16; i++) + if (seen[i]) + count++; + + return count >= 5 && count <= 15; +} + +static bool looks_like_rust(const char *str, size_t len) +{ + const char *end = str + len; + + while (str < end) + switch (*str) { + case '$': + if (!strncmp(str, "$C$", 3)) + str += 3; + else if (!strncmp(str, "$SP$", 4) + || !strncmp(str, "$BP$", 4) + || !strncmp(str, "$RF$", 4) + || !strncmp(str, "$LT$", 4) + || !strncmp(str, "$GT$", 4) + || !strncmp(str, "$LP$", 4) + || !strncmp(str, "$RP$", 4)) + str += 4; + else if (!strncmp(str, "$u20$", 5) + || !strncmp(str, "$u27$", 5) + || !strncmp(str, "$u5b$", 5) + || !strncmp(str, "$u5d$", 5) + || !strncmp(str, "$u7e$", 5)) + str += 5; + else + return false; + break; + case '.': + /* Do not allow three or more consecutive dots */ + if (!strncmp(str, "...", 3)) + return false; + /* Fall through */ + case 'a' ... 'z': + case 'A' ... 'Z': + case '0' ... '9': + case '_': + case ':': + str++; + break; + default: + return false; + } + + return true; +} + +/* + * INPUT: + * sym: symbol for which rust_is_mangled(sym) returns true + * + * The input is demangled in-place because the mangled name is always longer + * than the demangled one. + */ +void +rust_demangle_sym(char *sym) +{ + const char *in; + char *out; + const char *end; + + if (!sym) + return; + + in = sym; + out = sym; + end = sym + strlen(sym) - (hash_prefix_len + hash_len); + + while (in < end) + switch (*in) { + case '$': + if (!(unescape(&in, &out, "$C$", ',') + || unescape(&in, &out, "$SP$", '@') + || unescape(&in, &out, "$BP$", '*') + || unescape(&in, &out, "$RF$", '&') + || unescape(&in, &out, "$LT$", '<') + || unescape(&in, &out, "$GT$", '>') + || unescape(&in, &out, "$LP$", '(') + || unescape(&in, &out, "$RP$", ')') + || unescape(&in, &out, "$u20$", ' ') + || unescape(&in, &out, "$u27$", '\'') + || unescape(&in, &out, "$u5b$", '[') + || unescape(&in, &out, "$u5d$", ']') + || unescape(&in, &out, "$u7e$", '~'))) { + pr_err("demangle-rust: unexpected escape sequence"); + goto done; + } + break; + case '_': + /* + * If this is the start of a path component and the next + * character is an escape sequence, ignore the + * underscore. The mangler inserts an underscore to make + * sure the path component begins with a XID_Start + * character. + */ + if ((in == sym || in[-1] == ':') && in[1] == '$') + in++; + else + *out++ = *in++; + break; + case '.': + if (in[1] == '.') { + /* ".." becomes "::" */ + *out++ = ':'; + *out++ = ':'; + in += 2; + } else { + /* "." becomes "-" */ + *out++ = '-'; + in++; + } + break; + case 'a' ... 'z': + case 'A' ... 'Z': + case '0' ... '9': + case ':': + *out++ = *in++; + break; + default: + pr_err("demangle-rust: unexpected character '%c' in symbol\n", + *in); + goto done; + } + +done: + *out = '\0'; +} + +static bool unescape(const char **in, char **out, const char *seq, char value) +{ + size_t len = strlen(seq); + + if (strncmp(*in, seq, len)) + return false; + + **out = value; + + *in += len; + *out += 1; + + return true; +} diff --git a/tools/perf/util/demangle-rust.h b/tools/perf/util/demangle-rust.h new file mode 100644 index 000000000000..7b41ead7e0dd --- /dev/null +++ b/tools/perf/util/demangle-rust.h @@ -0,0 +1,7 @@ +#ifndef __PERF_DEMANGLE_RUST +#define __PERF_DEMANGLE_RUST 1 + +bool rust_is_mangled(const char *str); +void rust_demangle_sym(char *str); + +#endif /* __PERF_DEMANGLE_RUST */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 8e6395439ca0..774f6ec884d5 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -7,6 +7,7 @@ #include "auxtrace.h" #include "util.h" #include "debug.h" +#include "vdso.h" char dso__symtab_origin(const struct dso *dso) { @@ -38,7 +39,7 @@ int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type, char *root_dir, char *filename, size_t size) { - char build_id_hex[BUILD_ID_SIZE * 2 + 1]; + char build_id_hex[SBUILD_ID_SIZE]; int ret = 0; size_t len; @@ -62,9 +63,7 @@ int dso__read_binary_type_filename(const struct dso *dso, } break; case DSO_BINARY_TYPE__BUILD_ID_CACHE: - /* skip the locally configured cache if a symfs is given */ - if (symbol_conf.symfs[0] || - (dso__build_id_filename(dso, filename, size) == NULL)) + if (dso__build_id_filename(dso, filename, size) == NULL) ret = -1; break; @@ -336,7 +335,7 @@ static int do_open(char *name) return fd; pr_debug("dso open failed: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); if (!dso__data_open_cnt || errno != EMFILE) break; @@ -443,17 +442,27 @@ static rlim_t get_fd_limit(void) return limit; } -static bool may_cache_fd(void) +static rlim_t fd_limit; + +/* + * Used only by tests/dso-data.c to reset the environment + * for tests. I dont expect we should change this during + * standard runtime. + */ +void reset_fd_limit(void) { - static rlim_t limit; + fd_limit = 0; +} - if (!limit) - limit = get_fd_limit(); +static bool may_cache_fd(void) +{ + if (!fd_limit) + fd_limit = get_fd_limit(); - if (limit == RLIM_INFINITY) + if (fd_limit == RLIM_INFINITY) return true; - return limit > (rlim_t) dso__data_open_cnt; + return fd_limit > (rlim_t) dso__data_open_cnt; } /* @@ -777,7 +786,7 @@ static int data_file_size(struct dso *dso, struct machine *machine) if (fstat(dso->data.fd, &st) < 0) { ret = -errno; pr_err("dso cache fstat failed: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); dso->data.status = DSO_DATA_STATUS_ERROR; goto out; } @@ -1169,7 +1178,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) struct dso *pos; list_for_each_entry(pos, head, node) { - if (with_hits && !pos->hit) + if (with_hits && !pos->hit && !dso__is_vdso(pos)) continue; if (pos->has_build_id) { have_build_id = true; @@ -1301,7 +1310,7 @@ size_t __dsos__fprintf(struct list_head *head, FILE *fp) size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) { - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); return fprintf(fp, "%s", sbuild_id); @@ -1357,7 +1366,7 @@ int dso__strerror_load(struct dso *dso, char *buf, size_t buflen) BUG_ON(buflen == 0); if (errnum >= 0) { - const char *err = strerror_r(errnum, buf, buflen); + const char *err = str_error_r(errnum, buf, buflen); if (err != buf) scnprintf(buf, buflen, "%s", err); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 0953280629cf..ecc4bbd3f82e 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -4,6 +4,7 @@ #include <linux/atomic.h> #include <linux/types.h> #include <linux/rbtree.h> +#include <sys/types.h> #include <stdbool.h> #include <pthread.h> #include <linux/types.h> @@ -349,10 +350,17 @@ static inline bool dso__is_kcore(struct dso *dso) dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE; } +static inline bool dso__is_kallsyms(struct dso *dso) +{ + return dso->kernel && dso->long_name[0] != '/'; +} + void dso__free_a2l(struct dso *dso); enum dso_type dso__type(struct dso *dso, struct machine *machine); int dso__strerror_load(struct dso *dso, char *buf, size_t buflen); +void reset_fd_limit(void); + #endif /* __PERF_DSO */ diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index aea189b41cc8..a347b19c961a 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -915,8 +915,7 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) tmp = "*"; else if (tag == DW_TAG_subroutine_type) { /* Function pointer */ - strbuf_add(buf, "(function_type)", 15); - return 0; + return strbuf_add(buf, "(function_type)", 15); } else { if (!dwarf_diename(&type)) return -ENOENT; @@ -927,14 +926,10 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) else if (tag == DW_TAG_enumeration_type) tmp = "enum "; /* Write a base name */ - strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type)); - return 0; + return strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type)); } ret = die_get_typename(&type, buf); - if (ret == 0) - strbuf_addstr(buf, tmp); - - return ret; + return ret ? ret : strbuf_addstr(buf, tmp); } /** @@ -951,12 +946,10 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) ret = die_get_typename(vr_die, buf); if (ret < 0) { pr_debug("Failed to get type, make it unknown.\n"); - strbuf_add(buf, " (unknown_type)", 14); + ret = strbuf_add(buf, " (unknown_type)", 14); } - strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); - - return 0; + return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); } #ifdef HAVE_DWARF_GETLOCATIONS @@ -999,22 +992,24 @@ static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die, } while ((offset = dwarf_ranges(&scopes[1], offset, &base, - &start, &end)) > 0) { + &start, &end)) > 0) { start -= entry; end -= entry; if (first) { - strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64, - name, start, end); + ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64, + name, start, end); first = false; } else { - strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64, - start, end); + ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64, + start, end); } + if (ret < 0) + goto out; } if (!first) - strbuf_add(buf, "]>", 2); + ret = strbuf_add(buf, "]>", 2); out: free(scopes); @@ -1054,31 +1049,32 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL) return -EINVAL; - while ((offset = dwarf_getlocations( - &attr, offset, &base, - &start, &end, &op, &nops)) > 0) { + while ((offset = dwarf_getlocations(&attr, offset, &base, + &start, &end, &op, &nops)) > 0) { if (start == 0) { /* Single Location Descriptions */ ret = die_get_var_innermost_scope(sp_die, vr_die, buf); - return ret; + goto out; } /* Location Lists */ start -= entry; end -= entry; if (first) { - strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64, - name, start, end); + ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64, + name, start, end); first = false; } else { - strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64, - start, end); + ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64, + start, end); } + if (ret < 0) + goto out; } if (!first) - strbuf_add(buf, "]>", 2); - + ret = strbuf_add(buf, "]>", 2); +out: return ret; } #else diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 49a11d9d8b8f..bb964e86b09d 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -18,10 +18,13 @@ void perf_env__exit(struct perf_env *env) zfree(&env->cmdline_argv); zfree(&env->sibling_cores); zfree(&env->sibling_threads); - zfree(&env->numa_nodes); zfree(&env->pmu_mappings); zfree(&env->cpu); + for (i = 0; i < env->nr_numa_nodes; i++) + cpu_map__put(env->numa_nodes[i].map); + zfree(&env->numa_nodes); + for (i = 0; i < env->caches_cnt; i++) cpu_cache_level__free(&env->caches[i]); zfree(&env->caches); diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 56cffb60a0b4..b164dfd2dcbf 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -2,6 +2,7 @@ #define __PERF_ENV_H #include <linux/types.h> +#include "cpumap.h" struct cpu_topology_map { int socket_id; @@ -18,6 +19,13 @@ struct cpu_cache_level { char *map; }; +struct numa_node { + u32 node; + u64 mem_total; + u64 mem_free; + struct cpu_map *map; +}; + struct perf_env { char *hostname; char *os_release; @@ -40,11 +48,11 @@ struct perf_env { const char **cmdline_argv; char *sibling_cores; char *sibling_threads; - char *numa_nodes; char *pmu_mappings; struct cpu_topology_map *cpu; struct cpu_cache_level *caches; int caches_cnt; + struct numa_node *numa_nodes; }; extern struct perf_env perf_env; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index edcf4ed4e99c..e20438b784be 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -45,6 +45,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_STAT] = "STAT", [PERF_RECORD_STAT_ROUND] = "STAT_ROUND", [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE", + [PERF_RECORD_TIME_CONV] = "TIME_CONV", }; const char *perf_event__name(unsigned int id) @@ -672,6 +673,8 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, int err; union perf_event *event; + if (symbol_conf.kptr_restrict) + return -1; if (map == NULL) return -1; @@ -1089,7 +1092,7 @@ size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp) struct cpu_map *cpus = cpu_map__new_data(&event->cpu_map.data); size_t ret; - ret = fprintf(fp, " nr: "); + ret = fprintf(fp, ": "); if (cpus) ret += cpu_map__fprintf(cpus, fp); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 6bb1c928350d..8d363d5e65a2 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -233,6 +233,7 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_STAT = 76, PERF_RECORD_STAT_ROUND = 77, PERF_RECORD_EVENT_UPDATE = 78, + PERF_RECORD_TIME_CONV = 79, PERF_RECORD_HEADER_MAX }; @@ -469,6 +470,13 @@ struct stat_round_event { u64 time; }; +struct time_conv_event { + struct perf_event_header header; + u64 time_shift; + u64 time_mult; + u64 time_zero; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -497,6 +505,7 @@ union perf_event { struct stat_config_event stat_config; struct stat_event stat; struct stat_round_event stat_round; + struct time_conv_event time_conv; }; void perf_event__print_totals(void); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 86a03836a83f..097b3ed77fdd 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -15,6 +15,7 @@ #include "evlist.h" #include "evsel.h" #include "debug.h" +#include "asm/bug.h" #include <unistd.h> #include "parse-events.h" @@ -27,8 +28,8 @@ #include <linux/log2.h> #include <linux/err.h> -static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); -static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); +static void perf_mmap__munmap(struct perf_mmap *map); +static void perf_mmap__put(struct perf_mmap *map); #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) @@ -44,6 +45,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, perf_evlist__set_maps(evlist, cpus, threads); fdarray__init(&evlist->pollfd, 64); evlist->workload.pid = -1; + evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; } struct perf_evlist *perf_evlist__new(void) @@ -99,7 +101,7 @@ static void perf_evlist__update_id_pos(struct perf_evlist *evlist) { struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) + evlist__for_each_entry(evlist, evsel) perf_evsel__calc_id_pos(evsel); perf_evlist__set_id_pos(evlist); @@ -109,7 +111,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) { struct perf_evsel *pos, *n; - evlist__for_each_safe(evlist, n, pos) { + evlist__for_each_entry_safe(evlist, n, pos) { list_del_init(&pos->node); pos->evlist = NULL; perf_evsel__delete(pos); @@ -121,11 +123,15 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist) { zfree(&evlist->mmap); + zfree(&evlist->backward_mmap); fdarray__exit(&evlist->pollfd); } void perf_evlist__delete(struct perf_evlist *evlist) { + if (evlist == NULL) + return; + perf_evlist__munmap(evlist); perf_evlist__close(evlist); cpu_map__put(evlist->cpus); @@ -160,7 +166,7 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) { struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) + evlist__for_each_entry(evlist, evsel) __perf_evlist__propagate_maps(evlist, evsel); } @@ -189,7 +195,7 @@ void perf_evlist__splice_list_tail(struct perf_evlist *evlist, { struct perf_evsel *evsel, *temp; - __evlist__for_each_safe(list, temp, evsel) { + __evlist__for_each_entry_safe(list, temp, evsel) { list_del_init(&evsel->node); perf_evlist__add(evlist, evsel); } @@ -204,7 +210,7 @@ void __perf_evlist__set_leader(struct list_head *list) leader->nr_members = evsel->idx - leader->idx + 1; - __evlist__for_each(list, evsel) { + __evlist__for_each_entry(list, evsel) { evsel->leader = leader; } } @@ -233,31 +239,13 @@ void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) int perf_evlist__add_default(struct perf_evlist *evlist) { - struct perf_event_attr attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES, - }; - struct perf_evsel *evsel; - - event_attr_init(&attr); + struct perf_evsel *evsel = perf_evsel__new_cycles(); - perf_event_attr__set_max_precise_ip(&attr); - - evsel = perf_evsel__new(&attr); if (evsel == NULL) - goto error; - - /* use asprintf() because free(evsel) assumes name is allocated */ - if (asprintf(&evsel->name, "cycles%.*s", - attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) - goto error_free; + return -ENOMEM; perf_evlist__add(evlist, evsel); return 0; -error_free: - perf_evsel__delete(evsel); -error: - return -ENOMEM; } int perf_evlist__add_dummy(struct perf_evlist *evlist) @@ -295,7 +283,7 @@ static int perf_evlist__add_attrs(struct perf_evlist *evlist, return 0; out_delete_partial_list: - __evlist__for_each_safe(&head, n, evsel) + __evlist__for_each_entry_safe(&head, n, evsel) perf_evsel__delete(evsel); return -1; } @@ -316,7 +304,7 @@ perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) { struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type == PERF_TYPE_TRACEPOINT && (int)evsel->attr.config == id) return evsel; @@ -331,7 +319,7 @@ perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, { struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && (strcmp(evsel->name, name) == 0)) return evsel; @@ -366,7 +354,7 @@ void perf_evlist__disable(struct perf_evlist *evlist) { struct perf_evsel *pos; - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { if (!perf_evsel__is_group_leader(pos) || !pos->fd) continue; perf_evsel__disable(pos); @@ -379,7 +367,7 @@ void perf_evlist__enable(struct perf_evlist *evlist) { struct perf_evsel *pos; - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { if (!perf_evsel__is_group_leader(pos) || !pos->fd) continue; perf_evsel__enable(pos); @@ -447,7 +435,7 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) int nfds = 0; struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->system_wide) nfds += nr_cpus; else @@ -461,15 +449,16 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) return 0; } -static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx) +static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, + struct perf_mmap *map, short revent) { - int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP); + int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); /* * Save the idx so that when we filter out fds POLLHUP'ed we can * close the associated evlist->mmap[] entry. */ if (pos >= 0) { - evlist->pollfd.priv[pos].idx = idx; + evlist->pollfd.priv[pos].ptr = map; fcntl(fd, F_SETFL, O_NONBLOCK); } @@ -479,20 +468,22 @@ static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) { - return __perf_evlist__add_pollfd(evlist, fd, -1); + return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); } -static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd) +static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, + void *arg __maybe_unused) { - struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd); + struct perf_mmap *map = fda->priv[fd].ptr; - perf_evlist__mmap_put(evlist, fda->priv[fd].idx); + if (map) + perf_mmap__put(map); } int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) { return fdarray__filter(&evlist->pollfd, revents_and_mask, - perf_evlist__munmap_filtered); + perf_evlist__munmap_filtered, NULL); } int perf_evlist__poll(struct perf_evlist *evlist, int timeout) @@ -646,8 +637,8 @@ static int perf_evlist__event2id(struct perf_evlist *evlist, return 0; } -static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, - union perf_event *event) +struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, + union perf_event *event) { struct perf_evsel *first = perf_evlist__first(evlist); struct hlist_head *head; @@ -679,53 +670,82 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, return NULL; } -union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) +static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) +{ + int i; + + if (!evlist->backward_mmap) + return 0; + + for (i = 0; i < evlist->nr_mmaps; i++) { + int fd = evlist->backward_mmap[i].fd; + int err; + + if (fd < 0) + continue; + err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); + if (err) + return err; + } + return 0; +} + +static int perf_evlist__pause(struct perf_evlist *evlist) +{ + return perf_evlist__set_paused(evlist, true); +} + +static int perf_evlist__resume(struct perf_evlist *evlist) +{ + return perf_evlist__set_paused(evlist, false); +} + +/* When check_messup is true, 'end' must points to a good entry */ +static union perf_event * +perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, + u64 end, u64 *prev) { - struct perf_mmap *md = &evlist->mmap[idx]; - u64 head; - u64 old = md->prev; unsigned char *data = md->base + page_size; union perf_event *event = NULL; + int diff = end - start; - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!atomic_read(&md->refcnt)) - return NULL; - - head = perf_mmap__read_head(md); - if (evlist->overwrite) { + if (check_messup) { /* * If we're further behind than half the buffer, there's a chance * the writer will bite our tail and mess up the samples under us. * - * If we somehow ended up ahead of the head, we got messed up. + * If we somehow ended up ahead of the 'end', we got messed up. * - * In either case, truncate and restart at head. + * In either case, truncate and restart at 'end'. */ - int diff = head - old; if (diff > md->mask / 2 || diff < 0) { fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); /* - * head points to a known good entry, start there. + * 'end' points to a known good entry, start there. */ - old = head; + start = end; + diff = 0; } } - if (old != head) { + if (diff >= (int)sizeof(event->header)) { size_t size; - event = (union perf_event *)&data[old & md->mask]; + event = (union perf_event *)&data[start & md->mask]; size = event->header.size; + if (size < sizeof(event->header) || diff < (int)size) { + event = NULL; + goto broken_event; + } + /* * Event straddles the mmap boundary -- header should always * be inside due to u64 alignment of output. */ - if ((old & md->mask) + size != ((old + size) & md->mask)) { - unsigned int offset = old; + if ((start & md->mask) + size != ((start + size) & md->mask)) { + unsigned int offset = start; unsigned int len = min(sizeof(*event), size), cpy; void *dst = md->event_copy; @@ -740,44 +760,148 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) event = (union perf_event *) md->event_copy; } - old += size; + start += size; } - md->prev = old; +broken_event: + if (prev) + *prev = start; return event; } -static bool perf_mmap__empty(struct perf_mmap *md) +union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup) { - return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; + u64 head; + u64 old = md->prev; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!atomic_read(&md->refcnt)) + return NULL; + + head = perf_mmap__read_head(md); + + return perf_mmap__read(md, check_messup, old, head, &md->prev); } -static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) +union perf_event * +perf_mmap__read_backward(struct perf_mmap *md) { - atomic_inc(&evlist->mmap[idx].refcnt); + u64 head, end; + u64 start = md->prev; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!atomic_read(&md->refcnt)) + return NULL; + + head = perf_mmap__read_head(md); + if (!head) + return NULL; + + /* + * 'head' pointer starts from 0. Kernel minus sizeof(record) form + * it each time when kernel writes to it, so in fact 'head' is + * negative. 'end' pointer is made manually by adding the size of + * the ring buffer to 'head' pointer, means the validate data can + * read is the whole ring buffer. If 'end' is positive, the ring + * buffer has not fully filled, so we must adjust 'end' to 0. + * + * However, since both 'head' and 'end' is unsigned, we can't + * simply compare 'end' against 0. Here we compare '-head' and + * the size of the ring buffer, where -head is the number of bytes + * kernel write to the ring buffer. + */ + if (-head < (u64)(md->mask + 1)) + end = 0; + else + end = head + md->mask + 1; + + return perf_mmap__read(md, false, start, end, &md->prev); } -static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) +union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) { - BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0); + struct perf_mmap *md = &evlist->mmap[idx]; - if (atomic_dec_and_test(&evlist->mmap[idx].refcnt)) - __perf_evlist__munmap(evlist, idx); + /* + * Check messup is required for forward overwritable ring buffer: + * memory pointed by md->prev can be overwritten in this case. + * No need for read-write ring buffer: kernel stop outputting when + * it hit md->prev (perf_mmap__consume()). + */ + return perf_mmap__read_forward(md, evlist->overwrite); } -void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) +union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) { struct perf_mmap *md = &evlist->mmap[idx]; - if (!evlist->overwrite) { + /* + * No need to check messup for backward ring buffer: + * We can always read arbitrary long data from a backward + * ring buffer unless we forget to pause it before reading. + */ + return perf_mmap__read_backward(md); +} + +union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) +{ + return perf_evlist__mmap_read_forward(evlist, idx); +} + +void perf_mmap__read_catchup(struct perf_mmap *md) +{ + u64 head; + + if (!atomic_read(&md->refcnt)) + return; + + head = perf_mmap__read_head(md); + md->prev = head; +} + +void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) +{ + perf_mmap__read_catchup(&evlist->mmap[idx]); +} + +static bool perf_mmap__empty(struct perf_mmap *md) +{ + return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; +} + +static void perf_mmap__get(struct perf_mmap *map) +{ + atomic_inc(&map->refcnt); +} + +static void perf_mmap__put(struct perf_mmap *md) +{ + BUG_ON(md->base && atomic_read(&md->refcnt) == 0); + + if (atomic_dec_and_test(&md->refcnt)) + perf_mmap__munmap(md); +} + +void perf_mmap__consume(struct perf_mmap *md, bool overwrite) +{ + if (!overwrite) { u64 old = md->prev; perf_mmap__write_tail(md, old); } if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) - perf_evlist__mmap_put(evlist, idx); + perf_mmap__put(md); +} + +void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) +{ + perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); } int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, @@ -808,36 +932,52 @@ void __weak auxtrace_mmap_params__set_idx( { } -static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) +static void perf_mmap__munmap(struct perf_mmap *map) { - if (evlist->mmap[idx].base != NULL) { - munmap(evlist->mmap[idx].base, evlist->mmap_len); - evlist->mmap[idx].base = NULL; - atomic_set(&evlist->mmap[idx].refcnt, 0); + if (map->base != NULL) { + munmap(map->base, perf_mmap__mmap_len(map)); + map->base = NULL; + map->fd = -1; + atomic_set(&map->refcnt, 0); } - auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap); + auxtrace_mmap__munmap(&map->auxtrace_mmap); } -void perf_evlist__munmap(struct perf_evlist *evlist) +static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) { int i; - if (evlist->mmap == NULL) - return; + if (evlist->mmap) + for (i = 0; i < evlist->nr_mmaps; i++) + perf_mmap__munmap(&evlist->mmap[i]); - for (i = 0; i < evlist->nr_mmaps; i++) - __perf_evlist__munmap(evlist, i); + if (evlist->backward_mmap) + for (i = 0; i < evlist->nr_mmaps; i++) + perf_mmap__munmap(&evlist->backward_mmap[i]); +} +void perf_evlist__munmap(struct perf_evlist *evlist) +{ + perf_evlist__munmap_nofree(evlist); zfree(&evlist->mmap); + zfree(&evlist->backward_mmap); } -static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) +static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) { + int i; + struct perf_mmap *map; + evlist->nr_mmaps = cpu_map__nr(evlist->cpus); if (cpu_map__empty(evlist->cpus)) evlist->nr_mmaps = thread_map__nr(evlist->threads); - evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); - return evlist->mmap != NULL ? 0 : -ENOMEM; + map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); + if (!map) + return NULL; + + for (i = 0; i < evlist->nr_mmaps; i++) + map[i].fd = -1; + return map; } struct mmap_params { @@ -846,8 +986,8 @@ struct mmap_params { struct auxtrace_mmap_params auxtrace_mp; }; -static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, - struct mmap_params *mp, int fd) +static int perf_mmap__mmap(struct perf_mmap *map, + struct mmap_params *mp, int fd) { /* * The last one will be done at perf_evlist__mmap_consume(), so that we @@ -862,34 +1002,61 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, * evlist layer can't just drop it when filtering events in * perf_evlist__filter_pollfd(). */ - atomic_set(&evlist->mmap[idx].refcnt, 2); - evlist->mmap[idx].prev = 0; - evlist->mmap[idx].mask = mp->mask; - evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, - MAP_SHARED, fd, 0); - if (evlist->mmap[idx].base == MAP_FAILED) { + atomic_set(&map->refcnt, 2); + map->prev = 0; + map->mask = mp->mask; + map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, + MAP_SHARED, fd, 0); + if (map->base == MAP_FAILED) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", errno); - evlist->mmap[idx].base = NULL; + map->base = NULL; return -1; } + map->fd = fd; - if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap, - &mp->auxtrace_mp, evlist->mmap[idx].base, fd)) + if (auxtrace_mmap__mmap(&map->auxtrace_mmap, + &mp->auxtrace_mp, map->base, fd)) return -1; return 0; } +static bool +perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, + struct perf_evsel *evsel) +{ + if (evsel->attr.write_backward) + return false; + return true; +} + static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, struct mmap_params *mp, int cpu, - int thread, int *output) + int thread, int *_output, int *_output_backward) { struct perf_evsel *evsel; + int revent; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { + struct perf_mmap *maps = evlist->mmap; + int *output = _output; int fd; + if (evsel->attr.write_backward) { + output = _output_backward; + maps = evlist->backward_mmap; + + if (!maps) { + maps = perf_evlist__alloc_mmap(evlist); + if (!maps) + return -1; + evlist->backward_mmap = maps; + if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); + } + } + if (evsel->system_wide && thread) continue; @@ -897,15 +1064,18 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, if (*output == -1) { *output = fd; - if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0) + + if (perf_mmap__mmap(&maps[idx], mp, *output) < 0) return -1; } else { if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) return -1; - perf_evlist__mmap_get(evlist, idx); + perf_mmap__get(&maps[idx]); } + revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; + /* * The system_wide flag causes a selected event to be opened * always without a pid. Consequently it will never get a @@ -914,8 +1084,8 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, * Therefore don't add it for polling. */ if (!evsel->system_wide && - __perf_evlist__add_pollfd(evlist, fd, idx) < 0) { - perf_evlist__mmap_put(evlist, idx); + __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { + perf_mmap__put(&maps[idx]); return -1; } @@ -941,13 +1111,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per cpu\n"); for (cpu = 0; cpu < nr_cpus; cpu++) { int output = -1; + int output_backward = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, true); for (thread = 0; thread < nr_threads; thread++) { if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, - thread, &output)) + thread, &output, &output_backward)) goto out_unmap; } } @@ -955,8 +1126,7 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, return 0; out_unmap: - for (cpu = 0; cpu < nr_cpus; cpu++) - __perf_evlist__munmap(evlist, cpu); + perf_evlist__munmap_nofree(evlist); return -1; } @@ -969,43 +1139,51 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per thread\n"); for (thread = 0; thread < nr_threads; thread++) { int output = -1; + int output_backward = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, false); if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, - &output)) + &output, &output_backward)) goto out_unmap; } return 0; out_unmap: - for (thread = 0; thread < nr_threads; thread++) - __perf_evlist__munmap(evlist, thread); + perf_evlist__munmap_nofree(evlist); return -1; } -static size_t perf_evlist__mmap_size(unsigned long pages) +unsigned long perf_event_mlock_kb_in_pages(void) { - if (pages == UINT_MAX) { - int max; + unsigned long pages; + int max; - if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { - /* - * Pick a once upon a time good value, i.e. things look - * strange since we can't read a sysctl value, but lets not - * die yet... - */ - max = 512; - } else { - max -= (page_size / 1024); - } + if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { + /* + * Pick a once upon a time good value, i.e. things look + * strange since we can't read a sysctl value, but lets not + * die yet... + */ + max = 512; + } else { + max -= (page_size / 1024); + } + + pages = (max * 1024) / page_size; + if (!is_power_of_2(pages)) + pages = rounddown_pow_of_two(pages); - pages = (max * 1024) / page_size; - if (!is_power_of_2(pages)) - pages = rounddown_pow_of_two(pages); - } else if (!is_power_of_2(pages)) + return pages; +} + +static size_t perf_evlist__mmap_size(unsigned long pages) +{ + if (pages == UINT_MAX) + pages = perf_event_mlock_kb_in_pages(); + else if (!is_power_of_2(pages)) return 0; return (pages + 1) * page_size; @@ -1107,7 +1285,9 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), }; - if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) + if (!evlist->mmap) + evlist->mmap = perf_evlist__alloc_mmap(evlist); + if (!evlist->mmap) return -ENOMEM; if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) @@ -1121,7 +1301,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, auxtrace_pages, auxtrace_overwrite); - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && evsel->sample_id == NULL && perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) @@ -1192,6 +1372,24 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, perf_evlist__propagate_maps(evlist); } +void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, + enum perf_event_sample_format bit) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + __perf_evsel__set_sample_bit(evsel, bit); +} + +void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, + enum perf_event_sample_format bit) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + __perf_evsel__reset_sample_bit(evsel, bit); +} + int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) { struct perf_evsel *evsel; @@ -1199,7 +1397,7 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e const int ncpus = cpu_map__nr(evlist->cpus), nthreads = thread_map__nr(evlist->threads); - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->filter == NULL) continue; @@ -1222,7 +1420,7 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) struct perf_evsel *evsel; int err = 0; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type != PERF_TYPE_TRACEPOINT) continue; @@ -1276,7 +1474,7 @@ bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) if (evlist->id_pos < 0 || evlist->is_pos < 0) return false; - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { if (pos->id_pos != evlist->id_pos || pos->is_pos != evlist->is_pos) return false; @@ -1292,7 +1490,7 @@ u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) if (evlist->combined_sample_type) return evlist->combined_sample_type; - evlist__for_each(evlist, evsel) + evlist__for_each_entry(evlist, evsel) evlist->combined_sample_type |= evsel->attr.sample_type; return evlist->combined_sample_type; @@ -1309,7 +1507,7 @@ u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) struct perf_evsel *evsel; u64 branch_type = 0; - evlist__for_each(evlist, evsel) + evlist__for_each_entry(evlist, evsel) branch_type |= evsel->attr.branch_sample_type; return branch_type; } @@ -1320,7 +1518,7 @@ bool perf_evlist__valid_read_format(struct perf_evlist *evlist) u64 read_format = first->attr.read_format; u64 sample_type = first->attr.sample_type; - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { if (read_format != pos->attr.read_format) return false; } @@ -1377,7 +1575,7 @@ bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) { struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; - evlist__for_each_continue(evlist, pos) { + evlist__for_each_entry_continue(evlist, pos) { if (first->attr.sample_id_all != pos->attr.sample_id_all) return false; } @@ -1404,7 +1602,7 @@ void perf_evlist__close(struct perf_evlist *evlist) int nthreads = thread_map__nr(evlist->threads); int n; - evlist__for_each_reverse(evlist, evsel) { + evlist__for_each_entry_reverse(evlist, evsel) { n = evsel->cpus ? evsel->cpus->nr : ncpus; perf_evsel__close(evsel, n, nthreads); } @@ -1458,7 +1656,7 @@ int perf_evlist__open(struct perf_evlist *evlist) perf_evlist__update_id_pos(evlist); - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); if (err < 0) goto out_err; @@ -1619,7 +1817,7 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) struct perf_evsel *evsel; size_t printed = 0; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", perf_evsel__name(evsel)); } @@ -1631,7 +1829,7 @@ int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size) { int printed, value; - char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); + char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); switch (err) { case EACCES: @@ -1683,7 +1881,7 @@ out_default: int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) { - char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); + char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; switch (err) { @@ -1721,7 +1919,7 @@ void perf_evlist__to_front(struct perf_evlist *evlist, if (move_evsel == perf_evlist__first(evlist)) return; - evlist__for_each_safe(evlist, n, evsel) { + evlist__for_each_entry_safe(evlist, n, evsel) { if (evsel->leader == move_evsel->leader) list_move_tail(&evsel->node, &move); } @@ -1737,7 +1935,7 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist, if (tracking_evsel->tracking) return; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel != tracking_evsel) evsel->tracking = false; } @@ -1751,7 +1949,7 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, { struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (!evsel->name) continue; if (strcmp(str, evsel->name) == 0) @@ -1760,3 +1958,61 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, return NULL; } + +void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, + enum bkw_mmap_state state) +{ + enum bkw_mmap_state old_state = evlist->bkw_mmap_state; + enum action { + NONE, + PAUSE, + RESUME, + } action = NONE; + + if (!evlist->backward_mmap) + return; + + switch (old_state) { + case BKW_MMAP_NOTREADY: { + if (state != BKW_MMAP_RUNNING) + goto state_err;; + break; + } + case BKW_MMAP_RUNNING: { + if (state != BKW_MMAP_DATA_PENDING) + goto state_err; + action = PAUSE; + break; + } + case BKW_MMAP_DATA_PENDING: { + if (state != BKW_MMAP_EMPTY) + goto state_err; + break; + } + case BKW_MMAP_EMPTY: { + if (state != BKW_MMAP_RUNNING) + goto state_err; + action = RESUME; + break; + } + default: + WARN_ONCE(1, "Shouldn't get there\n"); + } + + evlist->bkw_mmap_state = state; + + switch (action) { + case PAUSE: + perf_evlist__pause(evlist); + break; + case RESUME: + perf_evlist__resume(evlist); + break; + case NONE: + default: + break; + } + +state_err: + return; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a0d15221db6e..4fd034f22d2f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -28,12 +28,47 @@ struct record_opts; struct perf_mmap { void *base; int mask; + int fd; atomic_t refcnt; u64 prev; struct auxtrace_mmap auxtrace_mmap; char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8))); }; +static inline size_t +perf_mmap__mmap_len(struct perf_mmap *map) +{ + return map->mask + 1 + page_size; +} + +/* + * State machine of bkw_mmap_state: + * + * .________________(forbid)_____________. + * | V + * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY + * ^ ^ | ^ | + * | |__(forbid)____/ |___(forbid)___/| + * | | + * \_________________(3)_______________/ + * + * NOTREADY : Backward ring buffers are not ready + * RUNNING : Backward ring buffers are recording + * DATA_PENDING : We are required to collect data from backward ring buffers + * EMPTY : We have collected data from backward ring buffers. + * + * (0): Setup backward ring buffer + * (1): Pause ring buffers for reading + * (2): Read from ring buffers + * (3): Resume ring buffers for recording + */ +enum bkw_mmap_state { + BKW_MMAP_NOTREADY, + BKW_MMAP_RUNNING, + BKW_MMAP_DATA_PENDING, + BKW_MMAP_EMPTY, +}; + struct perf_evlist { struct list_head entries; struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; @@ -47,12 +82,14 @@ struct perf_evlist { int id_pos; int is_pos; u64 combined_sample_type; + enum bkw_mmap_state bkw_mmap_state; struct { int cork_fd; pid_t pid; } workload; struct fdarray pollfd; struct perf_mmap *mmap; + struct perf_mmap *backward_mmap; struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; @@ -87,6 +124,17 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist); int perf_evlist__add_newtp(struct perf_evlist *evlist, const char *sys, const char *name, void *handler); +void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, + enum perf_event_sample_format bit); +void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, + enum perf_event_sample_format bit); + +#define perf_evlist__set_sample_bit(evlist, bit) \ + __perf_evlist__set_sample_bit(evlist, PERF_SAMPLE_##bit) + +#define perf_evlist__reset_sample_bit(evlist, bit) \ + __perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit) + int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter); int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid); int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids); @@ -116,18 +164,35 @@ struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); +void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state); + +union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); +union perf_event *perf_mmap__read_backward(struct perf_mmap *map); + +void perf_mmap__read_catchup(struct perf_mmap *md); +void perf_mmap__consume(struct perf_mmap *md, bool overwrite); + union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); +union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, + int idx); +union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, + int idx); +void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx); + void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx); int perf_evlist__open(struct perf_evlist *evlist); void perf_evlist__close(struct perf_evlist *evlist); +struct callchain_param; + void perf_evlist__set_id_pos(struct perf_evlist *evlist); bool perf_can_sample_identifier(void); bool perf_can_record_switch_events(void); bool perf_can_record_cpu_wide(void); -void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts); +void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts, + struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); int perf_evlist__prepare_workload(struct perf_evlist *evlist, @@ -144,6 +209,8 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset); +unsigned long perf_event_mlock_kb_in_pages(void); + int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, bool overwrite, unsigned int auxtrace_pages, bool auxtrace_overwrite); @@ -225,70 +292,70 @@ void perf_evlist__to_front(struct perf_evlist *evlist, struct perf_evsel *move_evsel); /** - * __evlist__for_each - iterate thru all the evsels + * __evlist__for_each_entry - iterate thru all the evsels * @list: list_head instance to iterate * @evsel: struct evsel iterator */ -#define __evlist__for_each(list, evsel) \ +#define __evlist__for_each_entry(list, evsel) \ list_for_each_entry(evsel, list, node) /** - * evlist__for_each - iterate thru all the evsels + * evlist__for_each_entry - iterate thru all the evsels * @evlist: evlist instance to iterate * @evsel: struct evsel iterator */ -#define evlist__for_each(evlist, evsel) \ - __evlist__for_each(&(evlist)->entries, evsel) +#define evlist__for_each_entry(evlist, evsel) \ + __evlist__for_each_entry(&(evlist)->entries, evsel) /** - * __evlist__for_each_continue - continue iteration thru all the evsels + * __evlist__for_each_entry_continue - continue iteration thru all the evsels * @list: list_head instance to iterate * @evsel: struct evsel iterator */ -#define __evlist__for_each_continue(list, evsel) \ +#define __evlist__for_each_entry_continue(list, evsel) \ list_for_each_entry_continue(evsel, list, node) /** - * evlist__for_each_continue - continue iteration thru all the evsels + * evlist__for_each_entry_continue - continue iteration thru all the evsels * @evlist: evlist instance to iterate * @evsel: struct evsel iterator */ -#define evlist__for_each_continue(evlist, evsel) \ - __evlist__for_each_continue(&(evlist)->entries, evsel) +#define evlist__for_each_entry_continue(evlist, evsel) \ + __evlist__for_each_entry_continue(&(evlist)->entries, evsel) /** - * __evlist__for_each_reverse - iterate thru all the evsels in reverse order + * __evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order * @list: list_head instance to iterate * @evsel: struct evsel iterator */ -#define __evlist__for_each_reverse(list, evsel) \ +#define __evlist__for_each_entry_reverse(list, evsel) \ list_for_each_entry_reverse(evsel, list, node) /** - * evlist__for_each_reverse - iterate thru all the evsels in reverse order + * evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order * @evlist: evlist instance to iterate * @evsel: struct evsel iterator */ -#define evlist__for_each_reverse(evlist, evsel) \ - __evlist__for_each_reverse(&(evlist)->entries, evsel) +#define evlist__for_each_entry_reverse(evlist, evsel) \ + __evlist__for_each_entry_reverse(&(evlist)->entries, evsel) /** - * __evlist__for_each_safe - safely iterate thru all the evsels + * __evlist__for_each_entry_safe - safely iterate thru all the evsels * @list: list_head instance to iterate * @tmp: struct evsel temp iterator * @evsel: struct evsel iterator */ -#define __evlist__for_each_safe(list, tmp, evsel) \ +#define __evlist__for_each_entry_safe(list, tmp, evsel) \ list_for_each_entry_safe(evsel, tmp, list, node) /** - * evlist__for_each_safe - safely iterate thru all the evsels + * evlist__for_each_entry_safe - safely iterate thru all the evsels * @evlist: evlist instance to iterate * @evsel: struct evsel iterator * @tmp: struct evsel temp iterator */ -#define evlist__for_each_safe(evlist, tmp, evsel) \ - __evlist__for_each_safe(&(evlist)->entries, tmp, evsel) +#define evlist__for_each_entry_safe(evlist, tmp, evsel) \ + __evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel) void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); @@ -297,4 +364,7 @@ void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr); struct perf_evsel * perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str); + +struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, + union perf_event *event); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 645dc1828836..21fd573106ed 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -37,6 +37,7 @@ static struct { bool clockid; bool clockid_wrong; bool lbr_flags; + bool write_backward; } perf_missing_features; static clockid_t clockid; @@ -199,6 +200,24 @@ void perf_evsel__set_sample_id(struct perf_evsel *evsel, evsel->attr.read_format |= PERF_FORMAT_ID; } +/** + * perf_evsel__is_function_event - Return whether given evsel is a function + * trace event + * + * @evsel - evsel selector to be tested + * + * Return %true if event is function trace event + */ +bool perf_evsel__is_function_event(struct perf_evsel *evsel) +{ +#define FUNCTION_EVENT "ftrace:function" + + return evsel->name && + !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT)); + +#undef FUNCTION_EVENT +} + void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx) { @@ -226,13 +245,42 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) perf_evsel__init(evsel, attr, idx); if (perf_evsel__is_bpf_output(evsel)) { - evsel->attr.sample_type |= PERF_SAMPLE_RAW; + evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), evsel->attr.sample_period = 1; } return evsel; } +struct perf_evsel *perf_evsel__new_cycles(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + }; + struct perf_evsel *evsel; + + event_attr_init(&attr); + + perf_event_attr__set_max_precise_ip(&attr); + + evsel = perf_evsel__new(&attr); + if (evsel == NULL) + goto out; + + /* use asprintf() because free(evsel) assumes name is allocated */ + if (asprintf(&evsel->name, "cycles%.*s", + attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) + goto error_free; +out: + return evsel; +error_free: + perf_evsel__delete(evsel); + evsel = NULL; + goto out; +} + /* * Returns pointer with encoded error via <linux/err.h> interface. */ @@ -459,17 +507,17 @@ static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) u8 op, result, type = (config >> 0) & 0xff; const char *err = "unknown-ext-hardware-cache-type"; - if (type > PERF_COUNT_HW_CACHE_MAX) + if (type >= PERF_COUNT_HW_CACHE_MAX) goto out_err; op = (config >> 8) & 0xff; err = "unknown-ext-hardware-cache-op"; - if (op > PERF_COUNT_HW_CACHE_OP_MAX) + if (op >= PERF_COUNT_HW_CACHE_OP_MAX) goto out_err; result = (config >> 16) & 0xff; err = "unknown-ext-hardware-cache-result"; - if (result > PERF_COUNT_HW_CACHE_RESULT_MAX) + if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX) goto out_err; err = "invalid-cache"; @@ -561,16 +609,17 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) return ret; } -static void -perf_evsel__config_callgraph(struct perf_evsel *evsel, - struct record_opts *opts, - struct callchain_param *param) +void perf_evsel__config_callchain(struct perf_evsel *evsel, + struct record_opts *opts, + struct callchain_param *param) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; perf_evsel__set_sample_bit(evsel, CALLCHAIN); + attr->sample_max_stack = param->max_stack; + if (param->record_mode == CALLCHAIN_LBR) { if (!opts->branch_stack) { if (attr->exclude_user) { @@ -634,7 +683,8 @@ static void apply_config_terms(struct perf_evsel *evsel, struct perf_event_attr *attr = &evsel->attr; struct callchain_param param; u32 dump_size = 0; - char *callgraph_buf = NULL; + int max_stack = 0; + const char *callgraph_buf = NULL; /* callgraph default */ param.record_mode = callchain_param.record_mode; @@ -661,6 +711,9 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_STACK_USER: dump_size = term->val.stack_user; break; + case PERF_EVSEL__CONFIG_TERM_MAX_STACK: + max_stack = term->val.max_stack; + break; case PERF_EVSEL__CONFIG_TERM_INHERIT: /* * attr->inherit should has already been set by @@ -670,13 +723,21 @@ static void apply_config_terms(struct perf_evsel *evsel, */ attr->inherit = term->val.inherit ? 1 : 0; break; + case PERF_EVSEL__CONFIG_TERM_OVERWRITE: + attr->write_backward = term->val.overwrite ? 1 : 0; + break; default: break; } } /* User explicitly set per-event callgraph, clear the old setting and reset. */ - if ((callgraph_buf != NULL) || (dump_size > 0)) { + if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) { + if (max_stack) { + param.max_stack = max_stack; + if (callgraph_buf == NULL) + callgraph_buf = "fp"; + } /* parse callgraph parameters */ if (callgraph_buf != NULL) { @@ -704,7 +765,7 @@ static void apply_config_terms(struct perf_evsel *evsel, /* set perf-event callgraph */ if (param.enabled) - perf_evsel__config_callgraph(evsel, opts, ¶m); + perf_evsel__config_callchain(evsel, opts, ¶m); } } @@ -736,7 +797,8 @@ static void apply_config_terms(struct perf_evsel *evsel, * enable/disable events specifically, as there's no * initial traced exec call. */ -void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) +void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, + struct callchain_param *callchain) { struct perf_evsel *leader = evsel->leader; struct perf_event_attr *attr = &evsel->attr; @@ -745,6 +807,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; attr->inherit = !opts->no_inherit; + attr->write_backward = opts->overwrite ? 1 : 0; perf_evsel__set_sample_bit(evsel, IP); perf_evsel__set_sample_bit(evsel, TID); @@ -811,22 +874,22 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (perf_evsel__is_function_event(evsel)) evsel->attr.exclude_callchain_user = 1; - if (callchain_param.enabled && !evsel->no_aux_samples) - perf_evsel__config_callgraph(evsel, opts, &callchain_param); + if (callchain && callchain->enabled && !evsel->no_aux_samples) + perf_evsel__config_callchain(evsel, opts, callchain); if (opts->sample_intr_regs) { attr->sample_regs_intr = opts->sample_intr_regs; perf_evsel__set_sample_bit(evsel, REGS_INTR); } - if (target__has_cpu(&opts->target)) + if (target__has_cpu(&opts->target) || opts->sample_cpu) perf_evsel__set_sample_bit(evsel, CPU); if (opts->period) perf_evsel__set_sample_bit(evsel, PERIOD); /* - * When the user explicitely disabled time don't force it here. + * When the user explicitly disabled time don't force it here. */ if (opts->sample_time && (!perf_missing_features.sample_id_all && @@ -1230,6 +1293,21 @@ static void __p_sample_type(char *buf, size_t size, u64 value) __p_bits(buf, size, value, bits); } +static void __p_branch_sample_type(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n } + struct bit_names bits[] = { + bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY), + bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL), + bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), + bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), + bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + static void __p_read_format(char *buf, size_t size, u64 value) { #define bit_name(n) { PERF_FORMAT_##n, #n } @@ -1248,6 +1326,7 @@ static void __p_read_format(char *buf, size_t size, u64 value) #define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val)) #define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val)) #define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) +#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) #define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) #define PRINT_ATTRn(_n, _f, _p) \ @@ -1299,17 +1378,19 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(comm_exec, p_unsigned); PRINT_ATTRf(use_clockid, p_unsigned); PRINT_ATTRf(context_switch, p_unsigned); + PRINT_ATTRf(write_backward, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex); PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex); - PRINT_ATTRf(branch_sample_type, p_unsigned); + PRINT_ATTRf(branch_sample_type, p_branch_sample_type); PRINT_ATTRf(sample_regs_user, p_hex); PRINT_ATTRf(sample_stack_user, p_unsigned); PRINT_ATTRf(clockid, p_signed); PRINT_ATTRf(sample_regs_intr, p_hex); PRINT_ATTRf(aux_watermark, p_unsigned); + PRINT_ATTRf(sample_max_stack, p_unsigned); return ret; } @@ -1328,6 +1409,9 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, int pid = -1, err; enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE; + if (perf_missing_features.write_backward && evsel->attr.write_backward) + return -EINVAL; + if (evsel->system_wide) nthreads = 1; else @@ -1457,7 +1541,10 @@ try_fallback: * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) { + if (!perf_missing_features.write_backward && evsel->attr.write_backward) { + perf_missing_features.write_backward = true; + goto out_close; + } else if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) { perf_missing_features.clockid_wrong = true; goto fallback_missing_features; } else if (!perf_missing_features.clockid && evsel->attr.use_clockid) { @@ -1483,7 +1570,6 @@ try_fallback: perf_missing_features.lbr_flags = true; goto fallback_missing_features; } - out_close: do { while (--thread >= 0) { @@ -2208,17 +2294,11 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, return sample->raw_data + offset; } -u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, - const char *name) +u64 format_field__intval(struct format_field *field, struct perf_sample *sample, + bool needs_swap) { - struct format_field *field = perf_evsel__field(evsel, name); - void *ptr; u64 value; - - if (!field) - return 0; - - ptr = sample->raw_data + field->offset; + void *ptr = sample->raw_data + field->offset; switch (field->size) { case 1: @@ -2236,7 +2316,7 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, return 0; } - if (!evsel->needs_swap) + if (!needs_swap) return value; switch (field->size) { @@ -2253,93 +2333,15 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, return 0; } -static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) -{ - va_list args; - int ret = 0; - - if (!*first) { - ret += fprintf(fp, ","); - } else { - ret += fprintf(fp, ":"); - *first = false; - } - - va_start(args, fmt); - ret += vfprintf(fp, fmt, args); - va_end(args); - return ret; -} - -static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv) -{ - return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val); -} - -int perf_evsel__fprintf(struct perf_evsel *evsel, - struct perf_attr_details *details, FILE *fp) +u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, + const char *name) { - bool first = true; - int printed = 0; - - if (details->event_group) { - struct perf_evsel *pos; - - if (!perf_evsel__is_group_leader(evsel)) - return 0; - - if (evsel->nr_members > 1) - printed += fprintf(fp, "%s{", evsel->group_name ?: ""); - - printed += fprintf(fp, "%s", perf_evsel__name(evsel)); - for_each_group_member(pos, evsel) - printed += fprintf(fp, ",%s", perf_evsel__name(pos)); - - if (evsel->nr_members > 1) - printed += fprintf(fp, "}"); - goto out; - } - - printed += fprintf(fp, "%s", perf_evsel__name(evsel)); - - if (details->verbose) { - printed += perf_event_attr__fprintf(fp, &evsel->attr, - __print_attr__fprintf, &first); - } else if (details->freq) { - const char *term = "sample_freq"; - - if (!evsel->attr.freq) - term = "sample_period"; - - printed += comma_fprintf(fp, &first, " %s=%" PRIu64, - term, (u64)evsel->attr.sample_freq); - } - - if (details->trace_fields) { - struct format_field *field; - - if (evsel->attr.type != PERF_TYPE_TRACEPOINT) { - printed += comma_fprintf(fp, &first, " (not a tracepoint)"); - goto out; - } - - field = evsel->tp_format->format.fields; - if (field == NULL) { - printed += comma_fprintf(fp, &first, " (no trace field)"); - goto out; - } + struct format_field *field = perf_evsel__field(evsel, name); - printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name); + if (!field) + return 0; - field = field->next; - while (field) { - printed += comma_fprintf(fp, &first, "%s", field->name); - field = field->next; - } - } -out: - fputc('\n', fp); - return ++printed; + return field ? format_field__intval(field, sample, evsel->needs_swap) : 0; } bool perf_evsel__fallback(struct perf_evsel *evsel, int err, @@ -2416,12 +2418,23 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "Probably the maximum number of open file descriptors has been reached.\n" "Hint: Try again after reducing the number of events.\n" "Hint: Try increasing the limit with 'ulimit -n <limit>'"); + case ENOMEM: + if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 && + access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0) + return scnprintf(msg, size, + "Not enough memory to setup event with callchain.\n" + "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n" + "Hint: Current value: %d", sysctl_perf_event_max_stack); + break; case ENODEV: if (target->cpu_list) return scnprintf(msg, size, "%s", - "No such device - did you specify an out-of-range profile CPU?\n"); + "No such device - did you specify an out-of-range profile CPU?"); break; case EOPNOTSUPP: + if (evsel->attr.sample_period != 0) + return scnprintf(msg, size, "%s", + "PMU Hardware doesn't support sampling/overflow-interrupts."); if (evsel->attr.precise_ip) return scnprintf(msg, size, "%s", "\'precise\' request may not be supported. Try removing 'p' modifier."); @@ -2439,6 +2452,8 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "We found oprofile daemon running, please stop it and try again."); break; case EINVAL: + if (evsel->attr.write_backward && perf_missing_features.write_backward) + return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel."); if (perf_missing_features.clockid) return scnprintf(msg, size, "clockid feature not supported."); if (perf_missing_features.clockid_wrong) @@ -2451,7 +2466,14 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" "/bin/dmesg may provide additional information.\n" - "No CONFIG_PERF_EVENTS=y kernel support configured?\n", - err, strerror_r(err, sbuf, sizeof(sbuf)), + "No CONFIG_PERF_EVENTS=y kernel support configured?", + err, str_error_r(err, sbuf, sizeof(sbuf)), perf_evsel__name(evsel)); } + +char *perf_evsel__env_arch(struct perf_evsel *evsel) +{ + if (evsel && evsel->evlist && evsel->evlist->env) + return evsel->evlist->env->arch; + return NULL; +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 501ea6e565f1..4d44129e050b 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -44,6 +44,8 @@ enum { PERF_EVSEL__CONFIG_TERM_CALLGRAPH, PERF_EVSEL__CONFIG_TERM_STACK_USER, PERF_EVSEL__CONFIG_TERM_INHERIT, + PERF_EVSEL__CONFIG_TERM_MAX_STACK, + PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_MAX, }; @@ -56,7 +58,9 @@ struct perf_evsel_config_term { bool time; char *callgraph; u64 stack_user; + int max_stack; bool inherit; + bool overwrite; } val; }; @@ -171,6 +175,8 @@ static inline struct perf_evsel *perf_evsel__newtp(const char *sys, const char * return perf_evsel__newtp_idx(sys, name, 0); } +struct perf_evsel *perf_evsel__new_cycles(void); + struct event_format *event_format__new(const char *sys, const char *name); void perf_evsel__init(struct perf_evsel *evsel, @@ -178,8 +184,14 @@ void perf_evsel__init(struct perf_evsel *evsel, void perf_evsel__exit(struct perf_evsel *evsel); void perf_evsel__delete(struct perf_evsel *evsel); +struct callchain_param; + void perf_evsel__config(struct perf_evsel *evsel, - struct record_opts *opts); + struct record_opts *opts, + struct callchain_param *callchain); +void perf_evsel__config_callchain(struct perf_evsel *evsel, + struct record_opts *opts, + struct callchain_param *callchain); int __perf_evsel__sample_size(u64 sample_type); void perf_evsel__calc_id_pos(struct perf_evsel *evsel); @@ -252,6 +264,8 @@ static inline char *perf_evsel__strval(struct perf_evsel *evsel, struct format_field; +u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap); + struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name); #define perf_evsel__match(evsel, t, c) \ @@ -344,23 +358,7 @@ static inline bool perf_evsel__is_group_event(struct perf_evsel *evsel) return perf_evsel__is_group_leader(evsel) && evsel->nr_members > 1; } -/** - * perf_evsel__is_function_event - Return whether given evsel is a function - * trace event - * - * @evsel - evsel selector to be tested - * - * Return %true if event is function trace event - */ -static inline bool perf_evsel__is_function_event(struct perf_evsel *evsel) -{ -#define FUNCTION_EVENT "ftrace:function" - - return evsel->name && - !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT)); - -#undef FUNCTION_EVENT -} +bool perf_evsel__is_function_event(struct perf_evsel *evsel); static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel) { @@ -381,6 +379,24 @@ struct perf_attr_details { int perf_evsel__fprintf(struct perf_evsel *evsel, struct perf_attr_details *details, FILE *fp); +#define EVSEL__PRINT_IP (1<<0) +#define EVSEL__PRINT_SYM (1<<1) +#define EVSEL__PRINT_DSO (1<<2) +#define EVSEL__PRINT_SYMOFFSET (1<<3) +#define EVSEL__PRINT_ONELINE (1<<4) +#define EVSEL__PRINT_SRCLINE (1<<5) +#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) + +struct callchain_cursor; + +int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, + unsigned int print_opts, + struct callchain_cursor *cursor, FILE *fp); + +int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, + int left_alignment, unsigned int print_opts, + struct callchain_cursor *cursor, FILE *fp); + bool perf_evsel__fallback(struct perf_evsel *evsel, int err, char *msg, size_t msgsize); int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, @@ -396,7 +412,7 @@ for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); \ (_evsel) && (_evsel)->leader == (_leader); \ (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node)) -static inline bool has_branch_callstack(struct perf_evsel *evsel) +static inline bool perf_evsel__has_branch_callstack(const struct perf_evsel *evsel) { return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; } @@ -406,4 +422,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); +char *perf_evsel__env_arch(struct perf_evsel *evsel); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c new file mode 100644 index 000000000000..3674e77ad640 --- /dev/null +++ b/tools/perf/util/evsel_fprintf.c @@ -0,0 +1,212 @@ +#include <stdio.h> +#include <stdbool.h> +#include <traceevent/event-parse.h> +#include "evsel.h" +#include "callchain.h" +#include "map.h" +#include "symbol.h" + +static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) +{ + va_list args; + int ret = 0; + + if (!*first) { + ret += fprintf(fp, ","); + } else { + ret += fprintf(fp, ":"); + *first = false; + } + + va_start(args, fmt); + ret += vfprintf(fp, fmt, args); + va_end(args); + return ret; +} + +static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv) +{ + return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val); +} + +int perf_evsel__fprintf(struct perf_evsel *evsel, + struct perf_attr_details *details, FILE *fp) +{ + bool first = true; + int printed = 0; + + if (details->event_group) { + struct perf_evsel *pos; + + if (!perf_evsel__is_group_leader(evsel)) + return 0; + + if (evsel->nr_members > 1) + printed += fprintf(fp, "%s{", evsel->group_name ?: ""); + + printed += fprintf(fp, "%s", perf_evsel__name(evsel)); + for_each_group_member(pos, evsel) + printed += fprintf(fp, ",%s", perf_evsel__name(pos)); + + if (evsel->nr_members > 1) + printed += fprintf(fp, "}"); + goto out; + } + + printed += fprintf(fp, "%s", perf_evsel__name(evsel)); + + if (details->verbose) { + printed += perf_event_attr__fprintf(fp, &evsel->attr, + __print_attr__fprintf, &first); + } else if (details->freq) { + const char *term = "sample_freq"; + + if (!evsel->attr.freq) + term = "sample_period"; + + printed += comma_fprintf(fp, &first, " %s=%" PRIu64, + term, (u64)evsel->attr.sample_freq); + } + + if (details->trace_fields) { + struct format_field *field; + + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) { + printed += comma_fprintf(fp, &first, " (not a tracepoint)"); + goto out; + } + + field = evsel->tp_format->format.fields; + if (field == NULL) { + printed += comma_fprintf(fp, &first, " (no trace field)"); + goto out; + } + + printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name); + + field = field->next; + while (field) { + printed += comma_fprintf(fp, &first, "%s", field->name); + field = field->next; + } + } +out: + fputc('\n', fp); + return ++printed; +} + +int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, + unsigned int print_opts, struct callchain_cursor *cursor, + FILE *fp) +{ + int printed = 0; + struct callchain_cursor_node *node; + int print_ip = print_opts & EVSEL__PRINT_IP; + int print_sym = print_opts & EVSEL__PRINT_SYM; + int print_dso = print_opts & EVSEL__PRINT_DSO; + int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET; + int print_oneline = print_opts & EVSEL__PRINT_ONELINE; + int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; + int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; + char s = print_oneline ? ' ' : '\t'; + + if (sample->callchain) { + struct addr_location node_al; + + callchain_cursor_commit(cursor); + + while (1) { + u64 addr = 0; + + node = callchain_cursor_current(cursor); + if (!node) + break; + + if (node->sym && node->sym->ignore) + goto next; + + printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " "); + + if (print_ip) + printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); + + if (node->map) + addr = node->map->map_ip(node->map, node->ip); + + if (print_sym) { + printed += fprintf(fp, " "); + node_al.addr = addr; + node_al.map = node->map; + + if (print_symoffset) { + printed += __symbol__fprintf_symname_offs(node->sym, &node_al, + print_unknown_as_addr, fp); + } else { + printed += __symbol__fprintf_symname(node->sym, &node_al, + print_unknown_as_addr, fp); + } + } + + if (print_dso) { + printed += fprintf(fp, " ("); + printed += map__fprintf_dsoname(node->map, fp); + printed += fprintf(fp, ")"); + } + + if (print_srcline) + printed += map__fprintf_srcline(node->map, addr, "\n ", fp); + + if (!print_oneline) + printed += fprintf(fp, "\n"); +next: + callchain_cursor_advance(cursor); + } + } + + return printed; +} + +int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, + int left_alignment, unsigned int print_opts, + struct callchain_cursor *cursor, FILE *fp) +{ + int printed = 0; + int print_ip = print_opts & EVSEL__PRINT_IP; + int print_sym = print_opts & EVSEL__PRINT_SYM; + int print_dso = print_opts & EVSEL__PRINT_DSO; + int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET; + int print_srcline = print_opts & EVSEL__PRINT_SRCLINE; + int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; + + if (cursor != NULL) { + printed += sample__fprintf_callchain(sample, left_alignment, + print_opts, cursor, fp); + } else if (!(al->sym && al->sym->ignore)) { + printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " "); + + if (print_ip) + printed += fprintf(fp, "%16" PRIx64, sample->ip); + + if (print_sym) { + printed += fprintf(fp, " "); + if (print_symoffset) { + printed += __symbol__fprintf_symname_offs(al->sym, al, + print_unknown_as_addr, fp); + } else { + printed += __symbol__fprintf_symname(al->sym, al, + print_unknown_as_addr, fp); + } + } + + if (print_dso) { + printed += fprintf(fp, " ("); + printed += map__fprintf_dsoname(al->map, fp); + printed += fprintf(fp, ")"); + } + + if (print_srcline) + printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); + } + + return printed; +} diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h new file mode 100644 index 000000000000..116debe7a995 --- /dev/null +++ b/tools/perf/util/group.h @@ -0,0 +1,7 @@ +#ifndef GROUP_H +#define GROUP_H 1 + +bool arch_topdown_check_group(bool *warn); +void arch_topdown_group_warn(void); + +#endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 90680ec9f8b8..8f0db4007282 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -336,7 +336,7 @@ static int write_event_desc(int fd, struct perf_header *h __maybe_unused, if (ret < 0) return ret; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { ret = do_write(fd, &evsel->attr, sz); if (ret < 0) return ret; @@ -801,7 +801,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused, if (ret < 0) return ret; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (perf_evsel__is_group_leader(evsel) && evsel->nr_members > 1) { const char *name = evsel->group_name ?: "{anon_group}"; @@ -1306,42 +1306,19 @@ static void print_total_mem(struct perf_header *ph, int fd __maybe_unused, static void print_numa_topology(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { - u32 nr, c, i; - char *str, *tmp; - uint64_t mem_total, mem_free; - - /* nr nodes */ - nr = ph->env.nr_numa_nodes; - str = ph->env.numa_nodes; - - for (i = 0; i < nr; i++) { - /* node number */ - c = strtoul(str, &tmp, 0); - if (*tmp != ':') - goto error; - - str = tmp + 1; - mem_total = strtoull(str, &tmp, 0); - if (*tmp != ':') - goto error; + int i; + struct numa_node *n; - str = tmp + 1; - mem_free = strtoull(str, &tmp, 0); - if (*tmp != ':') - goto error; + for (i = 0; i < ph->env.nr_numa_nodes; i++) { + n = &ph->env.numa_nodes[i]; fprintf(fp, "# node%u meminfo : total = %"PRIu64" kB," " free = %"PRIu64" kB\n", - c, mem_total, mem_free); - - str = tmp + 1; - fprintf(fp, "# node%u cpu list : %s\n", c, str); + n->node, n->mem_total, n->mem_free); - str += strlen(str) + 1; + fprintf(fp, "# node%u cpu list : ", n->node); + cpu_map__fprintf(n->map, fp); } - return; -error: - fprintf(fp, "# numa topology : not available\n"); } static void print_cpuid(struct perf_header *ph, int fd __maybe_unused, FILE *fp) @@ -1425,7 +1402,7 @@ static void print_group_desc(struct perf_header *ph, int fd __maybe_unused, session = container_of(ph, struct perf_session, header); - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { if (perf_evsel__is_group_leader(evsel) && evsel->nr_members > 1) { fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", @@ -1474,7 +1451,7 @@ static int __event_process_build_id(struct build_id_event *bev, dso = machine__findnew_dso(machine, filename); if (dso != NULL) { - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; dso__set_build_id(dso, &bev->build_id); @@ -1703,7 +1680,7 @@ perf_evlist__find_by_index(struct perf_evlist *evlist, int idx) { struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->idx == idx) return evsel; } @@ -1819,7 +1796,8 @@ static int process_cpu_topology(struct perf_file_section *section, ph->env.nr_sibling_cores = nr; size += sizeof(u32); - strbuf_init(&sb, 128); + if (strbuf_init(&sb, 128) < 0) + goto free_cpu; for (i = 0; i < nr; i++) { str = do_read_string(fd, ph); @@ -1827,7 +1805,8 @@ static int process_cpu_topology(struct perf_file_section *section, goto error; /* include a NULL character at the end */ - strbuf_add(&sb, str, strlen(str) + 1); + if (strbuf_add(&sb, str, strlen(str) + 1) < 0) + goto error; size += string_size(str); free(str); } @@ -1849,7 +1828,8 @@ static int process_cpu_topology(struct perf_file_section *section, goto error; /* include a NULL character at the end */ - strbuf_add(&sb, str, strlen(str) + 1); + if (strbuf_add(&sb, str, strlen(str) + 1) < 0) + goto error; size += string_size(str); free(str); } @@ -1903,59 +1883,61 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse struct perf_header *ph, int fd, void *data __maybe_unused) { + struct numa_node *nodes, *n; ssize_t ret; - u32 nr, node, i; + u32 nr, i; char *str; - uint64_t mem_total, mem_free; - struct strbuf sb; /* nr nodes */ ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) - goto error; + return -1; if (ph->needs_swap) nr = bswap_32(nr); ph->env.nr_numa_nodes = nr; - strbuf_init(&sb, 256); + nodes = zalloc(sizeof(*nodes) * nr); + if (!nodes) + return -ENOMEM; for (i = 0; i < nr; i++) { + n = &nodes[i]; + /* node number */ - ret = readn(fd, &node, sizeof(node)); - if (ret != sizeof(node)) + ret = readn(fd, &n->node, sizeof(u32)); + if (ret != sizeof(n->node)) goto error; - ret = readn(fd, &mem_total, sizeof(u64)); + ret = readn(fd, &n->mem_total, sizeof(u64)); if (ret != sizeof(u64)) goto error; - ret = readn(fd, &mem_free, sizeof(u64)); + ret = readn(fd, &n->mem_free, sizeof(u64)); if (ret != sizeof(u64)) goto error; if (ph->needs_swap) { - node = bswap_32(node); - mem_total = bswap_64(mem_total); - mem_free = bswap_64(mem_free); + n->node = bswap_32(n->node); + n->mem_total = bswap_64(n->mem_total); + n->mem_free = bswap_64(n->mem_free); } - strbuf_addf(&sb, "%u:%"PRIu64":%"PRIu64":", - node, mem_total, mem_free); - str = do_read_string(fd, ph); if (!str) goto error; - /* include a NULL character at the end */ - strbuf_add(&sb, str, strlen(str) + 1); + n->map = cpu_map__new(str); + if (!n->map) + goto error; + free(str); } - ph->env.numa_nodes = strbuf_detach(&sb, NULL); + ph->env.numa_nodes = nodes; return 0; error: - strbuf_release(&sb); + free(nodes); return -1; } @@ -1982,7 +1964,8 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused } ph->env.nr_pmu_mappings = pmu_num; - strbuf_init(&sb, 128); + if (strbuf_init(&sb, 128) < 0) + return -1; while (pmu_num) { if (readn(fd, &type, sizeof(type)) != sizeof(type)) @@ -1994,9 +1977,11 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused if (!name) goto error; - strbuf_addf(&sb, "%u:%s", type, name); + if (strbuf_addf(&sb, "%u:%s", type, name) < 0) + goto error; /* include a NULL character at the end */ - strbuf_add(&sb, "", 1); + if (strbuf_add(&sb, "", 1) < 0) + goto error; if (!strcmp(name, "msr")) ph->env.msr_pmu_type = type; @@ -2066,7 +2051,7 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused, session->evlist->nr_groups = nr_groups; i = nr = 0; - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { if (evsel->idx == (int) desc[i].leader_idx) { evsel->leader = evsel; /* {anon_group} is a dummy name */ @@ -2374,7 +2359,7 @@ int perf_session__write_header(struct perf_session *session, lseek(fd, sizeof(f_header), SEEK_SET); - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { evsel->id_offset = lseek(fd, 0, SEEK_CUR); err = do_write(fd, evsel->id, evsel->ids * sizeof(u64)); if (err < 0) { @@ -2385,7 +2370,7 @@ int perf_session__write_header(struct perf_session *session, attr_offset = lseek(fd, 0, SEEK_CUR); - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { f_attr = (struct perf_file_attr){ .attr = evsel->attr, .ids = { @@ -2819,7 +2804,7 @@ static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist, { struct perf_evsel *pos; - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { if (pos->attr.type == PERF_TYPE_TRACEPOINT && perf_evsel__prepare_tracepoint_event(pos, pevent)) return -1; @@ -3118,7 +3103,7 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, struct perf_evsel *evsel; int err = 0; - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids, evsel->id, process); if (err) { diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index 43a98a4dc1e1..2821f8d77e52 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -1,4 +1,6 @@ #include "cache.h" +#include "config.h" +#include <stdio.h> #include <subcmd/help.h> #include "../builtin.h" #include "levenshtein.h" @@ -27,16 +29,27 @@ static int levenshtein_compare(const void *p1, const void *p2) return l1 != l2 ? l1 - l2 : strcmp(s1, s2); } -static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) +static int add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) { - unsigned int i; - - ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); - + unsigned int i, nr = cmds->cnt + old->cnt; + void *tmp; + + if (nr > cmds->alloc) { + /* Choose bigger one to alloc */ + if (alloc_nr(cmds->alloc) < nr) + cmds->alloc = nr; + else + cmds->alloc = alloc_nr(cmds->alloc); + tmp = realloc(cmds->names, cmds->alloc * sizeof(*cmds->names)); + if (!tmp) + return -1; + cmds->names = tmp; + } for (i = 0; i < old->cnt; i++) cmds->names[cmds->cnt++] = old->names[i]; zfree(&old->names); old->cnt = 0; + return 0; } const char *help_unknown_cmd(const char *cmd) @@ -52,8 +65,11 @@ const char *help_unknown_cmd(const char *cmd) load_command_list("perf-", &main_cmds, &other_cmds); - add_cmd_list(&main_cmds, &aliases); - add_cmd_list(&main_cmds, &other_cmds); + if (add_cmd_list(&main_cmds, &aliases) < 0 || + add_cmd_list(&main_cmds, &other_cmds) < 0) { + fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n"); + goto end; + } qsort(main_cmds.names, main_cmds.cnt, sizeof(main_cmds.names), cmdname_compare); uniq(&main_cmds); @@ -99,6 +115,6 @@ const char *help_unknown_cmd(const char *cmd) for (i = 0; i < n; i++) fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); } - +end: exit(1); } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 31c4641fe5ff..de15dbcdcecf 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -79,7 +79,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) len = thread__comm_len(h->thread); if (hists__new_col_len(hists, HISTC_COMM, len)) - hists__set_col_len(hists, HISTC_THREAD, len + 6); + hists__set_col_len(hists, HISTC_THREAD, len + 8); if (h->ms.map) { len = dso__name_len(h->ms.map->dso); @@ -117,6 +117,13 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); } + + if (h->branch_info->srcline_from) + hists__new_col_len(hists, HISTC_SRCLINE_FROM, + strlen(h->branch_info->srcline_from)); + if (h->branch_info->srcline_to) + hists__new_col_len(hists, HISTC_SRCLINE_TO, + strlen(h->branch_info->srcline_to)); } if (h->mem_info) { @@ -295,7 +302,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he) root_in = &he->parent_he->hroot_in; root_out = &he->parent_he->hroot_out; } else { - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root_in = &hists->entries_collapsed; else root_in = hists->entries_in; @@ -345,86 +352,114 @@ void hists__delete_entries(struct hists *hists) * histogram, sorted on item, collects periods */ -static struct hist_entry *hist_entry__new(struct hist_entry *template, - bool sample_self) +static int hist_entry__init(struct hist_entry *he, + struct hist_entry *template, + bool sample_self) { - size_t callchain_size = 0; - struct hist_entry *he; + *he = *template; - if (symbol_conf.use_callchain) - callchain_size = sizeof(struct callchain_root); + if (symbol_conf.cumulate_callchain) { + he->stat_acc = malloc(sizeof(he->stat)); + if (he->stat_acc == NULL) + return -ENOMEM; + memcpy(he->stat_acc, &he->stat, sizeof(he->stat)); + if (!sample_self) + memset(&he->stat, 0, sizeof(he->stat)); + } + + map__get(he->ms.map); + + if (he->branch_info) { + /* + * This branch info is (a part of) allocated from + * sample__resolve_bstack() and will be freed after + * adding new entries. So we need to save a copy. + */ + he->branch_info = malloc(sizeof(*he->branch_info)); + if (he->branch_info == NULL) { + map__zput(he->ms.map); + free(he->stat_acc); + return -ENOMEM; + } + + memcpy(he->branch_info, template->branch_info, + sizeof(*he->branch_info)); - he = zalloc(sizeof(*he) + callchain_size); + map__get(he->branch_info->from.map); + map__get(he->branch_info->to.map); + } + + if (he->mem_info) { + map__get(he->mem_info->iaddr.map); + map__get(he->mem_info->daddr.map); + } - if (he != NULL) { - *he = *template; + if (symbol_conf.use_callchain) + callchain_init(he->callchain); + + if (he->raw_data) { + he->raw_data = memdup(he->raw_data, he->raw_size); - if (symbol_conf.cumulate_callchain) { - he->stat_acc = malloc(sizeof(he->stat)); - if (he->stat_acc == NULL) { - free(he); - return NULL; + if (he->raw_data == NULL) { + map__put(he->ms.map); + if (he->branch_info) { + map__put(he->branch_info->from.map); + map__put(he->branch_info->to.map); + free(he->branch_info); } - memcpy(he->stat_acc, &he->stat, sizeof(he->stat)); - if (!sample_self) - memset(&he->stat, 0, sizeof(he->stat)); + if (he->mem_info) { + map__put(he->mem_info->iaddr.map); + map__put(he->mem_info->daddr.map); + } + free(he->stat_acc); + return -ENOMEM; } + } + INIT_LIST_HEAD(&he->pairs.node); + thread__get(he->thread); - map__get(he->ms.map); + if (!symbol_conf.report_hierarchy) + he->leaf = true; - if (he->branch_info) { - /* - * This branch info is (a part of) allocated from - * sample__resolve_bstack() and will be freed after - * adding new entries. So we need to save a copy. - */ - he->branch_info = malloc(sizeof(*he->branch_info)); - if (he->branch_info == NULL) { - map__zput(he->ms.map); - free(he->stat_acc); - free(he); - return NULL; - } + return 0; +} + +static void *hist_entry__zalloc(size_t size) +{ + return zalloc(size + sizeof(struct hist_entry)); +} - memcpy(he->branch_info, template->branch_info, - sizeof(*he->branch_info)); +static void hist_entry__free(void *ptr) +{ + free(ptr); +} - map__get(he->branch_info->from.map); - map__get(he->branch_info->to.map); - } +static struct hist_entry_ops default_ops = { + .new = hist_entry__zalloc, + .free = hist_entry__free, +}; - if (he->mem_info) { - map__get(he->mem_info->iaddr.map); - map__get(he->mem_info->daddr.map); - } +static struct hist_entry *hist_entry__new(struct hist_entry *template, + bool sample_self) +{ + struct hist_entry_ops *ops = template->ops; + size_t callchain_size = 0; + struct hist_entry *he; + int err = 0; - if (symbol_conf.use_callchain) - callchain_init(he->callchain); + if (!ops) + ops = template->ops = &default_ops; - if (he->raw_data) { - he->raw_data = memdup(he->raw_data, he->raw_size); + if (symbol_conf.use_callchain) + callchain_size = sizeof(struct callchain_root); - if (he->raw_data == NULL) { - map__put(he->ms.map); - if (he->branch_info) { - map__put(he->branch_info->from.map); - map__put(he->branch_info->to.map); - free(he->branch_info); - } - if (he->mem_info) { - map__put(he->mem_info->iaddr.map); - map__put(he->mem_info->daddr.map); - } - free(he->stat_acc); - free(he); - return NULL; - } + he = ops->new(callchain_size); + if (he) { + err = hist_entry__init(he, template, sample_self); + if (err) { + ops->free(he); + he = NULL; } - INIT_LIST_HEAD(&he->pairs.node); - thread__get(he->thread); - - if (!symbol_conf.report_hierarchy) - he->leaf = true; } return he; @@ -524,13 +559,15 @@ out: return he; } -struct hist_entry *__hists__add_entry(struct hists *hists, - struct addr_location *al, - struct symbol *sym_parent, - struct branch_info *bi, - struct mem_info *mi, - struct perf_sample *sample, - bool sample_self) +static struct hist_entry* +__hists__add_entry(struct hists *hists, + struct addr_location *al, + struct symbol *sym_parent, + struct branch_info *bi, + struct mem_info *mi, + struct perf_sample *sample, + bool sample_self, + struct hist_entry_ops *ops) { struct hist_entry entry = { .thread = al->thread, @@ -557,11 +594,37 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .transaction = sample->transaction, .raw_data = sample->raw_data, .raw_size = sample->raw_size, + .ops = ops, }; return hists__findnew_entry(hists, &entry, al, sample_self); } +struct hist_entry *hists__add_entry(struct hists *hists, + struct addr_location *al, + struct symbol *sym_parent, + struct branch_info *bi, + struct mem_info *mi, + struct perf_sample *sample, + bool sample_self) +{ + return __hists__add_entry(hists, al, sym_parent, bi, mi, + sample, sample_self, NULL); +} + +struct hist_entry *hists__add_entry_ops(struct hists *hists, + struct hist_entry_ops *ops, + struct addr_location *al, + struct symbol *sym_parent, + struct branch_info *bi, + struct mem_info *mi, + struct perf_sample *sample, + bool sample_self) +{ + return __hists__add_entry(hists, al, sym_parent, bi, mi, + sample, sample_self, ops); +} + static int iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, struct addr_location *al __maybe_unused) @@ -615,8 +678,8 @@ iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al */ sample->period = cost; - he = __hists__add_entry(hists, al, iter->parent, NULL, mi, - sample, true); + he = hists__add_entry(hists, al, iter->parent, NULL, mi, + sample, true); if (!he) return -ENOMEM; @@ -720,8 +783,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a sample->period = 1; sample->weight = bi->flags.cycles ? bi->flags.cycles : 1; - he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL, - sample, true); + he = hists__add_entry(hists, al, iter->parent, &bi[i], NULL, + sample, true); if (he == NULL) return -ENOMEM; @@ -757,8 +820,8 @@ iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location struct perf_sample *sample = iter->sample; struct hist_entry *he; - he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL, - sample, true); + he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL, + sample, true); if (he == NULL) return -ENOMEM; @@ -818,8 +881,8 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, struct hist_entry *he; int err = 0; - he = __hists__add_entry(hists, al, iter->parent, NULL, NULL, - sample, true); + he = hists__add_entry(hists, al, iter->parent, NULL, NULL, + sample, true); if (he == NULL) return -ENOMEM; @@ -893,8 +956,8 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, } } - he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL, - sample, false); + he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL, + sample, false); if (he == NULL) return -ENOMEM; @@ -953,7 +1016,7 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, { int err, err2; - err = sample__resolve_callchain(iter->sample, &iter->parent, + err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, iter->evsel, al, max_stack_depth); if (err) return err; @@ -1036,12 +1099,16 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) void hist_entry__delete(struct hist_entry *he) { + struct hist_entry_ops *ops = he->ops; + thread__zput(he->thread); map__zput(he->ms.map); if (he->branch_info) { map__zput(he->branch_info->from.map); map__zput(he->branch_info->to.map); + free_srcline(he->branch_info->srcline_from); + free_srcline(he->branch_info->srcline_to); zfree(&he->branch_info); } @@ -1058,7 +1125,7 @@ void hist_entry__delete(struct hist_entry *he) free_callchain(he->callchain); free(he->trace_output); free(he->raw_data); - free(he); + ops->free(he); } /* @@ -1072,7 +1139,7 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, struct perf_hpp_fmt *fmt, int printed) { if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) { - const int width = fmt->width(fmt, hpp, hists_to_evsel(he->hists)); + const int width = fmt->width(fmt, hpp, he->hists); if (printed < width) { advance_hpp(hpp, printed); printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " "); @@ -1295,8 +1362,9 @@ static int hists__hierarchy_insert_entry(struct hists *hists, return ret; } -int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root, - struct hist_entry *he) +static int hists__collapse_insert_entry(struct hists *hists, + struct rb_root *root, + struct hist_entry *he) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -1372,7 +1440,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) struct hist_entry *n; int ret; - if (!sort__need_collapse) + if (!hists__has(hists, need_collapse)) return 0; hists->nr_entries = 0; @@ -1604,7 +1672,7 @@ static void __hists__insert_output_entry(struct rb_root *entries, } static void output_resort(struct hists *hists, struct ui_progress *prog, - bool use_callchain) + bool use_callchain, hists__resort_cb_t cb) { struct rb_root *root; struct rb_node *next; @@ -1631,7 +1699,7 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, return; } - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -1643,6 +1711,9 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); + if (cb && cb(n)) + continue; + __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); hists__inc_stats(hists, n); @@ -1663,12 +1734,18 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro else use_callchain = symbol_conf.use_callchain; - output_resort(evsel__hists(evsel), prog, use_callchain); + output_resort(evsel__hists(evsel), prog, use_callchain, NULL); } void hists__output_resort(struct hists *hists, struct ui_progress *prog) { - output_resort(hists, prog, symbol_conf.use_callchain); + output_resort(hists, prog, symbol_conf.use_callchain, NULL); +} + +void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, + hists__resort_cb_t cb) +{ + output_resort(hists, prog, symbol_conf.use_callchain, cb); } static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd) @@ -2035,7 +2112,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; @@ -2061,6 +2138,8 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, if (he) { memset(&he->stat, 0, sizeof(he->stat)); he->hists = hists; + if (symbol_conf.cumulate_callchain) + memset(he->stat_acc, 0, sizeof(he->stat)); rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, root); hists__inc_stats(hists, he); @@ -2075,7 +2154,7 @@ static struct hist_entry *hists__find_entry(struct hists *hists, { struct rb_node *n; - if (sort__need_collapse) + if (hists__has(hists, need_collapse)) n = hists->entries_collapsed.rb_node; else n = hists->entries_in->rb_node; @@ -2104,7 +2183,7 @@ void hists__match(struct hists *leader, struct hists *other) struct rb_node *nd; struct hist_entry *pos, *pair; - if (sort__need_collapse) + if (hists__has(leader, need_collapse)) root = &leader->entries_collapsed; else root = leader->entries_in; @@ -2129,7 +2208,7 @@ int hists__link(struct hists *leader, struct hists *other) struct rb_node *nd; struct hist_entry *pos, *pair; - if (sort__need_collapse) + if (hists__has(other, need_collapse)) root = &other->entries_collapsed; else root = other->entries_in; @@ -2187,7 +2266,7 @@ size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp) struct perf_evsel *pos; size_t ret = 0; - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos)); ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp); } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index bec0cd660fbd..0a1edf1ab450 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -10,6 +10,7 @@ #include "ui/progress.h" struct hist_entry; +struct hist_entry_ops; struct addr_location; struct symbol; @@ -52,6 +53,8 @@ enum hist_column { HISTC_MEM_IADDR_SYMBOL, HISTC_TRANSACTION, HISTC_CYCLES, + HISTC_SRCLINE_FROM, + HISTC_SRCLINE_TO, HISTC_TRACE, HISTC_NR_COLS, /* Last entry */ }; @@ -82,6 +85,8 @@ struct hists { int nr_hpp_node; }; +#define hists__has(__h, __f) (__h)->hpp_list->__f + struct hist_entry_iter; struct hist_iter_ops { @@ -116,13 +121,23 @@ extern const struct hist_iter_ops hist_iter_branch; extern const struct hist_iter_ops hist_iter_mem; extern const struct hist_iter_ops hist_iter_cumulative; -struct hist_entry *__hists__add_entry(struct hists *hists, - struct addr_location *al, - struct symbol *parent, - struct branch_info *bi, - struct mem_info *mi, - struct perf_sample *sample, - bool sample_self); +struct hist_entry *hists__add_entry(struct hists *hists, + struct addr_location *al, + struct symbol *parent, + struct branch_info *bi, + struct mem_info *mi, + struct perf_sample *sample, + bool sample_self); + +struct hist_entry *hists__add_entry_ops(struct hists *hists, + struct hist_entry_ops *ops, + struct addr_location *al, + struct symbol *sym_parent, + struct branch_info *bi, + struct mem_info *mi, + struct perf_sample *sample, + bool sample_self); + int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg); @@ -138,8 +153,12 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, struct perf_hpp_fmt *fmt, int printed); void hist_entry__delete(struct hist_entry *he); +typedef int (*hists__resort_cb_t)(struct hist_entry *he); + void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); void hists__output_resort(struct hists *hists, struct ui_progress *prog); +void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, + hists__resort_cb_t cb); int hists__collapse_resort(struct hists *hists, struct ui_progress *prog); void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); @@ -155,7 +174,8 @@ void events_stats__inc(struct events_stats *stats, u32 type); size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, - int max_cols, float min_pcnt, FILE *fp); + int max_cols, float min_pcnt, FILE *fp, + bool use_callchain); size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp); void hists__filter_by_dso(struct hists *hists); @@ -199,8 +219,6 @@ int hists__init(void); int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); struct rb_root *hists__get_rotate_entries_in(struct hists *hists); -int hists__collapse_insert_entry(struct hists *hists, - struct rb_root *root, struct hist_entry *he); struct perf_hpp { char *buf; @@ -212,9 +230,9 @@ struct perf_hpp { struct perf_hpp_fmt { const char *name; int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct perf_evsel *evsel); + struct hists *hists); int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct perf_evsel *evsel); + struct hists *hists); int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he); int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, @@ -240,6 +258,14 @@ struct perf_hpp_fmt { struct perf_hpp_list { struct list_head fields; struct list_head sorts; + + int need_collapse; + int parent; + int sym; + int dso; + int socket; + int thread; + int comm; }; extern struct perf_hpp_list perf_hpp_list; diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/perf/util/include/asm/alternative-asm.h deleted file mode 100644 index 3a3a0f16456a..000000000000 --- a/tools/perf/util/include/asm/alternative-asm.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _PERF_ASM_ALTERNATIVE_ASM_H -#define _PERF_ASM_ALTERNATIVE_ASM_H - -/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ - -#define altinstruction_entry # -#define ALTERNATIVE_2 # - -#endif diff --git a/tools/perf/util/include/asm/byteorder.h b/tools/perf/util/include/asm/byteorder.h deleted file mode 100644 index 2a9bdc066307..000000000000 --- a/tools/perf/util/include/asm/byteorder.h +++ /dev/null @@ -1,2 +0,0 @@ -#include <asm/types.h> -#include "../../../../include/uapi/linux/swab.h" diff --git a/tools/perf/util/include/asm/unistd_32.h b/tools/perf/util/include/asm/unistd_32.h deleted file mode 100644 index 8b137891791f..000000000000 --- a/tools/perf/util/include/asm/unistd_32.h +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tools/perf/util/include/asm/unistd_64.h b/tools/perf/util/include/asm/unistd_64.h deleted file mode 100644 index 8b137891791f..000000000000 --- a/tools/perf/util/include/asm/unistd_64.h +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tools/perf/util/include/linux/const.h b/tools/perf/util/include/linux/const.h deleted file mode 100644 index c10a35e1afb8..000000000000 --- a/tools/perf/util/include/linux/const.h +++ /dev/null @@ -1 +0,0 @@ -#include "../../../../include/uapi/linux/const.h" diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index abf1366e2a24..749e6f2e37ca 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -66,6 +66,7 @@ struct intel_bts { u64 branches_id; size_t branches_event_size; bool synth_needs_swap; + unsigned long num_events; }; struct intel_bts_queue { @@ -275,6 +276,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, union perf_event event; struct perf_sample sample = { .ip = 0, }; + if (bts->synth_opts.initial_skip && + bts->num_events++ <= bts->synth_opts.initial_skip) + return 0; + event.sample.header.type = PERF_RECORD_SAMPLE; event.sample.header.misc = PERF_RECORD_MISC_USER; event.sample.header.size = sizeof(struct perf_event_header); @@ -417,7 +422,8 @@ static int intel_bts_get_branch_type(struct intel_bts_queue *btsq, } static int intel_bts_process_buffer(struct intel_bts_queue *btsq, - struct auxtrace_buffer *buffer) + struct auxtrace_buffer *buffer, + struct thread *thread) { struct branch *branch; size_t sz, bsz = sizeof(struct branch); @@ -439,6 +445,12 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq, if (!branch->from && !branch->to) continue; intel_bts_get_branch_type(btsq, branch); + if (btsq->bts->synth_opts.thread_stack) + thread_stack__event(thread, btsq->sample_flags, + le64_to_cpu(branch->from), + le64_to_cpu(branch->to), + btsq->intel_pt_insn.length, + buffer->buffer_nr + 1); if (filter && !(filter & btsq->sample_flags)) continue; err = intel_bts_synth_branch_sample(btsq, branch); @@ -502,12 +514,13 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp) goto out_put; } - if (!btsq->bts->synth_opts.callchain && thread && + if (!btsq->bts->synth_opts.callchain && + !btsq->bts->synth_opts.thread_stack && thread && (!old_buffer || btsq->bts->sampling_mode || (btsq->bts->snapshot_mode && !buffer->consecutive))) thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1); - err = intel_bts_process_buffer(btsq, buffer); + err = intel_bts_process_buffer(btsq, buffer, thread); auxtrace_buffer__drop_data(buffer); @@ -772,7 +785,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, u64 id; int err; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type == bts->pmu_type && evsel->ids) { found = true; break; @@ -900,10 +913,14 @@ int intel_bts_process_auxtrace_info(union perf_event *event, if (dump_trace) return 0; - if (session->itrace_synth_opts && session->itrace_synth_opts->set) + if (session->itrace_synth_opts && session->itrace_synth_opts->set) { bts->synth_opts = *session->itrace_synth_opts; - else + } else { itrace_synth_opts__set_default(&bts->synth_opts); + if (session->itrace_synth_opts) + bts->synth_opts.thread_stack = + session->itrace_synth_opts->thread_stack; + } if (bts->synth_opts.calls) bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 0611d619a42e..9b742ea8bfe8 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -7,8 +7,11 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table $(call rule_mkdir) @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ +# Busybox's diff doesn't have -I, avoid warning in the case + $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c - @(test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \ + @(diff -I 2>&1 | grep -q 'option requires an argument' && \ + test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \ diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk index 517567347aac..54e961659514 100644 --- a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk +++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk @@ -72,12 +72,14 @@ BEGIN { lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 - # All opcodes starting with lower-case 'v' or with (v1) superscript + # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript # accepts VEX prefix - vexok_opcode_expr = "^v.*" + vexok_opcode_expr = "^[vk].*" vexok_expr = "\\(v1\\)" # All opcodes with (v) superscript supports *only* VEX prefix vexonly_expr = "\\(v\\)" + # All opcodes with (ev) superscript supports *only* EVEX prefix + evexonly_expr = "\\(ev\\)" prefix_expr = "\\(Prefix\\)" prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" @@ -95,6 +97,7 @@ BEGIN { prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" + prefix_num["EVEX"] = "INAT_PFX_EVEX" clear_vars() } @@ -319,7 +322,9 @@ function convert_operands(count,opnd, i,j,imm,mod) flags = add_flags(flags, "INAT_MODRM") # check VEX codes - if (match(ext, vexonly_expr)) + if (match(ext, evexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") + else if (match(ext, vexonly_expr)) flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) flags = add_flags(flags, "INAT_VEXOK") diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h index 611645e903a8..125ecd2a300d 100644 --- a/tools/perf/util/intel-pt-decoder/inat.h +++ b/tools/perf/util/intel-pt-decoder/inat.h @@ -48,6 +48,7 @@ /* AVX VEX prefixes */ #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ +#define INAT_PFX_EVEX 15 /* EVEX prefix */ #define INAT_LSTPFX_MAX 3 #define INAT_LGCPFX_MAX 11 @@ -89,6 +90,7 @@ #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) @@ -141,7 +143,13 @@ static inline int inat_last_prefix_id(insn_attr_t attr) static inline int inat_is_vex_prefix(insn_attr_t attr) { attr &= INAT_PFX_MASK; - return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 || + attr == INAT_PFX_EVEX; +} + +static inline int inat_is_evex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX; } static inline int inat_is_vex3_prefix(insn_attr_t attr) @@ -216,6 +224,11 @@ static inline int inat_accept_vex(insn_attr_t attr) static inline int inat_must_vex(insn_attr_t attr) { - return attr & INAT_VEXONLY; + return attr & (INAT_VEXONLY | INAT_EVEXONLY); +} + +static inline int inat_must_evex(insn_attr_t attr) +{ + return attr & INAT_EVEXONLY; } #endif diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c index 9f26eae6c9f0..ca983e2bea8b 100644 --- a/tools/perf/util/intel-pt-decoder/insn.c +++ b/tools/perf/util/intel-pt-decoder/insn.c @@ -155,14 +155,24 @@ found: /* * In 32-bits mode, if the [7:6] bits (mod bits of * ModRM) on the second byte are not 11b, it is - * LDS or LES. + * LDS or LES or BOUND. */ if (X86_MODRM_MOD(b2) != 3) goto vex_end; } insn->vex_prefix.bytes[0] = b; insn->vex_prefix.bytes[1] = b2; - if (inat_is_vex3_prefix(attr)) { + if (inat_is_evex_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + b2 = peek_nbyte_next(insn_byte_t, insn, 3); + insn->vex_prefix.bytes[3] = b2; + insn->vex_prefix.nbytes = 4; + insn->next_byte += 4; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else if (inat_is_vex3_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); insn->vex_prefix.bytes[2] = b2; insn->vex_prefix.nbytes = 3; @@ -221,7 +231,9 @@ void insn_get_opcode(struct insn *insn) m = insn_vex_m_bits(insn); p = insn_vex_p_bits(insn); insn->attr = inat_get_avx_attribute(op, m, p); - if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) + if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || + (!inat_accept_vex(insn->attr) && + !inat_is_group(insn->attr))) insn->attr = 0; /* This instruction is bad */ goto end; /* VEX has only 1 byte for opcode */ } diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h index dd12da0f4593..e23578c7b1be 100644 --- a/tools/perf/util/intel-pt-decoder/insn.h +++ b/tools/perf/util/intel-pt-decoder/insn.h @@ -91,6 +91,7 @@ struct insn { #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ /* VEX bit fields */ +#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ #define X86_VEX2_M 1 /* VEX2.M always 1 */ #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ @@ -133,6 +134,13 @@ static inline int insn_is_avx(struct insn *insn) return (insn->vex_prefix.value != 0); } +static inline int insn_is_evex(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.nbytes == 4); +} + /* Ensure this instruction is decoded completely */ static inline int insn_complete(struct insn *insn) { @@ -144,8 +152,10 @@ static inline insn_byte_t insn_vex_m_bits(struct insn *insn) { if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ return X86_VEX2_M; - else + else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */ return X86_VEX3_M(insn->vex_prefix.bytes[1]); + else /* EVEX */ + return X86_EVEX_M(insn->vex_prefix.bytes[1]); } static inline insn_byte_t insn_vex_p_bits(struct insn *insn) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 9409d014b46c..8ff6c6a61291 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -123,8 +123,6 @@ struct intel_pt_decoder { bool have_calc_cyc_to_tsc; int exec_mode; unsigned int insn_bytes; - uint64_t sign_bit; - uint64_t sign_bits; uint64_t period; enum intel_pt_period_type period_type; uint64_t tot_insn_cnt; @@ -191,9 +189,6 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->data = params->data; decoder->return_compression = params->return_compression; - decoder->sign_bit = (uint64_t)1 << 47; - decoder->sign_bits = ~(((uint64_t)1 << 48) - 1); - decoder->period = params->period; decoder->period_type = params->period_type; @@ -356,27 +351,36 @@ static const char *intel_pt_err_msgs[] = { int intel_pt__strerror(int code, char *buf, size_t buflen) { - if (code < 1 || code > INTEL_PT_ERR_MAX) + if (code < 1 || code >= INTEL_PT_ERR_MAX) code = INTEL_PT_ERR_UNK; strlcpy(buf, intel_pt_err_msgs[code], buflen); return 0; } -static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, - const struct intel_pt_pkt *packet, +static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet, uint64_t last_ip) { uint64_t ip; switch (packet->count) { - case 2: + case 1: ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) | packet->payload; break; - case 4: + case 2: ip = (last_ip & (uint64_t)0xffffffff00000000ULL) | packet->payload; break; + case 3: + ip = packet->payload; + /* Sign-extend 6-byte ip */ + if (ip & (uint64_t)0x800000000000ULL) + ip |= (uint64_t)0xffff000000000000ULL; + break; + case 4: + ip = (last_ip & (uint64_t)0xffff000000000000ULL) | + packet->payload; + break; case 6: ip = packet->payload; break; @@ -384,16 +388,12 @@ static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, return 0; } - if (ip & decoder->sign_bit) - return ip | decoder->sign_bits; - return ip; } static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) { - decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet, - decoder->last_ip); + decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip); } static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) @@ -1657,6 +1657,12 @@ next: } } +static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder) +{ + return decoder->last_ip || decoder->packet.count == 0 || + decoder->packet.count == 3 || decoder->packet.count == 6; +} + /* Walk PSB+ packets to get in sync. */ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) { @@ -1677,8 +1683,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: decoder->pge = true; - if (decoder->last_ip || decoder->packet.count == 6 || - decoder->packet.count == 0) { + if (intel_pt_have_ip(decoder)) { uint64_t current_ip = decoder->ip; intel_pt_set_ip(decoder); @@ -1767,8 +1772,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_TIP_PGE: case INTEL_PT_TIP: decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; - if (decoder->last_ip || decoder->packet.count == 6 || - decoder->packet.count == 0) + if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (decoder->ip) return 0; @@ -1776,9 +1780,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: if (decoder->overflow) { - if (decoder->last_ip || - decoder->packet.count == 6 || - decoder->packet.count == 0) + if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (decoder->ip) return 0; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index b1257c816310..4f7b32020487 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -292,36 +292,46 @@ static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { - switch (byte >> 5) { + int ip_len; + + packet->count = byte >> 5; + + switch (packet->count) { case 0: - packet->count = 0; + ip_len = 0; break; case 1: if (len < 3) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 2; + ip_len = 2; packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); break; case 2: if (len < 5) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 4; + ip_len = 4; packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1)); break; case 3: - case 6: + case 4: if (len < 7) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 6; + ip_len = 6; memcpy_le64(&packet->payload, buf + 1, 6); break; + case 6: + if (len < 9) + return INTEL_PT_NEED_MORE_BYTES; + ip_len = 8; + packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1)); + break; default: return INTEL_PT_BAD_PACKET; } packet->type = type; - return packet->count + 1; + return ip_len + 1; } static int intel_pt_get_mode(const unsigned char *buf, size_t len, diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt index d388de72eaca..767be7c76034 100644 --- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt @@ -13,12 +13,17 @@ # opcode: escape # escaped-name # EndTable # +# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix +# mnemonics that begin with lowercase 'k' accept a VEX prefix +# #<group maps> # GrpTable: GrpXXX # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] # EndTable # # AVX Superscripts +# (ev): this opcode requires EVEX prefix. +# (evo): this opcode is changed by EVEX prefix (EVEX opcode) # (v): this opcode requires VEX prefix. # (v1): this opcode only supports 128bit VEX. # @@ -137,7 +142,7 @@ AVXcode: # 0x60 - 0x6f 60: PUSHA/PUSHAD (i64) 61: POPA/POPAD (i64) -62: BOUND Gv,Ma (i64) +62: BOUND Gv,Ma (i64) | EVEX (Prefix) 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) 64: SEG=FS (Prefix) 65: SEG=GS (Prefix) @@ -399,17 +404,17 @@ AVXcode: 1 3f: # 0x0f 0x40-0x4f 40: CMOVO Gv,Ev -41: CMOVNO Gv,Ev -42: CMOVB/C/NAE Gv,Ev +41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66) +42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66) 43: CMOVAE/NB/NC Gv,Ev -44: CMOVE/Z Gv,Ev -45: CMOVNE/NZ Gv,Ev -46: CMOVBE/NA Gv,Ev -47: CMOVA/NBE Gv,Ev +44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66) +45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66) +46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66) +47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66) 48: CMOVS Gv,Ev 49: CMOVNS Gv,Ev -4a: CMOVP/PE Gv,Ev -4b: CMOVNP/PO Gv,Ev +4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66) +4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk 4c: CMOVL/NGE Gv,Ev 4d: CMOVNL/GE Gv,Ev 4e: CMOVLE/NG Gv,Ev @@ -426,7 +431,7 @@ AVXcode: 1 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) -5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) @@ -447,7 +452,7 @@ AVXcode: 1 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) -6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) +6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev) # 0x0f 0x70-0x7f 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) 71: Grp12 (1A) @@ -458,14 +463,14 @@ AVXcode: 1 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. 77: emms | vzeroupper | vzeroall -78: VMREAD Ey,Gy -79: VMWRITE Gy,Ey -7a: -7b: +78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev) +79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev) +7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev) +7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev) 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) -7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) +7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) # 0x0f 0x80-0x8f # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). 80: JO Jz (f64) @@ -485,16 +490,16 @@ AVXcode: 1 8e: JLE/JNG Jz (f64) 8f: JNLE/JG Jz (f64) # 0x0f 0x90-0x9f -90: SETO Eb -91: SETNO Eb -92: SETB/C/NAE Eb -93: SETAE/NB/NC Eb +90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) +91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) +92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2) +93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2) 94: SETE/Z Eb 95: SETNE/NZ Eb 96: SETBE/NA Eb 97: SETA/NBE Eb -98: SETS Eb -99: SETNS Eb +98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66) +99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66) 9a: SETP/PE Eb 9b: SETNP/PO Eb 9c: SETL/NGE Eb @@ -564,11 +569,11 @@ d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) -db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) +db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo) dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) -df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) +df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo) # 0x0f 0xe0-0xef e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) @@ -576,16 +581,16 @@ e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) -e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) +e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2) e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) -eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) +eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo) ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) -ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) +ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo) # 0x0f 0xf0-0xff f0: vlddqu Vx,Mx (F2) f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) @@ -626,81 +631,105 @@ AVXcode: 2 0e: vtestps Vx,Wx (66),(v) 0f: vtestpd Vx,Wx (66),(v) # 0x0f 0x38 0x10-0x1f -10: pblendvb Vdq,Wdq (66) -11: -12: -13: vcvtph2ps Vx,Wx,Ib (66),(v) -14: blendvps Vdq,Wdq (66) -15: blendvpd Vdq,Wdq (66) -16: vpermps Vqq,Hqq,Wqq (66),(v) +10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev) +11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev) +12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev) +13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev) +14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo) +15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo) +16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo) 17: vptest Vx,Wx (66) 18: vbroadcastss Vx,Wd (66),(v) -19: vbroadcastsd Vqq,Wq (66),(v) -1a: vbroadcastf128 Vqq,Mdq (66),(v) -1b: +19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo) +1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo) +1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev) 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) -1f: +1f: vpabsq Vx,Wx (66),(ev) # 0x0f 0x38 0x20-0x2f -20: vpmovsxbw Vx,Ux/Mq (66),(v1) -21: vpmovsxbd Vx,Ux/Md (66),(v1) -22: vpmovsxbq Vx,Ux/Mw (66),(v1) -23: vpmovsxwd Vx,Ux/Mq (66),(v1) -24: vpmovsxwq Vx,Ux/Md (66),(v1) -25: vpmovsxdq Vx,Ux/Mq (66),(v1) -26: -27: -28: vpmuldq Vx,Hx,Wx (66),(v1) -29: vpcmpeqq Vx,Hx,Wx (66),(v1) -2a: vmovntdqa Vx,Mx (66),(v1) +20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev) +21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev) +22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev) +23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev) +24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev) +25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev) +26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev) +27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev) +28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev) +29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev) +2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev) 2b: vpackusdw Vx,Hx,Wx (66),(v1) -2c: vmaskmovps Vx,Hx,Mx (66),(v) -2d: vmaskmovpd Vx,Hx,Mx (66),(v) +2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo) +2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo) 2e: vmaskmovps Mx,Hx,Vx (66),(v) 2f: vmaskmovpd Mx,Hx,Vx (66),(v) # 0x0f 0x38 0x30-0x3f -30: vpmovzxbw Vx,Ux/Mq (66),(v1) -31: vpmovzxbd Vx,Ux/Md (66),(v1) -32: vpmovzxbq Vx,Ux/Mw (66),(v1) -33: vpmovzxwd Vx,Ux/Mq (66),(v1) -34: vpmovzxwq Vx,Ux/Md (66),(v1) -35: vpmovzxdq Vx,Ux/Mq (66),(v1) -36: vpermd Vqq,Hqq,Wqq (66),(v) +30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev) +31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev) +32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev) +33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev) +34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev) +35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev) +36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo) 37: vpcmpgtq Vx,Hx,Wx (66),(v1) -38: vpminsb Vx,Hx,Wx (66),(v1) -39: vpminsd Vx,Hx,Wx (66),(v1) -3a: vpminuw Vx,Hx,Wx (66),(v1) -3b: vpminud Vx,Hx,Wx (66),(v1) +38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev) +39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev) +3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev) +3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo) 3c: vpmaxsb Vx,Hx,Wx (66),(v1) -3d: vpmaxsd Vx,Hx,Wx (66),(v1) +3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo) 3e: vpmaxuw Vx,Hx,Wx (66),(v1) -3f: vpmaxud Vx,Hx,Wx (66),(v1) +3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo) # 0x0f 0x38 0x40-0x8f -40: vpmulld Vx,Hx,Wx (66),(v1) +40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo) 41: vphminposuw Vdq,Wdq (66),(v1) -42: -43: -44: +42: vgetexpps/d Vx,Wx (66),(ev) +43: vgetexpss/d Vx,Hx,Wx (66),(ev) +44: vplzcntd/q Vx,Wx (66),(ev) 45: vpsrlvd/q Vx,Hx,Wx (66),(v) -46: vpsravd Vx,Hx,Wx (66),(v) +46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) 47: vpsllvd/q Vx,Hx,Wx (66),(v) -# Skip 0x48-0x57 +# Skip 0x48-0x4b +4c: vrcp14ps/d Vpd,Wpd (66),(ev) +4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) +4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) +4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev) +# Skip 0x50-0x57 58: vpbroadcastd Vx,Wx (66),(v) -59: vpbroadcastq Vx,Wx (66),(v) -5a: vbroadcasti128 Vqq,Mdq (66),(v) -# Skip 0x5b-0x77 +59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) +5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) +5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) +# Skip 0x5c-0x63 +64: vpblendmd/q Vx,Hx,Wx (66),(ev) +65: vblendmps/d Vx,Hx,Wx (66),(ev) +66: vpblendmb/w Vx,Hx,Wx (66),(ev) +# Skip 0x67-0x74 +75: vpermi2b/w Vx,Hx,Wx (66),(ev) +76: vpermi2d/q Vx,Hx,Wx (66),(ev) +77: vpermi2ps/d Vx,Hx,Wx (66),(ev) 78: vpbroadcastb Vx,Wx (66),(v) 79: vpbroadcastw Vx,Wx (66),(v) -# Skip 0x7a-0x7f +7a: vpbroadcastb Vx,Rv (66),(ev) +7b: vpbroadcastw Vx,Rv (66),(ev) +7c: vpbroadcastd/q Vx,Rv (66),(ev) +7d: vpermt2b/w Vx,Hx,Wx (66),(ev) +7e: vpermt2d/q Vx,Hx,Wx (66),(ev) +7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 80: INVEPT Gy,Mdq (66) 81: INVPID Gy,Mdq (66) 82: INVPCID Gy,Mdq (66) +83: vpmultishiftqb Vx,Hx,Wx (66),(ev) +88: vexpandps/d Vpd,Wpd (66),(ev) +89: vpexpandd/q Vx,Wx (66),(ev) +8a: vcompressps/d Wx,Vx (66),(ev) +8b: vpcompressd/q Wx,Vx (66),(ev) 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +8d: vpermb/w Vx,Hx,Wx (66),(ev) 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) # 0x0f 0x38 0x90-0xbf (FMA) -90: vgatherdd/q Vx,Hx,Wx (66),(v) -91: vgatherqd/q Vx,Hx,Wx (66),(v) +90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo) +91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo) 92: vgatherdps/d Vx,Hx,Wx (66),(v) 93: vgatherqps/d Vx,Hx,Wx (66),(v) 94: @@ -715,6 +744,10 @@ AVXcode: 2 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +a0: vpscatterdd/q Wx,Vx (66),(ev) +a1: vpscatterqd/q Wx,Vx (66),(ev) +a2: vscatterdps/d Wx,Vx (66),(ev) +a3: vscatterqps/d Wx,Vx (66),(ev) a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) @@ -725,6 +758,8 @@ ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +b4: vpmadd52luq Vx,Hx,Wx (66),(ev) +b5: vpmadd52huq Vx,Hx,Wx (66),(ev) b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) @@ -736,12 +771,15 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) # 0x0f 0x38 0xc0-0xff -c8: sha1nexte Vdq,Wdq +c4: vpconflictd/q Vx,Wx (66),(ev) +c6: Grp18 (1A) +c7: Grp19 (1A) +c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev) c9: sha1msg1 Vdq,Wdq -ca: sha1msg2 Vdq,Wdq -cb: sha256rnds2 Vdq,Wdq -cc: sha256msg1 Vdq,Wdq -cd: sha256msg2 Vdq,Wdq +ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev) +cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) +cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) +cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev) db: VAESIMC Vdq,Wdq (66),(v1) dc: VAESENC Vdq,Hdq,Wdq (66),(v1) dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) @@ -763,15 +801,15 @@ AVXcode: 3 00: vpermq Vqq,Wqq,Ib (66),(v) 01: vpermpd Vqq,Wqq,Ib (66),(v) 02: vpblendd Vx,Hx,Wx,Ib (66),(v) -03: +03: valignd/q Vx,Hx,Wx,Ib (66),(ev) 04: vpermilps Vx,Wx,Ib (66),(v) 05: vpermilpd Vx,Wx,Ib (66),(v) 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 07: -08: vroundps Vx,Wx,Ib (66) -09: vroundpd Vx,Wx,Ib (66) -0a: vroundss Vss,Wss,Ib (66),(v1) -0b: vroundsd Vsd,Wsd,Ib (66),(v1) +08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) +09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) +0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) +0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) 0c: vblendps Vx,Hx,Wx,Ib (66) 0d: vblendpd Vx,Hx,Wx,Ib (66) 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) @@ -780,26 +818,51 @@ AVXcode: 3 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) 16: vpextrd/q Ey,Vdq,Ib (66),(v1) 17: vextractps Ed,Vdq,Ib (66),(v1) -18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) -19: vextractf128 Wdq,Vqq,Ib (66),(v) +18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) +19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo) +1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) +1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev) 1d: vcvtps2ph Wx,Vx,Ib (66),(v) +1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev) +1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev) 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) -38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) -39: vextracti128 Wdq,Vqq,Ib (66),(v) +23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) +25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) +26: vgetmantps/d Vx,Wx,Ib (66),(ev) +27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) +30: kshiftrb/w Vk,Uk,Ib (66),(v) +31: kshiftrd/q Vk,Uk,Ib (66),(v) +32: kshiftlb/w Vk,Uk,Ib (66),(v) +33: kshiftld/q Vk,Uk,Ib (66),(v) +38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) +39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo) +3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) +3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev) +3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev) +3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev) 40: vdpps Vx,Hx,Wx,Ib (66) 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) -42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) +42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo) +43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) 4a: vblendvps Vx,Hx,Wx,Lx (66),(v) 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev) +51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) +54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) +55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) +56: vreduceps/d Vx,Wx,Ib (66),(ev) +57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +66: vfpclassps/d Vk,Wx,Ib (66),(ev) +67: vfpclassss/d Vk,Wx,Ib (66),(ev) cc: sha1rnds4 Vdq,Wdq,Ib df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) f0: RORX Gy,Ey,Ib (F2),(v) @@ -927,8 +990,10 @@ GrpTable: Grp12 EndTable GrpTable: Grp13 +0: vprord/q Hx,Wx,Ib (66),(ev) +1: vprold/q Hx,Wx,Ib (66),(ev) 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) -4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) +4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo) 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) EndTable @@ -947,7 +1012,7 @@ GrpTable: Grp15 4: XSAVE 5: XRSTOR | lfence (11B) 6: XSAVEOPT | clwb (66) | mfence (11B) -7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) +7: clflush | clflushopt (66) | sfence (11B) EndTable GrpTable: Grp16 @@ -963,6 +1028,20 @@ GrpTable: Grp17 3: BLSI By,Ey (v) EndTable +GrpTable: Grp18 +1: vgatherpf0dps/d Wx (66),(ev) +2: vgatherpf1dps/d Wx (66),(ev) +5: vscatterpf0dps/d Wx (66),(ev) +6: vscatterpf1dps/d Wx (66),(ev) +EndTable + +GrpTable: Grp19 +1: vgatherpf0qps/d Wx (66),(ev) +2: vgatherpf1qps/d Wx (66),(ev) +5: vscatterpf0qps/d Wx (66),(ev) +6: vscatterpf1qps/d Wx (66),(ev) +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 617578440989..551ff6f640be 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -39,6 +39,7 @@ #include "auxtrace.h" #include "tsc.h" #include "intel-pt.h" +#include "config.h" #include "intel-pt-decoder/intel-pt-log.h" #include "intel-pt-decoder/intel-pt-decoder.h" @@ -100,6 +101,8 @@ struct intel_pt { u64 cyc_bit; u64 noretcomp_bit; unsigned max_non_turbo_ratio; + + unsigned long num_events; }; enum switch_state { @@ -554,7 +557,7 @@ static bool intel_pt_exclude_kernel(struct intel_pt *pt) { struct perf_evsel *evsel; - evlist__for_each(pt->session->evlist, evsel) { + evlist__for_each_entry(pt->session->evlist, evsel) { if (intel_pt_get_config(pt, &evsel->attr, NULL) && !evsel->attr.exclude_kernel) return false; @@ -570,7 +573,7 @@ static bool intel_pt_return_compression(struct intel_pt *pt) if (!pt->noretcomp_bit) return true; - evlist__for_each(pt->session->evlist, evsel) { + evlist__for_each_entry(pt->session->evlist, evsel) { if (intel_pt_get_config(pt, &evsel->attr, &config) && (config & pt->noretcomp_bit)) return false; @@ -590,7 +593,7 @@ static unsigned int intel_pt_mtc_period(struct intel_pt *pt) for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++) config >>= 1; - evlist__for_each(pt->session->evlist, evsel) { + evlist__for_each_entry(pt->session->evlist, evsel) { if (intel_pt_get_config(pt, &evsel->attr, &config)) return (config & pt->mtc_freq_bits) >> shift; } @@ -606,7 +609,7 @@ static bool intel_pt_timeless_decoding(struct intel_pt *pt) if (!pt->tsc_bit || !pt->cap_user_time_zero) return true; - evlist__for_each(pt->session->evlist, evsel) { + evlist__for_each_entry(pt->session->evlist, evsel) { if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME)) return true; if (intel_pt_get_config(pt, &evsel->attr, &config)) { @@ -623,7 +626,7 @@ static bool intel_pt_tracing_kernel(struct intel_pt *pt) { struct perf_evsel *evsel; - evlist__for_each(pt->session->evlist, evsel) { + evlist__for_each_entry(pt->session->evlist, evsel) { if (intel_pt_get_config(pt, &evsel->attr, NULL) && !evsel->attr.exclude_kernel) return true; @@ -640,7 +643,7 @@ static bool intel_pt_have_tsc(struct intel_pt *pt) if (!pt->tsc_bit) return false; - evlist__for_each(pt->session->evlist, evsel) { + evlist__for_each_entry(pt->session->evlist, evsel) { if (intel_pt_get_config(pt, &evsel->attr, &config)) { if (config & pt->tsc_bit) have_tsc = true; @@ -972,6 +975,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) return 0; + if (pt->synth_opts.initial_skip && + pt->num_events++ < pt->synth_opts.initial_skip) + return 0; + event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); @@ -1029,6 +1036,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; + if (pt->synth_opts.initial_skip && + pt->num_events++ < pt->synth_opts.initial_skip) + return 0; + event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); @@ -1087,6 +1098,10 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; + if (pt->synth_opts.initial_skip && + pt->num_events++ < pt->synth_opts.initial_skip) + return 0; + event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); @@ -1199,14 +1214,18 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; if (pt->sample_instructions && - (state->type & INTEL_PT_INSTRUCTION)) { + (state->type & INTEL_PT_INSTRUCTION) && + (!pt->synth_opts.initial_skip || + pt->num_events++ >= pt->synth_opts.initial_skip)) { err = intel_pt_synth_instruction_sample(ptq); if (err) return err; } if (pt->sample_transactions && - (state->type & INTEL_PT_TRANSACTION)) { + (state->type & INTEL_PT_TRANSACTION) && + (!pt->synth_opts.initial_skip || + pt->num_events++ >= pt->synth_opts.initial_skip)) { err = intel_pt_synth_transaction_sample(ptq); if (err) return err; @@ -1215,7 +1234,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) if (!(state->type & INTEL_PT_BRANCH)) return 0; - if (pt->synth_opts.callchain) + if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) thread_stack__event(ptq->thread, ptq->flags, state->from_ip, state->to_ip, ptq->insn_len, state->trace_nr); @@ -1832,7 +1851,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, u64 id; int err; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type == pt->pmu_type && evsel->ids) { found = true; break; @@ -1912,7 +1931,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, pt->sample_transactions = true; pt->transactions_id = id; id += 1; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->id && evsel->id[0] == pt->transactions_id) { if (evsel->name) zfree(&evsel->name); @@ -1950,7 +1969,7 @@ static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) { struct perf_evsel *evsel; - evlist__for_each_reverse(evlist, evsel) { + evlist__for_each_entry_reverse(evlist, evsel) { const char *name = perf_evsel__name(evsel); if (!strcmp(name, "sched:sched_switch")) @@ -1964,7 +1983,7 @@ static bool intel_pt_find_switch(struct perf_evlist *evlist) { struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.context_switch) return true; } @@ -2118,6 +2137,9 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->synth_opts.branches = false; pt->synth_opts.callchain = true; } + if (session->itrace_synth_opts) + pt->synth_opts.thread_stack = + session->itrace_synth_opts->thread_stack; } if (pt->synth_opts.log) diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h index aa6877d36858..020b9ca1b47e 100644 --- a/tools/perf/util/intlist.h +++ b/tools/perf/util/intlist.h @@ -57,21 +57,21 @@ static inline struct int_node *intlist__next(struct int_node *in) } /** - * intlist_for_each - iterate over a intlist + * intlist__for_each_entry - iterate over a intlist * @pos: the &struct int_node to use as a loop cursor. * @ilist: the &struct intlist for loop. */ -#define intlist__for_each(pos, ilist) \ +#define intlist__for_each_entry(pos, ilist) \ for (pos = intlist__first(ilist); pos; pos = intlist__next(pos)) /** - * intlist_for_each_safe - iterate over a intlist safe against removal of + * intlist__for_each_entry_safe - iterate over a intlist safe against removal of * int_node * @pos: the &struct int_node to use as a loop cursor. * @n: another &struct int_node to use as temporary storage. * @ilist: the &struct intlist for loop. */ -#define intlist__for_each_safe(pos, n, ilist) \ +#define intlist__for_each_entry_safe(pos, n, ilist) \ for (pos = intlist__first(ilist), n = intlist__next(pos); pos;\ pos = n, n = intlist__next(n)) #endif /* __PERF_INTLIST_H */ diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index ad0c0bb1fbc7..95f0884aae02 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -1,3 +1,4 @@ +#include <sys/sysmacros.h> #include <sys/types.h> #include <stdio.h> #include <stdlib.h> @@ -17,6 +18,7 @@ #include "strlist.h" #include <elf.h> +#include "tsc.h" #include "session.h" #include "jit.h" #include "jitdump.h" @@ -33,6 +35,7 @@ struct jit_buf_desc { size_t bufsize; FILE *in; bool needs_bswap; /* handles cross-endianess */ + bool use_arch_timestamp; void *debug_data; size_t nr_debug_entries; uint32_t code_load_count; @@ -106,7 +109,7 @@ jit_validate_events(struct perf_session *session) /* * check that all events use CLOCK_MONOTONIC */ - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC) return -1; } @@ -158,13 +161,16 @@ jit_open(struct jit_buf_desc *jd, const char *name) header.flags = bswap_64(header.flags); } + jd->use_arch_timestamp = header.flags & JITDUMP_FLAGS_ARCH_TIMESTAMP; + if (verbose > 2) - pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n", + pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\nuse_arch_timestamp=%d\n", header.version, header.total_size, (unsigned long long)header.timestamp, header.pid, - header.elf_mach); + header.elf_mach, + jd->use_arch_timestamp); if (header.flags & JITDUMP_FLAGS_RESERVED) { pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n", @@ -172,10 +178,15 @@ jit_open(struct jit_buf_desc *jd, const char *name) goto error; } + if (jd->use_arch_timestamp && !jd->session->time_conv.time_mult) { + pr_err("jitdump file uses arch timestamps but there is no timestamp conversion\n"); + goto error; + } + /* * validate event is using the correct clockid */ - if (jit_validate_events(jd->session)) { + if (!jd->use_arch_timestamp && jit_validate_events(jd->session)) { pr_err("error, jitted code must be sampled with perf record -k 1\n"); goto error; } @@ -329,6 +340,23 @@ jit_inject_event(struct jit_buf_desc *jd, union perf_event *event) return 0; } +static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp) +{ + struct perf_tsc_conversion tc; + + if (!jd->use_arch_timestamp) + return timestamp; + + tc.time_shift = jd->session->time_conv.time_shift; + tc.time_mult = jd->session->time_conv.time_mult; + tc.time_zero = jd->session->time_conv.time_zero; + + if (!tc.time_mult) + return 0; + + return tsc_to_perf_time(timestamp, &tc); +} + static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) { struct perf_sample sample; @@ -385,7 +413,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) return -1; } if (stat(filename, &st)) - memset(&st, 0, sizeof(stat)); + memset(&st, 0, sizeof(st)); event->mmap2.header.type = PERF_RECORD_MMAP2; event->mmap2.header.misc = PERF_RECORD_MISC_USER; @@ -410,7 +438,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) id->tid = tid; } if (jd->sample_type & PERF_SAMPLE_TIME) - id->time = jr->load.p.timestamp; + id->time = convert_timestamp(jd, jr->load.p.timestamp); /* * create pseudo sample to induce dso hit increment @@ -473,7 +501,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) size++; /* for \0 */ if (stat(filename, &st)) - memset(&st, 0, sizeof(stat)); + memset(&st, 0, sizeof(st)); size = PERF_ALIGN(size, sizeof(u64)); @@ -499,7 +527,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) id->tid = tid; } if (jd->sample_type & PERF_SAMPLE_TIME) - id->time = jr->load.p.timestamp; + id->time = convert_timestamp(jd, jr->load.p.timestamp); /* * create pseudo sample to induce dso hit increment diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h index b66c1f503d9e..bcacd20d0c1c 100644 --- a/tools/perf/util/jitdump.h +++ b/tools/perf/util/jitdump.h @@ -23,9 +23,12 @@ #define JITHEADER_VERSION 1 enum jitdump_flags_bits { + JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT, JITDUMP_FLAGS_MAX_BIT, }; +#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT) + #define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \ (~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0) diff --git a/tools/perf/util/levenshtein.c b/tools/perf/util/levenshtein.c index e521d1516df6..f616e4f65b67 100644 --- a/tools/perf/util/levenshtein.c +++ b/tools/perf/util/levenshtein.c @@ -1,5 +1,7 @@ -#include "cache.h" #include "levenshtein.h" +#include <errno.h> +#include <stdlib.h> +#include <string.h> /* * This function implements the Damerau-Levenshtein algorithm to diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c new file mode 100644 index 000000000000..6559bc586643 --- /dev/null +++ b/tools/perf/util/libunwind/arm64.c @@ -0,0 +1,40 @@ +/* + * This file setups defines to compile arch specific binary from the + * generic one. + * + * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch + * name and the defination of this function is included directly from + * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function + * is defined no matter what arch the host is. + * + * Finally, the arch specific unwind methods are exported which will + * be assigned to each arm64 thread. + */ + +#define REMOTE_UNWIND_LIBUNWIND + +/* Define arch specific functions & regs for libunwind, should be + * defined before including "unwind.h" + */ +#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arm64_reg_id(regnum) +#define LIBUNWIND__ARCH_REG_IP PERF_REG_ARM64_PC +#define LIBUNWIND__ARCH_REG_SP PERF_REG_ARM64_SP + +#include "unwind.h" +#include "debug.h" +#include "libunwind-aarch64.h" +#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> +#include "../../arch/arm64/util/unwind-libunwind.c" + +/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind, + * assign NO_LIBUNWIND_DEBUG_FRAME_AARCH64 to it for compiling arm64 + * unwind methods. + */ +#undef NO_LIBUNWIND_DEBUG_FRAME +#ifdef NO_LIBUNWIND_DEBUG_FRAME_AARCH64 +#define NO_LIBUNWIND_DEBUG_FRAME +#endif +#include "util/unwind-libunwind-local.c" + +struct unwind_libunwind_ops * +arm64_unwind_libunwind_ops = &_unwind_libunwind_ops; diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c new file mode 100644 index 000000000000..957ffff72428 --- /dev/null +++ b/tools/perf/util/libunwind/x86_32.c @@ -0,0 +1,43 @@ +/* + * This file setups defines to compile arch specific binary from the + * generic one. + * + * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch + * name and the defination of this function is included directly from + * 'arch/x86/util/unwind-libunwind.c', to make sure that this function + * is defined no matter what arch the host is. + * + * Finally, the arch specific unwind methods are exported which will + * be assigned to each x86 thread. + */ + +#define REMOTE_UNWIND_LIBUNWIND + +/* Define arch specific functions & regs for libunwind, should be + * defined before including "unwind.h" + */ +#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__x86_reg_id(regnum) +#define LIBUNWIND__ARCH_REG_IP PERF_REG_X86_IP +#define LIBUNWIND__ARCH_REG_SP PERF_REG_X86_SP + +#include "unwind.h" +#include "debug.h" +#include "libunwind-x86.h" +#include <../../../../arch/x86/include/uapi/asm/perf_regs.h> + +/* HAVE_ARCH_X86_64_SUPPORT is used in'arch/x86/util/unwind-libunwind.c' + * for x86_32, we undef it to compile code for x86_32 only. + */ +#undef HAVE_ARCH_X86_64_SUPPORT +#include "../../arch/x86/util/unwind-libunwind.c" + +/* Explicitly define NO_LIBUNWIND_DEBUG_FRAME, because non-ARM has no + * dwarf_find_debug_frame() function. + */ +#ifndef NO_LIBUNWIND_DEBUG_FRAME +#define NO_LIBUNWIND_DEBUG_FRAME +#endif +#include "util/unwind-libunwind-local.c" + +struct unwind_libunwind_ops * +x86_32_unwind_libunwind_ops = &_unwind_libunwind_ops; diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 33071d6159bc..bf7216b8731d 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -3,11 +3,14 @@ * Copyright (C) 2015, Huawei Inc. */ +#include <errno.h> #include <limits.h> #include <stdio.h> #include <stdlib.h> #include "debug.h" #include "llvm-utils.h" +#include "config.h" +#include "util.h" #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ @@ -42,6 +45,8 @@ int perf_llvm_config(const char *var, const char *value) llvm_param.kbuild_dir = strdup(value); else if (!strcmp(var, "kbuild-opts")) llvm_param.kbuild_opts = strdup(value); + else if (!strcmp(var, "dump-obj")) + llvm_param.dump_obj = !!perf_config_bool(var, value); else return -1; llvm_param.user_set_param = true; @@ -103,7 +108,7 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) file = popen(cmd, "r"); if (!file) { pr_err("ERROR: unable to popen cmd: %s\n", - strerror_r(errno, serr, sizeof(serr))); + str_error_r(errno, serr, sizeof(serr))); return -EINVAL; } @@ -137,7 +142,7 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) if (ferror(file)) { pr_err("ERROR: error occurred when reading from pipe: %s\n", - strerror_r(errno, serr, sizeof(serr))); + str_error_r(errno, serr, sizeof(serr))); err = -EIO; goto errout; } @@ -326,6 +331,42 @@ get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) pr_debug("include option is set to %s\n", *kbuild_include_opts); } +static void +dump_obj(const char *path, void *obj_buf, size_t size) +{ + char *obj_path = strdup(path); + FILE *fp; + char *p; + + if (!obj_path) { + pr_warning("WARNING: No enough memory, skip object dumping\n"); + return; + } + + p = strrchr(obj_path, '.'); + if (!p || (strcmp(p, ".c") != 0)) { + pr_warning("WARNING: invalid llvm source path: '%s', skip object dumping\n", + obj_path); + goto out; + } + + p[1] = 'o'; + fp = fopen(obj_path, "wb"); + if (!fp) { + pr_warning("WARNING: failed to open '%s': %s, skip object dumping\n", + obj_path, strerror(errno)); + goto out; + } + + pr_info("LLVM: dumping %s\n", obj_path); + if (fwrite(obj_buf, size, 1, fp) != 1) + pr_warning("WARNING: failed to write to file '%s': %s, skip object dumping\n", + obj_path, strerror(errno)); + fclose(fp); +out: + free(obj_path); +} + int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz) { @@ -343,7 +384,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, if (path[0] != '-' && realpath(path, abspath) == NULL) { err = errno; pr_err("ERROR: problems with path %s: %s\n", - path, strerror_r(err, serr, sizeof(serr))); + path, str_error_r(err, serr, sizeof(serr))); return -err; } @@ -371,7 +412,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, if (nr_cpus_avail <= 0) { pr_err( "WARNING:\tunable to get available CPUs in this system: %s\n" -" \tUse 128 instead.\n", strerror_r(errno, serr, sizeof(serr))); +" \tUse 128 instead.\n", str_error_r(errno, serr, sizeof(serr))); nr_cpus_avail = 128; } snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d", @@ -411,6 +452,10 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, free(kbuild_dir); free(kbuild_include_opts); + + if (llvm_param.dump_obj) + dump_obj(path, obj_buf, obj_buf_sz); + if (!p_obj_buf) free(obj_buf); else diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h index 23b9a743fe72..9f501cef06a1 100644 --- a/tools/perf/util/llvm-utils.h +++ b/tools/perf/util/llvm-utils.h @@ -30,6 +30,11 @@ struct llvm_param { */ const char *kbuild_opts; /* + * Default is false. If set to true, write compiling result + * to object file. + */ + bool dump_obj; + /* * Default is false. If one of the above fields is set by user * explicitly then user_set_llvm is set to true. This is used * for perf test. If user doesn't set anything in .perfconfig diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 80b9b6a87990..cb6388dbdd98 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -32,6 +32,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->threads = RB_ROOT; pthread_rwlock_init(&machine->threads_lock, NULL); + machine->nr_threads = 0; INIT_LIST_HEAD(&machine->dead_threads); machine->last_match = NULL; @@ -42,6 +43,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->symbol_filter = NULL; machine->id_hdr_size = 0; + machine->kptr_restrict_warned = false; machine->comm_exec = false; machine->kernel_start = 0; @@ -136,8 +138,10 @@ void machine__exit(struct machine *machine) void machine__delete(struct machine *machine) { - machine__exit(machine); - free(machine); + if (machine) { + machine__exit(machine); + free(machine); + } } void machines__init(struct machines *machines) @@ -361,7 +365,7 @@ out_err: } /* - * Caller must eventually drop thread->refcnt returned with a successfull + * Caller must eventually drop thread->refcnt returned with a successful * lookup/new thread inserted. */ static struct thread *____machine__findnew_thread(struct machine *machine, @@ -430,6 +434,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, */ thread__get(th); machine->last_match = th; + ++machine->nr_threads; } return th; @@ -681,11 +686,13 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) size_t machine__fprintf(struct machine *machine, FILE *fp) { - size_t ret = 0; + size_t ret; struct rb_node *nd; pthread_rwlock_rdlock(&machine->threads_lock); + ret = fprintf(fp, "Threads: %u\n", machine->nr_threads); + for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { struct thread *pos = rb_entry(nd, struct thread, rb_node); @@ -705,7 +712,7 @@ static struct dso *machine__get_kernel(struct machine *machine) if (machine__is_host(machine)) { vmlinux_name = symbol_conf.vmlinux_name; if (!vmlinux_name) - vmlinux_name = "[kernel.kallsyms]"; + vmlinux_name = DSO__NAME_KALLSYMS; kernel = machine__findnew_kernel(machine, vmlinux_name, "[kernel]", DSO_TYPE_KERNEL); @@ -908,11 +915,11 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid) return machine__create_kernel_maps(machine); } -int machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type, symbol_filter_t filter) +int __machine__load_kallsyms(struct machine *machine, const char *filename, + enum map_type type, bool no_kcore, symbol_filter_t filter) { struct map *map = machine__kernel_map(machine); - int ret = dso__load_kallsyms(map->dso, filename, map, filter); + int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore, filter); if (ret > 0) { dso__set_loaded(map->dso, type); @@ -927,6 +934,12 @@ int machine__load_kallsyms(struct machine *machine, const char *filename, return ret; } +int machine__load_kallsyms(struct machine *machine, const char *filename, + enum map_type type, symbol_filter_t filter) +{ + return __machine__load_kallsyms(machine, filename, type, false, filter); +} + int machine__load_vmlinux_path(struct machine *machine, enum map_type type, symbol_filter_t filter) { @@ -1080,12 +1093,20 @@ static int machine__set_modules_path(struct machine *machine) return map_groups__set_modules_path_dir(&machine->kmaps, modules_path, 0); } +int __weak arch__fix_module_text_start(u64 *start __maybe_unused, + const char *name __maybe_unused) +{ + return 0; +} static int machine__create_module(void *arg, const char *name, u64 start) { struct machine *machine = arg; struct map *map; + if (arch__fix_module_text_start(&start, name) < 0) + return -1; + map = machine__findnew_module_map(machine, start, name); if (map == NULL) return -1; @@ -1125,10 +1146,10 @@ int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); const char *name; - u64 addr = machine__get_running_kernel_start(machine, &name); + u64 addr; int ret; - if (!addr || kernel == NULL) + if (kernel == NULL) return -1; ret = __machine__create_kernel_maps(machine, kernel); @@ -1150,8 +1171,9 @@ int machine__create_kernel_maps(struct machine *machine) */ map_groups__fixup_end(&machine->kmaps); - if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, - addr)) { + addr = machine__get_running_kernel_start(machine, &name); + if (!addr) { + } else if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { machine__destroy_kernel_maps(machine); return -1; } @@ -1341,11 +1363,16 @@ int machine__process_mmap2_event(struct machine *machine, if (map == NULL) goto out_problem_map; - thread__insert_map(thread, map); + ret = thread__insert_map(thread, map); + if (ret) + goto out_problem_insert; + thread__put(thread); map__put(map); return 0; +out_problem_insert: + map__put(map); out_problem_map: thread__put(thread); out_problem: @@ -1391,11 +1418,16 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event if (map == NULL) goto out_problem_map; - thread__insert_map(thread, map); + ret = thread__insert_map(thread, map); + if (ret) + goto out_problem_insert; + thread__put(thread); map__put(map); return 0; +out_problem_insert: + map__put(map); out_problem_map: thread__put(thread); out_problem: @@ -1413,6 +1445,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, pthread_rwlock_wrlock(&machine->threads_lock); rb_erase_init(&th->rb_node, &machine->threads); RB_CLEAR_NODE(&th->rb_node); + --machine->nr_threads; /* * Move it first to the dead_threads list, then drop the reference, * if this is the last reference, then the thread__delete destructor @@ -1599,6 +1632,7 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, } static int add_callchain_ip(struct thread *thread, + struct callchain_cursor *cursor, struct symbol **parent, struct addr_location *root_al, u8 *cpumode, @@ -1630,7 +1664,7 @@ static int add_callchain_ip(struct thread *thread, * It seems the callchain is corrupted. * Discard all. */ - callchain_cursor_reset(&callchain_cursor); + callchain_cursor_reset(cursor); return 1; } return 0; @@ -1640,7 +1674,7 @@ static int add_callchain_ip(struct thread *thread, } if (al.sym != NULL) { - if (sort__has_parent && !*parent && + if (perf_hpp_list.parent && !*parent && symbol__match_regex(al.sym, &parent_regex)) *parent = al.sym; else if (have_ignore_callees && root_al && @@ -1648,13 +1682,13 @@ static int add_callchain_ip(struct thread *thread, /* Treat this symbol as the root, forgetting its callees. */ *root_al = al; - callchain_cursor_reset(&callchain_cursor); + callchain_cursor_reset(cursor); } } if (symbol_conf.hide_unresolved && al.sym == NULL) return 0; - return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym); + return callchain_cursor_append(cursor, al.addr, al.map, al.sym); } struct branch_info *sample__resolve_bstack(struct perf_sample *sample, @@ -1724,6 +1758,7 @@ static int remove_loops(struct branch_entry *l, int nr) * negative error code on other errors. */ static int resolve_lbr_callchain_sample(struct thread *thread, + struct callchain_cursor *cursor, struct perf_sample *sample, struct symbol **parent, struct addr_location *root_al, @@ -1756,11 +1791,6 @@ static int resolve_lbr_callchain_sample(struct thread *thread, */ int mix_chain_nr = i + 1 + lbr_nr + 1; - if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) { - pr_warning("corrupted callchain. skipping...\n"); - return 0; - } - for (j = 0; j < mix_chain_nr; j++) { if (callchain_param.order == ORDER_CALLEE) { if (j < i + 1) @@ -1778,7 +1808,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, ip = lbr_stack->entries[0].to; } - err = add_callchain_ip(thread, parent, root_al, &cpumode, ip); + err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); if (err) return (err < 0) ? err : 0; } @@ -1789,6 +1819,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, } static int thread__resolve_callchain_sample(struct thread *thread, + struct callchain_cursor *cursor, struct perf_evsel *evsel, struct perf_sample *sample, struct symbol **parent, @@ -1797,16 +1828,14 @@ static int thread__resolve_callchain_sample(struct thread *thread, { struct branch_stack *branch = sample->branch_stack; struct ip_callchain *chain = sample->callchain; - int chain_nr = min(max_stack, (int)chain->nr); + int chain_nr = chain->nr; u8 cpumode = PERF_RECORD_MISC_USER; - int i, j, err; + int i, j, err, nr_entries; int skip_idx = -1; int first_call = 0; - callchain_cursor_reset(&callchain_cursor); - - if (has_branch_callstack(evsel)) { - err = resolve_lbr_callchain_sample(thread, sample, parent, + if (perf_evsel__has_branch_callstack(evsel)) { + err = resolve_lbr_callchain_sample(thread, cursor, sample, parent, root_al, max_stack); if (err) return (err < 0) ? err : 0; @@ -1816,8 +1845,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. */ - if (chain->nr < PERF_MAX_STACK_DEPTH) - skip_idx = arch_skip_callchain_idx(thread, chain); + skip_idx = arch_skip_callchain_idx(thread, chain); /* * Add branches to call stack for easier browsing. This gives @@ -1863,10 +1891,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, nr = remove_loops(be, nr); for (i = 0; i < nr; i++) { - err = add_callchain_ip(thread, parent, root_al, + err = add_callchain_ip(thread, cursor, parent, root_al, NULL, be[i].to); if (!err) - err = add_callchain_ip(thread, parent, root_al, + err = add_callchain_ip(thread, cursor, parent, root_al, NULL, be[i].from); if (err == -EINVAL) break; @@ -1877,12 +1905,8 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: - if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) { - pr_warning("corrupted callchain. skipping...\n"); - return 0; - } - - for (i = first_call; i < chain_nr; i++) { + for (i = first_call, nr_entries = 0; + i < chain_nr && nr_entries < max_stack; i++) { u64 ip; if (callchain_param.order == ORDER_CALLEE) @@ -1896,7 +1920,10 @@ check_calls: #endif ip = chain->ips[j]; - err = add_callchain_ip(thread, parent, root_al, &cpumode, ip); + if (ip < PERF_CONTEXT_MAX) + ++nr_entries; + + err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); if (err) return (err < 0) ? err : 0; @@ -1915,19 +1942,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) entry->map, entry->sym); } -int thread__resolve_callchain(struct thread *thread, - struct perf_evsel *evsel, - struct perf_sample *sample, - struct symbol **parent, - struct addr_location *root_al, - int max_stack) +static int thread__resolve_callchain_unwind(struct thread *thread, + struct callchain_cursor *cursor, + struct perf_evsel *evsel, + struct perf_sample *sample, + int max_stack) { - int ret = thread__resolve_callchain_sample(thread, evsel, - sample, parent, - root_al, max_stack); - if (ret) - return ret; - /* Can we do dwarf post unwind? */ if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) && (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER))) @@ -1938,9 +1958,45 @@ int thread__resolve_callchain(struct thread *thread, (!sample->user_stack.size)) return 0; - return unwind__get_entries(unwind_entry, &callchain_cursor, + return unwind__get_entries(unwind_entry, cursor, thread, sample, max_stack); +} +int thread__resolve_callchain(struct thread *thread, + struct callchain_cursor *cursor, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) +{ + int ret = 0; + + callchain_cursor_reset(&callchain_cursor); + + if (callchain_param.order == ORDER_CALLEE) { + ret = thread__resolve_callchain_sample(thread, cursor, + evsel, sample, + parent, root_al, + max_stack); + if (ret) + return ret; + ret = thread__resolve_callchain_unwind(thread, cursor, + evsel, sample, + max_stack); + } else { + ret = thread__resolve_callchain_unwind(thread, cursor, + evsel, sample, + max_stack); + if (ret) + return ret; + ret = thread__resolve_callchain_sample(thread, cursor, + evsel, sample, + parent, root_al, + max_stack); + } + + return ret; } int machine__for_each_thread(struct machine *machine, diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 8499db281158..20739f746bc4 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -28,9 +28,11 @@ struct machine { pid_t pid; u16 id_hdr_size; bool comm_exec; + bool kptr_restrict_warned; char *root_dir; struct rb_root threads; pthread_rwlock_t threads_lock; + unsigned int nr_threads; struct list_head dead_threads; struct thread *last_match; struct vdso_info *vdso_info; @@ -141,7 +143,11 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, struct addr_location *al); struct mem_info *sample__resolve_mem(struct perf_sample *sample, struct addr_location *al); + +struct callchain_cursor; + int thread__resolve_callchain(struct thread *thread, + struct callchain_cursor *cursor, struct perf_evsel *evsel, struct perf_sample *sample, struct symbol **parent, @@ -210,7 +216,10 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine, struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename); +int arch__fix_module_text_start(u64 *start, const char *name); +int __machine__load_kallsyms(struct machine *machine, const char *filename, + enum map_type type, bool no_kcore, symbol_filter_t filter); int machine__load_kallsyms(struct machine *machine, const char *filename, enum map_type type, symbol_filter_t filter); int machine__load_vmlinux_path(struct machine *machine, enum map_type type, diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 171b6d10a04b..728129ac653a 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -15,6 +15,7 @@ #include "debug.h" #include "machine.h" #include <linux/string.h> +#include "unwind.h" static void __maps__insert(struct maps *maps, struct map *map); @@ -289,7 +290,7 @@ int map__load(struct map *map, symbol_filter_t filter) nr = dso__load(map->dso, map, filter); if (nr < 0) { if (map->dso->has_build_id) { - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; build_id__sprintf(map->dso->build_id, sizeof(map->dso->build_id), @@ -311,6 +312,9 @@ int map__load(struct map *map, symbol_filter_t filter) pr_warning("%.*s was updated (is prelink enabled?). " "Restart the long running apps that use it!\n", (int)real_len, name); + } else if (filter) { + pr_warning("no symbols passed the given filter.\n"); + return -2; /* Empty but maybe by the filter */ } else { pr_warning("no symbols found in %s, maybe install " "a debug package?\n", name); @@ -431,6 +435,13 @@ u64 map__rip_2objdump(struct map *map, u64 rip) if (map->dso->rel) return rip - map->pgoff; + /* + * kernel modules also have DSO_TYPE_USER in dso->kernel, + * but all kernel modules are ET_REL, so won't get here. + */ + if (map->dso->kernel == DSO_TYPE_USER) + return rip + map->dso->text_offset; + return map->unmap_ip(map, rip) - map->reloc; } @@ -454,6 +465,13 @@ u64 map__objdump_2mem(struct map *map, u64 ip) if (map->dso->rel) return map->unmap_ip(map, ip + map->pgoff); + /* + * kernel modules also have DSO_TYPE_USER in dso->kernel, + * but all kernel modules are ET_REL, so won't get here. + */ + if (map->dso->kernel == DSO_TYPE_USER) + return map->unmap_ip(map, ip - map->dso->text_offset); + return ip + map->reloc; } @@ -730,9 +748,10 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, /* * XXX This should not really _copy_ te maps, but refcount them. */ -int map_groups__clone(struct map_groups *mg, +int map_groups__clone(struct thread *thread, struct map_groups *parent, enum map_type type) { + struct map_groups *mg = thread->mg; int err = -ENOMEM; struct map *map; struct maps *maps = &parent->maps[type]; @@ -743,6 +762,11 @@ int map_groups__clone(struct map_groups *mg, struct map *new = map__clone(map); if (new == NULL) goto out_unlock; + + err = unwind__prepare_access(thread, new, NULL); + if (err) + goto out_unlock; + map_groups__insert(mg, new); map__put(new); } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 7309d64ce39e..d83396ceecba 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -194,7 +194,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp, symbol_filter_t filter); void map_groups__init(struct map_groups *mg, struct machine *machine); void map_groups__exit(struct map_groups *mg); -int map_groups__clone(struct map_groups *mg, +int map_groups__clone(struct thread *thread, struct map_groups *parent, enum map_type type); size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 75465f89a413..bbc368e7d1e4 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -10,18 +10,33 @@ #include "debug.h" #include "symbol.h" +unsigned int perf_mem_events__loads_ldlat = 30; + #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { - E("ldlat-loads", "cpu/mem-loads,ldlat=30/P", "mem-loads"), + E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "mem-loads"), E("ldlat-stores", "cpu/mem-stores/P", "mem-stores"), }; #undef E #undef E +static char mem_loads_name[100]; +static bool mem_loads_name__init; + char *perf_mem_events__name(int i) { + if (i == PERF_MEM_EVENTS__LOAD) { + if (!mem_loads_name__init) { + mem_loads_name__init = true; + scnprintf(mem_loads_name, sizeof(mem_loads_name), + perf_mem_events[i].name, + perf_mem_events__loads_ldlat); + } + return mem_loads_name; + } + return (char *)perf_mem_events[i].name; } diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 5d6d93066a6e..7f69bf9d789d 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -18,6 +18,7 @@ enum { }; extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX]; +extern unsigned int perf_mem_events__loads_ldlat; int perf_mem_events__parse(const char *str); int perf_mem_events__init(void); diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index b1b9e2385f4b..fe84df1875aa 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -308,3 +308,12 @@ void ordered_events__free(struct ordered_events *oe) free(event); } } + +void ordered_events__reinit(struct ordered_events *oe) +{ + ordered_events__deliver_t old_deliver = oe->deliver; + + ordered_events__free(oe); + memset(oe, '\0', sizeof(*oe)); + ordered_events__init(oe, old_deliver); +} diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index f403991e3bfd..e11468a9a6e4 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -49,6 +49,7 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver); void ordered_events__free(struct ordered_events *oe); +void ordered_events__reinit(struct ordered_events *oe); static inline void ordered_events__set_alloc_size(struct ordered_events *oe, u64 size) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index bcbc983d4b12..6c913c3914fb 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -20,6 +20,7 @@ #include "pmu.h" #include "thread_map.h" #include "cpumap.h" +#include "probe-file.h" #include "asm/bug.h" #define MAX_NAME_LEN 100 @@ -436,7 +437,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, } static void tracepoint_error(struct parse_events_error *e, int err, - char *sys, char *name) + const char *sys, const char *name) { char help[BUFSIZ]; @@ -466,7 +467,7 @@ static void tracepoint_error(struct parse_events_error *e, int err, } static int add_tracepoint(struct list_head *list, int *idx, - char *sys_name, char *evt_name, + const char *sys_name, const char *evt_name, struct parse_events_error *err, struct list_head *head_config) { @@ -491,7 +492,7 @@ static int add_tracepoint(struct list_head *list, int *idx, } static int add_tracepoint_multi_event(struct list_head *list, int *idx, - char *sys_name, char *evt_name, + const char *sys_name, const char *evt_name, struct parse_events_error *err, struct list_head *head_config) { @@ -533,7 +534,7 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, } static int add_tracepoint_event(struct list_head *list, int *idx, - char *sys_name, char *evt_name, + const char *sys_name, const char *evt_name, struct parse_events_error *err, struct list_head *head_config) { @@ -545,7 +546,7 @@ static int add_tracepoint_event(struct list_head *list, int *idx, } static int add_tracepoint_multi_sys(struct list_head *list, int *idx, - char *sys_name, char *evt_name, + const char *sys_name, const char *evt_name, struct parse_events_error *err, struct list_head *head_config) { @@ -584,7 +585,7 @@ struct __add_bpf_event_param { struct list_head *head_config; }; -static int add_bpf_event(struct probe_trace_event *tev, int fd, +static int add_bpf_event(const char *group, const char *event, int fd, void *_param) { LIST_HEAD(new_evsels); @@ -595,27 +596,27 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd, int err; pr_debug("add bpf event %s:%s and attach bpf program %d\n", - tev->group, tev->event, fd); + group, event, fd); - err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, tev->group, - tev->event, evlist->error, + err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, group, + event, evlist->error, param->head_config); if (err) { struct perf_evsel *evsel, *tmp; pr_debug("Failed to add BPF event %s:%s\n", - tev->group, tev->event); + group, event); list_for_each_entry_safe(evsel, tmp, &new_evsels, node) { list_del(&evsel->node); perf_evsel__delete(evsel); } return err; } - pr_debug("adding %s:%s\n", tev->group, tev->event); + pr_debug("adding %s:%s\n", group, event); list_for_each_entry(pos, &new_evsels, node) { pr_debug("adding %s:%s to %p\n", - tev->group, tev->event, pos); + group, event, pos); pos->bpf_fd = fd; } list_splice(&new_evsels, list); @@ -661,7 +662,7 @@ int parse_events_load_bpf_obj(struct parse_events_evlist *data, goto errout; } - err = bpf__foreach_tev(obj, add_bpf_event, ¶m); + err = bpf__foreach_event(obj, add_bpf_event, ¶m); if (err) { snprintf(errbuf, sizeof(errbuf), "Attach events in BPF object failed"); @@ -900,6 +901,9 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size", [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", + [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack", + [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite", + [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", }; static bool config_term_shrinked; @@ -992,9 +996,18 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NOINHERIT: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_OVERWRITE: + CHECK_TYPE_VAL(NUM); + break; + case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: + CHECK_TYPE_VAL(NUM); + break; case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + CHECK_TYPE_VAL(NUM); + break; default: err->str = strdup("unknown term"); err->idx = term->err_term; @@ -1040,6 +1053,9 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_STACKSIZE: case PARSE_EVENTS__TERM_TYPE_INHERIT: case PARSE_EVENTS__TERM_TYPE_NOINHERIT: + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + case PARSE_EVENTS__TERM_TYPE_OVERWRITE: + case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: return config_term_common(attr, term, err); default: if (err) { @@ -1109,6 +1125,15 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NOINHERIT: ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1); break; + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num); + break; + case PARSE_EVENTS__TERM_TYPE_OVERWRITE: + ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0); + break; + case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: + ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1); + break; default: break; } @@ -1118,7 +1143,7 @@ do { \ } int parse_events_add_tracepoint(struct list_head *list, int *idx, - char *sys, char *event, + const char *sys, const char *event, struct parse_events_error *err, struct list_head *head_config) { @@ -1388,7 +1413,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) if (!add && get_event_modifier(&mod, str, NULL)) return -EINVAL; - __evlist__for_each(list, evsel) { + __evlist__for_each_entry(list, evsel) { if (add && get_event_modifier(&mod, str, evsel)) return -EINVAL; @@ -1414,7 +1439,7 @@ int parse_events_name(struct list_head *list, char *name) { struct perf_evsel *evsel; - __evlist__for_each(list, evsel) { + __evlist__for_each_entry(list, evsel) { if (!evsel->name) evsel->name = strdup(name); } @@ -1649,7 +1674,7 @@ static void parse_events_print_error(struct parse_events_error *err, buf = _buf; - /* We're cutting from the beggining. */ + /* We're cutting from the beginning. */ if (err->idx > max_err_idx) cut = err->idx - max_err_idx; @@ -1976,6 +2001,85 @@ static bool is_event_supported(u8 type, unsigned config) return ret; } +void print_sdt_events(const char *subsys_glob, const char *event_glob, + bool name_only) +{ + struct probe_cache *pcache; + struct probe_cache_entry *ent; + struct strlist *bidlist, *sdtlist; + struct strlist_config cfg = {.dont_dupstr = true}; + struct str_node *nd, *nd2; + char *buf, *path, *ptr = NULL; + bool show_detail = false; + int ret; + + sdtlist = strlist__new(NULL, &cfg); + if (!sdtlist) { + pr_debug("Failed to allocate new strlist for SDT\n"); + return; + } + bidlist = build_id_cache__list_all(true); + if (!bidlist) { + pr_debug("Failed to get buildids: %d\n", errno); + return; + } + strlist__for_each_entry(nd, bidlist) { + pcache = probe_cache__new(nd->s); + if (!pcache) + continue; + list_for_each_entry(ent, &pcache->entries, node) { + if (!ent->sdt) + continue; + if (subsys_glob && + !strglobmatch(ent->pev.group, subsys_glob)) + continue; + if (event_glob && + !strglobmatch(ent->pev.event, event_glob)) + continue; + ret = asprintf(&buf, "%s:%s@%s", ent->pev.group, + ent->pev.event, nd->s); + if (ret > 0) + strlist__add(sdtlist, buf); + } + probe_cache__delete(pcache); + } + strlist__delete(bidlist); + + strlist__for_each_entry(nd, sdtlist) { + buf = strchr(nd->s, '@'); + if (buf) + *(buf++) = '\0'; + if (name_only) { + printf("%s ", nd->s); + continue; + } + nd2 = strlist__next(nd); + if (nd2) { + ptr = strchr(nd2->s, '@'); + if (ptr) + *ptr = '\0'; + if (strcmp(nd->s, nd2->s) == 0) + show_detail = true; + } + if (show_detail) { + path = build_id_cache__origname(buf); + ret = asprintf(&buf, "%s@%s(%.12s)", nd->s, path, buf); + if (ret > 0) { + printf(" %-50s [%s]\n", buf, "SDT event"); + free(buf); + } + } else + printf(" %-50s [%s]\n", nd->s, "SDT event"); + if (nd2) { + if (strcmp(nd->s, nd2->s) != 0) + show_detail = false; + if (ptr) + *ptr = '@'; + } + } + strlist__delete(sdtlist); +} + int print_hwcache_events(const char *event_glob, bool name_only) { unsigned int type, op, i, evt_i = 0, evt_num = 0; @@ -2158,6 +2262,8 @@ void print_events(const char *event_glob, bool name_only) } print_tracepoint_events(NULL, NULL, name_only); + + print_sdt_events(NULL, NULL, name_only); } int parse_events__is_hardcoded_term(struct parse_events_term *term) @@ -2322,9 +2428,9 @@ static void config_terms_list(char *buf, size_t buf_sz) char *parse_events_formats_error_string(char *additional_terms) { char *str; - /* "branch_type" is the longest name */ + /* "no-overwrite" is the longest name */ char static_terms[__PARSE_EVENTS__TERM_TYPE_NR * - (sizeof("branch_type") - 1)]; + (sizeof("no-overwrite") - 1)]; config_terms_list(static_terms, sizeof(static_terms)); /* valid terms */ diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index d740c3ca9a1d..d1edbf8cc66a 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -68,6 +68,9 @@ enum { PARSE_EVENTS__TERM_TYPE_STACKSIZE, PARSE_EVENTS__TERM_TYPE_NOINHERIT, PARSE_EVENTS__TERM_TYPE_INHERIT, + PARSE_EVENTS__TERM_TYPE_MAX_STACK, + PARSE_EVENTS__TERM_TYPE_NOOVERWRITE, + PARSE_EVENTS__TERM_TYPE_OVERWRITE, __PARSE_EVENTS__TERM_TYPE_NR, }; @@ -133,7 +136,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_name(struct list_head *list, char *name); int parse_events_add_tracepoint(struct list_head *list, int *idx, - char *sys, char *event, + const char *sys, const char *event, struct parse_events_error *error, struct list_head *head_config); int parse_events_load_bpf(struct parse_events_evlist *data, @@ -182,6 +185,8 @@ void print_symbol_events(const char *event_glob, unsigned type, void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only); int print_hwcache_events(const char *event_glob, bool name_only); +void print_sdt_events(const char *subsys_glob, const char *event_glob, + bool name_only); int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 1477fbc78993..7a2519435da0 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -199,8 +199,11 @@ branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } +max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); } inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } +overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } +no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } @@ -259,6 +262,7 @@ cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); } cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); } mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); } mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); } +topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } L1-dcache|l1-d|l1d|L1-data | L1-icache|l1-i|l1i|L1-instruction | diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 3bf6bf82ff2d..7c7630be5a89 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -11,17 +11,13 @@ * which is what it's designed for. */ #include "cache.h" +#include "util.h" +#include <limits.h> static char bad_path[] = "/bad-path/"; /* - * Two hacks: + * One hack: */ - -static const char *get_perf_dir(void) -{ - return "."; -} - static char *get_pathname(void) { static char pathname_array[4][PATH_MAX]; @@ -54,60 +50,3 @@ char *mkpath(const char *fmt, ...) return bad_path; return cleanup_path(pathname); } - -char *perf_path(const char *fmt, ...) -{ - const char *perf_dir = get_perf_dir(); - char *pathname = get_pathname(); - va_list args; - unsigned len; - - len = strlen(perf_dir); - if (len > PATH_MAX-100) - return bad_path; - memcpy(pathname, perf_dir, len); - if (len && perf_dir[len-1] != '/') - pathname[len++] = '/'; - va_start(args, fmt); - len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args); - va_end(args); - if (len >= PATH_MAX) - return bad_path; - return cleanup_path(pathname); -} - -/* strip arbitrary amount of directory separators at end of path */ -static inline int chomp_trailing_dir_sep(const char *path, int len) -{ - while (len && is_dir_sep(path[len - 1])) - len--; - return len; -} - -/* - * If path ends with suffix (complete path components), returns the - * part before suffix (sans trailing directory separators). - * Otherwise returns NULL. - */ -char *strip_path_suffix(const char *path, const char *suffix) -{ - int path_len = strlen(path), suffix_len = strlen(suffix); - - while (suffix_len) { - if (!path_len) - return NULL; - - if (is_dir_sep(path[path_len - 1])) { - if (!is_dir_sep(suffix[suffix_len - 1])) - return NULL; - path_len = chomp_trailing_dir_sep(path, path_len); - suffix_len = chomp_trailing_dir_sep(suffix, suffix_len); - } - else if (path[--path_len] != suffix[--suffix_len]) - return NULL; - } - - if (path_len && !is_dir_sep(path[path_len - 1])) - return NULL; - return strndup(path, chomp_trailing_dir_sep(path, path_len)); -} diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 6b8eb13e14e4..c4023f22f287 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -12,18 +12,18 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) int i, idx = 0; u64 mask = regs->mask; - if (regs->cache_mask & (1 << id)) + if (regs->cache_mask & (1ULL << id)) goto out; - if (!(mask & (1 << id))) + if (!(mask & (1ULL << id))) return -EINVAL; for (i = 0; i < id; i++) { - if (mask & (1 << i)) + if (mask & (1ULL << i)) idx++; } - regs->cache_mask |= (1 << id); + regs->cache_mask |= (1ULL << id); regs->cache_regs[id] = regs->regs[idx]; out: diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index adef23b1352e..ddb0261b2577 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -602,14 +602,13 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, static __u64 pmu_format_max_value(const unsigned long *format) { - int w; + __u64 w = 0; + int fbit; - w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); - if (!w) - return 0; - if (w < 64) - return (1ULL << w) - 1; - return -1; + for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS) + w |= (1ULL << fbit); + + return w; } /* @@ -644,20 +643,20 @@ static int pmu_resolve_param_term(struct parse_events_term *term, static char *pmu_formats_string(struct list_head *formats) { struct perf_pmu_format *format; - char *str; - struct strbuf buf; + char *str = NULL; + struct strbuf buf = STRBUF_INIT; unsigned i = 0; if (!formats) return NULL; - strbuf_init(&buf, 0); /* sysfs exported terms */ list_for_each_entry(format, formats, list) - strbuf_addf(&buf, i++ ? ",%s" : "%s", - format->name); + if (strbuf_addf(&buf, i++ ? ",%s" : "%s", format->name) < 0) + goto error; str = strbuf_detach(&buf, NULL); +error: strbuf_release(&buf); return str; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 8319fbb08636..28733962cd80 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -67,7 +67,6 @@ int e_snprintf(char *str, size_t size, const char *format, ...) return ret; } -static char *synthesize_perf_probe_point(struct perf_probe_point *pp); static struct machine *host_machine; /* Initialize symbol maps and path of vmlinux/modules */ @@ -103,10 +102,8 @@ out: void exit_probe_symbol_maps(void) { - if (host_machine) { - machine__delete(host_machine); - host_machine = NULL; - } + machine__delete(host_machine); + host_machine = NULL; symbol__exit(); } @@ -173,15 +170,17 @@ static struct map *kernel_get_module_map(const char *module) module = "kernel"; for (pos = maps__first(maps); pos; pos = map__next(pos)) { + /* short_name is "[module]" */ if (strncmp(pos->dso->short_name + 1, module, - pos->dso->short_name_len - 2) == 0) { + pos->dso->short_name_len - 2) == 0 && + module[pos->dso->short_name_len - 2] == '\0') { return pos; } } return NULL; } -static struct map *get_target_map(const char *target, bool user) +struct map *get_target_map(const char *target, bool user) { /* Init maps of given executable or kernel */ if (user) @@ -265,6 +264,65 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address) return true; } +/* + * NOTE: + * '.gnu.linkonce.this_module' section of kernel module elf directly + * maps to 'struct module' from linux/module.h. This section contains + * actual module name which will be used by kernel after loading it. + * But, we cannot use 'struct module' here since linux/module.h is not + * exposed to user-space. Offset of 'name' has remained same from long + * time, so hardcoding it here. + */ +#ifdef __LP64__ +#define MOD_NAME_OFFSET 24 +#else +#define MOD_NAME_OFFSET 12 +#endif + +/* + * @module can be module name of module file path. In case of path, + * inspect elf and find out what is actual module name. + * Caller has to free mod_name after using it. + */ +static char *find_module_name(const char *module) +{ + int fd; + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *data; + Elf_Scn *sec; + char *mod_name = NULL; + + fd = open(module, O_RDONLY); + if (fd < 0) + return NULL; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + goto elf_err; + + if (gelf_getehdr(elf, &ehdr) == NULL) + goto ret_err; + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".gnu.linkonce.this_module", NULL); + if (!sec) + goto ret_err; + + data = elf_getdata(sec, NULL); + if (!data || !data->d_buf) + goto ret_err; + + mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET); + +ret_err: + elf_end(elf); +elf_err: + close(fd); + return mod_name; +} + #ifdef HAVE_DWARF_SUPPORT static int kernel_get_module_dso(const char *module, struct dso **pdso) @@ -329,7 +387,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, if (uprobes) address = sym->start; else - address = map->unmap_ip(map, sym->start); + address = map->unmap_ip(map, sym->start) - map->reloc; break; } if (!address) { @@ -412,7 +470,7 @@ static struct debuginfo *open_debuginfo(const char *module, bool silent) err = kernel_get_module_dso(module, &dso); if (err < 0) { if (!dso || dso->load_errno == 0) { - if (!strerror_r(-err, reason, STRERR_BUFSIZE)) + if (!str_error_r(-err, reason, STRERR_BUFSIZE)) strcpy(reason, "(unknown)"); } else dso__strerror_load(dso, reason, STRERR_BUFSIZE); @@ -486,8 +544,10 @@ static int get_text_start_address(const char *exec, unsigned long *address) return -errno; elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) - return -EINVAL; + if (elf == NULL) { + ret = -EINVAL; + goto out_close; + } if (gelf_getehdr(elf, &ehdr) == NULL) goto out; @@ -499,6 +559,9 @@ static int get_text_start_address(const char *exec, unsigned long *address) ret = 0; out: elf_end(elf); +out_close: + close(fd); + return ret; } @@ -583,51 +646,34 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, int ntevs, const char *module) { int i, ret = 0; - char *tmp; + char *mod_name = NULL; if (!module) return 0; - tmp = strrchr(module, '/'); - if (tmp) { - /* This is a module path -- get the module name */ - module = strdup(tmp + 1); - if (!module) - return -ENOMEM; - tmp = strchr(module, '.'); - if (tmp) - *tmp = '\0'; - tmp = (char *)module; /* For free() */ - } + mod_name = find_module_name(module); for (i = 0; i < ntevs; i++) { - tevs[i].point.module = strdup(module); + tevs[i].point.module = + strdup(mod_name ? mod_name : module); if (!tevs[i].point.module) { ret = -ENOMEM; break; } } - free(tmp); + free(mod_name); return ret; } -/* Post processing the probe events */ -static int post_process_probe_trace_events(struct probe_trace_event *tevs, - int ntevs, const char *module, - bool uprobe) +static int +post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, + int ntevs) { struct ref_reloc_sym *reloc_sym; char *tmp; int i, skipped = 0; - if (uprobe) - return add_exec_to_probe_trace_events(tevs, ntevs, module); - - /* Note that currently ref_reloc_sym based probe is not for drivers */ - if (module) - return add_module_to_probe_trace_events(tevs, ntevs, module); - reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); @@ -659,6 +705,34 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs, return skipped; } +void __weak +arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused, + int ntevs __maybe_unused) +{ +} + +/* Post processing the probe events */ +static int post_process_probe_trace_events(struct perf_probe_event *pev, + struct probe_trace_event *tevs, + int ntevs, const char *module, + bool uprobe) +{ + int ret; + + if (uprobe) + ret = add_exec_to_probe_trace_events(tevs, ntevs, module); + else if (module) + /* Currently ref_reloc_sym based probe is not for drivers */ + ret = add_module_to_probe_trace_events(tevs, ntevs, module); + else + ret = post_process_kernel_probe_trace_events(tevs, ntevs); + + if (ret >= 0) + arch__post_process_probe_trace_events(pev, ntevs); + + return ret; +} + /* Try to find perf_probe_event with debuginfo */ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event **tevs) @@ -697,7 +771,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, if (ntevs > 0) { /* Succeeded to find trace events */ pr_debug("Found %d probe_trace_events.\n", ntevs); - ret = post_process_probe_trace_events(*tevs, ntevs, + ret = post_process_probe_trace_events(pev, *tevs, ntevs, pev->target, pev->uprobes); if (ret < 0 || ret == ntevs) { clear_probe_trace_events(*tevs, ntevs); @@ -754,7 +828,7 @@ static int __show_one_line(FILE *fp, int l, bool skip, bool show_num) error: if (ferror(fp)) { pr_warning("File read error: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); return -1; } return 0; @@ -834,7 +908,7 @@ static int __show_line_range(struct line_range *lr, const char *module, fp = fopen(lr->path, "r"); if (fp == NULL) { pr_warning("Failed to open %s: %s\n", lr->path, - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); return -errno; } /* Skip to starting line number */ @@ -844,7 +918,7 @@ static int __show_line_range(struct line_range *lr, const char *module, goto end; } - intlist__for_each(ln, lr->line_list) { + intlist__for_each_entry(ln, lr->line_list) { for (; ln->i > l; l++) { ret = show_one_line(fp, l - lr->offset); if (ret < 0) @@ -928,7 +1002,7 @@ static int show_available_vars_at(struct debuginfo *dinfo, zfree(&vl->point.symbol); nvars = 0; if (vl->vars) { - strlist__for_each(node, vl->vars) { + strlist__for_each_entry(node, vl->vars) { var = strchr(node->s, '\t') + 1; if (strfilter__compare(_filter, var)) { fprintf(stdout, "\t\t%s\n", node->s); @@ -1145,6 +1219,34 @@ err: return err; } +static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev) +{ + char *ptr; + + ptr = strchr(*arg, ':'); + if (ptr) { + *ptr = '\0'; + if (!pev->sdt && !is_c_func_name(*arg)) + goto ng_name; + pev->group = strdup(*arg); + if (!pev->group) + return -ENOMEM; + *arg = ptr + 1; + } else + pev->group = NULL; + if (!pev->sdt && !is_c_func_name(*arg)) { +ng_name: + semantic_error("%s is bad for event name -it must " + "follow C symbol-naming rule.\n", *arg); + return -EINVAL; + } + pev->event = strdup(*arg); + if (pev->event == NULL) + return -ENOMEM; + + return 0; +} + /* Parse probepoint definition. */ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) { @@ -1152,33 +1254,64 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) char *ptr, *tmp; char c, nc = 0; bool file_spec = false; + int ret; + /* * <Syntax> - * perf probe [EVENT=]SRC[:LN|;PTN] - * perf probe [EVENT=]FUNC[@SRC][+OFFS|%return|:LN|;PAT] - * - * TODO:Group name support + * perf probe [GRP:][EVENT=]SRC[:LN|;PTN] + * perf probe [GRP:][EVENT=]FUNC[@SRC][+OFFS|%return|:LN|;PAT] + * perf probe %[GRP:]SDT_EVENT */ if (!arg) return -EINVAL; + /* + * If the probe point starts with '%', + * or starts with "sdt_" and has a ':' but no '=', + * then it should be a SDT/cached probe point. + */ + if (arg[0] == '%' || + (!strncmp(arg, "sdt_", 4) && + !!strchr(arg, ':') && !strchr(arg, '='))) { + pev->sdt = true; + if (arg[0] == '%') + arg++; + } + ptr = strpbrk(arg, ";=@+%"); + if (pev->sdt) { + if (ptr) { + if (*ptr != '@') { + semantic_error("%s must be an SDT name.\n", + arg); + return -EINVAL; + } + /* This must be a target file name or build id */ + tmp = build_id_cache__complement(ptr + 1); + if (tmp) { + pev->target = build_id_cache__origname(tmp); + free(tmp); + } else + pev->target = strdup(ptr + 1); + if (!pev->target) + return -ENOMEM; + *ptr = '\0'; + } + ret = parse_perf_probe_event_name(&arg, pev); + if (ret == 0) { + if (asprintf(&pev->point.function, "%%%s", pev->event) < 0) + ret = -errno; + } + return ret; + } + if (ptr && *ptr == '=') { /* Event name */ *ptr = '\0'; tmp = ptr + 1; - if (strchr(arg, ':')) { - semantic_error("Group name is not supported yet.\n"); - return -ENOTSUP; - } - if (!is_c_func_name(arg)) { - semantic_error("%s is bad for event name -it must " - "follow C symbol-naming rule.\n", arg); - return -EINVAL; - } - pev->event = strdup(arg); - if (pev->event == NULL) - return -ENOMEM; - pev->group = NULL; + ret = parse_perf_probe_event_name(&arg, pev); + if (ret < 0) + return ret; + arg = tmp; } @@ -1490,7 +1623,9 @@ bool perf_probe_event_need_dwarf(struct perf_probe_event *pev) return true; for (i = 0; i < pev->nargs; i++) - if (is_c_varname(pev->args[i].var)) + if (is_c_varname(pev->args[i].var) || + !strcmp(pev->args[i].var, "$params") || + !strcmp(pev->args[i].var, "$vars")) return true; return false; @@ -1548,6 +1683,11 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev) p = strchr(argv[1], ':'); if (p) { tp->module = strndup(argv[1], p - argv[1]); + if (!tp->module) { + ret = -ENOMEM; + goto out; + } + tev->uprobes = (tp->module[0] == '/'); p++; } else p = argv[1]; @@ -1618,69 +1758,65 @@ out: } /* Compose only probe arg */ -int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len) +char *synthesize_perf_probe_arg(struct perf_probe_arg *pa) { struct perf_probe_arg_field *field = pa->field; - int ret; - char *tmp = buf; + struct strbuf buf; + char *ret = NULL; + int err; + + if (strbuf_init(&buf, 64) < 0) + return NULL; if (pa->name && pa->var) - ret = e_snprintf(tmp, len, "%s=%s", pa->name, pa->var); + err = strbuf_addf(&buf, "%s=%s", pa->name, pa->var); else - ret = e_snprintf(tmp, len, "%s", pa->name ? pa->name : pa->var); - if (ret <= 0) - goto error; - tmp += ret; - len -= ret; + err = strbuf_addstr(&buf, pa->name ?: pa->var); + if (err) + goto out; while (field) { if (field->name[0] == '[') - ret = e_snprintf(tmp, len, "%s", field->name); + err = strbuf_addstr(&buf, field->name); else - ret = e_snprintf(tmp, len, "%s%s", - field->ref ? "->" : ".", field->name); - if (ret <= 0) - goto error; - tmp += ret; - len -= ret; + err = strbuf_addf(&buf, "%s%s", field->ref ? "->" : ".", + field->name); field = field->next; + if (err) + goto out; } - if (pa->type) { - ret = e_snprintf(tmp, len, ":%s", pa->type); - if (ret <= 0) - goto error; - tmp += ret; - len -= ret; - } + if (pa->type) + if (strbuf_addf(&buf, ":%s", pa->type) < 0) + goto out; - return tmp - buf; -error: - pr_debug("Failed to synthesize perf probe argument: %d\n", ret); + ret = strbuf_detach(&buf, NULL); +out: + strbuf_release(&buf); return ret; } /* Compose only probe point (not argument) */ -static char *synthesize_perf_probe_point(struct perf_probe_point *pp) +char *synthesize_perf_probe_point(struct perf_probe_point *pp) { - char *buf, *tmp; - char offs[32] = "", line[32] = "", file[32] = ""; - int ret, len; + struct strbuf buf; + char *tmp, *ret = NULL; + int len, err = 0; - buf = zalloc(MAX_CMDLEN); - if (buf == NULL) { - ret = -ENOMEM; - goto error; - } - if (pp->offset) { - ret = e_snprintf(offs, 32, "+%lu", pp->offset); - if (ret <= 0) - goto error; - } - if (pp->line) { - ret = e_snprintf(line, 32, ":%d", pp->line); - if (ret <= 0) - goto error; + if (strbuf_init(&buf, 64) < 0) + return NULL; + + if (pp->function) { + if (strbuf_addstr(&buf, pp->function) < 0) + goto out; + if (pp->offset) + err = strbuf_addf(&buf, "+%lu", pp->offset); + else if (pp->line) + err = strbuf_addf(&buf, ":%d", pp->line); + else if (pp->retprobe) + err = strbuf_addstr(&buf, "%return"); + if (err) + goto out; } if (pp->file) { tmp = pp->file; @@ -1689,92 +1825,75 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp) tmp = strchr(pp->file + len - 30, '/'); tmp = tmp ? tmp + 1 : pp->file + len - 30; } - ret = e_snprintf(file, 32, "@%s", tmp); - if (ret <= 0) - goto error; + err = strbuf_addf(&buf, "@%s", tmp); + if (!err && !pp->function && pp->line) + err = strbuf_addf(&buf, ":%d", pp->line); } - - if (pp->function) - ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s%s", pp->function, - offs, pp->retprobe ? "%return" : "", line, - file); - else - ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", file, line); - if (ret <= 0) - goto error; - - return buf; -error: - pr_debug("Failed to synthesize perf probe point: %d\n", ret); - free(buf); - return NULL; + if (!err) + ret = strbuf_detach(&buf, NULL); +out: + strbuf_release(&buf); + return ret; } -#if 0 char *synthesize_perf_probe_command(struct perf_probe_event *pev) { - char *buf; - int i, len, ret; + struct strbuf buf; + char *tmp, *ret = NULL; + int i; - buf = synthesize_perf_probe_point(&pev->point); - if (!buf) + if (strbuf_init(&buf, 64)) return NULL; + if (pev->event) + if (strbuf_addf(&buf, "%s:%s=", pev->group ?: PERFPROBE_GROUP, + pev->event) < 0) + goto out; + + tmp = synthesize_perf_probe_point(&pev->point); + if (!tmp || strbuf_addstr(&buf, tmp) < 0) + goto out; + free(tmp); - len = strlen(buf); for (i = 0; i < pev->nargs; i++) { - ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s", - pev->args[i].name); - if (ret <= 0) { - free(buf); - return NULL; - } - len += ret; + tmp = synthesize_perf_probe_arg(pev->args + i); + if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0) + goto out; + free(tmp); } - return buf; + ret = strbuf_detach(&buf, NULL); +out: + strbuf_release(&buf); + return ret; } -#endif static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref, - char **buf, size_t *buflen, - int depth) + struct strbuf *buf, int depth) { - int ret; + int err; if (ref->next) { depth = __synthesize_probe_trace_arg_ref(ref->next, buf, - buflen, depth + 1); + depth + 1); if (depth < 0) - goto out; - } - - ret = e_snprintf(*buf, *buflen, "%+ld(", ref->offset); - if (ret < 0) - depth = ret; - else { - *buf += ret; - *buflen -= ret; + return depth; } -out: - return depth; - + err = strbuf_addf(buf, "%+ld(", ref->offset); + return (err < 0) ? err : depth; } static int synthesize_probe_trace_arg(struct probe_trace_arg *arg, - char *buf, size_t buflen) + struct strbuf *buf) { struct probe_trace_arg_ref *ref = arg->ref; - int ret, depth = 0; - char *tmp = buf; + int depth = 0, err; /* Argument name or separator */ if (arg->name) - ret = e_snprintf(buf, buflen, " %s=", arg->name); + err = strbuf_addf(buf, " %s=", arg->name); else - ret = e_snprintf(buf, buflen, " "); - if (ret < 0) - return ret; - buf += ret; - buflen -= ret; + err = strbuf_addch(buf, ' '); + if (err) + return err; /* Special case: @XXX */ if (arg->value[0] == '@' && arg->ref) @@ -1782,59 +1901,44 @@ static int synthesize_probe_trace_arg(struct probe_trace_arg *arg, /* Dereferencing arguments */ if (ref) { - depth = __synthesize_probe_trace_arg_ref(ref, &buf, - &buflen, 1); + depth = __synthesize_probe_trace_arg_ref(ref, buf, 1); if (depth < 0) return depth; } /* Print argument value */ if (arg->value[0] == '@' && arg->ref) - ret = e_snprintf(buf, buflen, "%s%+ld", arg->value, - arg->ref->offset); + err = strbuf_addf(buf, "%s%+ld", arg->value, arg->ref->offset); else - ret = e_snprintf(buf, buflen, "%s", arg->value); - if (ret < 0) - return ret; - buf += ret; - buflen -= ret; + err = strbuf_addstr(buf, arg->value); /* Closing */ - while (depth--) { - ret = e_snprintf(buf, buflen, ")"); - if (ret < 0) - return ret; - buf += ret; - buflen -= ret; - } + while (!err && depth--) + err = strbuf_addch(buf, ')'); + /* Print argument type */ - if (arg->type) { - ret = e_snprintf(buf, buflen, ":%s", arg->type); - if (ret <= 0) - return ret; - buf += ret; - } + if (!err && arg->type) + err = strbuf_addf(buf, ":%s", arg->type); - return buf - tmp; + return err; } char *synthesize_probe_trace_command(struct probe_trace_event *tev) { struct probe_trace_point *tp = &tev->point; - char *buf; - int i, len, ret; + struct strbuf buf; + char *ret = NULL; + int i, err; - buf = zalloc(MAX_CMDLEN); - if (buf == NULL) + /* Uprobes must have tp->module */ + if (tev->uprobes && !tp->module) return NULL; - len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s ", tp->retprobe ? 'r' : 'p', - tev->group, tev->event); - if (len <= 0) - goto error; + if (strbuf_init(&buf, 32) < 0) + return NULL; - /* Uprobes must have tp->module */ - if (tev->uprobes && !tp->module) + if (strbuf_addf(&buf, "%c:%s/%s ", tp->retprobe ? 'r' : 'p', + tev->group, tev->event) < 0) goto error; /* * If tp->address == 0, then this point must be a @@ -1849,34 +1953,25 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev) /* Use the tp->address for uprobes */ if (tev->uprobes) - ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx", - tp->module, tp->address); + err = strbuf_addf(&buf, "%s:0x%lx", tp->module, tp->address); else if (!strncmp(tp->symbol, "0x", 2)) /* Absolute address. See try_to_find_absolute_address() */ - ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx", - tp->module ?: "", tp->module ? ":" : "", - tp->address); + err = strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "", + tp->module ? ":" : "", tp->address); else - ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu", - tp->module ?: "", tp->module ? ":" : "", - tp->symbol, tp->offset); - - if (ret <= 0) + err = strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "", + tp->module ? ":" : "", tp->symbol, tp->offset); + if (err) goto error; - len += ret; - for (i = 0; i < tev->nargs; i++) { - ret = synthesize_probe_trace_arg(&tev->args[i], buf + len, - MAX_CMDLEN - len); - if (ret <= 0) + for (i = 0; i < tev->nargs; i++) + if (synthesize_probe_trace_arg(&tev->args[i], &buf) < 0) goto error; - len += ret; - } - return buf; + ret = strbuf_detach(&buf, NULL); error: - free(buf); - return NULL; + strbuf_release(&buf); + return ret; } static int find_perf_probe_point_from_map(struct probe_trace_point *tp, @@ -1958,7 +2053,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp, static int convert_to_perf_probe_event(struct probe_trace_event *tev, struct perf_probe_event *pev, bool is_kprobe) { - char buf[64] = ""; + struct strbuf buf = STRBUF_INIT; int i, ret; /* Convert event/group name */ @@ -1981,14 +2076,15 @@ static int convert_to_perf_probe_event(struct probe_trace_event *tev, if (tev->args[i].name) pev->args[i].name = strdup(tev->args[i].name); else { - ret = synthesize_probe_trace_arg(&tev->args[i], - buf, 64); - pev->args[i].name = strdup(buf); + if ((ret = strbuf_init(&buf, 32)) < 0) + goto error; + ret = synthesize_probe_trace_arg(&tev->args[i], &buf); + pev->args[i].name = strbuf_detach(&buf, NULL); } if (pev->args[i].name == NULL && ret >= 0) ret = -ENOMEM; } - +error: if (ret < 0) clear_perf_probe_event(pev); @@ -2021,6 +2117,79 @@ void clear_perf_probe_event(struct perf_probe_event *pev) memset(pev, 0, sizeof(*pev)); } +#define strdup_or_goto(str, label) \ +({ char *__p = NULL; if (str && !(__p = strdup(str))) goto label; __p; }) + +static int perf_probe_point__copy(struct perf_probe_point *dst, + struct perf_probe_point *src) +{ + dst->file = strdup_or_goto(src->file, out_err); + dst->function = strdup_or_goto(src->function, out_err); + dst->lazy_line = strdup_or_goto(src->lazy_line, out_err); + dst->line = src->line; + dst->retprobe = src->retprobe; + dst->offset = src->offset; + return 0; + +out_err: + clear_perf_probe_point(dst); + return -ENOMEM; +} + +static int perf_probe_arg__copy(struct perf_probe_arg *dst, + struct perf_probe_arg *src) +{ + struct perf_probe_arg_field *field, **ppfield; + + dst->name = strdup_or_goto(src->name, out_err); + dst->var = strdup_or_goto(src->var, out_err); + dst->type = strdup_or_goto(src->type, out_err); + + field = src->field; + ppfield = &(dst->field); + while (field) { + *ppfield = zalloc(sizeof(*field)); + if (!*ppfield) + goto out_err; + (*ppfield)->name = strdup_or_goto(field->name, out_err); + (*ppfield)->index = field->index; + (*ppfield)->ref = field->ref; + field = field->next; + ppfield = &((*ppfield)->next); + } + return 0; +out_err: + return -ENOMEM; +} + +int perf_probe_event__copy(struct perf_probe_event *dst, + struct perf_probe_event *src) +{ + int i; + + dst->event = strdup_or_goto(src->event, out_err); + dst->group = strdup_or_goto(src->group, out_err); + dst->target = strdup_or_goto(src->target, out_err); + dst->uprobes = src->uprobes; + + if (perf_probe_point__copy(&dst->point, &src->point) < 0) + goto out_err; + + dst->args = zalloc(sizeof(struct perf_probe_arg) * src->nargs); + if (!dst->args) + goto out_err; + dst->nargs = src->nargs; + + for (i = 0; i < src->nargs; i++) + if (perf_probe_arg__copy(&dst->args[i], &src->args[i]) < 0) + goto out_err; + return 0; + +out_err: + clear_perf_probe_event(dst); + return -ENOMEM; +} + void clear_probe_trace_event(struct probe_trace_event *tev) { struct probe_trace_arg_ref *ref, *next; @@ -2162,35 +2331,38 @@ static int perf_probe_event__sprintf(const char *group, const char *event, struct strbuf *result) { int i, ret; - char buf[128]; - char *place; + char *buf; - /* Synthesize only event probe point */ - place = synthesize_perf_probe_point(&pev->point); - if (!place) - return -EINVAL; + if (asprintf(&buf, "%s:%s", group, event) < 0) + return -errno; + ret = strbuf_addf(result, " %-20s (on ", buf); + free(buf); + if (ret) + return ret; - ret = e_snprintf(buf, 128, "%s:%s", group, event); - if (ret < 0) - goto out; + /* Synthesize only event probe point */ + buf = synthesize_perf_probe_point(&pev->point); + if (!buf) + return -ENOMEM; + ret = strbuf_addstr(result, buf); + free(buf); - strbuf_addf(result, " %-20s (on %s", buf, place); - if (module) - strbuf_addf(result, " in %s", module); + if (!ret && module) + ret = strbuf_addf(result, " in %s", module); - if (pev->nargs > 0) { - strbuf_add(result, " with", 5); - for (i = 0; i < pev->nargs; i++) { - ret = synthesize_perf_probe_arg(&pev->args[i], - buf, 128); - if (ret < 0) - goto out; - strbuf_addf(result, " %s", buf); + if (!ret && pev->nargs > 0) { + ret = strbuf_add(result, " with", 5); + for (i = 0; !ret && i < pev->nargs; i++) { + buf = synthesize_perf_probe_arg(&pev->args[i]); + if (!buf) + return -ENOMEM; + ret = strbuf_addf(result, " %s", buf); + free(buf); } } - strbuf_addch(result, ')'); -out: - free(place); + if (!ret) + ret = strbuf_addch(result, ')'); + return ret; } @@ -2245,7 +2417,7 @@ static int __show_perf_probe_events(int fd, bool is_kprobe, if (!rawlist) return -ENOMEM; - strlist__for_each(ent, rawlist) { + strlist__for_each_entry(ent, rawlist) { ret = parse_probe_trace_command(ent->s, &tev); if (ret >= 0) { if (!filter_probe_trace_event(&tev, filter)) @@ -2278,6 +2450,9 @@ int show_perf_probe_events(struct strfilter *filter) setup_pager(); + if (probe_conf.cache) + return probe_cache__show_all_caches(filter); + ret = init_probe_symbol_maps(false); if (ret < 0) return ret; @@ -2386,17 +2561,24 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, char buf[64]; int ret; - if (pev->event) + /* If probe_event or trace_event already have the name, reuse it */ + if (pev->event && !pev->sdt) event = pev->event; - else + else if (tev->event) + event = tev->event; + else { + /* Or generate new one from probe point */ if (pev->point.function && (strncmp(pev->point.function, "0x", 2) != 0) && !strisglob(pev->point.function)) event = pev->point.function; else event = tev->point.realname; - if (pev->group) + } + if (pev->group && !pev->sdt) group = pev->group; + else if (tev->group) + group = tev->group; else group = PERFPROBE_GROUP; @@ -2418,40 +2600,60 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, return 0; } -static int __add_probe_trace_events(struct perf_probe_event *pev, - struct probe_trace_event *tevs, - int ntevs, bool allow_suffix) +static int __open_probe_file_and_namelist(bool uprobe, + struct strlist **namelist) { - int i, fd, ret; - struct probe_trace_event *tev = NULL; - struct strlist *namelist; + int fd; - fd = probe_file__open(PF_FL_RW | (pev->uprobes ? PF_FL_UPROBE : 0)); + fd = probe_file__open(PF_FL_RW | (uprobe ? PF_FL_UPROBE : 0)); if (fd < 0) return fd; /* Get current event names */ - namelist = probe_file__get_namelist(fd); - if (!namelist) { + *namelist = probe_file__get_namelist(fd); + if (!(*namelist)) { pr_debug("Failed to get current event list.\n"); - ret = -ENOMEM; - goto close_out; + close(fd); + return -ENOMEM; } + return fd; +} + +static int __add_probe_trace_events(struct perf_probe_event *pev, + struct probe_trace_event *tevs, + int ntevs, bool allow_suffix) +{ + int i, fd[2] = {-1, -1}, up, ret; + struct probe_trace_event *tev = NULL; + struct probe_cache *cache = NULL; + struct strlist *namelist[2] = {NULL, NULL}; + + up = pev->uprobes ? 1 : 0; + fd[up] = __open_probe_file_and_namelist(up, &namelist[up]); + if (fd[up] < 0) + return fd[up]; ret = 0; for (i = 0; i < ntevs; i++) { tev = &tevs[i]; + up = tev->uprobes ? 1 : 0; + if (fd[up] == -1) { /* Open the kprobe/uprobe_events */ + fd[up] = __open_probe_file_and_namelist(up, + &namelist[up]); + if (fd[up] < 0) + goto close_out; + } /* Skip if the symbol is out of .text or blacklisted */ - if (!tev->point.symbol) + if (!tev->point.symbol && !pev->uprobes) continue; /* Set new name for tev (and update namelist) */ - ret = probe_trace_event__set_name(tev, pev, namelist, + ret = probe_trace_event__set_name(tev, pev, namelist[up], allow_suffix); if (ret < 0) break; - ret = probe_file__add_event(fd, tev); + ret = probe_file__add_event(fd[up], tev); if (ret < 0) break; @@ -2465,10 +2667,21 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, } if (ret == -EINVAL && pev->uprobes) warn_uprobe_event_compat(tev); + if (ret == 0 && probe_conf.cache) { + cache = probe_cache__new(pev->target); + if (!cache || + probe_cache__add_entry(cache, pev, tevs, ntevs) < 0 || + probe_cache__commit(cache) < 0) + pr_warning("Failed to add event to probe cache\n"); + probe_cache__delete(cache); + } - strlist__delete(namelist); close_out: - close(fd); + for (up = 0; up < 2; up++) { + strlist__delete(namelist[up]); + if (fd[up] >= 0) + close(fd[up]); + } return ret; } @@ -2493,12 +2706,10 @@ static int find_probe_functions(struct map *map, char *name, return found; } -#define strdup_or_goto(str, label) \ - ({ char *__p = strdup(str); if (!__p) goto label; __p; }) - void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused, struct probe_trace_event *tev __maybe_unused, - struct map *map __maybe_unused) { } + struct map *map __maybe_unused, + struct symbol *sym __maybe_unused) { } /* * Find probe function addresses from map. @@ -2516,6 +2727,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, struct probe_trace_point *tp; int num_matched_functions; int ret, i, j, skipped = 0; + char *mod_name; map = get_target_map(pev->target, pev->uprobes); if (!map) { @@ -2600,9 +2812,19 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, tp->realname = strdup_or_goto(sym->name, nomem_out); tp->retprobe = pp->retprobe; - if (pev->target) - tev->point.module = strdup_or_goto(pev->target, - nomem_out); + if (pev->target) { + if (pev->uprobes) { + tev->point.module = strdup_or_goto(pev->target, + nomem_out); + } else { + mod_name = find_module_name(pev->target); + tev->point.module = + strdup(mod_name ? mod_name : pev->target); + free(mod_name); + if (!tev->point.module) + goto nomem_out; + } + } tev->uprobes = pev->uprobes; tev->nargs = pev->nargs; if (tev->nargs) { @@ -2624,7 +2846,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, strdup_or_goto(pev->args[i].type, nomem_out); } - arch__fix_tev_from_maps(pev, tev, map); + arch__fix_tev_from_maps(pev, tev, map, sym); } if (ret == skipped) { ret = -ENOENT; @@ -2736,16 +2958,211 @@ errout: return err; } -bool __weak arch__prefers_symtab(void) { return false; } +/* Concatinate two arrays */ +static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b) +{ + void *ret; + + ret = malloc(sz_a + sz_b); + if (ret) { + memcpy(ret, a, sz_a); + memcpy(ret + sz_a, b, sz_b); + } + return ret; +} + +static int +concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs, + struct probe_trace_event **tevs2, int ntevs2) +{ + struct probe_trace_event *new_tevs; + int ret = 0; + + if (ntevs == 0) { + *tevs = *tevs2; + *ntevs = ntevs2; + *tevs2 = NULL; + return 0; + } + + if (*ntevs + ntevs2 > probe_conf.max_probes) + ret = -E2BIG; + else { + /* Concatinate the array of probe_trace_event */ + new_tevs = memcat(*tevs, (*ntevs) * sizeof(**tevs), + *tevs2, ntevs2 * sizeof(**tevs2)); + if (!new_tevs) + ret = -ENOMEM; + else { + free(*tevs); + *tevs = new_tevs; + *ntevs += ntevs2; + } + } + if (ret < 0) + clear_probe_trace_events(*tevs2, ntevs2); + zfree(tevs2); + + return ret; +} + +/* + * Try to find probe_trace_event from given probe caches. Return the number + * of cached events found, if an error occurs return the error. + */ +static int find_cached_events(struct perf_probe_event *pev, + struct probe_trace_event **tevs, + const char *target) +{ + struct probe_cache *cache; + struct probe_cache_entry *entry; + struct probe_trace_event *tmp_tevs = NULL; + int ntevs = 0; + int ret = 0; + + cache = probe_cache__new(target); + /* Return 0 ("not found") if the target has no probe cache. */ + if (!cache) + return 0; + + for_each_probe_cache_entry(entry, cache) { + /* Skip the cache entry which has no name */ + if (!entry->pev.event || !entry->pev.group) + continue; + if ((!pev->group || strglobmatch(entry->pev.group, pev->group)) && + strglobmatch(entry->pev.event, pev->event)) { + ret = probe_cache_entry__get_event(entry, &tmp_tevs); + if (ret > 0) + ret = concat_probe_trace_events(tevs, &ntevs, + &tmp_tevs, ret); + if (ret < 0) + break; + } + } + probe_cache__delete(cache); + if (ret < 0) { + clear_probe_trace_events(*tevs, ntevs); + zfree(tevs); + } else { + ret = ntevs; + if (ntevs > 0 && target && target[0] == '/') + pev->uprobes = true; + } + + return ret; +} + +/* Try to find probe_trace_event from all probe caches */ +static int find_cached_events_all(struct perf_probe_event *pev, + struct probe_trace_event **tevs) +{ + struct probe_trace_event *tmp_tevs = NULL; + struct strlist *bidlist; + struct str_node *nd; + char *pathname; + int ntevs = 0; + int ret; + + /* Get the buildid list of all valid caches */ + bidlist = build_id_cache__list_all(true); + if (!bidlist) { + ret = -errno; + pr_debug("Failed to get buildids: %d\n", ret); + return ret; + } + + ret = 0; + strlist__for_each_entry(nd, bidlist) { + pathname = build_id_cache__origname(nd->s); + ret = find_cached_events(pev, &tmp_tevs, pathname); + /* In the case of cnt == 0, we just skip it */ + if (ret > 0) + ret = concat_probe_trace_events(tevs, &ntevs, + &tmp_tevs, ret); + free(pathname); + if (ret < 0) + break; + } + strlist__delete(bidlist); + + if (ret < 0) { + clear_probe_trace_events(*tevs, ntevs); + zfree(tevs); + } else + ret = ntevs; + + return ret; +} + +static int find_probe_trace_events_from_cache(struct perf_probe_event *pev, + struct probe_trace_event **tevs) +{ + struct probe_cache *cache; + struct probe_cache_entry *entry; + struct probe_trace_event *tev; + struct str_node *node; + int ret, i; + + if (pev->sdt) { + /* For SDT/cached events, we use special search functions */ + if (!pev->target) + return find_cached_events_all(pev, tevs); + else + return find_cached_events(pev, tevs, pev->target); + } + cache = probe_cache__new(pev->target); + if (!cache) + return 0; + + entry = probe_cache__find(cache, pev); + if (!entry) { + /* SDT must be in the cache */ + ret = pev->sdt ? -ENOENT : 0; + goto out; + } + + ret = strlist__nr_entries(entry->tevlist); + if (ret > probe_conf.max_probes) { + pr_debug("Too many entries matched in the cache of %s\n", + pev->target ? : "kernel"); + ret = -E2BIG; + goto out; + } + + *tevs = zalloc(ret * sizeof(*tev)); + if (!*tevs) { + ret = -ENOMEM; + goto out; + } + + i = 0; + strlist__for_each_entry(node, entry->tevlist) { + tev = &(*tevs)[i++]; + ret = parse_probe_trace_command(node->s, tev); + if (ret < 0) + goto out; + /* Set the uprobes attribute as same as original */ + tev->uprobes = pev->uprobes; + } + ret = i; + +out: + probe_cache__delete(cache); + return ret; +} static int convert_to_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event **tevs) { int ret; - if (pev->uprobes && !pev->group) { - /* Replace group name if not given */ - ret = convert_exec_to_group(pev->target, &pev->group); + if (!pev->group && !pev->sdt) { + /* Set group name if not given */ + if (!pev->uprobes) { + pev->group = strdup(PERFPROBE_GROUP); + ret = pev->group ? 0 : -ENOMEM; + } else + ret = convert_exec_to_group(pev->target, &pev->group); if (ret != 0) { pr_warning("Failed to make a group name.\n"); return ret; @@ -2756,11 +3173,10 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, if (ret > 0) return ret; - if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) { - ret = find_probe_trace_events_from_map(pev, tevs); - if (ret > 0) - return ret; /* Found in symbol table */ - } + /* At first, we need to lookup cache entry */ + ret = find_probe_trace_events_from_cache(pev, tevs); + if (ret > 0 || pev->sdt) /* SDT can be found only in the cache */ + return ret == 0 ? -ENOENT : ret; /* Found in probe cache */ /* Convert perf_probe_event with debuginfo */ ret = try_to_find_probe_trace_events(pev, tevs); @@ -2910,8 +3326,16 @@ int show_available_funcs(const char *target, struct strfilter *_filter, /* Load symbols with given filter */ available_func_filter = _filter; - if (map__load(map, filter_available_functions)) { - pr_err("Failed to load symbols in %s\n", (target) ? : "kernel"); + ret = map__load(map, filter_available_functions); + if (ret) { + if (ret == -2) { + char *str = strfilter__string(_filter); + pr_err("Failed to find symbols matched to \"%s\"\n", + str); + free(str); + } else + pr_err("Failed to load symbols in %s\n", + (target) ? : "kernel"); goto end; } if (!dso__sorted_by_name(map->dso, map->type)) diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index e54e7b011577..f4f45db77c1c 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -12,6 +12,7 @@ struct probe_conf { bool show_location_range; bool force_add; bool no_inlines; + bool cache; int max_probes; }; extern struct probe_conf probe_conf; @@ -84,6 +85,7 @@ struct perf_probe_event { char *group; /* Group name */ struct perf_probe_point point; /* Probe point */ int nargs; /* Number of arguments */ + bool sdt; /* SDT/cached event flag */ bool uprobes; /* Uprobe event flag */ char *target; /* Target binary */ struct perf_probe_arg *args; /* Arguments */ @@ -120,7 +122,11 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev); /* Events to command string */ char *synthesize_perf_probe_command(struct perf_probe_event *pev); char *synthesize_probe_trace_command(struct probe_trace_event *tev); -int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len); +char *synthesize_perf_probe_arg(struct perf_probe_arg *pa); +char *synthesize_perf_probe_point(struct perf_probe_point *pp); + +int perf_probe_event__copy(struct perf_probe_event *dst, + struct perf_probe_event *src); /* Check the perf_probe_event needs debuginfo */ bool perf_probe_event_need_dwarf(struct perf_probe_event *pev); @@ -152,9 +158,9 @@ int show_line_range(struct line_range *lr, const char *module, bool user); int show_available_vars(struct perf_probe_event *pevs, int npevs, struct strfilter *filter); int show_available_funcs(const char *module, struct strfilter *filter, bool user); -bool arch__prefers_symtab(void); void arch__fix_tev_from_maps(struct perf_probe_event *pev, - struct probe_trace_event *tev, struct map *map); + struct probe_trace_event *tev, struct map *map, + struct symbol *sym); /* If there is no space to write, returns -E2BIG. */ int e_snprintf(char *str, size_t size, const char *format, ...) @@ -166,4 +172,9 @@ int e_snprintf(char *str, size_t size, const char *format, ...) int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, struct perf_probe_arg *pvar); +struct map *get_target_map(const char *target, bool user); + +void arch__post_process_probe_trace_events(struct perf_probe_event *pev, + int ntevs); + #endif /*_PROBE_EVENT_H */ diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index e3b3b92e4458..9c3b9ed5b3c3 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -14,6 +14,7 @@ * GNU General Public License for more details. * */ +#include <sys/uio.h> #include "util.h" #include "event.h" #include "strlist.h" @@ -49,7 +50,7 @@ static void print_open_warning(int err, bool uprobe) else pr_warning("Failed to open %cprobe_events: %s\n", uprobe ? 'u' : 'k', - strerror_r(-err, sbuf, sizeof(sbuf))); + str_error_r(-err, sbuf, sizeof(sbuf))); } static void print_both_open_warning(int kerr, int uerr) @@ -63,9 +64,9 @@ static void print_both_open_warning(int kerr, int uerr) else { char sbuf[STRERR_BUFSIZE]; pr_warning("Failed to open kprobe events: %s.\n", - strerror_r(-kerr, sbuf, sizeof(sbuf))); + str_error_r(-kerr, sbuf, sizeof(sbuf))); pr_warning("Failed to open uprobe events: %s.\n", - strerror_r(-uerr, sbuf, sizeof(sbuf))); + str_error_r(-uerr, sbuf, sizeof(sbuf))); } } @@ -132,7 +133,7 @@ int probe_file__open_both(int *kfd, int *ufd, int flag) /* Get raw string list of current kprobe_events or uprobe_events */ struct strlist *probe_file__get_rawlist(int fd) { - int ret, idx; + int ret, idx, fddup; FILE *fp; char buf[MAX_CMDLEN]; char *p; @@ -142,8 +143,17 @@ struct strlist *probe_file__get_rawlist(int fd) return NULL; sl = strlist__new(NULL, NULL); + if (sl == NULL) + return NULL; + + fddup = dup(fd); + if (fddup < 0) + goto out_free_sl; + + fp = fdopen(fddup, "r"); + if (!fp) + goto out_close_fddup; - fp = fdopen(dup(fd), "r"); while (!feof(fp)) { p = fgets(buf, MAX_CMDLEN, fp); if (!p) @@ -155,13 +165,21 @@ struct strlist *probe_file__get_rawlist(int fd) ret = strlist__add(sl, buf); if (ret < 0) { pr_debug("strlist__add failed (%d)\n", ret); - strlist__delete(sl); - return NULL; + goto out_close_fp; } } fclose(fp); return sl; + +out_close_fp: + fclose(fp); + goto out_free_sl; +out_close_fddup: + close(fddup); +out_free_sl: + strlist__delete(sl); + return NULL; } static struct strlist *__probe_file__get_namelist(int fd, bool include_group) @@ -177,7 +195,7 @@ static struct strlist *__probe_file__get_namelist(int fd, bool include_group) if (!rawlist) return NULL; sl = strlist__new(NULL, NULL); - strlist__for_each(ent, rawlist) { + strlist__for_each_entry(ent, rawlist) { ret = parse_probe_trace_command(ent->s, &tev); if (ret < 0) break; @@ -220,11 +238,10 @@ int probe_file__add_event(int fd, struct probe_trace_event *tev) pr_debug("Writing event: %s\n", buf); if (!probe_event_dry_run) { - ret = write(fd, buf, strlen(buf)); - if (ret <= 0) { + if (write(fd, buf, strlen(buf)) < (int)strlen(buf)) { ret = -errno; pr_warning("Failed to write event: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); } } free(buf); @@ -262,7 +279,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent) return 0; error: pr_warning("Failed to delete event: %s\n", - strerror_r(-ret, buf, sizeof(buf))); + str_error_r(-ret, buf, sizeof(buf))); return ret; } @@ -281,7 +298,7 @@ int probe_file__get_events(int fd, struct strfilter *filter, if (!namelist) return -ENOENT; - strlist__for_each(ent, namelist) { + strlist__for_each_entry(ent, namelist) { p = strchr(ent->s, ':'); if ((p && strfilter__compare(filter, p + 1)) || strfilter__compare(filter, ent->s)) { @@ -299,7 +316,7 @@ int probe_file__del_strlist(int fd, struct strlist *namelist) int ret = 0; struct str_node *ent; - strlist__for_each(ent, namelist) { + strlist__for_each_entry(ent, namelist) { ret = __del_trace_probe_event(fd, ent); if (ret < 0) break; @@ -325,3 +342,538 @@ int probe_file__del_events(int fd, struct strfilter *filter) return ret; } + +/* Caller must ensure to remove this entry from list */ +static void probe_cache_entry__delete(struct probe_cache_entry *entry) +{ + if (entry) { + BUG_ON(!list_empty(&entry->node)); + + strlist__delete(entry->tevlist); + clear_perf_probe_event(&entry->pev); + zfree(&entry->spev); + free(entry); + } +} + +static struct probe_cache_entry * +probe_cache_entry__new(struct perf_probe_event *pev) +{ + struct probe_cache_entry *entry = zalloc(sizeof(*entry)); + + if (entry) { + INIT_LIST_HEAD(&entry->node); + entry->tevlist = strlist__new(NULL, NULL); + if (!entry->tevlist) + zfree(&entry); + else if (pev) { + entry->spev = synthesize_perf_probe_command(pev); + if (!entry->spev || + perf_probe_event__copy(&entry->pev, pev) < 0) { + probe_cache_entry__delete(entry); + return NULL; + } + } + } + + return entry; +} + +int probe_cache_entry__get_event(struct probe_cache_entry *entry, + struct probe_trace_event **tevs) +{ + struct probe_trace_event *tev; + struct str_node *node; + int ret, i; + + ret = strlist__nr_entries(entry->tevlist); + if (ret > probe_conf.max_probes) + return -E2BIG; + + *tevs = zalloc(ret * sizeof(*tev)); + if (!*tevs) + return -ENOMEM; + + i = 0; + strlist__for_each_entry(node, entry->tevlist) { + tev = &(*tevs)[i++]; + ret = parse_probe_trace_command(node->s, tev); + if (ret < 0) + break; + } + return i; +} + +/* For the kernel probe caches, pass target = NULL or DSO__NAME_KALLSYMS */ +static int probe_cache__open(struct probe_cache *pcache, const char *target) +{ + char cpath[PATH_MAX]; + char sbuildid[SBUILD_ID_SIZE]; + char *dir_name = NULL; + bool is_kallsyms = false; + int ret, fd; + + if (target && build_id_cache__cached(target)) { + /* This is a cached buildid */ + strncpy(sbuildid, target, SBUILD_ID_SIZE); + dir_name = build_id_cache__linkname(sbuildid, NULL, 0); + goto found; + } + + if (!target || !strcmp(target, DSO__NAME_KALLSYMS)) { + target = DSO__NAME_KALLSYMS; + is_kallsyms = true; + ret = sysfs__sprintf_build_id("/", sbuildid); + } else + ret = filename__sprintf_build_id(target, sbuildid); + + if (ret < 0) { + pr_debug("Failed to get build-id from %s.\n", target); + return ret; + } + + /* If we have no buildid cache, make it */ + if (!build_id_cache__cached(sbuildid)) { + ret = build_id_cache__add_s(sbuildid, target, + is_kallsyms, NULL); + if (ret < 0) { + pr_debug("Failed to add build-id cache: %s\n", target); + return ret; + } + } + + dir_name = build_id_cache__cachedir(sbuildid, target, is_kallsyms, + false); +found: + if (!dir_name) { + pr_debug("Failed to get cache from %s\n", target); + return -ENOMEM; + } + + snprintf(cpath, PATH_MAX, "%s/probes", dir_name); + fd = open(cpath, O_CREAT | O_RDWR, 0644); + if (fd < 0) + pr_debug("Failed to open cache(%d): %s\n", fd, cpath); + free(dir_name); + pcache->fd = fd; + + return fd; +} + +static int probe_cache__load(struct probe_cache *pcache) +{ + struct probe_cache_entry *entry = NULL; + char buf[MAX_CMDLEN], *p; + int ret = 0, fddup; + FILE *fp; + + fddup = dup(pcache->fd); + if (fddup < 0) + return -errno; + fp = fdopen(fddup, "r"); + if (!fp) { + close(fddup); + return -EINVAL; + } + + while (!feof(fp)) { + if (!fgets(buf, MAX_CMDLEN, fp)) + break; + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + /* #perf_probe_event or %sdt_event */ + if (buf[0] == '#' || buf[0] == '%') { + entry = probe_cache_entry__new(NULL); + if (!entry) { + ret = -ENOMEM; + goto out; + } + if (buf[0] == '%') + entry->sdt = true; + entry->spev = strdup(buf + 1); + if (entry->spev) + ret = parse_perf_probe_command(buf + 1, + &entry->pev); + else + ret = -ENOMEM; + if (ret < 0) { + probe_cache_entry__delete(entry); + goto out; + } + list_add_tail(&entry->node, &pcache->entries); + } else { /* trace_probe_event */ + if (!entry) { + ret = -EINVAL; + goto out; + } + strlist__add(entry->tevlist, buf); + } + } +out: + fclose(fp); + return ret; +} + +static struct probe_cache *probe_cache__alloc(void) +{ + struct probe_cache *pcache = zalloc(sizeof(*pcache)); + + if (pcache) { + INIT_LIST_HEAD(&pcache->entries); + pcache->fd = -EINVAL; + } + return pcache; +} + +void probe_cache__purge(struct probe_cache *pcache) +{ + struct probe_cache_entry *entry, *n; + + list_for_each_entry_safe(entry, n, &pcache->entries, node) { + list_del_init(&entry->node); + probe_cache_entry__delete(entry); + } +} + +void probe_cache__delete(struct probe_cache *pcache) +{ + if (!pcache) + return; + + probe_cache__purge(pcache); + if (pcache->fd > 0) + close(pcache->fd); + free(pcache); +} + +struct probe_cache *probe_cache__new(const char *target) +{ + struct probe_cache *pcache = probe_cache__alloc(); + int ret; + + if (!pcache) + return NULL; + + ret = probe_cache__open(pcache, target); + if (ret < 0) { + pr_debug("Cache open error: %d\n", ret); + goto out_err; + } + + ret = probe_cache__load(pcache); + if (ret < 0) { + pr_debug("Cache read error: %d\n", ret); + goto out_err; + } + + return pcache; + +out_err: + probe_cache__delete(pcache); + return NULL; +} + +static bool streql(const char *a, const char *b) +{ + if (a == b) + return true; + + if (!a || !b) + return false; + + return !strcmp(a, b); +} + +struct probe_cache_entry * +probe_cache__find(struct probe_cache *pcache, struct perf_probe_event *pev) +{ + struct probe_cache_entry *entry = NULL; + char *cmd = synthesize_perf_probe_command(pev); + + if (!cmd) + return NULL; + + for_each_probe_cache_entry(entry, pcache) { + if (pev->sdt) { + if (entry->pev.event && + streql(entry->pev.event, pev->event) && + (!pev->group || + streql(entry->pev.group, pev->group))) + goto found; + + continue; + } + /* Hit if same event name or same command-string */ + if ((pev->event && + (streql(entry->pev.group, pev->group) && + streql(entry->pev.event, pev->event))) || + (!strcmp(entry->spev, cmd))) + goto found; + } + entry = NULL; + +found: + free(cmd); + return entry; +} + +struct probe_cache_entry * +probe_cache__find_by_name(struct probe_cache *pcache, + const char *group, const char *event) +{ + struct probe_cache_entry *entry = NULL; + + for_each_probe_cache_entry(entry, pcache) { + /* Hit if same event name or same command-string */ + if (streql(entry->pev.group, group) && + streql(entry->pev.event, event)) + goto found; + } + entry = NULL; + +found: + return entry; +} + +int probe_cache__add_entry(struct probe_cache *pcache, + struct perf_probe_event *pev, + struct probe_trace_event *tevs, int ntevs) +{ + struct probe_cache_entry *entry = NULL; + char *command; + int i, ret = 0; + + if (!pcache || !pev || !tevs || ntevs <= 0) { + ret = -EINVAL; + goto out_err; + } + + /* Remove old cache entry */ + entry = probe_cache__find(pcache, pev); + if (entry) { + list_del_init(&entry->node); + probe_cache_entry__delete(entry); + } + + ret = -ENOMEM; + entry = probe_cache_entry__new(pev); + if (!entry) + goto out_err; + + for (i = 0; i < ntevs; i++) { + if (!tevs[i].point.symbol) + continue; + + command = synthesize_probe_trace_command(&tevs[i]); + if (!command) + goto out_err; + strlist__add(entry->tevlist, command); + free(command); + } + list_add_tail(&entry->node, &pcache->entries); + pr_debug("Added probe cache: %d\n", ntevs); + return 0; + +out_err: + pr_debug("Failed to add probe caches\n"); + probe_cache_entry__delete(entry); + return ret; +} + +#ifdef HAVE_GELF_GETNOTE_SUPPORT +static unsigned long long sdt_note__get_addr(struct sdt_note *note) +{ + return note->bit32 ? (unsigned long long)note->addr.a32[0] + : (unsigned long long)note->addr.a64[0]; +} + +int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname) +{ + struct probe_cache_entry *entry = NULL; + struct list_head sdtlist; + struct sdt_note *note; + char *buf; + char sdtgrp[64]; + int ret; + + INIT_LIST_HEAD(&sdtlist); + ret = get_sdt_note_list(&sdtlist, pathname); + if (ret < 0) { + pr_debug("Failed to get sdt note: %d\n", ret); + return ret; + } + list_for_each_entry(note, &sdtlist, note_list) { + ret = snprintf(sdtgrp, 64, "sdt_%s", note->provider); + if (ret < 0) + break; + /* Try to find same-name entry */ + entry = probe_cache__find_by_name(pcache, sdtgrp, note->name); + if (!entry) { + entry = probe_cache_entry__new(NULL); + if (!entry) { + ret = -ENOMEM; + break; + } + entry->sdt = true; + ret = asprintf(&entry->spev, "%s:%s=%s", sdtgrp, + note->name, note->name); + if (ret < 0) + break; + entry->pev.event = strdup(note->name); + entry->pev.group = strdup(sdtgrp); + list_add_tail(&entry->node, &pcache->entries); + } + ret = asprintf(&buf, "p:%s/%s %s:0x%llx", + sdtgrp, note->name, pathname, + sdt_note__get_addr(note)); + if (ret < 0) + break; + strlist__add(entry->tevlist, buf); + free(buf); + entry = NULL; + } + if (entry) { + list_del_init(&entry->node); + probe_cache_entry__delete(entry); + } + cleanup_sdt_note_list(&sdtlist); + return ret; +} +#endif + +static int probe_cache_entry__write(struct probe_cache_entry *entry, int fd) +{ + struct str_node *snode; + struct stat st; + struct iovec iov[3]; + const char *prefix = entry->sdt ? "%" : "#"; + int ret; + /* Save stat for rollback */ + ret = fstat(fd, &st); + if (ret < 0) + return ret; + + pr_debug("Writing cache: %s%s\n", prefix, entry->spev); + iov[0].iov_base = (void *)prefix; iov[0].iov_len = 1; + iov[1].iov_base = entry->spev; iov[1].iov_len = strlen(entry->spev); + iov[2].iov_base = (void *)"\n"; iov[2].iov_len = 1; + ret = writev(fd, iov, 3); + if (ret < (int)iov[1].iov_len + 2) + goto rollback; + + strlist__for_each_entry(snode, entry->tevlist) { + iov[0].iov_base = (void *)snode->s; + iov[0].iov_len = strlen(snode->s); + iov[1].iov_base = (void *)"\n"; iov[1].iov_len = 1; + ret = writev(fd, iov, 2); + if (ret < (int)iov[0].iov_len + 1) + goto rollback; + } + return 0; + +rollback: + /* Rollback to avoid cache file corruption */ + if (ret > 0) + ret = -1; + if (ftruncate(fd, st.st_size) < 0) + ret = -2; + + return ret; +} + +int probe_cache__commit(struct probe_cache *pcache) +{ + struct probe_cache_entry *entry; + int ret = 0; + + /* TBD: if we do not update existing entries, skip it */ + ret = lseek(pcache->fd, 0, SEEK_SET); + if (ret < 0) + goto out; + + ret = ftruncate(pcache->fd, 0); + if (ret < 0) + goto out; + + for_each_probe_cache_entry(entry, pcache) { + ret = probe_cache_entry__write(entry, pcache->fd); + pr_debug("Cache committed: %d\n", ret); + if (ret < 0) + break; + } +out: + return ret; +} + +static bool probe_cache_entry__compare(struct probe_cache_entry *entry, + struct strfilter *filter) +{ + char buf[128], *ptr = entry->spev; + + if (entry->pev.event) { + snprintf(buf, 128, "%s:%s", entry->pev.group, entry->pev.event); + ptr = buf; + } + return strfilter__compare(filter, ptr); +} + +int probe_cache__filter_purge(struct probe_cache *pcache, + struct strfilter *filter) +{ + struct probe_cache_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &pcache->entries, node) { + if (probe_cache_entry__compare(entry, filter)) { + pr_info("Removed cached event: %s\n", entry->spev); + list_del_init(&entry->node); + probe_cache_entry__delete(entry); + } + } + return 0; +} + +static int probe_cache__show_entries(struct probe_cache *pcache, + struct strfilter *filter) +{ + struct probe_cache_entry *entry; + + for_each_probe_cache_entry(entry, pcache) { + if (probe_cache_entry__compare(entry, filter)) + printf("%s\n", entry->spev); + } + return 0; +} + +/* Show all cached probes */ +int probe_cache__show_all_caches(struct strfilter *filter) +{ + struct probe_cache *pcache; + struct strlist *bidlist; + struct str_node *nd; + char *buf = strfilter__string(filter); + + pr_debug("list cache with filter: %s\n", buf); + free(buf); + + bidlist = build_id_cache__list_all(true); + if (!bidlist) { + pr_debug("Failed to get buildids: %d\n", errno); + return -EINVAL; + } + strlist__for_each_entry(nd, bidlist) { + pcache = probe_cache__new(nd->s); + if (!pcache) + continue; + if (!list_empty(&pcache->entries)) { + buf = build_id_cache__origname(nd->s); + printf("%s (%s):\n", buf, nd->s); + free(buf); + probe_cache__show_entries(pcache, filter); + } + probe_cache__delete(pcache); + } + strlist__delete(bidlist); + + return 0; +} diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index 18ac9cf51c34..9577b5c0b487 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -5,9 +5,27 @@ #include "strfilter.h" #include "probe-event.h" +/* Cache of probe definitions */ +struct probe_cache_entry { + struct list_head node; + bool sdt; + struct perf_probe_event pev; + char *spev; + struct strlist *tevlist; +}; + +struct probe_cache { + int fd; + struct list_head entries; +}; + #define PF_FL_UPROBE 1 #define PF_FL_RW 2 +#define for_each_probe_cache_entry(entry, pcache) \ + list_for_each_entry(entry, &pcache->entries, node) +/* probe-file.c depends on libelf */ +#ifdef HAVE_LIBELF_SUPPORT int probe_file__open(int flag); int probe_file__open_both(int *kfd, int *ufd, int flag); struct strlist *probe_file__get_namelist(int fd); @@ -18,5 +36,29 @@ int probe_file__get_events(int fd, struct strfilter *filter, struct strlist *plist); int probe_file__del_strlist(int fd, struct strlist *namelist); +int probe_cache_entry__get_event(struct probe_cache_entry *entry, + struct probe_trace_event **tevs); +struct probe_cache *probe_cache__new(const char *target); +int probe_cache__add_entry(struct probe_cache *pcache, + struct perf_probe_event *pev, + struct probe_trace_event *tevs, int ntevs); +int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname); +int probe_cache__commit(struct probe_cache *pcache); +void probe_cache__purge(struct probe_cache *pcache); +void probe_cache__delete(struct probe_cache *pcache); +int probe_cache__filter_purge(struct probe_cache *pcache, + struct strfilter *filter); +struct probe_cache_entry *probe_cache__find(struct probe_cache *pcache, + struct perf_probe_event *pev); +struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache, + const char *group, const char *event); +int probe_cache__show_all_caches(struct strfilter *filter); +#else /* ! HAVE_LIBELF_SUPPORT */ +static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused) +{ + return NULL; +} +#define probe_cache__delete(pcache) do {} while (0) +#endif #endif diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index b3bd0fba0237..5c290c682afe 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -297,10 +297,13 @@ static int convert_variable_type(Dwarf_Die *vr_die, char sbuf[STRERR_BUFSIZE]; int bsize, boffs, total; int ret; + char sign; /* TODO: check all types */ - if (cast && strcmp(cast, "string") != 0) { + if (cast && strcmp(cast, "string") != 0 && + strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) { /* Non string type is OK */ + /* and respect signedness cast */ tvar->type = strdup(cast); return (tvar->type == NULL) ? -ENOMEM : 0; } @@ -361,6 +364,13 @@ static int convert_variable_type(Dwarf_Die *vr_die, return (tvar->type == NULL) ? -ENOMEM : 0; } + if (cast && (strcmp(cast, "u") == 0)) + sign = 'u'; + else if (cast && (strcmp(cast, "s") == 0)) + sign = 's'; + else + sign = die_is_signed_type(&type) ? 's' : 'u'; + ret = dwarf_bytesize(&type); if (ret <= 0) /* No size ... try to use default type */ @@ -373,15 +383,14 @@ static int convert_variable_type(Dwarf_Die *vr_die, dwarf_diename(&type), MAX_BASIC_TYPE_BITS); ret = MAX_BASIC_TYPE_BITS; } - ret = snprintf(buf, 16, "%c%d", - die_is_signed_type(&type) ? 's' : 'u', ret); + ret = snprintf(buf, 16, "%c%d", sign, ret); formatted: if (ret < 0 || ret >= 16) { if (ret >= 16) ret = -E2BIG; pr_warning("Failed to convert variable type: %s\n", - strerror_r(-ret, sbuf, sizeof(sbuf))); + str_error_r(-ret, sbuf, sizeof(sbuf))); return ret; } tvar->type = strdup(buf); @@ -553,7 +562,7 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) { Dwarf_Die vr_die; - char buf[32], *ptr; + char *buf, *ptr; int ret = 0; /* Copy raw parameters */ @@ -563,13 +572,13 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) if (pf->pvar->name) pf->tvar->name = strdup(pf->pvar->name); else { - ret = synthesize_perf_probe_arg(pf->pvar, buf, 32); - if (ret < 0) - return ret; + buf = synthesize_perf_probe_arg(pf->pvar); + if (!buf) + return -ENOMEM; ptr = strchr(buf, ':'); /* Change type separator to _ */ if (ptr) *ptr = '_'; - pf->tvar->name = strdup(buf); + pf->tvar->name = buf; } if (pf->tvar->name == NULL) return -ENOMEM; @@ -809,7 +818,7 @@ static int find_lazy_match_lines(struct intlist *list, fp = fopen(fname, "r"); if (!fp) { pr_warning("Failed to open %s: %s\n", fname, - strerror_r(errno, sbuf, sizeof(sbuf))); + str_error_r(errno, sbuf, sizeof(sbuf))); return -errno; } @@ -1294,6 +1303,7 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) { struct available_var_finder *af = data; struct variable_list *vl; + struct strbuf buf = STRBUF_INIT; int tag, ret; vl = &af->vls[af->nvls - 1]; @@ -1307,25 +1317,26 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) if (ret == 0 || ret == -ERANGE) { int ret2; bool externs = !af->child; - struct strbuf buf; - strbuf_init(&buf, 64); + if (strbuf_init(&buf, 64) < 0) + goto error; if (probe_conf.show_location_range) { - if (!externs) { - if (ret) - strbuf_add(&buf, "[INV]\t", 6); - else - strbuf_add(&buf, "[VAL]\t", 6); - } else - strbuf_add(&buf, "[EXT]\t", 6); + if (!externs) + ret2 = strbuf_add(&buf, + ret ? "[INV]\t" : "[VAL]\t", 6); + else + ret2 = strbuf_add(&buf, "[EXT]\t", 6); + if (ret2) + goto error; } ret2 = die_get_varname(die_mem, &buf); if (!ret2 && probe_conf.show_location_range && !externs) { - strbuf_addch(&buf, '\t'); + if (strbuf_addch(&buf, '\t') < 0) + goto error; ret2 = die_get_var_range(&af->pf.sp_die, die_mem, &buf); } @@ -1343,6 +1354,10 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) return DIE_FIND_CB_CONTINUE; else return DIE_FIND_CB_SIBLING; +error: + strbuf_release(&buf); + pr_debug("Error in strbuf\n"); + return DIE_FIND_CB_END; } /* Add a found vars into available variables list */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 8162ba0e2e57..b7d4f4aeee61 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -13,6 +13,7 @@ util/cpumap.c ../lib/bitmap.c ../lib/find_bit.c ../lib/hweight.c +../lib/vsprintf.c util/thread_map.c util/util.c util/xyarray.c @@ -23,3 +24,4 @@ util/strlist.c util/trace-event.c ../lib/rbtree.c util/string.c +util/symbol_fprintf.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 98f127abfa42..a5fbc012e3df 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -2,6 +2,7 @@ #include <structmember.h> #include <inttypes.h> #include <poll.h> +#include <linux/err.h> #include "evlist.h" #include "evsel.h" #include "event.h" @@ -47,6 +48,7 @@ PyMODINIT_FUNC initperf(void); struct pyrf_event { PyObject_HEAD + struct perf_evsel *evsel; struct perf_sample sample; union perf_event event; }; @@ -288,6 +290,85 @@ static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent) return ret; } +static bool is_tracepoint(struct pyrf_event *pevent) +{ + return pevent->evsel->attr.type == PERF_TYPE_TRACEPOINT; +} + +static PyObject* +tracepoint_field(struct pyrf_event *pe, struct format_field *field) +{ + struct pevent *pevent = field->event->pevent; + void *data = pe->sample.raw_data; + PyObject *ret = NULL; + unsigned long long val; + unsigned int offset, len; + + if (field->flags & FIELD_IS_ARRAY) { + offset = field->offset; + len = field->size; + if (field->flags & FIELD_IS_DYNAMIC) { + val = pevent_read_number(pevent, data + offset, len); + offset = val; + len = offset >> 16; + offset &= 0xffff; + } + if (field->flags & FIELD_IS_STRING && + is_printable_array(data + offset, len)) { + ret = PyString_FromString((char *)data + offset); + } else { + ret = PyByteArray_FromStringAndSize((const char *) data + offset, len); + field->flags &= ~FIELD_IS_STRING; + } + } else { + val = pevent_read_number(pevent, data + field->offset, + field->size); + if (field->flags & FIELD_IS_POINTER) + ret = PyLong_FromUnsignedLong((unsigned long) val); + else if (field->flags & FIELD_IS_SIGNED) + ret = PyLong_FromLong((long) val); + else + ret = PyLong_FromUnsignedLong((unsigned long) val); + } + + return ret; +} + +static PyObject* +get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) +{ + const char *str = PyString_AsString(PyObject_Str(attr_name)); + struct perf_evsel *evsel = pevent->evsel; + struct format_field *field; + + if (!evsel->tp_format) { + struct event_format *tp_format; + + tp_format = trace_event__tp_format_id(evsel->attr.config); + if (!tp_format) + return NULL; + + evsel->tp_format = tp_format; + } + + field = pevent_find_any_field(evsel->tp_format, str); + if (!field) + return NULL; + + return tracepoint_field(pevent, field); +} + +static PyObject* +pyrf_sample_event__getattro(struct pyrf_event *pevent, PyObject *attr_name) +{ + PyObject *obj = NULL; + + if (is_tracepoint(pevent)) + obj = get_tracepoint_field(pevent, attr_name); + + return obj ?: PyObject_GenericGetAttr((PyObject *) pevent, attr_name); +} + static PyTypeObject pyrf_sample_event__type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "perf.sample_event", @@ -296,6 +377,7 @@ static PyTypeObject pyrf_sample_event__type = { .tp_doc = pyrf_sample_event__doc, .tp_members = pyrf_sample_event__members, .tp_repr = (reprfunc)pyrf_sample_event__repr, + .tp_getattro = (getattrofunc) pyrf_sample_event__getattro, }; static char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object."); @@ -653,6 +735,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel, attr.precise_ip = precise_ip; attr.mmap_data = mmap_data; attr.sample_id_all = sample_id_all; + attr.size = sizeof(attr); perf_evsel__init(&pevsel->evsel, &attr, idx); return 0; @@ -863,13 +946,22 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, if (event != NULL) { PyObject *pyevent = pyrf_event__new(event); struct pyrf_event *pevent = (struct pyrf_event *)pyevent; - - perf_evlist__mmap_consume(evlist, cpu); + struct perf_evsel *evsel; if (pyevent == NULL) return PyErr_NoMemory(); - err = perf_evlist__parse_sample(evlist, event, &pevent->sample); + evsel = perf_evlist__event2evsel(evlist, event); + if (!evsel) + return Py_None; + + pevent->evsel = evsel; + + err = perf_evsel__parse_sample(evsel, event, &pevent->sample); + + /* Consume the even only after we parsed it out. */ + perf_evlist__mmap_consume(evlist, cpu); + if (err) return PyErr_Format(PyExc_OSError, "perf: can't parse sample, err=%d", err); @@ -957,7 +1049,7 @@ static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i) if (i >= pevlist->evlist.nr_entries) return NULL; - evlist__for_each(&pevlist->evlist, pos) { + evlist__for_each_entry(&pevlist->evlist, pos) { if (i-- == 0) break; } @@ -1073,7 +1165,32 @@ static struct { { .name = NULL, }, }; +static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, + PyObject *args, PyObject *kwargs) +{ + struct event_format *tp_format; + static char *kwlist[] = { "sys", "name", NULL }; + char *sys = NULL; + char *name = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss", kwlist, + &sys, &name)) + return NULL; + + tp_format = trace_event__tp_format(sys, name); + if (IS_ERR(tp_format)) + return PyInt_FromLong(-1); + + return PyInt_FromLong(tp_format->id); +} + static PyMethodDef perf__methods[] = { + { + .ml_name = "tracepoint", + .ml_meth = (PyCFunction) pyrf__tracepoint, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("Get tracepoint config.") + }, { .ml_name = NULL, } }; @@ -1100,6 +1217,33 @@ PyMODINIT_FUNC initperf(void) Py_INCREF(&pyrf_evsel__type); PyModule_AddObject(module, "evsel", (PyObject*)&pyrf_evsel__type); + Py_INCREF(&pyrf_mmap_event__type); + PyModule_AddObject(module, "mmap_event", (PyObject *)&pyrf_mmap_event__type); + + Py_INCREF(&pyrf_lost_event__type); + PyModule_AddObject(module, "lost_event", (PyObject *)&pyrf_lost_event__type); + + Py_INCREF(&pyrf_comm_event__type); + PyModule_AddObject(module, "comm_event", (PyObject *)&pyrf_comm_event__type); + + Py_INCREF(&pyrf_task_event__type); + PyModule_AddObject(module, "task_event", (PyObject *)&pyrf_task_event__type); + + Py_INCREF(&pyrf_throttle_event__type); + PyModule_AddObject(module, "throttle_event", (PyObject *)&pyrf_throttle_event__type); + + Py_INCREF(&pyrf_task_event__type); + PyModule_AddObject(module, "task_event", (PyObject *)&pyrf_task_event__type); + + Py_INCREF(&pyrf_read_event__type); + PyModule_AddObject(module, "read_event", (PyObject *)&pyrf_read_event__type); + + Py_INCREF(&pyrf_sample_event__type); + PyModule_AddObject(module, "sample_event", (PyObject *)&pyrf_sample_event__type); + + Py_INCREF(&pyrf_context_switch_event__type); + PyModule_AddObject(module, "switch_event", (PyObject *)&pyrf_context_switch_event__type); + Py_INCREF(&pyrf_thread_map__type); PyModule_AddObject(module, "thread_map", (PyObject*)&pyrf_thread_map__type); diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c index 01f03242b86a..639d1da2f978 100644 --- a/tools/perf/util/quote.c +++ b/tools/perf/util/quote.c @@ -1,5 +1,7 @@ -#include "cache.h" +#include <stdlib.h> +#include "strbuf.h" #include "quote.h" +#include "util.h" /* Help to copy the thing properly quoted for the shell safety. * any single quote is replaced with '\'', any exclamation point @@ -17,38 +19,42 @@ static inline int need_bs_quote(char c) return (c == '\'' || c == '!'); } -static void sq_quote_buf(struct strbuf *dst, const char *src) +static int sq_quote_buf(struct strbuf *dst, const char *src) { char *to_free = NULL; + int ret; if (dst->buf == src) to_free = strbuf_detach(dst, NULL); - strbuf_addch(dst, '\''); - while (*src) { + ret = strbuf_addch(dst, '\''); + while (!ret && *src) { size_t len = strcspn(src, "'!"); - strbuf_add(dst, src, len); + ret = strbuf_add(dst, src, len); src += len; - while (need_bs_quote(*src)) { - strbuf_addstr(dst, "'\\"); - strbuf_addch(dst, *src++); - strbuf_addch(dst, '\''); - } + while (!ret && need_bs_quote(*src)) + ret = strbuf_addf(dst, "'\\%c\'", *src++); } - strbuf_addch(dst, '\''); + if (!ret) + ret = strbuf_addch(dst, '\''); free(to_free); + + return ret; } -void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) +int sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) { - int i; + int i, ret; /* Copy into destination buffer. */ - strbuf_grow(dst, 255); - for (i = 0; argv[i]; ++i) { - strbuf_addch(dst, ' '); - sq_quote_buf(dst, argv[i]); + ret = strbuf_grow(dst, 255); + for (i = 0; !ret && argv[i]; ++i) { + ret = strbuf_addch(dst, ' '); + if (ret) + break; + ret = sq_quote_buf(dst, argv[i]); if (maxlen && dst->len > maxlen) die("Too many or long arguments"); } + return ret; } diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h index 3340c9c4a6ca..055ca45bed99 100644 --- a/tools/perf/util/quote.h +++ b/tools/perf/util/quote.h @@ -2,7 +2,6 @@ #define __PERF_QUOTE_H #include <stddef.h> -#include <stdio.h> /* Help to copy the thing properly quoted for the shell safety. * any single quote is replaced with '\'', any exclamation point @@ -24,6 +23,8 @@ * sq_quote() in a real application. */ -void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); +struct strbuf; + +int sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); #endif /* __PERF_QUOTE_H */ diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h new file mode 100644 index 000000000000..808cc45611fe --- /dev/null +++ b/tools/perf/util/rb_resort.h @@ -0,0 +1,149 @@ +#ifndef _PERF_RESORT_RB_H_ +#define _PERF_RESORT_RB_H_ +/* + * Template for creating a class to resort an existing rb_tree according to + * a new sort criteria, that must be present in the entries of the source + * rb_tree. + * + * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com> + * + * Quick example, resorting threads by its shortname: + * + * First define the prefix (threads) to be used for the functions and data + * structures created, and provide an expression for the sorting, then the + * fields to be present in each of the entries in the new, sorted, rb_tree. + * + * The body of the init function should collect the fields, maybe + * pre-calculating them from multiple entries in the original 'entry' from + * the rb_tree used as a source for the entries to be sorted: + +DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname, + b->thread->shortname) < 0, + struct thread *thread; +) +{ + entry->thread = rb_entry(nd, struct thread, rb_node); +} + + * After this it is just a matter of instantiating it and iterating it, + * for a few data structures with existing rb_trees, such as 'struct machine', + * helpers are available to get the rb_root and the nr_entries: + + DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr); + + * This will instantiate the new rb_tree and a cursor for it, that can be used as: + + struct rb_node *nd; + + resort_rb__for_each_entry(nd, threads) { + struct thread *t = threads_entry; + printf("%s: %d\n", t->shortname, t->tid); + } + + * Then delete it: + + resort_rb__delete(threads); + + * The name of the data structures and functions will have a _sorted suffix + * right before the method names, i.e. will look like: + * + * struct threads_sorted_entry {} + * threads_sorted__insert() + */ + +#define DEFINE_RESORT_RB(__name, __comp, ...) \ +struct __name##_sorted_entry { \ + struct rb_node rb_node; \ + __VA_ARGS__ \ +}; \ +static void __name##_sorted__init_entry(struct rb_node *nd, \ + struct __name##_sorted_entry *entry); \ + \ +static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \ +{ \ + struct __name##_sorted_entry *a, *b; \ + a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \ + b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \ + return __comp; \ +} \ + \ +struct __name##_sorted { \ + struct rb_root entries; \ + struct __name##_sorted_entry nd[0]; \ +}; \ + \ +static void __name##_sorted__insert(struct __name##_sorted *sorted, \ + struct rb_node *sorted_nd) \ +{ \ + struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \ + while (*p != NULL) { \ + parent = *p; \ + if (__name##_sorted__cmp(sorted_nd, parent)) \ + p = &(*p)->rb_left; \ + else \ + p = &(*p)->rb_right; \ + } \ + rb_link_node(sorted_nd, parent, p); \ + rb_insert_color(sorted_nd, &sorted->entries); \ +} \ + \ +static void __name##_sorted__sort(struct __name##_sorted *sorted, \ + struct rb_root *entries) \ +{ \ + struct rb_node *nd; \ + unsigned int i = 0; \ + for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \ + struct __name##_sorted_entry *snd = &sorted->nd[i++]; \ + __name##_sorted__init_entry(nd, snd); \ + __name##_sorted__insert(sorted, &snd->rb_node); \ + } \ +} \ + \ +static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \ + int nr_entries) \ +{ \ + struct __name##_sorted *sorted; \ + sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \ + if (sorted) { \ + sorted->entries = RB_ROOT; \ + __name##_sorted__sort(sorted, entries); \ + } \ + return sorted; \ +} \ + \ +static void __name##_sorted__delete(struct __name##_sorted *sorted) \ +{ \ + free(sorted); \ +} \ + \ +static void __name##_sorted__init_entry(struct rb_node *nd, \ + struct __name##_sorted_entry *entry) + +#define DECLARE_RESORT_RB(__name) \ +struct __name##_sorted_entry *__name##_entry; \ +struct __name##_sorted *__name = __name##_sorted__new + +#define resort_rb__for_each_entry(__nd, __name) \ + for (__nd = rb_first(&__name->entries); \ + __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \ + rb_node), __nd; \ + __nd = rb_next(__nd)) + +#define resort_rb__delete(__name) \ + __name##_sorted__delete(__name), __name = NULL + +/* + * Helpers for other classes that contains both an rbtree and the + * number of entries in it: + */ + +/* For 'struct intlist' */ +#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \ + DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries, \ + __ilist->rblist.nr_entries) + +/* For 'struct machine->threads' */ +#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \ + DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads) + +#endif /* _PERF_RESORT_RB_H_ */ diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 0467367dc315..98bf584853ea 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -129,7 +129,8 @@ bool perf_can_record_cpu_wide(void) return true; } -void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) +void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts, + struct callchain_param *callchain) { struct perf_evsel *evsel; bool use_sample_identifier = false; @@ -147,8 +148,8 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) use_comm_exec = perf_can_comm_exec(); - evlist__for_each(evlist, evsel) { - perf_evsel__config(evsel, opts); + evlist__for_each_entry(evlist, evsel) { + perf_evsel__config(evsel, opts, callchain); if (evsel->tracking && use_comm_exec) evsel->attr.comm_exec = 1; } @@ -160,18 +161,18 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) * match the id. */ use_sample_identifier = perf_can_sample_identifier(); - evlist__for_each(evlist, evsel) + evlist__for_each_entry(evlist, evsel) perf_evsel__set_sample_id(evsel, use_sample_identifier); } else if (evlist->nr_entries > 1) { struct perf_evsel *first = perf_evlist__first(evlist); - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.sample_type == first->attr.sample_type) continue; use_sample_identifier = perf_can_sample_identifier(); break; } - evlist__for_each(evlist, evsel) + evlist__for_each_entry(evlist, evsel) perf_evsel__set_sample_id(evsel, use_sample_identifier); } diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index b3aabc0d4eb0..5d1eb1ccd96c 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -31,6 +31,8 @@ #include <perl.h> #include "../../perf.h" +#include "../callchain.h" +#include "../machine.h" #include "../thread.h" #include "../event.h" #include "../trace-event.h" @@ -248,10 +250,89 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } +static SV *perl_process_callchain(struct perf_sample *sample, + struct perf_evsel *evsel, + struct addr_location *al) +{ + AV *list; + + list = newAV(); + if (!list) + goto exit; + + if (!symbol_conf.use_callchain || !sample->callchain) + goto exit; + + if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel, + sample, NULL, NULL, scripting_max_stack) != 0) { + pr_err("Failed to resolve callchain. Skipping\n"); + goto exit; + } + callchain_cursor_commit(&callchain_cursor); + + + while (1) { + HV *elem; + struct callchain_cursor_node *node; + node = callchain_cursor_current(&callchain_cursor); + if (!node) + break; + + elem = newHV(); + if (!elem) + goto exit; + + if (!hv_stores(elem, "ip", newSVuv(node->ip))) { + hv_undef(elem); + goto exit; + } + + if (node->sym) { + HV *sym = newHV(); + if (!sym) { + hv_undef(elem); + goto exit; + } + if (!hv_stores(sym, "start", newSVuv(node->sym->start)) || + !hv_stores(sym, "end", newSVuv(node->sym->end)) || + !hv_stores(sym, "binding", newSVuv(node->sym->binding)) || + !hv_stores(sym, "name", newSVpvn(node->sym->name, + node->sym->namelen)) || + !hv_stores(elem, "sym", newRV_noinc((SV*)sym))) { + hv_undef(sym); + hv_undef(elem); + goto exit; + } + } + + if (node->map) { + struct map *map = node->map; + const char *dsoname = "[unknown]"; + if (map && map->dso && (map->dso->name || map->dso->long_name)) { + if (symbol_conf.show_kernel_path && map->dso->long_name) + dsoname = map->dso->long_name; + else if (map->dso->name) + dsoname = map->dso->name; + } + if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) { + hv_undef(elem); + goto exit; + } + } + + callchain_cursor_advance(&callchain_cursor); + av_push(list, newRV_noinc((SV*)elem)); + } + +exit: + return newRV_noinc((SV*)list); +} + static void perl_process_tracepoint(struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread) + struct addr_location *al) { + struct thread *thread = al->thread; struct event_format *event = evsel->tp_format; struct format_field *field; static char handler[256]; @@ -295,6 +376,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, XPUSHs(sv_2mortal(newSVuv(ns))); XPUSHs(sv_2mortal(newSViv(pid))); XPUSHs(sv_2mortal(newSVpv(comm, 0))); + XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al))); /* common fields other than pid can be accessed via xsub fns */ @@ -329,6 +411,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, XPUSHs(sv_2mortal(newSVuv(nsecs))); XPUSHs(sv_2mortal(newSViv(pid))); XPUSHs(sv_2mortal(newSVpv(comm, 0))); + XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al))); call_pv("main::trace_unhandled", G_SCALAR); } SPAGAIN; @@ -366,7 +449,7 @@ static void perl_process_event(union perf_event *event, struct perf_evsel *evsel, struct addr_location *al) { - perl_process_tracepoint(sample, evsel, al->thread); + perl_process_tracepoint(sample, evsel, al); perl_process_event_generic(event, sample, evsel); } @@ -490,7 +573,27 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "use Perf::Trace::Util;\n\n"); fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n"); - fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n"); + fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n"); + + + fprintf(ofp, "\n\ +sub print_backtrace\n\ +{\n\ + my $callchain = shift;\n\ + for my $node (@$callchain)\n\ + {\n\ + if(exists $node->{sym})\n\ + {\n\ + printf( \"\\t[\\%%x] \\%%s\\n\", $node->{ip}, $node->{sym}{name});\n\ + }\n\ + else\n\ + {\n\ + printf( \"\\t[\\%%x]\\n\", $node{ip});\n\ + }\n\ + }\n\ +}\n\n\ +"); + while ((event = trace_find_next_event(pevent, event))) { fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name); @@ -502,7 +605,8 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "$common_secs, "); fprintf(ofp, "$common_nsecs,\n"); fprintf(ofp, "\t $common_pid, "); - fprintf(ofp, "$common_comm,\n\t "); + fprintf(ofp, "$common_comm, "); + fprintf(ofp, "$common_callchain,\n\t "); not_first = 0; count = 0; @@ -519,7 +623,7 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "\tprint_header($event_name, $common_cpu, " "$common_secs, $common_nsecs,\n\t " - "$common_pid, $common_comm);\n\n"); + "$common_pid, $common_comm, $common_callchain);\n\n"); fprintf(ofp, "\tprintf(\""); @@ -581,17 +685,22 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "$%s", f->name); } - fprintf(ofp, ");\n"); + fprintf(ofp, ");\n\n"); + + fprintf(ofp, "\tprint_backtrace($common_callchain);\n"); + fprintf(ofp, "}\n\n"); } fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, " "$common_cpu, $common_secs, $common_nsecs,\n\t " - "$common_pid, $common_comm) = @_;\n\n"); + "$common_pid, $common_comm, $common_callchain) = @_;\n\n"); fprintf(ofp, "\tprint_header($event_name, $common_cpu, " "$common_secs, $common_nsecs,\n\t $common_pid, " - "$common_comm);\n}\n\n"); + "$common_comm, $common_callchain);\n"); + fprintf(ofp, "\tprint_backtrace($common_callchain);\n"); + fprintf(ofp, "}\n\n"); fprintf(ofp, "sub print_header\n{\n" "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n" diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index fbd05242b4e5..e0203b979474 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -41,6 +41,7 @@ #include "../thread-stack.h" #include "../trace-event.h" #include "../machine.h" +#include "../call-path.h" #include "thread_map.h" #include "cpumap.h" #include "stat.h" @@ -272,7 +273,7 @@ static PyObject *get_field_numeric_entry(struct event_format *event, struct format_field *field, void *data) { bool is_array = field->flags & FIELD_IS_ARRAY; - PyObject *obj, *list = NULL; + PyObject *obj = NULL, *list = NULL; unsigned long long val; unsigned int item_size, n_items, i; @@ -323,7 +324,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample, if (!symbol_conf.use_callchain || !sample->callchain) goto exit; - if (thread__resolve_callchain(al->thread, evsel, + if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel, sample, NULL, NULL, scripting_max_stack) != 0) { pr_err("Failed to resolve callchain. Skipping\n"); @@ -385,13 +386,12 @@ exit: return pylist; } - static void python_process_tracepoint(struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al) { struct event_format *event = evsel->tp_format; - PyObject *handler, *context, *t, *obj, *callchain; + PyObject *handler, *context, *t, *obj = NULL, *callchain; PyObject *dict = NULL; static char handler_name[256]; struct format_field *field; @@ -407,8 +407,11 @@ static void python_process_tracepoint(struct perf_sample *sample, if (!t) Py_FatalError("couldn't create Python tuple"); - if (!event) - die("ug! no event found for type %d", (int)evsel->attr.config); + if (!event) { + snprintf(handler_name, sizeof(handler_name), + "ug! no event found for type %" PRIu64, (u64)evsel->attr.config); + Py_FatalError(handler_name); + } pid = raw_field_value(event, "common_pid", data); @@ -453,14 +456,26 @@ static void python_process_tracepoint(struct perf_sample *sample, pydict_set_item_string_decref(dict, "common_callchain", callchain); } for (field = event->format.fields; field; field = field->next) { - if (field->flags & FIELD_IS_STRING) { - int offset; + unsigned int offset, len; + unsigned long long val; + + if (field->flags & FIELD_IS_ARRAY) { + offset = field->offset; + len = field->size; if (field->flags & FIELD_IS_DYNAMIC) { - offset = *(int *)(data + field->offset); + val = pevent_read_number(scripting_context->pevent, + data + offset, len); + offset = val; + len = offset >> 16; offset &= 0xffff; - } else - offset = field->offset; - obj = PyString_FromString((char *)data + offset); + } + if (field->flags & FIELD_IS_STRING && + is_printable_array(data + offset, len)) { + obj = PyString_FromString((char *) data + offset); + } else { + obj = PyByteArray_FromStringAndSize((const char *) data + offset, len); + field->flags &= ~FIELD_IS_STRING; + } } else { /* FIELD_IS_NUMERIC */ obj = get_field_numeric_entry(event, field, data); } @@ -614,7 +629,7 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso, struct machine *machine) { struct tables *tables = container_of(dbe, struct tables, dbe); - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; PyObject *t; build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); @@ -681,7 +696,7 @@ static int python_export_sample(struct db_export *dbe, struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - t = tuple_new(21); + t = tuple_new(22); tuple_set_u64(t, 0, es->db_id); tuple_set_u64(t, 1, es->evsel->db_id); @@ -704,6 +719,7 @@ static int python_export_sample(struct db_export *dbe, tuple_set_u64(t, 18, es->sample->data_src); tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK); tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX)); + tuple_set_u64(t, 21, es->call_path_id); call_object(tables->sample_handler, t, "sample_table"); @@ -998,8 +1014,10 @@ static void set_table_handlers(struct tables *tables) { const char *perf_db_export_mode = "perf_db_export_mode"; const char *perf_db_export_calls = "perf_db_export_calls"; - PyObject *db_export_mode, *db_export_calls; + const char *perf_db_export_callchains = "perf_db_export_callchains"; + PyObject *db_export_mode, *db_export_calls, *db_export_callchains; bool export_calls = false; + bool export_callchains = false; int ret; memset(tables, 0, sizeof(struct tables)); @@ -1016,6 +1034,7 @@ static void set_table_handlers(struct tables *tables) if (!ret) return; + /* handle export calls */ tables->dbe.crp = NULL; db_export_calls = PyDict_GetItemString(main_dict, perf_db_export_calls); if (db_export_calls) { @@ -1033,6 +1052,33 @@ static void set_table_handlers(struct tables *tables) Py_FatalError("failed to create calls processor"); } + /* handle export callchains */ + tables->dbe.cpr = NULL; + db_export_callchains = PyDict_GetItemString(main_dict, + perf_db_export_callchains); + if (db_export_callchains) { + ret = PyObject_IsTrue(db_export_callchains); + if (ret == -1) + handler_call_die(perf_db_export_callchains); + export_callchains = !!ret; + } + + if (export_callchains) { + /* + * Attempt to use the call path root from the call return + * processor, if the call return processor is in use. Otherwise, + * we allocate a new call path root. This prevents exporting + * duplicate call path ids when both are in use simultaniously. + */ + if (tables->dbe.crp) + tables->dbe.cpr = tables->dbe.crp->cpr; + else + tables->dbe.cpr = call_path_root__new(); + + if (!tables->dbe.cpr) + Py_FatalError("failed to create call path root"); + } + tables->db_export_mode = true; /* * Reserve per symbol space for symbol->db_id via symbol__priv() diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 4abd85c6346d..5d61242a6e64 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -83,7 +83,7 @@ static bool perf_session__has_comm_exec(struct perf_session *session) { struct perf_evsel *evsel; - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { if (evsel->attr.comm_exec) return true; } @@ -178,6 +178,8 @@ static void perf_session__delete_threads(struct perf_session *session) void perf_session__delete(struct perf_session *session) { + if (session == NULL) + return; auxtrace__free(session); auxtrace_index__free(&session->auxtrace_index); perf_session__destroy_kernel_maps(session); @@ -409,6 +411,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->stat = process_stat_stub; if (tool->stat_round == NULL) tool->stat_round = process_stat_round_stub; + if (tool->time_conv == NULL) + tool->time_conv = process_event_op2_stub; } static void swap_sample_id_all(union perf_event *event, void *data) @@ -555,7 +559,7 @@ static u8 revbyte(u8 b) /* * XXX this is hack in attempt to carry flags bitfield - * throught endian village. ABI says: + * through endian village. ABI says: * * Bit-fields are allocated from right to left (least to most significant) * on little-endian implementations and from left to right (most to least @@ -591,6 +595,7 @@ do { \ if (bswap_safe(f, 0)) \ attr->f = bswap_##sz(attr->f); \ } while(0) +#define bswap_field_16(f) bswap_field(f, 16) #define bswap_field_32(f) bswap_field(f, 32) #define bswap_field_64(f) bswap_field(f, 64) @@ -606,6 +611,7 @@ do { \ bswap_field_64(sample_regs_user); bswap_field_32(sample_stack_user); bswap_field_32(aux_watermark); + bswap_field_16(sample_max_stack); /* * After read_format are bitfields. Check read_format because @@ -794,6 +800,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_STAT] = perf_event__stat_swap, [PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap, [PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap, + [PERF_RECORD_TIME_CONV] = perf_event__all64_swap, [PERF_RECORD_HEADER_MAX] = NULL, }; @@ -904,7 +911,7 @@ static void callchain__printf(struct perf_evsel *evsel, unsigned int i; struct ip_callchain *callchain = sample->callchain; - if (has_branch_callstack(evsel)) + if (perf_evsel__has_branch_callstack(evsel)) callchain__lbr_callstack_printf(sample); printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr); @@ -1078,7 +1085,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_CALLCHAIN) callchain__printf(evsel, sample); - if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !has_branch_callstack(evsel)) + if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel)) branch_stack__printf(sample); if (sample_type & PERF_SAMPLE_REGS_USER) @@ -1341,6 +1348,9 @@ static s64 perf_session__process_user_event(struct perf_session *session, return tool->stat(tool, event, session); case PERF_RECORD_STAT_ROUND: return tool->stat_round(tool, event, session); + case PERF_RECORD_TIME_CONV: + session->time_conv = event->time_conv; + return tool->time_conv(tool, event, session); default: return -EINVAL; } @@ -1489,10 +1499,27 @@ int perf_session__register_idle_thread(struct perf_session *session) return err; } +static void +perf_session__warn_order(const struct perf_session *session) +{ + const struct ordered_events *oe = &session->ordered_events; + struct perf_evsel *evsel; + bool should_warn = true; + + evlist__for_each_entry(session->evlist, evsel) { + if (evsel->attr.write_backward) + should_warn = false; + } + + if (!should_warn) + return; + if (oe->nr_unordered_events != 0) + ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events); +} + static void perf_session__warn_about_errors(const struct perf_session *session) { const struct events_stats *stats = &session->evlist->stats; - const struct ordered_events *oe = &session->ordered_events; if (session->tool->lost == perf_event__process_lost && stats->nr_events[PERF_RECORD_LOST] != 0) { @@ -1549,8 +1576,7 @@ static void perf_session__warn_about_errors(const struct perf_session *session) stats->nr_unprocessable_samples); } - if (oe->nr_unordered_events != 0) - ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events); + perf_session__warn_order(session); events_stats__auxtrace_error_warn(stats); @@ -1830,7 +1856,11 @@ out: out_err: ui_progress__finish(); perf_session__warn_about_errors(session); - ordered_events__free(&session->ordered_events); + /* + * We may switching perf.data output, make ordered_events + * reusable. + */ + ordered_events__reinit(&session->ordered_events); auxtrace__free_events(session); session->one_mmap = false; return err; @@ -1858,7 +1888,7 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg) { struct perf_evsel *evsel; - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { if (evsel->attr.type == PERF_TYPE_TRACEPOINT) return true; } @@ -1940,112 +1970,13 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, { struct perf_evsel *pos; - evlist__for_each(session->evlist, pos) { + evlist__for_each_entry(session->evlist, pos) { if (pos->attr.type == type) return pos; } return NULL; } -void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample, - struct addr_location *al, - unsigned int print_opts, unsigned int stack_depth) -{ - struct callchain_cursor_node *node; - int print_ip = print_opts & PRINT_IP_OPT_IP; - int print_sym = print_opts & PRINT_IP_OPT_SYM; - int print_dso = print_opts & PRINT_IP_OPT_DSO; - int print_symoffset = print_opts & PRINT_IP_OPT_SYMOFFSET; - int print_oneline = print_opts & PRINT_IP_OPT_ONELINE; - int print_srcline = print_opts & PRINT_IP_OPT_SRCLINE; - char s = print_oneline ? ' ' : '\t'; - - if (symbol_conf.use_callchain && sample->callchain) { - struct addr_location node_al; - - if (thread__resolve_callchain(al->thread, evsel, - sample, NULL, NULL, - stack_depth) != 0) { - if (verbose) - error("Failed to resolve callchain. Skipping\n"); - return; - } - callchain_cursor_commit(&callchain_cursor); - - if (print_symoffset) - node_al = *al; - - while (stack_depth) { - u64 addr = 0; - - node = callchain_cursor_current(&callchain_cursor); - if (!node) - break; - - if (node->sym && node->sym->ignore) - goto next; - - if (print_ip) - printf("%c%16" PRIx64, s, node->ip); - - if (node->map) - addr = node->map->map_ip(node->map, node->ip); - - if (print_sym) { - printf(" "); - if (print_symoffset) { - node_al.addr = addr; - node_al.map = node->map; - symbol__fprintf_symname_offs(node->sym, &node_al, stdout); - } else - symbol__fprintf_symname(node->sym, stdout); - } - - if (print_dso) { - printf(" ("); - map__fprintf_dsoname(node->map, stdout); - printf(")"); - } - - if (print_srcline) - map__fprintf_srcline(node->map, addr, "\n ", - stdout); - - if (!print_oneline) - printf("\n"); - - stack_depth--; -next: - callchain_cursor_advance(&callchain_cursor); - } - - } else { - if (al->sym && al->sym->ignore) - return; - - if (print_ip) - printf("%16" PRIx64, sample->ip); - - if (print_sym) { - printf(" "); - if (print_symoffset) - symbol__fprintf_symname_offs(al->sym, al, - stdout); - else - symbol__fprintf_symname(al->sym, stdout); - } - - if (print_dso) { - printf(" ("); - map__fprintf_dsoname(al->map, stdout); - printf(")"); - } - - if (print_srcline) - map__fprintf_srcline(al->map, al->addr, "\n ", stdout); - } -} - int perf_session__cpu_bitmap(struct perf_session *session, const char *cpu_list, unsigned long *cpu_bitmap) { @@ -2194,7 +2125,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, max_nr = (UINT16_MAX - sizeof(struct id_index_event)) / sizeof(struct id_index_entry); - evlist__for_each(evlist, evsel) + evlist__for_each_entry(evlist, evsel) nr += evsel->ids; n = nr > max_nr ? max_nr : nr; @@ -2207,7 +2138,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, ev->id_index.header.size = sz; ev->id_index.nr = n; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { u32 j; for (j = 0; j < evsel->ids; j++) { diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 5f792e35d4c1..4bd758553450 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -26,6 +26,7 @@ struct perf_session { struct itrace_synth_opts *itrace_synth_opts; struct list_head auxtrace_index; struct trace_event tevent; + struct time_conv_event time_conv; bool repipe; bool one_mmap; void *one_mmap_addr; @@ -35,13 +36,6 @@ struct perf_session { struct perf_tool *tool; }; -#define PRINT_IP_OPT_IP (1<<0) -#define PRINT_IP_OPT_SYM (1<<1) -#define PRINT_IP_OPT_DSO (1<<2) -#define PRINT_IP_OPT_SYMOFFSET (1<<3) -#define PRINT_IP_OPT_ONELINE (1<<4) -#define PRINT_IP_OPT_SRCLINE (1<<5) - struct perf_tool; struct perf_session *perf_session__new(struct perf_data_file *file, @@ -103,10 +97,6 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type); -void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample, - struct addr_location *al, - unsigned int print_opts, unsigned int stack_depth); - int perf_session__cpu_bitmap(struct perf_session *session, const char *cpu_list, unsigned long *cpu_bitmap); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index f5ba111cd9fb..3d3cb8392c86 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -21,13 +21,6 @@ const char *sort_order; const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; -int sort__need_collapse = 0; -int sort__has_parent = 0; -int sort__has_sym = 0; -int sort__has_dso = 0; -int sort__has_socket = 0; -int sort__has_thread = 0; -int sort__has_comm = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; /* @@ -86,8 +79,8 @@ static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf, { const char *comm = thread__comm_str(he->thread); - width = max(7U, width) - 6; - return repsep_snprintf(bf, size, "%5d:%-*.*s", he->thread->tid, + width = max(7U, width) - 8; + return repsep_snprintf(bf, size, "%7d:%-*.*s", he->thread->tid, width, width, comm ?: ""); } @@ -102,7 +95,7 @@ static int hist_entry__thread_filter(struct hist_entry *he, int type, const void } struct sort_entry sort_thread = { - .se_header = " Pid:Command", + .se_header = " Pid:Command", .se_cmp = sort__thread_cmp, .se_snprintf = hist_entry__thread_snprintf, .se_filter = hist_entry__thread_filter, @@ -244,7 +237,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) * comparing symbol address alone is not enough since it's a * relative address within a dso. */ - if (!sort__has_dso) { + if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) { ret = sort__dso_cmp(left, right); if (ret != 0) return ret; @@ -360,6 +353,88 @@ struct sort_entry sort_srcline = { .se_width_idx = HISTC_SRCLINE, }; +/* --sort srcline_from */ + +static int64_t +sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) +{ + if (!left->branch_info->srcline_from) { + struct map *map = left->branch_info->from.map; + if (!map) + left->branch_info->srcline_from = SRCLINE_UNKNOWN; + else + left->branch_info->srcline_from = get_srcline(map->dso, + map__rip_2objdump(map, + left->branch_info->from.al_addr), + left->branch_info->from.sym, true); + } + if (!right->branch_info->srcline_from) { + struct map *map = right->branch_info->from.map; + if (!map) + right->branch_info->srcline_from = SRCLINE_UNKNOWN; + else + right->branch_info->srcline_from = get_srcline(map->dso, + map__rip_2objdump(map, + right->branch_info->from.al_addr), + right->branch_info->from.sym, true); + } + return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); +} + +static int hist_entry__srcline_from_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*.*s", width, width, he->branch_info->srcline_from); +} + +struct sort_entry sort_srcline_from = { + .se_header = "From Source:Line", + .se_cmp = sort__srcline_from_cmp, + .se_snprintf = hist_entry__srcline_from_snprintf, + .se_width_idx = HISTC_SRCLINE_FROM, +}; + +/* --sort srcline_to */ + +static int64_t +sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) +{ + if (!left->branch_info->srcline_to) { + struct map *map = left->branch_info->to.map; + if (!map) + left->branch_info->srcline_to = SRCLINE_UNKNOWN; + else + left->branch_info->srcline_to = get_srcline(map->dso, + map__rip_2objdump(map, + left->branch_info->to.al_addr), + left->branch_info->from.sym, true); + } + if (!right->branch_info->srcline_to) { + struct map *map = right->branch_info->to.map; + if (!map) + right->branch_info->srcline_to = SRCLINE_UNKNOWN; + else + right->branch_info->srcline_to = get_srcline(map->dso, + map__rip_2objdump(map, + right->branch_info->to.al_addr), + right->branch_info->to.sym, true); + } + return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); +} + +static int hist_entry__srcline_to_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*.*s", width, width, he->branch_info->srcline_to); +} + +struct sort_entry sort_srcline_to = { + .se_header = "To Source:Line", + .se_cmp = sort__srcline_to_cmp, + .se_snprintf = hist_entry__srcline_to_snprintf, + .se_width_idx = HISTC_SRCLINE_TO, +}; + /* --sort srcfile */ static char no_srcfile[1]; @@ -513,7 +588,11 @@ static char *get_trace_output(struct hist_entry *he) } else { pevent_event_info(&seq, evsel->tp_format, &rec); } - return seq.buffer; + /* + * Trim the buffer, it starts at 4KB and we're not going to + * add anything more to this buffer. + */ + return realloc(seq.buffer, seq.len + 1); } static int64_t @@ -1143,7 +1222,7 @@ struct sort_entry sort_mem_daddr_dso = { .se_header = "Data Object", .se_cmp = sort__dso_daddr_cmp, .se_snprintf = hist_entry__dso_daddr_snprintf, - .se_width_idx = HISTC_MEM_DADDR_SYMBOL, + .se_width_idx = HISTC_MEM_DADDR_DSO, }; struct sort_entry sort_mem_locked = { @@ -1354,6 +1433,8 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_IN_TX, "in_tx", sort_in_tx), DIM(SORT_ABORT, "abort", sort_abort), DIM(SORT_CYCLES, "cycles", sort_cycles), + DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from), + DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to), }; #undef DIM @@ -1411,7 +1492,7 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists) } static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct perf_evsel *evsel) + struct hists *hists) { struct hpp_sort_entry *hse; size_t len = fmt->user_len; @@ -1419,14 +1500,14 @@ static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, hse = container_of(fmt, struct hpp_sort_entry, hpp); if (!len) - len = hists__col_len(evsel__hists(evsel), hse->se->se_width_idx); + len = hists__col_len(hists, hse->se->se_width_idx); return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name); } static int __sort__hpp_width(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp __maybe_unused, - struct perf_evsel *evsel) + struct hists *hists) { struct hpp_sort_entry *hse; size_t len = fmt->user_len; @@ -1434,7 +1515,7 @@ static int __sort__hpp_width(struct perf_hpp_fmt *fmt, hse = container_of(fmt, struct hpp_sort_entry, hpp); if (!len) - len = hists__col_len(evsel__hists(evsel), hse->se->se_width_idx); + len = hists__col_len(hists, hse->se->se_width_idx); return len; } @@ -1716,7 +1797,7 @@ static void update_dynamic_len(struct hpp_dynamic_entry *hde, } static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct perf_evsel *evsel __maybe_unused) + struct hists *hists __maybe_unused) { struct hpp_dynamic_entry *hde; size_t len = fmt->user_len; @@ -1731,7 +1812,7 @@ static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, static int __sort__hde_width(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp __maybe_unused, - struct perf_evsel *evsel __maybe_unused) + struct hists *hists __maybe_unused) { struct hpp_dynamic_entry *hde; size_t len = fmt->user_len; @@ -1992,7 +2073,7 @@ static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_nam } full_name = !!strchr(event_name, ':'); - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { /* case 2 */ if (full_name && !strcmp(pos->name, event_name)) return pos; @@ -2048,7 +2129,7 @@ static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace, int ret; struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type != PERF_TYPE_TRACEPOINT) continue; @@ -2066,7 +2147,7 @@ static int add_all_matching_fields(struct perf_evlist *evlist, struct perf_evsel *evsel; struct format_field *field; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type != PERF_TYPE_TRACEPOINT) continue; @@ -2163,7 +2244,7 @@ static int __sort_dimension__add(struct sort_dimension *sd, return -1; if (sd->entry->se_collapse) - sort__need_collapse = 1; + list->need_collapse = 1; sd->taken = 1; @@ -2245,9 +2326,9 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, pr_err("Invalid regex: %s\n%s", parent_pattern, err); return -EINVAL; } - sort__has_parent = 1; + list->parent = 1; } else if (sd->entry == &sort_sym) { - sort__has_sym = 1; + list->sym = 1; /* * perf diff displays the performance difference amongst * two or more perf.data files. Those files could come @@ -2258,13 +2339,13 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, sd->entry->se_collapse = sort__sym_sort; } else if (sd->entry == &sort_dso) { - sort__has_dso = 1; + list->dso = 1; } else if (sd->entry == &sort_socket) { - sort__has_socket = 1; + list->socket = 1; } else if (sd->entry == &sort_thread) { - sort__has_thread = 1; + list->thread = 1; } else if (sd->entry == &sort_comm) { - sort__has_comm = 1; + list->comm = 1; } return __sort_dimension__add(sd, list, level); @@ -2289,7 +2370,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, return -EINVAL; if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) - sort__has_sym = 1; + list->sym = 1; __sort_dimension__add(sd, list, level); return 0; @@ -2304,8 +2385,11 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, if (sort__mode != SORT_MODE__MEMORY) return -EINVAL; + if (sd->entry == &sort_mem_dcacheline && cacheline_size == 0) + return -EINVAL; + if (sd->entry == &sort_mem_daddr_sym) - sort__has_sym = 1; + list->sym = 1; __sort_dimension__add(sd, list, level); return 0; @@ -2347,7 +2431,10 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, if (*tok) { ret = sort_dimension__add(list, tok, evlist, level); if (ret == -EINVAL) { - error("Invalid --sort key: `%s'", tok); + if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok))) + error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system"); + else + error("Invalid --sort key: `%s'", tok); break; } else if (ret == -ESRCH) { error("Unknown --sort key: `%s'", tok); @@ -2379,7 +2466,7 @@ static const char *get_default_sort_order(struct perf_evlist *evlist) if (evlist == NULL) goto out_no_evlist; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type != PERF_TYPE_TRACEPOINT) { use_trace = false; break; @@ -2749,10 +2836,10 @@ int setup_sorting(struct perf_evlist *evlist) void reset_output_field(void) { - sort__need_collapse = 0; - sort__has_parent = 0; - sort__has_sym = 0; - sort__has_dso = 0; + perf_hpp_list.need_collapse = 0; + perf_hpp_list.parent = 0; + perf_hpp_list.sym = 0; + perf_hpp_list.dso = 0; field_order = NULL; sort_order = NULL; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 3f4e35998119..7ca37ea17395 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,13 +31,6 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; -extern int sort__need_collapse; -extern int sort__has_dso; -extern int sort__has_parent; -extern int sort__has_sym; -extern int sort__has_socket; -extern int sort__has_thread; -extern int sort__has_comm; extern enum sort_mode sort__mode; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; @@ -74,6 +67,11 @@ struct hist_entry_diff { }; }; +struct hist_entry_ops { + void *(*new)(size_t size); + void (*free)(void *ptr); +}; + /** * struct hist_entry - histogram entry * @@ -132,6 +130,7 @@ struct hist_entry { void *trace_output; struct perf_hpp_list *hpp_list; struct hist_entry *parent_he; + struct hist_entry_ops *ops; union { /* this is for hierarchical entry structure */ struct { @@ -222,6 +221,8 @@ enum sort_type { SORT_ABORT, SORT_IN_TX, SORT_CYCLES, + SORT_SRCLINE_FROM, + SORT_SRCLINE_TO, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index fdb71961143e..8a2bbd2a4d82 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -36,6 +36,11 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; static bool have_frontend_stalled; struct stats walltime_nsecs_stats; @@ -82,6 +87,11 @@ void perf_stat__reset_shadow_stats(void) sizeof(runtime_transaction_stats)); memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); + memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); + memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); + memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); + memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); + memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); } /* @@ -94,7 +104,8 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, { int ctx = evsel_context(counter); - if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) + if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || + perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) update_stats(&runtime_nsecs_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); @@ -104,6 +115,16 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); else if (perf_stat_evsel__is(counter, ELISION_START)) update_stats(&runtime_elision_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) + update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) + update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) + update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) + update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]); + else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) + update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) @@ -188,7 +209,7 @@ static void print_stalled_cycles_backend(int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio); + out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); } static void print_branch_misses(int cpu, @@ -301,6 +322,107 @@ static void print_ll_cache_misses(int cpu, out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); } +/* + * High level "TopDown" CPU core pipe line bottleneck break down. + * + * Basic concept following + * Yasin, A Top Down Method for Performance analysis and Counter architecture + * ISPASS14 + * + * The CPU pipeline is divided into 4 areas that can be bottlenecks: + * + * Frontend -> Backend -> Retiring + * BadSpeculation in addition means out of order execution that is thrown away + * (for example branch mispredictions) + * Frontend is instruction decoding. + * Backend is execution, like computation and accessing data in memory + * Retiring is good execution that is not directly bottlenecked + * + * The formulas are computed in slots. + * A slot is an entry in the pipeline each for the pipeline width + * (for example a 4-wide pipeline has 4 slots for each cycle) + * + * Formulas: + * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / + * TotalSlots + * Retiring = SlotsRetired / TotalSlots + * FrontendBound = FetchBubbles / TotalSlots + * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound + * + * The kernel provides the mapping to the low level CPU events and any scaling + * needed for the CPU pipeline width, for example: + * + * TotalSlots = Cycles * 4 + * + * The scaling factor is communicated in the sysfs unit. + * + * In some cases the CPU may not be able to measure all the formulas due to + * missing events. In this case multiple formulas are combined, as possible. + * + * Full TopDown supports more levels to sub-divide each area: for example + * BackendBound into computing bound and memory bound. For now we only + * support Level 1 TopDown. + */ + +static double sanitize_val(double x) +{ + if (x < 0 && x >= -0.02) + return 0.0; + return x; +} + +static double td_total_slots(int ctx, int cpu) +{ + return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); +} + +static double td_bad_spec(int ctx, int cpu) +{ + double bad_spec = 0; + double total_slots; + double total; + + total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - + avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + + avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); + total_slots = td_total_slots(ctx, cpu); + if (total_slots) + bad_spec = total / total_slots; + return sanitize_val(bad_spec); +} + +static double td_retiring(int ctx, int cpu) +{ + double retiring = 0; + double total_slots = td_total_slots(ctx, cpu); + double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); + + if (total_slots) + retiring = ret_slots / total_slots; + return retiring; +} + +static double td_fe_bound(int ctx, int cpu) +{ + double fe_bound = 0; + double total_slots = td_total_slots(ctx, cpu); + double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); + + if (total_slots) + fe_bound = fetch_bub / total_slots; + return fe_bound; +} + +static double td_be_bound(int ctx, int cpu) +{ + double sum = (td_fe_bound(ctx, cpu) + + td_bad_spec(ctx, cpu) + + td_retiring(ctx, cpu)); + if (sum == 0) + return 0; + return sanitize_val(1.0 - sum); +} + void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out) @@ -308,6 +430,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, void *ctxp = out->ctx; print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; + const char *color = NULL; int ctx = evsel_context(evsel); if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { @@ -444,12 +567,53 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ratio = total / avg; print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); - } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) { + } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) || + perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", avg / ratio); else print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { + double fe_bound = td_fe_bound(ctx, cpu); + + if (fe_bound > 0.2) + color = PERF_COLOR_RED; + print_metric(ctxp, color, "%8.1f%%", "frontend bound", + fe_bound * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { + double retiring = td_retiring(ctx, cpu); + + if (retiring > 0.7) + color = PERF_COLOR_GREEN; + print_metric(ctxp, color, "%8.1f%%", "retiring", + retiring * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { + double bad_spec = td_bad_spec(ctx, cpu); + + if (bad_spec > 0.1) + color = PERF_COLOR_RED; + print_metric(ctxp, color, "%8.1f%%", "bad speculation", + bad_spec * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { + double be_bound = td_be_bound(ctx, cpu); + const char *name = "backend bound"; + static int have_recovery_bubbles = -1; + + /* In case the CPU does not support topdown-recovery-bubbles */ + if (have_recovery_bubbles < 0) + have_recovery_bubbles = pmu_have_event("cpu", + "topdown-recovery-bubbles"); + if (!have_recovery_bubbles) + name = "backend bound/bad spec"; + + if (be_bound > 0.2) + color = PERF_COLOR_RED; + if (td_total_slots(ctx, cpu) > 0) + print_metric(ctxp, color, "%8.1f%%", name, + be_bound * 100.); + else + print_metric(ctxp, NULL, NULL, name, 0); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; char unit_buf[10]; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 4d9b481cf3b6..39345c2ddfc2 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -79,6 +79,11 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { ID(TRANSACTION_START, cpu/tx-start/), ID(ELISION_START, cpu/el-start/), ID(CYCLES_IN_TX_CP, cpu/cycles-ct/), + ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots), + ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued), + ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), + ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), + ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), }; #undef ID @@ -157,7 +162,7 @@ int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) { struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { if (perf_evsel__alloc_stats(evsel, alloc_raw)) goto out_free; } @@ -173,7 +178,7 @@ void perf_evlist__free_stats(struct perf_evlist *evlist) { struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { perf_evsel__free_stat_priv(evsel); perf_evsel__free_counts(evsel); perf_evsel__free_prev_raw_counts(evsel); @@ -184,7 +189,7 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist) { struct perf_evsel *evsel; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { perf_evsel__reset_stat_priv(evsel); perf_evsel__reset_counts(evsel); } @@ -307,6 +312,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat_evsel *ps = counter->priv; u64 *count = counter->counts->aggr.values; + u64 val; int i, ret; aggr->val = aggr->ena = aggr->run = 0; @@ -346,7 +352,8 @@ int perf_stat_process_counter(struct perf_stat_config *config, /* * Save the full runtime - to allow normalization during printout: */ - perf_stat__update_shadow_stats(counter, count, 0); + val = counter->scale * *count; + perf_stat__update_shadow_stats(counter, &val, 0); return 0; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 0150e786ccc7..c29bb94c48a4 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -17,6 +17,11 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__TRANSACTION_START, PERF_STAT_EVSEL_ID__ELISION_START, PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP, + PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS, + PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED, + PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, + PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, + PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, PERF_STAT_EVSEL_ID__MAX, }; diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 8fb73295ec34..817593908d47 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -1,4 +1,5 @@ -#include "cache.h" +#include "debug.h" +#include "util.h" #include <linux/kernel.h> int prefixcmp(const char *str, const char *prefix) @@ -17,12 +18,13 @@ int prefixcmp(const char *str, const char *prefix) */ char strbuf_slopbuf[1]; -void strbuf_init(struct strbuf *sb, ssize_t hint) +int strbuf_init(struct strbuf *sb, ssize_t hint) { sb->alloc = sb->len = 0; sb->buf = strbuf_slopbuf; if (hint) - strbuf_grow(sb, hint); + return strbuf_grow(sb, hint); + return 0; } void strbuf_release(struct strbuf *sb) @@ -42,67 +44,104 @@ char *strbuf_detach(struct strbuf *sb, size_t *sz) return res; } -void strbuf_grow(struct strbuf *sb, size_t extra) +int strbuf_grow(struct strbuf *sb, size_t extra) { - if (sb->len + extra + 1 <= sb->len) - die("you want to use way too much memory"); - if (!sb->alloc) - sb->buf = NULL; - ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); + char *buf; + size_t nr = sb->len + extra + 1; + + if (nr < sb->alloc) + return 0; + + if (nr <= sb->len) + return -E2BIG; + + if (alloc_nr(sb->alloc) > nr) + nr = alloc_nr(sb->alloc); + + /* + * Note that sb->buf == strbuf_slopbuf if sb->alloc == 0, and it is + * a static variable. Thus we have to avoid passing it to realloc. + */ + buf = realloc(sb->alloc ? sb->buf : NULL, nr * sizeof(*buf)); + if (!buf) + return -ENOMEM; + + sb->buf = buf; + sb->alloc = nr; + return 0; } -void strbuf_addch(struct strbuf *sb, int c) +int strbuf_addch(struct strbuf *sb, int c) { - strbuf_grow(sb, 1); + int ret = strbuf_grow(sb, 1); + if (ret) + return ret; + sb->buf[sb->len++] = c; sb->buf[sb->len] = '\0'; + return 0; } -void strbuf_add(struct strbuf *sb, const void *data, size_t len) +int strbuf_add(struct strbuf *sb, const void *data, size_t len) { - strbuf_grow(sb, len); + int ret = strbuf_grow(sb, len); + if (ret) + return ret; + memcpy(sb->buf + sb->len, data, len); - strbuf_setlen(sb, sb->len + len); + return strbuf_setlen(sb, sb->len + len); } -static void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) +static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) { - int len; + int len, ret; va_list ap_saved; - if (!strbuf_avail(sb)) - strbuf_grow(sb, 64); + if (!strbuf_avail(sb)) { + ret = strbuf_grow(sb, 64); + if (ret) + return ret; + } va_copy(ap_saved, ap); len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); if (len < 0) - die("your vsnprintf is broken"); + return len; if (len > strbuf_avail(sb)) { - strbuf_grow(sb, len); + ret = strbuf_grow(sb, len); + if (ret) + return ret; len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved); va_end(ap_saved); if (len > strbuf_avail(sb)) { - die("this should not happen, your vsnprintf is broken"); + pr_debug("this should not happen, your vsnprintf is broken"); + return -EINVAL; } } - strbuf_setlen(sb, sb->len + len); + return strbuf_setlen(sb, sb->len + len); } -void strbuf_addf(struct strbuf *sb, const char *fmt, ...) +int strbuf_addf(struct strbuf *sb, const char *fmt, ...) { va_list ap; + int ret; va_start(ap, fmt); - strbuf_addv(sb, fmt, ap); + ret = strbuf_addv(sb, fmt, ap); va_end(ap); + return ret; } ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint) { size_t oldlen = sb->len; size_t oldalloc = sb->alloc; + int ret; + + ret = strbuf_grow(sb, hint ? hint : 8192); + if (ret) + return ret; - strbuf_grow(sb, hint ? hint : 8192); for (;;) { ssize_t cnt; @@ -112,12 +151,14 @@ ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint) strbuf_release(sb); else strbuf_setlen(sb, oldlen); - return -1; + return cnt; } if (!cnt) break; sb->len += cnt; - strbuf_grow(sb, 8192); + ret = strbuf_grow(sb, 8192); + if (ret) + return ret; } sb->buf[sb->len] = '\0'; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index ab9be0fbbd40..b268a6648a5d 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -40,6 +40,9 @@ #include <assert.h> #include <stdarg.h> +#include <stddef.h> +#include <string.h> +#include <sys/types.h> extern char strbuf_slopbuf[]; struct strbuf { @@ -51,7 +54,7 @@ struct strbuf { #define STRBUF_INIT { 0, 0, strbuf_slopbuf } /*----- strbuf life cycle -----*/ -void strbuf_init(struct strbuf *buf, ssize_t hint); +int strbuf_init(struct strbuf *buf, ssize_t hint); void strbuf_release(struct strbuf *buf); char *strbuf_detach(struct strbuf *buf, size_t *); @@ -60,26 +63,31 @@ static inline ssize_t strbuf_avail(const struct strbuf *sb) { return sb->alloc ? sb->alloc - sb->len - 1 : 0; } -void strbuf_grow(struct strbuf *buf, size_t); +int strbuf_grow(struct strbuf *buf, size_t); -static inline void strbuf_setlen(struct strbuf *sb, size_t len) { - if (!sb->alloc) - strbuf_grow(sb, 0); +static inline int strbuf_setlen(struct strbuf *sb, size_t len) { + int ret; + if (!sb->alloc) { + ret = strbuf_grow(sb, 0); + if (ret) + return ret; + } assert(len < sb->alloc); sb->len = len; sb->buf[len] = '\0'; + return 0; } /*----- add data in your buffer -----*/ -void strbuf_addch(struct strbuf *sb, int c); +int strbuf_addch(struct strbuf *sb, int c); -void strbuf_add(struct strbuf *buf, const void *, size_t); -static inline void strbuf_addstr(struct strbuf *sb, const char *s) { - strbuf_add(sb, s, strlen(s)); +int strbuf_add(struct strbuf *buf, const void *, size_t); +static inline int strbuf_addstr(struct strbuf *sb, const char *s) { + return strbuf_add(sb, s, strlen(s)); } __attribute__((format(printf,2,3))) -void strbuf_addf(struct strbuf *sb, const char *fmt, ...); +int strbuf_addf(struct strbuf *sb, const char *fmt, ...); /* XXX: if read fails, any partial read is undone */ ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index ca990029e243..19207e50fce5 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -73,7 +73,7 @@ static inline struct str_node *strlist__next(struct str_node *sn) * @pos: the &struct str_node to use as a loop cursor. * @slist: the &struct strlist for loop. */ -#define strlist__for_each(pos, slist) \ +#define strlist__for_each_entry(pos, slist) \ for (pos = strlist__first(slist); pos; pos = strlist__next(pos)) /** @@ -83,7 +83,7 @@ static inline struct str_node *strlist__next(struct str_node *sn) * @n: another &struct str_node to use as temporary storage. * @slist: the &struct strlist for loop. */ -#define strlist__for_each_safe(pos, n, slist) \ +#define strlist__for_each_entry_safe(pos, n, slist) \ for (pos = strlist__first(slist), n = strlist__next(pos); pos;\ pos = n, n = strlist__next(n)) #endif /* __PERF_STRLIST_H */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index bc229a74c6a9..a811c13a74d6 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -7,6 +7,7 @@ #include "symbol.h" #include "demangle-java.h" +#include "demangle-rust.h" #include "machine.h" #include "vdso.h" #include <symbol/kallsyms.h> @@ -16,6 +17,7 @@ #define EM_AARCH64 183 /* ARM 64 bit */ #endif +typedef Elf64_Nhdr GElf_Nhdr; #ifdef HAVE_CPLUS_DEMANGLE_SUPPORT extern char *cplus_demangle(const char *, int); @@ -54,6 +56,14 @@ static int elf_getphdrnum(Elf *elf, size_t *dst) } #endif +#ifndef HAVE_ELF_GETSHDRSTRNDX_SUPPORT +static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe_unused) +{ + pr_err("%s: update your libelf to > 0.140, this one lacks elf_getshdrstrndx().\n", __func__); + return -1; +} +#endif + #ifndef NT_GNU_BUILD_ID #define NT_GNU_BUILD_ID 3 #endif @@ -709,17 +719,10 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, if (ss->opdshdr.sh_type != SHT_PROGBITS) ss->opdsec = NULL; - if (dso->kernel == DSO_TYPE_USER) { - GElf_Shdr shdr; - ss->adjust_symbols = (ehdr.e_type == ET_EXEC || - ehdr.e_type == ET_REL || - dso__is_vdso(dso) || - elf_section_by_name(elf, &ehdr, &shdr, - ".gnu.prelink_undo", - NULL) != NULL); - } else { + if (dso->kernel == DSO_TYPE_USER) + ss->adjust_symbols = true; + else ss->adjust_symbols = elf__needs_adjust_symbols(ehdr); - } ss->name = strdup(name); if (!ss->name) { @@ -777,7 +780,8 @@ static bool want_demangle(bool is_kernel_sym) return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; } -void __weak arch__elf_sym_adjust(GElf_Sym *sym __maybe_unused) { } +void __weak arch__sym_update(struct symbol *s __maybe_unused, + GElf_Sym *sym __maybe_unused) { } int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, @@ -833,7 +837,8 @@ int dso__load_sym(struct dso *dso, struct map *map, sec = syms_ss->symtab; shdr = syms_ss->symshdr; - if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) + if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr, + ".text", NULL)) dso->text_offset = tshdr.sh_addr - tshdr.sh_offset; if (runtime_ss->opdsec) @@ -954,8 +959,6 @@ int dso__load_sym(struct dso *dso, struct map *map, (sym.st_value & 1)) --sym.st_value; - arch__elf_sym_adjust(&sym); - if (dso->kernel || kmodule) { char dso_name[PATH_MAX]; @@ -1080,6 +1083,13 @@ new_symbol: demangled = bfd_demangle(NULL, elf_name, demangle_flags); if (demangled == NULL) demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); + else if (rust_is_mangled(demangled)) + /* + * Input to Rust demangling is the BFD-demangled + * name which it Rust-demangles in place. + */ + rust_demangle_sym(demangled); + if (demangled != NULL) elf_name = demangled; } @@ -1089,6 +1099,8 @@ new_symbol: if (!f) goto out_elf_end; + arch__sym_update(f, &sym); + if (filter && filter(curr_map, f)) symbol__delete(f); else { @@ -1787,6 +1799,260 @@ void kcore_extract__delete(struct kcore_extract *kce) unlink(kce->extract_filename); } +#ifdef HAVE_GELF_GETNOTE_SUPPORT +/** + * populate_sdt_note : Parse raw data and identify SDT note + * @elf: elf of the opened file + * @data: raw data of a section with description offset applied + * @len: note description size + * @type: type of the note + * @sdt_notes: List to add the SDT note + * + * Responsible for parsing the @data in section .note.stapsdt in @elf and + * if its an SDT note, it appends to @sdt_notes list. + */ +static int populate_sdt_note(Elf **elf, const char *data, size_t len, + struct list_head *sdt_notes) +{ + const char *provider, *name; + struct sdt_note *tmp = NULL; + GElf_Ehdr ehdr; + GElf_Addr base_off = 0; + GElf_Shdr shdr; + int ret = -EINVAL; + + union { + Elf64_Addr a64[NR_ADDR]; + Elf32_Addr a32[NR_ADDR]; + } buf; + + Elf_Data dst = { + .d_buf = &buf, .d_type = ELF_T_ADDR, .d_version = EV_CURRENT, + .d_size = gelf_fsize((*elf), ELF_T_ADDR, NR_ADDR, EV_CURRENT), + .d_off = 0, .d_align = 0 + }; + Elf_Data src = { + .d_buf = (void *) data, .d_type = ELF_T_ADDR, + .d_version = EV_CURRENT, .d_size = dst.d_size, .d_off = 0, + .d_align = 0 + }; + + tmp = (struct sdt_note *)calloc(1, sizeof(struct sdt_note)); + if (!tmp) { + ret = -ENOMEM; + goto out_err; + } + + INIT_LIST_HEAD(&tmp->note_list); + + if (len < dst.d_size + 3) + goto out_free_note; + + /* Translation from file representation to memory representation */ + if (gelf_xlatetom(*elf, &dst, &src, + elf_getident(*elf, NULL)[EI_DATA]) == NULL) { + pr_err("gelf_xlatetom : %s\n", elf_errmsg(-1)); + goto out_free_note; + } + + /* Populate the fields of sdt_note */ + provider = data + dst.d_size; + + name = (const char *)memchr(provider, '\0', data + len - provider); + if (name++ == NULL) + goto out_free_note; + + tmp->provider = strdup(provider); + if (!tmp->provider) { + ret = -ENOMEM; + goto out_free_note; + } + tmp->name = strdup(name); + if (!tmp->name) { + ret = -ENOMEM; + goto out_free_prov; + } + + if (gelf_getclass(*elf) == ELFCLASS32) { + memcpy(&tmp->addr, &buf, 3 * sizeof(Elf32_Addr)); + tmp->bit32 = true; + } else { + memcpy(&tmp->addr, &buf, 3 * sizeof(Elf64_Addr)); + tmp->bit32 = false; + } + + if (!gelf_getehdr(*elf, &ehdr)) { + pr_debug("%s : cannot get elf header.\n", __func__); + ret = -EBADF; + goto out_free_name; + } + + /* Adjust the prelink effect : + * Find out the .stapsdt.base section. + * This scn will help us to handle prelinking (if present). + * Compare the retrieved file offset of the base section with the + * base address in the description of the SDT note. If its different, + * then accordingly, adjust the note location. + */ + if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL)) { + base_off = shdr.sh_offset; + if (base_off) { + if (tmp->bit32) + tmp->addr.a32[0] = tmp->addr.a32[0] + base_off - + tmp->addr.a32[1]; + else + tmp->addr.a64[0] = tmp->addr.a64[0] + base_off - + tmp->addr.a64[1]; + } + } + + list_add_tail(&tmp->note_list, sdt_notes); + return 0; + +out_free_name: + free(tmp->name); +out_free_prov: + free(tmp->provider); +out_free_note: + free(tmp); +out_err: + return ret; +} + +/** + * construct_sdt_notes_list : constructs a list of SDT notes + * @elf : elf to look into + * @sdt_notes : empty list_head + * + * Scans the sections in 'elf' for the section + * .note.stapsdt. It, then calls populate_sdt_note to find + * out the SDT events and populates the 'sdt_notes'. + */ +static int construct_sdt_notes_list(Elf *elf, struct list_head *sdt_notes) +{ + GElf_Ehdr ehdr; + Elf_Scn *scn = NULL; + Elf_Data *data; + GElf_Shdr shdr; + size_t shstrndx, next; + GElf_Nhdr nhdr; + size_t name_off, desc_off, offset; + int ret = 0; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + ret = -EBADF; + goto out_ret; + } + if (elf_getshdrstrndx(elf, &shstrndx) != 0) { + ret = -EBADF; + goto out_ret; + } + + /* Look for the required section */ + scn = elf_section_by_name(elf, &ehdr, &shdr, SDT_NOTE_SCN, NULL); + if (!scn) { + ret = -ENOENT; + goto out_ret; + } + + if ((shdr.sh_type != SHT_NOTE) || (shdr.sh_flags & SHF_ALLOC)) { + ret = -ENOENT; + goto out_ret; + } + + data = elf_getdata(scn, NULL); + + /* Get the SDT notes */ + for (offset = 0; (next = gelf_getnote(data, offset, &nhdr, &name_off, + &desc_off)) > 0; offset = next) { + if (nhdr.n_namesz == sizeof(SDT_NOTE_NAME) && + !memcmp(data->d_buf + name_off, SDT_NOTE_NAME, + sizeof(SDT_NOTE_NAME))) { + /* Check the type of the note */ + if (nhdr.n_type != SDT_NOTE_TYPE) + goto out_ret; + + ret = populate_sdt_note(&elf, ((data->d_buf) + desc_off), + nhdr.n_descsz, sdt_notes); + if (ret < 0) + goto out_ret; + } + } + if (list_empty(sdt_notes)) + ret = -ENOENT; + +out_ret: + return ret; +} + +/** + * get_sdt_note_list : Wrapper to construct a list of sdt notes + * @head : empty list_head + * @target : file to find SDT notes from + * + * This opens the file, initializes + * the ELF and then calls construct_sdt_notes_list. + */ +int get_sdt_note_list(struct list_head *head, const char *target) +{ + Elf *elf; + int fd, ret; + + fd = open(target, O_RDONLY); + if (fd < 0) + return -EBADF; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (!elf) { + ret = -EBADF; + goto out_close; + } + ret = construct_sdt_notes_list(elf, head); + elf_end(elf); +out_close: + close(fd); + return ret; +} + +/** + * cleanup_sdt_note_list : free the sdt notes' list + * @sdt_notes: sdt notes' list + * + * Free up the SDT notes in @sdt_notes. + * Returns the number of SDT notes free'd. + */ +int cleanup_sdt_note_list(struct list_head *sdt_notes) +{ + struct sdt_note *tmp, *pos; + int nr_free = 0; + + list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) { + list_del(&pos->note_list); + free(pos->name); + free(pos->provider); + free(pos); + nr_free++; + } + return nr_free; +} + +/** + * sdt_notes__get_count: Counts the number of sdt events + * @start: list_head to sdt_notes list + * + * Returns the number of SDT notes in a list + */ +int sdt_notes__get_count(struct list_head *start) +{ + struct sdt_note *sdt_ptr; + int count = 0; + + list_for_each_entry(sdt_ptr, start, note_list) + count++; + return count; +} +#endif + void symbol__elf_init(void) { elf_version(EV_CURRENT); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e7588dc91518..37e8d20ae03e 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -255,40 +255,6 @@ void symbol__delete(struct symbol *sym) free(((void *)sym) - symbol_conf.priv_size); } -size_t symbol__fprintf(struct symbol *sym, FILE *fp) -{ - return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n", - sym->start, sym->end, - sym->binding == STB_GLOBAL ? 'g' : - sym->binding == STB_LOCAL ? 'l' : 'w', - sym->name); -} - -size_t symbol__fprintf_symname_offs(const struct symbol *sym, - const struct addr_location *al, FILE *fp) -{ - unsigned long offset; - size_t length; - - if (sym && sym->name) { - length = fprintf(fp, "%s", sym->name); - if (al) { - if (al->addr < sym->end) - offset = al->addr - sym->start; - else - offset = al->addr - al->map->start - sym->start; - length += fprintf(fp, "+0x%lx", offset); - } - return length; - } else - return fprintf(fp, "[unknown]"); -} - -size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) -{ - return symbol__fprintf_symname_offs(sym, NULL, fp); -} - void symbols__delete(struct rb_root *symbols) { struct symbol *pos; @@ -335,7 +301,7 @@ static struct symbol *symbols__find(struct rb_root *symbols, u64 ip) if (ip < s->start) n = n->rb_left; - else if (ip >= s->end) + else if (ip > s->end || (ip == s->end && ip != s->start)) n = n->rb_right; else return s; @@ -364,11 +330,6 @@ static struct symbol *symbols__next(struct symbol *sym) return NULL; } -struct symbol_name_rb_node { - struct rb_node rb_node; - struct symbol sym; -}; - static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym) { struct rb_node **p = &symbols->rb_node; @@ -452,6 +413,18 @@ void dso__reset_find_symbol_cache(struct dso *dso) } } +void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym) +{ + symbols__insert(&dso->symbols[type], sym); + + /* update the symbol cache if necessary */ + if (dso->last_find_result[type].addr >= sym->start && + (dso->last_find_result[type].addr < sym->end || + sym->start == sym->end)) { + dso->last_find_result[type].symbol = sym; + } +} + struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr) { @@ -497,21 +470,6 @@ void dso__sort_by_name(struct dso *dso, enum map_type type) &dso->symbols[type]); } -size_t dso__fprintf_symbols_by_name(struct dso *dso, - enum map_type type, FILE *fp) -{ - size_t ret = 0; - struct rb_node *nd; - struct symbol_name_rb_node *pos; - - for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) { - pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); - fprintf(fp, "%s\n", pos->sym.name); - } - - return ret; -} - int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, u64 start)) @@ -1262,8 +1220,8 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) return 0; } -int dso__load_kallsyms(struct dso *dso, const char *filename, - struct map *map, symbol_filter_t filter) +int __dso__load_kallsyms(struct dso *dso, const char *filename, + struct map *map, bool no_kcore, symbol_filter_t filter) { u64 delta = 0; @@ -1284,12 +1242,18 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, else dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS; - if (!dso__load_kcore(dso, map, filename)) + if (!no_kcore && !dso__load_kcore(dso, map, filename)) return dso__split_kallsyms_for_kcore(dso, map, filter); else return dso__split_kallsyms(dso, map, delta, filter); } +int dso__load_kallsyms(struct dso *dso, const char *filename, + struct map *map, symbol_filter_t filter) +{ + return __dso__load_kallsyms(dso, filename, map, false, filter); +} + static int dso__load_perf_map(struct dso *dso, struct map *map, symbol_filter_t filter) { @@ -1466,7 +1430,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work */ - if (is_regular_file(name) && + if (is_regular_file(dso->long_name) && filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) dso__set_build_id(dso, build_id); @@ -1644,25 +1608,27 @@ out: return err; } +static bool visible_dir_filter(const char *name, struct dirent *d) +{ + if (d->d_type != DT_DIR) + return false; + return lsdir_no_dot_filter(name, d); +} + static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz) { char kallsyms_filename[PATH_MAX]; - struct dirent *dent; int ret = -1; - DIR *d; + struct strlist *dirs; + struct str_node *nd; - d = opendir(dir); - if (!d) + dirs = lsdir(dir, visible_dir_filter); + if (!dirs) return -1; - while (1) { - dent = readdir(d); - if (!dent) - break; - if (dent->d_type != DT_DIR) - continue; + strlist__for_each_entry(nd, dirs) { scnprintf(kallsyms_filename, sizeof(kallsyms_filename), - "%s/%s/kallsyms", dir, dent->d_name); + "%s/%s/kallsyms", dir, nd->s); if (!validate_kcore_addresses(kallsyms_filename, map)) { strlcpy(dir, kallsyms_filename, dir_sz); ret = 0; @@ -1670,15 +1636,29 @@ static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz) } } - closedir(d); + strlist__delete(dirs); return ret; } +/* + * Use open(O_RDONLY) to check readability directly instead of access(R_OK) + * since access(R_OK) only checks with real UID/GID but open() use effective + * UID/GID and actual capabilities (e.g. /proc/kcore requires CAP_SYS_RAWIO). + */ +static bool filename__readable(const char *file) +{ + int fd = open(file, O_RDONLY); + if (fd < 0) + return false; + close(fd); + return true; +} + static char *dso__find_kallsyms(struct dso *dso, struct map *map) { u8 host_build_id[BUILD_ID_SIZE]; - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + char sbuild_id[SBUILD_ID_SIZE]; bool is_host = false; char path[PATH_MAX]; @@ -1694,58 +1674,43 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) sizeof(host_build_id)) == 0) is_host = dso__build_id_equal(dso, host_build_id); - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); - - scnprintf(path, sizeof(path), "%s/[kernel.kcore]/%s", buildid_dir, - sbuild_id); - - /* Use /proc/kallsyms if possible */ + /* Try a fast path for /proc/kallsyms if possible */ if (is_host) { - DIR *d; - int fd; - - /* If no cached kcore go with /proc/kallsyms */ - d = opendir(path); - if (!d) - goto proc_kallsyms; - closedir(d); - /* - * Do not check the build-id cache, until we know we cannot use - * /proc/kcore. + * Do not check the build-id cache, unless we know we cannot use + * /proc/kcore or module maps don't match to /proc/kallsyms. + * To check readability of /proc/kcore, do not use access(R_OK) + * since /proc/kcore requires CAP_SYS_RAWIO to read and access + * can't check it. */ - fd = open("/proc/kcore", O_RDONLY); - if (fd != -1) { - close(fd); - /* If module maps match go with /proc/kallsyms */ - if (!validate_kcore_addresses("/proc/kallsyms", map)) - goto proc_kallsyms; - } - - /* Find kallsyms in build-id cache with kcore */ - if (!find_matching_kcore(map, path, sizeof(path))) - return strdup(path); - - goto proc_kallsyms; + if (filename__readable("/proc/kcore") && + !validate_kcore_addresses("/proc/kallsyms", map)) + goto proc_kallsyms; } + build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + /* Find kallsyms in build-id cache with kcore */ + scnprintf(path, sizeof(path), "%s/%s/%s", + buildid_dir, DSO__NAME_KCORE, sbuild_id); + if (!find_matching_kcore(map, path, sizeof(path))) return strdup(path); - scnprintf(path, sizeof(path), "%s/[kernel.kallsyms]/%s", - buildid_dir, sbuild_id); + /* Use current /proc/kallsyms if possible */ + if (is_host) { +proc_kallsyms: + return strdup("/proc/kallsyms"); + } - if (access(path, F_OK)) { + /* Finally, find a cache of kallsyms */ + if (!build_id_cache__kallsyms_path(sbuild_id, path, sizeof(path))) { pr_err("No kallsyms or vmlinux with build-id %s was found\n", sbuild_id); return NULL; } return strdup(path); - -proc_kallsyms: - return strdup("/proc/kallsyms"); } static int dso__load_kernel_sym(struct dso *dso, struct map *map, @@ -1803,7 +1768,7 @@ do_kallsyms: if (err > 0 && !dso__is_kcore(dso)) { dso->binary_type = DSO_BINARY_TYPE__KALLSYMS; - dso__set_long_name(dso, "[kernel.kallsyms]", false); + dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); map__fixup_start(map); map__fixup_end(map); } @@ -1967,17 +1932,17 @@ int setup_intlist(struct intlist **list, const char *list_str, static bool symbol__read_kptr_restrict(void) { bool value = false; + FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); - if (geteuid() != 0) { - FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); - if (fp != NULL) { - char line[8]; + if (fp != NULL) { + char line[8]; - if (fgets(line, sizeof(line), fp) != NULL) - value = atoi(line) != 0; + if (fgets(line, sizeof(line), fp) != NULL) + value = (geteuid() != 0) ? + (atoi(line) != 0) : + (atoi(line) == 2); - fclose(fp); - } + fclose(fp); } return value; @@ -2067,3 +2032,26 @@ void symbol__exit(void) symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL; symbol_conf.initialized = false; } + +int symbol__config_symfs(const struct option *opt __maybe_unused, + const char *dir, int unset __maybe_unused) +{ + char *bf = NULL; + int ret; + + symbol_conf.symfs = strdup(dir); + if (symbol_conf.symfs == NULL) + return -ENOMEM; + + /* skip the locally configured cache if a symfs is given, and + * config buildid dir to symfs/.debug + */ + ret = asprintf(&bf, "%s/%s", dir, ".debug"); + if (ret < 0) + return -ENOMEM; + + set_buildid_dir(bf); + + free(bf); + return 0; +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index c8b7544d9267..699f7cbcfe72 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -44,6 +44,9 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, #define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ #endif +#define DSO__NAME_KALLSYMS "[kernel.kallsyms]" +#define DSO__NAME_KCORE "[kernel.kcore]" + /** struct symbol - symtab entry * * @ignore - resolvable but tools ignore it (e.g. idle routines) @@ -55,6 +58,7 @@ struct symbol { u16 namelen; u8 binding; bool ignore; + u8 arch_sym; char name[0]; }; @@ -140,6 +144,11 @@ struct symbol_conf { extern struct symbol_conf symbol_conf; +struct symbol_name_rb_node { + struct rb_node rb_node; + struct symbol sym; +}; + static inline int __symbol__join_symfs(char *bf, size_t size, const char *path) { return path__join(bf, size, symbol_conf.symfs, path); @@ -177,6 +186,8 @@ struct branch_info { struct addr_map_symbol from; struct addr_map_symbol to; struct branch_flags flags; + char *srcline_from; + char *srcline_to; }; struct mem_info { @@ -235,9 +246,14 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, symbol_filter_t filter); int dso__load_vmlinux_path(struct dso *dso, struct map *map, symbol_filter_t filter); +int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, + bool no_kcore, symbol_filter_t filter); int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, symbol_filter_t filter); +void dso__insert_symbol(struct dso *dso, enum map_type type, + struct symbol *sym); + struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, @@ -262,14 +278,22 @@ int symbol__init(struct perf_env *env); void symbol__exit(void); void symbol__elf_init(void); struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); +size_t __symbol__fprintf_symname_offs(const struct symbol *sym, + const struct addr_location *al, + bool unknown_as_addr, FILE *fp); size_t symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, FILE *fp); +size_t __symbol__fprintf_symname(const struct symbol *sym, + const struct addr_location *al, + bool unknown_as_addr, FILE *fp); size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); size_t symbol__fprintf(struct symbol *sym, FILE *fp); bool symbol_type__is_a(char symbol_type, enum map_type map_type); bool symbol__restricted_filename(const char *filename, const char *restricted_filename); bool symbol__is_idle(struct symbol *sym); +int symbol__config_symfs(const struct option *opt __maybe_unused, + const char *dir, int unset __maybe_unused); int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, symbol_filter_t filter, @@ -310,7 +334,7 @@ int setup_intlist(struct intlist **list, const char *list_str, #ifdef HAVE_LIBELF_SUPPORT bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); -void arch__elf_sym_adjust(GElf_Sym *sym); +void arch__sym_update(struct symbol *s, GElf_Sym *sym); #endif #define SYMBOL_A 0 @@ -318,4 +342,26 @@ void arch__elf_sym_adjust(GElf_Sym *sym); int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb); +/* structure containing an SDT note's info */ +struct sdt_note { + char *name; /* name of the note*/ + char *provider; /* provider name */ + bool bit32; /* whether the location is 32 bits? */ + union { /* location, base and semaphore addrs */ + Elf64_Addr a64[3]; + Elf32_Addr a32[3]; + } addr; + struct list_head note_list; /* SDT notes' list */ +}; + +int get_sdt_note_list(struct list_head *head, const char *target); +int cleanup_sdt_note_list(struct list_head *sdt_notes); +int sdt_notes__get_count(struct list_head *start); + +#define SDT_BASE_SCN ".stapsdt.base" +#define SDT_NOTE_SCN ".note.stapsdt" +#define SDT_NOTE_TYPE 3 +#define SDT_NOTE_NAME "stapsdt" +#define NR_ADDR 3 + #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c new file mode 100644 index 000000000000..a680bdaa65dc --- /dev/null +++ b/tools/perf/util/symbol_fprintf.c @@ -0,0 +1,71 @@ +#include <elf.h> +#include <inttypes.h> +#include <stdio.h> + +#include "symbol.h" + +size_t symbol__fprintf(struct symbol *sym, FILE *fp) +{ + return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n", + sym->start, sym->end, + sym->binding == STB_GLOBAL ? 'g' : + sym->binding == STB_LOCAL ? 'l' : 'w', + sym->name); +} + +size_t __symbol__fprintf_symname_offs(const struct symbol *sym, + const struct addr_location *al, + bool unknown_as_addr, FILE *fp) +{ + unsigned long offset; + size_t length; + + if (sym && sym->name) { + length = fprintf(fp, "%s", sym->name); + if (al) { + if (al->addr < sym->end) + offset = al->addr - sym->start; + else + offset = al->addr - al->map->start - sym->start; + length += fprintf(fp, "+0x%lx", offset); + } + return length; + } else if (al && unknown_as_addr) + return fprintf(fp, "[%#" PRIx64 "]", al->addr); + else + return fprintf(fp, "[unknown]"); +} + +size_t symbol__fprintf_symname_offs(const struct symbol *sym, + const struct addr_location *al, + FILE *fp) +{ + return __symbol__fprintf_symname_offs(sym, al, false, fp); +} + +size_t __symbol__fprintf_symname(const struct symbol *sym, + const struct addr_location *al, + bool unknown_as_addr, FILE *fp) +{ + return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, fp); +} + +size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) +{ + return __symbol__fprintf_symname_offs(sym, NULL, false, fp); +} + +size_t dso__fprintf_symbols_by_name(struct dso *dso, + enum map_type type, FILE *fp) +{ + size_t ret = 0; + struct rb_node *nd; + struct symbol_name_rb_node *pos; + + for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) { + pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); + fprintf(fp, "%s\n", pos->sym.name); + } + + return ret; +} diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c new file mode 100644 index 000000000000..bbb4c1957578 --- /dev/null +++ b/tools/perf/util/syscalltbl.c @@ -0,0 +1,134 @@ +/* + * System call table mapper + * + * (C) 2016 Arnaldo Carvalho de Melo <acme@redhat.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include "syscalltbl.h" +#include <stdlib.h> + +#ifdef HAVE_SYSCALL_TABLE +#include <linux/compiler.h> +#include <string.h> +#include "util.h" + +#if defined(__x86_64__) +#include <asm/syscalls_64.c> +const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_x86_64; +#endif + +struct syscall { + int id; + const char *name; +}; + +static int syscallcmpname(const void *vkey, const void *ventry) +{ + const char *key = vkey; + const struct syscall *entry = ventry; + + return strcmp(key, entry->name); +} + +static int syscallcmp(const void *va, const void *vb) +{ + const struct syscall *a = va, *b = vb; + + return strcmp(a->name, b->name); +} + +static int syscalltbl__init_native(struct syscalltbl *tbl) +{ + int nr_entries = 0, i, j; + struct syscall *entries; + + for (i = 0; i <= syscalltbl_native_max_id; ++i) + if (syscalltbl_native[i]) + ++nr_entries; + + entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries); + if (tbl->syscalls.entries == NULL) + return -1; + + for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) { + if (syscalltbl_native[i]) { + entries[j].name = syscalltbl_native[i]; + entries[j].id = i; + ++j; + } + } + + qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp); + tbl->syscalls.nr_entries = nr_entries; + return 0; +} + +struct syscalltbl *syscalltbl__new(void) +{ + struct syscalltbl *tbl = malloc(sizeof(*tbl)); + if (tbl) { + if (syscalltbl__init_native(tbl)) { + free(tbl); + return NULL; + } + } + return tbl; +} + +void syscalltbl__delete(struct syscalltbl *tbl) +{ + zfree(&tbl->syscalls.entries); + free(tbl); +} + +const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id) +{ + return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL; +} + +int syscalltbl__id(struct syscalltbl *tbl, const char *name) +{ + struct syscall *sc = bsearch(name, tbl->syscalls.entries, + tbl->syscalls.nr_entries, sizeof(*sc), + syscallcmpname); + + return sc ? sc->id : -1; +} + +#else /* HAVE_SYSCALL_TABLE */ + +#include <libaudit.h> + +struct syscalltbl *syscalltbl__new(void) +{ + struct syscalltbl *tbl = malloc(sizeof(*tbl)); + if (tbl) + tbl->audit_machine = audit_detect_machine(); + return tbl; +} + +void syscalltbl__delete(struct syscalltbl *tbl) +{ + free(tbl); +} + +const char *syscalltbl__name(const struct syscalltbl *tbl, int id) +{ + return audit_syscall_to_name(id, tbl->audit_machine); +} + +int syscalltbl__id(struct syscalltbl *tbl, const char *name) +{ + return audit_name_to_syscall(name, tbl->audit_machine); +} +#endif /* HAVE_SYSCALL_TABLE */ diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h new file mode 100644 index 000000000000..e2951510484f --- /dev/null +++ b/tools/perf/util/syscalltbl.h @@ -0,0 +1,20 @@ +#ifndef __PERF_SYSCALLTBL_H +#define __PERF_SYSCALLTBL_H + +struct syscalltbl { + union { + int audit_machine; + struct { + int nr_entries; + void *entries; + } syscalls; + }; +}; + +struct syscalltbl *syscalltbl__new(void); +void syscalltbl__delete(struct syscalltbl *tbl); + +const char *syscalltbl__name(const struct syscalltbl *tbl, int id); +int syscalltbl__id(struct syscalltbl *tbl, const char *name); + +#endif /* __PERF_SYSCALLTBL_H */ diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index a53603b27e52..21c4d9b23c24 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -7,6 +7,7 @@ */ #include "target.h" +#include "util.h" #include "debug.h" #include <pwd.h> @@ -121,11 +122,7 @@ int target__strerror(struct target *target, int errnum, BUG_ON(buflen == 0); if (errnum >= 0) { - const char *err = strerror_r(errnum, buf, buflen); - - if (err != buf) - scnprintf(buf, buflen, "%s", err); - + str_error_r(errnum, buf, buflen); return 0; } diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 679688e70ae7..d3301529f6a7 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -22,44 +22,9 @@ #include "debug.h" #include "symbol.h" #include "comm.h" +#include "call-path.h" #include "thread-stack.h" -#define CALL_PATH_BLOCK_SHIFT 8 -#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT) -#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1) - -struct call_path_block { - struct call_path cp[CALL_PATH_BLOCK_SIZE]; - struct list_head node; -}; - -/** - * struct call_path_root - root of all call paths. - * @call_path: root call path - * @blocks: list of blocks to store call paths - * @next: next free space - * @sz: number of spaces - */ -struct call_path_root { - struct call_path call_path; - struct list_head blocks; - size_t next; - size_t sz; -}; - -/** - * struct call_return_processor - provides a call-back to consume call-return - * information. - * @cpr: call path root - * @process: call-back that accepts call/return information - * @data: anonymous data for call-back - */ -struct call_return_processor { - struct call_path_root *cpr; - int (*process)(struct call_return *cr, void *data); - void *data; -}; - #define STACK_GROWTH 2048 /** @@ -335,108 +300,6 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; } -static void call_path__init(struct call_path *cp, struct call_path *parent, - struct symbol *sym, u64 ip, bool in_kernel) -{ - cp->parent = parent; - cp->sym = sym; - cp->ip = sym ? 0 : ip; - cp->db_id = 0; - cp->in_kernel = in_kernel; - RB_CLEAR_NODE(&cp->rb_node); - cp->children = RB_ROOT; -} - -static struct call_path_root *call_path_root__new(void) -{ - struct call_path_root *cpr; - - cpr = zalloc(sizeof(struct call_path_root)); - if (!cpr) - return NULL; - call_path__init(&cpr->call_path, NULL, NULL, 0, false); - INIT_LIST_HEAD(&cpr->blocks); - return cpr; -} - -static void call_path_root__free(struct call_path_root *cpr) -{ - struct call_path_block *pos, *n; - - list_for_each_entry_safe(pos, n, &cpr->blocks, node) { - list_del(&pos->node); - free(pos); - } - free(cpr); -} - -static struct call_path *call_path__new(struct call_path_root *cpr, - struct call_path *parent, - struct symbol *sym, u64 ip, - bool in_kernel) -{ - struct call_path_block *cpb; - struct call_path *cp; - size_t n; - - if (cpr->next < cpr->sz) { - cpb = list_last_entry(&cpr->blocks, struct call_path_block, - node); - } else { - cpb = zalloc(sizeof(struct call_path_block)); - if (!cpb) - return NULL; - list_add_tail(&cpb->node, &cpr->blocks); - cpr->sz += CALL_PATH_BLOCK_SIZE; - } - - n = cpr->next++ & CALL_PATH_BLOCK_MASK; - cp = &cpb->cp[n]; - - call_path__init(cp, parent, sym, ip, in_kernel); - - return cp; -} - -static struct call_path *call_path__findnew(struct call_path_root *cpr, - struct call_path *parent, - struct symbol *sym, u64 ip, u64 ks) -{ - struct rb_node **p; - struct rb_node *node_parent = NULL; - struct call_path *cp; - bool in_kernel = ip >= ks; - - if (sym) - ip = 0; - - if (!parent) - return call_path__new(cpr, parent, sym, ip, in_kernel); - - p = &parent->children.rb_node; - while (*p != NULL) { - node_parent = *p; - cp = rb_entry(node_parent, struct call_path, rb_node); - - if (cp->sym == sym && cp->ip == ip) - return cp; - - if (sym < cp->sym || (sym == cp->sym && ip < cp->ip)) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - cp = call_path__new(cpr, parent, sym, ip, in_kernel); - if (!cp) - return NULL; - - rb_link_node(&cp->rb_node, node_parent, p); - rb_insert_color(&cp->rb_node, &parent->children); - - return cp; -} - struct call_return_processor * call_return_processor__new(int (*process)(struct call_return *cr, void *data), void *data) @@ -753,3 +616,10 @@ int thread_stack__process(struct thread *thread, struct comm *comm, return err; } + +size_t thread_stack__depth(struct thread *thread) +{ + if (!thread->ts) + return 0; + return thread->ts->cnt; +} diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index e1528f1374c3..b7e41c4ebfdd 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -19,17 +19,16 @@ #include <sys/types.h> #include <linux/types.h> -#include <linux/rbtree.h> struct thread; struct comm; struct ip_callchain; struct symbol; struct dso; -struct call_return_processor; struct comm; struct perf_sample; struct addr_location; +struct call_path; /* * Call/Return flags. @@ -69,26 +68,16 @@ struct call_return { }; /** - * struct call_path - node in list of calls leading to a function call. - * @parent: call path to the parent function call - * @sym: symbol of function called - * @ip: only if sym is null, the ip of the function - * @db_id: id used for db-export - * @in_kernel: whether function is a in the kernel - * @rb_node: node in parent's tree of called functions - * @children: tree of call paths of functions called - * - * In combination with the call_return structure, the call_path structure - * defines a context-sensitve call-graph. + * struct call_return_processor - provides a call-back to consume call-return + * information. + * @cpr: call path root + * @process: call-back that accepts call/return information + * @data: anonymous data for call-back */ -struct call_path { - struct call_path *parent; - struct symbol *sym; - u64 ip; - u64 db_id; - bool in_kernel; - struct rb_node rb_node; - struct rb_root children; +struct call_return_processor { + struct call_path_root *cpr; + int (*process)(struct call_return *cr, void *data); + void *data; }; int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, @@ -98,6 +87,7 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, size_t sz, u64 ip); int thread_stack__flush(struct thread *thread); void thread_stack__free(struct thread *thread); +size_t thread_stack__depth(struct thread *thread); struct call_return_processor * call_return_processor__new(int (*process)(struct call_return *cr, void *data), diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index dfd00c6dad6e..8b10a55410a2 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -10,6 +10,8 @@ #include "comm.h" #include "unwind.h" +#include <api/fs/fs.h> + int thread__init_map_groups(struct thread *thread, struct machine *machine) { struct thread *leader; @@ -41,9 +43,6 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread->cpu = -1; INIT_LIST_HEAD(&thread->comm_list); - if (unwind__prepare_access(thread) < 0) - goto err_thread; - comm_str = malloc(32); if (!comm_str) goto err_thread; @@ -153,6 +152,23 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, return 0; } +int thread__set_comm_from_proc(struct thread *thread) +{ + char path[64]; + char *comm = NULL; + size_t sz; + int err = -1; + + if (!(snprintf(path, sizeof(path), "%d/task/%d/comm", + thread->pid_, thread->tid) >= (int)sizeof(path)) && + procfs__read_str(path, &comm, &sz) == 0) { + comm[sz - 1] = '\0'; + err = thread__set_comm(thread, comm, 0); + } + + return err; +} + const char *thread__comm_str(const struct thread *thread) { const struct comm *comm = thread__comm(thread); @@ -182,10 +198,51 @@ size_t thread__fprintf(struct thread *thread, FILE *fp) map_groups__fprintf(thread->mg, fp); } -void thread__insert_map(struct thread *thread, struct map *map) +int thread__insert_map(struct thread *thread, struct map *map) { + int ret; + + ret = unwind__prepare_access(thread, map, NULL); + if (ret) + return ret; + map_groups__fixup_overlappings(thread->mg, map, stderr); map_groups__insert(thread->mg, map); + + return 0; +} + +static int __thread__prepare_access(struct thread *thread) +{ + bool initialized = false; + int i, err = 0; + + for (i = 0; i < MAP__NR_TYPES; ++i) { + struct maps *maps = &thread->mg->maps[i]; + struct map *map; + + pthread_rwlock_rdlock(&maps->lock); + + for (map = maps__first(maps); map; map = map__next(map)) { + err = unwind__prepare_access(thread, map, &initialized); + if (err || initialized) + break; + } + + pthread_rwlock_unlock(&maps->lock); + } + + return err; +} + +static int thread__prepare_access(struct thread *thread) +{ + int err = 0; + + if (symbol_conf.use_callchain) + err = __thread__prepare_access(thread); + + return err; } static int thread__clone_map_groups(struct thread *thread, @@ -195,7 +252,7 @@ static int thread__clone_map_groups(struct thread *thread, /* This is new thread, we share map groups for process. */ if (thread->pid_ == parent->pid_) - return 0; + return thread__prepare_access(thread); if (thread->mg == parent->mg) { pr_debug("broken map groups on thread %d/%d parent %d/%d\n", @@ -205,7 +262,7 @@ static int thread__clone_map_groups(struct thread *thread, /* But this one is new process, copy maps. */ for (i = 0; i < MAP__NR_TYPES; ++i) - if (map_groups__clone(thread->mg, parent->mg, i) < 0) + if (map_groups__clone(thread, parent->mg, i) < 0) return -ENOMEM; return 0; @@ -233,7 +290,7 @@ void thread__find_cpumode_addr_location(struct thread *thread, struct addr_location *al) { size_t i; - const u8 const cpumodes[] = { + const u8 cpumodes[] = { PERF_RECORD_MISC_USER, PERF_RECORD_MISC_KERNEL, PERF_RECORD_MISC_GUEST_USER, @@ -246,3 +303,14 @@ void thread__find_cpumode_addr_location(struct thread *thread, break; } } + +struct thread *thread__main_thread(struct machine *machine, struct thread *thread) +{ + if (thread->pid_ == thread->tid) + return thread__get(thread); + + if (thread->pid_ == -1) + return NULL; + + return machine__find_thread(machine, thread->pid_, thread->pid_); +} diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index a0ac0317affb..99263cb6e6b6 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -11,6 +11,7 @@ #include <intlist.h> struct thread_stack; +struct unwind_libunwind_ops; struct thread { union { @@ -32,6 +33,10 @@ struct thread { void *priv; struct thread_stack *ts; +#ifdef HAVE_LIBUNWIND_SUPPORT + void *addr_space; + struct unwind_libunwind_ops *unwind_libunwind_ops; +#endif }; struct machine; @@ -65,14 +70,18 @@ static inline int thread__set_comm(struct thread *thread, const char *comm, return __thread__set_comm(thread, comm, timestamp, false); } +int thread__set_comm_from_proc(struct thread *thread); + int thread__comm_len(struct thread *thread); struct comm *thread__comm(const struct thread *thread); struct comm *thread__exec_comm(const struct thread *thread); const char *thread__comm_str(const struct thread *thread); -void thread__insert_map(struct thread *thread, struct map *map); +int thread__insert_map(struct thread *thread, struct map *map); int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); size_t thread__fprintf(struct thread *thread, FILE *fp); +struct thread *thread__main_thread(struct machine *machine, struct thread *thread); + void thread__find_addr_map(struct thread *thread, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al); diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 267112b4e3db..40585f5b7027 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -202,7 +202,7 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) if (!slist) return NULL; - strlist__for_each(pos, slist) { + strlist__for_each_entry(pos, slist) { pid = strtol(pos->s, &end_ptr, 10); if (pid == INT_MIN || pid == INT_MAX || @@ -260,7 +260,7 @@ struct thread_map *thread_map__new_dummy(void) return threads; } -static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) +struct thread_map *thread_map__new_by_tid_str(const char *tid_str) { struct thread_map *threads = NULL, *nt; int ntasks = 0; @@ -278,7 +278,7 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) if (!slist) return NULL; - strlist__for_each(pos, slist) { + strlist__for_each_entry(pos, slist) { tid = strtol(pos->s, &end_ptr, 10); if (tid == INT_MIN || tid == INT_MAX || @@ -436,3 +436,15 @@ struct thread_map *thread_map__new_event(struct thread_map_event *event) return threads; } + +bool thread_map__has(struct thread_map *threads, pid_t pid) +{ + int i; + + for (i = 0; i < threads->nr; ++i) { + if (threads->map[i].pid == pid) + return true; + } + + return false; +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 85e4c7c4fbde..bd3b971588da 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -31,6 +31,8 @@ void thread_map__put(struct thread_map *map); struct thread_map *thread_map__new_str(const char *pid, const char *tid, uid_t uid); +struct thread_map *thread_map__new_by_tid_str(const char *tid_str); + size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); static inline int thread_map__nr(struct thread_map *threads) @@ -55,4 +57,5 @@ static inline char *thread_map__comm(struct thread_map *map, int thread) } void thread_map__read_comms(struct thread_map *threads); +bool thread_map__has(struct thread_map *threads, pid_t pid); #endif /* __PERF_THREAD_MAP_H */ diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 55de4cffcd4e..ac2590a3de2d 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -57,6 +57,7 @@ struct perf_tool { id_index, auxtrace_info, auxtrace_error, + time_conv, thread_map, cpu_map, stat_config, diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index f92c37abb0a8..b2940c88734a 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -27,7 +27,6 @@ struct perf_top { int max_stack; bool hide_kernel_symbols, hide_user_symbols, zero; bool use_tui, use_stdio; - bool kptr_restrict_warned; bool vmlinux_warned; bool dump_symtab; struct hist_entry *sym_filter_entry; diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 8ae051e0ec79..c330780674fc 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -105,3 +105,11 @@ trace_event__tp_format(const char *sys, const char *name) return tp_format(sys, name); } + +struct event_format *trace_event__tp_format_id(int id) +{ + if (!tevent_initialized && trace_event__init2()) + return ERR_PTR(-ENOMEM); + + return pevent_find_event(tevent.pevent, id); +} diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index bce5b1dac268..b0af9c81bb0d 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -23,6 +23,8 @@ int trace_event__register_resolver(struct machine *machine, struct event_format* trace_event__tp_format(const char *sys, const char *name); +struct event_format *trace_event__tp_format_id(int id); + int bigendian(void); void event_format__fprintf(struct event_format *event, diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h new file mode 100644 index 000000000000..e97d7016d771 --- /dev/null +++ b/tools/perf/util/trigger.h @@ -0,0 +1,94 @@ +#ifndef __TRIGGER_H_ +#define __TRIGGER_H_ 1 + +#include "util/debug.h" +#include "asm/bug.h" + +/* + * Use trigger to model operations which need to be executed when + * an event (a signal, for example) is observed. + * + * States and transits: + * + * + * OFF--(on)--> READY --(hit)--> HIT + * ^ | + * | (ready) + * | | + * \_____________/ + * + * is_hit and is_ready are two key functions to query the state of + * a trigger. is_hit means the event already happen; is_ready means the + * trigger is waiting for the event. + */ + +struct trigger { + volatile enum { + TRIGGER_ERROR = -2, + TRIGGER_OFF = -1, + TRIGGER_READY = 0, + TRIGGER_HIT = 1, + } state; + const char *name; +}; + +#define TRIGGER_WARN_ONCE(t, exp) \ + WARN_ONCE(t->state != exp, "trigger '%s' state transist error: %d in %s()\n", \ + t->name, t->state, __func__) + +static inline bool trigger_is_available(struct trigger *t) +{ + return t->state >= 0; +} + +static inline bool trigger_is_error(struct trigger *t) +{ + return t->state <= TRIGGER_ERROR; +} + +static inline void trigger_on(struct trigger *t) +{ + TRIGGER_WARN_ONCE(t, TRIGGER_OFF); + t->state = TRIGGER_READY; +} + +static inline void trigger_ready(struct trigger *t) +{ + if (!trigger_is_available(t)) + return; + t->state = TRIGGER_READY; +} + +static inline void trigger_hit(struct trigger *t) +{ + if (!trigger_is_available(t)) + return; + TRIGGER_WARN_ONCE(t, TRIGGER_READY); + t->state = TRIGGER_HIT; +} + +static inline void trigger_off(struct trigger *t) +{ + if (!trigger_is_available(t)) + return; + t->state = TRIGGER_OFF; +} + +static inline void trigger_error(struct trigger *t) +{ + t->state = TRIGGER_ERROR; +} + +static inline bool trigger_is_ready(struct trigger *t) +{ + return t->state == TRIGGER_READY; +} + +static inline bool trigger_is_hit(struct trigger *t) +{ + return t->state == TRIGGER_HIT; +} + +#define DEFINE_TRIGGER(n) \ +struct trigger n = {.state = TRIGGER_OFF, .name = #n} +#endif diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index a8b78f1b3243..d5b11e2b85e0 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -3,10 +3,29 @@ #include <linux/types.h> -#include "../arch/x86/util/tsc.h" +#include "event.h" + +struct perf_tsc_conversion { + u16 time_shift; + u32 time_mult; + u64 time_zero; +}; +struct perf_event_mmap_page; + +int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, + struct perf_tsc_conversion *tc); u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); +struct perf_event_mmap_page; +struct perf_tool; +struct machine; + +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, + struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine); + #endif diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index cf5e250bc78e..783a53fb7a4e 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -66,7 +66,7 @@ static int entry(u64 ip, struct unwind_info *ui) if (__report_module(&al, ip, ui)) return -1; - e->ip = ip; + e->ip = al.addr; e->map = al.map; e->sym = al.sym; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c new file mode 100644 index 000000000000..20c2e5743903 --- /dev/null +++ b/tools/perf/util/unwind-libunwind-local.c @@ -0,0 +1,699 @@ +/* + * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps. + * + * Lots of this code have been borrowed or heavily inspired from parts of + * the libunwind 0.99 code which are (amongst other contributors I may have + * forgotten): + * + * Copyright (C) 2002-2007 Hewlett-Packard Co + * Contributed by David Mosberger-Tang <davidm@hpl.hp.com> + * + * And the bugs have been added by: + * + * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com> + * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com> + * + */ + +#include <elf.h> +#include <gelf.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <linux/list.h> +#ifndef REMOTE_UNWIND_LIBUNWIND +#include <libunwind.h> +#include <libunwind-ptrace.h> +#endif +#include "callchain.h" +#include "thread.h" +#include "session.h" +#include "perf_regs.h" +#include "unwind.h" +#include "symbol.h" +#include "util.h" +#include "debug.h" +#include "asm/bug.h" + +extern int +UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +extern int +UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, + unw_word_t ip, + unw_word_t segbase, + const char *obj_name, unw_word_t start, + unw_word_t end); + +#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame) + +#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */ +#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */ + +/* Pointer-encoding formats: */ +#define DW_EH_PE_omit 0xff +#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */ +#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */ +#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */ +#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */ +#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */ + +/* Pointer-encoding application: */ +#define DW_EH_PE_absptr 0x00 /* absolute value */ +#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */ + +/* + * The following are not documented by LSB v1.3, yet they are used by + * GCC, presumably they aren't documented by LSB since they aren't + * used on Linux: + */ +#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ +#define DW_EH_PE_aligned 0x50 /* aligned pointer */ + +/* Flags intentionaly not handled, since they're not needed: + * #define DW_EH_PE_indirect 0x80 + * #define DW_EH_PE_uleb128 0x01 + * #define DW_EH_PE_udata2 0x02 + * #define DW_EH_PE_sleb128 0x09 + * #define DW_EH_PE_sdata2 0x0a + * #define DW_EH_PE_textrel 0x20 + * #define DW_EH_PE_datarel 0x30 + */ + +struct unwind_info { + struct perf_sample *sample; + struct machine *machine; + struct thread *thread; +}; + +#define dw_read(ptr, type, end) ({ \ + type *__p = (type *) ptr; \ + type __v; \ + if ((__p + 1) > (type *) end) \ + return -EINVAL; \ + __v = *__p++; \ + ptr = (typeof(ptr)) __p; \ + __v; \ + }) + +static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val, + u8 encoding) +{ + u8 *cur = *p; + *val = 0; + + switch (encoding) { + case DW_EH_PE_omit: + *val = 0; + goto out; + case DW_EH_PE_ptr: + *val = dw_read(cur, unsigned long, end); + goto out; + default: + break; + } + + switch (encoding & DW_EH_PE_APPL_MASK) { + case DW_EH_PE_absptr: + break; + case DW_EH_PE_pcrel: + *val = (unsigned long) cur; + break; + default: + return -EINVAL; + } + + if ((encoding & 0x07) == 0x00) + encoding |= DW_EH_PE_udata4; + + switch (encoding & DW_EH_PE_FORMAT_MASK) { + case DW_EH_PE_sdata4: + *val += dw_read(cur, s32, end); + break; + case DW_EH_PE_udata4: + *val += dw_read(cur, u32, end); + break; + case DW_EH_PE_sdata8: + *val += dw_read(cur, s64, end); + break; + case DW_EH_PE_udata8: + *val += dw_read(cur, u64, end); + break; + default: + return -EINVAL; + } + + out: + *p = cur; + return 0; +} + +#define dw_read_encoded_value(ptr, end, enc) ({ \ + u64 __v; \ + if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \ + return -EINVAL; \ + } \ + __v; \ + }) + +static u64 elf_section_offset(int fd, const char *name) +{ + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + u64 offset = 0; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + return 0; + + do { + if (gelf_getehdr(elf, &ehdr) == NULL) + break; + + if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL)) + break; + + offset = shdr.sh_offset; + } while (0); + + elf_end(elf); + return offset; +} + +#ifndef NO_LIBUNWIND_DEBUG_FRAME +static int elf_is_exec(int fd, const char *name) +{ + Elf *elf; + GElf_Ehdr ehdr; + int retval = 0; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + return 0; + if (gelf_getehdr(elf, &ehdr) == NULL) + goto out; + + retval = (ehdr.e_type == ET_EXEC); + +out: + elf_end(elf); + pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval); + return retval; +} +#endif + +struct table_entry { + u32 start_ip_offset; + u32 fde_offset; +}; + +struct eh_frame_hdr { + unsigned char version; + unsigned char eh_frame_ptr_enc; + unsigned char fde_count_enc; + unsigned char table_enc; + + /* + * The rest of the header is variable-length and consists of the + * following members: + * + * encoded_t eh_frame_ptr; + * encoded_t fde_count; + */ + + /* A single encoded pointer should not be more than 8 bytes. */ + u64 enc[2]; + + /* + * struct { + * encoded_t start_ip; + * encoded_t fde_addr; + * } binary_search_table[fde_count]; + */ + char data[0]; +} __packed; + +static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, + u64 offset, u64 *table_data, u64 *segbase, + u64 *fde_count) +{ + struct eh_frame_hdr hdr; + u8 *enc = (u8 *) &hdr.enc; + u8 *end = (u8 *) &hdr.data; + ssize_t r; + + r = dso__data_read_offset(dso, machine, offset, + (u8 *) &hdr, sizeof(hdr)); + if (r != sizeof(hdr)) + return -EINVAL; + + /* We dont need eh_frame_ptr, just skip it. */ + dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc); + + *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc); + *segbase = offset; + *table_data = (enc - (u8 *) &hdr) + offset; + return 0; +} + +static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, + u64 *table_data, u64 *segbase, + u64 *fde_count) +{ + int ret = -EINVAL, fd; + u64 offset = dso->data.eh_frame_hdr_offset; + + if (offset == 0) { + fd = dso__data_get_fd(dso, machine); + if (fd < 0) + return -EINVAL; + + /* Check the .eh_frame section for unwinding info */ + offset = elf_section_offset(fd, ".eh_frame_hdr"); + dso->data.eh_frame_hdr_offset = offset; + dso__data_put_fd(dso); + } + + if (offset) + ret = unwind_spec_ehframe(dso, machine, offset, + table_data, segbase, + fde_count); + + return ret; +} + +#ifndef NO_LIBUNWIND_DEBUG_FRAME +static int read_unwind_spec_debug_frame(struct dso *dso, + struct machine *machine, u64 *offset) +{ + int fd; + u64 ofs = dso->data.debug_frame_offset; + + if (ofs == 0) { + fd = dso__data_get_fd(dso, machine); + if (fd < 0) + return -EINVAL; + + /* Check the .debug_frame section for unwinding info */ + ofs = elf_section_offset(fd, ".debug_frame"); + dso->data.debug_frame_offset = ofs; + dso__data_put_fd(dso); + } + + *offset = ofs; + if (*offset) + return 0; + + return -EINVAL; +} +#endif + +static struct map *find_map(unw_word_t ip, struct unwind_info *ui) +{ + struct addr_location al; + + thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al); + if (!al.map) { + /* + * We've seen cases (softice) where DWARF unwinder went + * through non executable mmaps, which we need to lookup + * in MAP__VARIABLE tree. + */ + thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, + MAP__VARIABLE, ip, &al); + } + return al.map; +} + +static int +find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, + int need_unwind_info, void *arg) +{ + struct unwind_info *ui = arg; + struct map *map; + unw_dyn_info_t di; + u64 table_data, segbase, fde_count; + int ret = -EINVAL; + + map = find_map(ip, ui); + if (!map || !map->dso) + return -EINVAL; + + pr_debug("unwind: find_proc_info dso %s\n", map->dso->name); + + /* Check the .eh_frame section for unwinding info */ + if (!read_unwind_spec_eh_frame(map->dso, ui->machine, + &table_data, &segbase, &fde_count)) { + memset(&di, 0, sizeof(di)); + di.format = UNW_INFO_FORMAT_REMOTE_TABLE; + di.start_ip = map->start; + di.end_ip = map->end; + di.u.rti.segbase = map->start + segbase; + di.u.rti.table_data = map->start + table_data; + di.u.rti.table_len = fde_count * sizeof(struct table_entry) + / sizeof(unw_word_t); + ret = dwarf_search_unwind_table(as, ip, &di, pi, + need_unwind_info, arg); + } + +#ifndef NO_LIBUNWIND_DEBUG_FRAME + /* Check the .debug_frame section for unwinding info */ + if (ret < 0 && + !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { + int fd = dso__data_get_fd(map->dso, ui->machine); + int is_exec = elf_is_exec(fd, map->dso->name); + unw_word_t base = is_exec ? 0 : map->start; + const char *symfile; + + if (fd >= 0) + dso__data_put_fd(map->dso); + + symfile = map->dso->symsrc_filename ?: map->dso->name; + + memset(&di, 0, sizeof(di)); + if (dwarf_find_debug_frame(0, &di, ip, base, symfile, + map->start, map->end)) + return dwarf_search_unwind_table(as, ip, &di, pi, + need_unwind_info, arg); + } +#endif + + return ret; +} + +static int access_fpreg(unw_addr_space_t __maybe_unused as, + unw_regnum_t __maybe_unused num, + unw_fpreg_t __maybe_unused *val, + int __maybe_unused __write, + void __maybe_unused *arg) +{ + pr_err("unwind: access_fpreg unsupported\n"); + return -UNW_EINVAL; +} + +static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as, + unw_word_t __maybe_unused *dil_addr, + void __maybe_unused *arg) +{ + return -UNW_ENOINFO; +} + +static int resume(unw_addr_space_t __maybe_unused as, + unw_cursor_t __maybe_unused *cu, + void __maybe_unused *arg) +{ + pr_err("unwind: resume unsupported\n"); + return -UNW_EINVAL; +} + +static int +get_proc_name(unw_addr_space_t __maybe_unused as, + unw_word_t __maybe_unused addr, + char __maybe_unused *bufp, size_t __maybe_unused buf_len, + unw_word_t __maybe_unused *offp, void __maybe_unused *arg) +{ + pr_err("unwind: get_proc_name unsupported\n"); + return -UNW_EINVAL; +} + +static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, + unw_word_t *data) +{ + struct map *map; + ssize_t size; + + map = find_map(addr, ui); + if (!map) { + pr_debug("unwind: no map for %lx\n", (unsigned long)addr); + return -1; + } + + if (!map->dso) + return -1; + + size = dso__data_read_addr(map->dso, map, ui->machine, + addr, (u8 *) data, sizeof(*data)); + + return !(size == sizeof(*data)); +} + +static int access_mem(unw_addr_space_t __maybe_unused as, + unw_word_t addr, unw_word_t *valp, + int __write, void *arg) +{ + struct unwind_info *ui = arg; + struct stack_dump *stack = &ui->sample->user_stack; + u64 start, end; + int offset; + int ret; + + /* Don't support write, probably not needed. */ + if (__write || !stack || !ui->sample->user_regs.regs) { + *valp = 0; + return 0; + } + + ret = perf_reg_value(&start, &ui->sample->user_regs, + LIBUNWIND__ARCH_REG_SP); + if (ret) + return ret; + + end = start + stack->size; + + /* Check overflow. */ + if (addr + sizeof(unw_word_t) < addr) + return -EINVAL; + + if (addr < start || addr + sizeof(unw_word_t) >= end) { + ret = access_dso_mem(ui, addr, valp); + if (ret) { + pr_debug("unwind: access_mem %p not inside range" + " 0x%" PRIx64 "-0x%" PRIx64 "\n", + (void *) (uintptr_t) addr, start, end); + *valp = 0; + return ret; + } + return 0; + } + + offset = addr - start; + *valp = *(unw_word_t *)&stack->data[offset]; + pr_debug("unwind: access_mem addr %p val %lx, offset %d\n", + (void *) (uintptr_t) addr, (unsigned long)*valp, offset); + return 0; +} + +static int access_reg(unw_addr_space_t __maybe_unused as, + unw_regnum_t regnum, unw_word_t *valp, + int __write, void *arg) +{ + struct unwind_info *ui = arg; + int id, ret; + u64 val; + + /* Don't support write, I suspect we don't need it. */ + if (__write) { + pr_err("unwind: access_reg w %d\n", regnum); + return 0; + } + + if (!ui->sample->user_regs.regs) { + *valp = 0; + return 0; + } + + id = LIBUNWIND__ARCH_REG_ID(regnum); + if (id < 0) + return -EINVAL; + + ret = perf_reg_value(&val, &ui->sample->user_regs, id); + if (ret) { + pr_err("unwind: can't read reg %d\n", regnum); + return ret; + } + + *valp = (unw_word_t) val; + pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp); + return 0; +} + +static void put_unwind_info(unw_addr_space_t __maybe_unused as, + unw_proc_info_t *pi __maybe_unused, + void *arg __maybe_unused) +{ + pr_debug("unwind: put_unwind_info called\n"); +} + +static int entry(u64 ip, struct thread *thread, + unwind_entry_cb_t cb, void *arg) +{ + struct unwind_entry e; + struct addr_location al; + + thread__find_addr_location(thread, PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al); + + e.ip = al.addr; + e.map = al.map; + e.sym = al.sym; + + pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", + al.sym ? al.sym->name : "''", + ip, + al.map ? al.map->map_ip(al.map, ip) : (u64) 0); + + return cb(&e, arg); +} + +static void display_error(int err) +{ + switch (err) { + case UNW_EINVAL: + pr_err("unwind: Only supports local.\n"); + break; + case UNW_EUNSPEC: + pr_err("unwind: Unspecified error.\n"); + break; + case UNW_EBADREG: + pr_err("unwind: Register unavailable.\n"); + break; + default: + break; + } +} + +static unw_accessors_t accessors = { + .find_proc_info = find_proc_info, + .put_unwind_info = put_unwind_info, + .get_dyn_info_list_addr = get_dyn_info_list_addr, + .access_mem = access_mem, + .access_reg = access_reg, + .access_fpreg = access_fpreg, + .resume = resume, + .get_proc_name = get_proc_name, +}; + +static int _unwind__prepare_access(struct thread *thread) +{ + if (callchain_param.record_mode != CALLCHAIN_DWARF) + return 0; + + thread->addr_space = unw_create_addr_space(&accessors, 0); + if (!thread->addr_space) { + pr_err("unwind: Can't create unwind address space.\n"); + return -ENOMEM; + } + + unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL); + return 0; +} + +static void _unwind__flush_access(struct thread *thread) +{ + if (callchain_param.record_mode != CALLCHAIN_DWARF) + return; + + unw_flush_cache(thread->addr_space, 0, 0); +} + +static void _unwind__finish_access(struct thread *thread) +{ + if (callchain_param.record_mode != CALLCHAIN_DWARF) + return; + + unw_destroy_addr_space(thread->addr_space); +} + +static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, + void *arg, int max_stack) +{ + u64 val; + unw_word_t ips[max_stack]; + unw_addr_space_t addr_space; + unw_cursor_t c; + int ret, i = 0; + + ret = perf_reg_value(&val, &ui->sample->user_regs, + LIBUNWIND__ARCH_REG_IP); + if (ret) + return ret; + + ips[i++] = (unw_word_t) val; + + /* + * If we need more than one entry, do the DWARF + * unwind itself. + */ + if (max_stack - 1 > 0) { + WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL"); + addr_space = ui->thread->addr_space; + + if (addr_space == NULL) + return -1; + + ret = unw_init_remote(&c, addr_space, ui); + if (ret) + display_error(ret); + + while (!ret && (unw_step(&c) > 0) && i < max_stack) { + unw_get_reg(&c, UNW_REG_IP, &ips[i]); + ++i; + } + + max_stack = i; + } + + /* + * Display what we got based on the order setup. + */ + for (i = 0; i < max_stack && !ret; i++) { + int j = i; + + if (callchain_param.order == ORDER_CALLER) + j = max_stack - i - 1; + ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0; + } + + return ret; +} + +static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg, + struct thread *thread, + struct perf_sample *data, int max_stack) +{ + struct unwind_info ui = { + .sample = data, + .thread = thread, + .machine = thread->mg->machine, + }; + + if (!data->user_regs.regs) + return -EINVAL; + + if (max_stack <= 0) + return -EINVAL; + + return get_entries(&ui, cb, arg, max_stack); +} + +static struct unwind_libunwind_ops +_unwind_libunwind_ops = { + .prepare_access = _unwind__prepare_access, + .flush_access = _unwind__flush_access, + .finish_access = _unwind__finish_access, + .get_entries = _unwind__get_entries, +}; + +#ifndef REMOTE_UNWIND_LIBUNWIND +struct unwind_libunwind_ops * +local_unwind_libunwind_ops = &_unwind_libunwind_ops; +#endif diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index ee7e372297e5..6d542a4e0648 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -1,689 +1,83 @@ -/* - * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps. - * - * Lots of this code have been borrowed or heavily inspired from parts of - * the libunwind 0.99 code which are (amongst other contributors I may have - * forgotten): - * - * Copyright (C) 2002-2007 Hewlett-Packard Co - * Contributed by David Mosberger-Tang <davidm@hpl.hp.com> - * - * And the bugs have been added by: - * - * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com> - * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com> - * - */ - -#include <elf.h> -#include <gelf.h> -#include <fcntl.h> -#include <string.h> -#include <unistd.h> -#include <sys/mman.h> -#include <linux/list.h> -#include <libunwind.h> -#include <libunwind-ptrace.h> -#include "callchain.h" +#include "unwind.h" #include "thread.h" #include "session.h" -#include "perf_regs.h" -#include "unwind.h" -#include "symbol.h" -#include "util.h" #include "debug.h" +#include "arch/common.h" -extern int -UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, - unw_word_t ip, - unw_dyn_info_t *di, - unw_proc_info_t *pi, - int need_unwind_info, void *arg); - -#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) - -extern int -UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, - unw_word_t ip, - unw_word_t segbase, - const char *obj_name, unw_word_t start, - unw_word_t end); - -#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame) - -#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */ -#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */ - -/* Pointer-encoding formats: */ -#define DW_EH_PE_omit 0xff -#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */ -#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */ -#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */ -#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */ -#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */ - -/* Pointer-encoding application: */ -#define DW_EH_PE_absptr 0x00 /* absolute value */ -#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */ - -/* - * The following are not documented by LSB v1.3, yet they are used by - * GCC, presumably they aren't documented by LSB since they aren't - * used on Linux: - */ -#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ -#define DW_EH_PE_aligned 0x50 /* aligned pointer */ - -/* Flags intentionaly not handled, since they're not needed: - * #define DW_EH_PE_indirect 0x80 - * #define DW_EH_PE_uleb128 0x01 - * #define DW_EH_PE_udata2 0x02 - * #define DW_EH_PE_sleb128 0x09 - * #define DW_EH_PE_sdata2 0x0a - * #define DW_EH_PE_textrel 0x20 - * #define DW_EH_PE_datarel 0x30 - */ - -struct unwind_info { - struct perf_sample *sample; - struct machine *machine; - struct thread *thread; -}; - -#define dw_read(ptr, type, end) ({ \ - type *__p = (type *) ptr; \ - type __v; \ - if ((__p + 1) > (type *) end) \ - return -EINVAL; \ - __v = *__p++; \ - ptr = (typeof(ptr)) __p; \ - __v; \ - }) - -static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val, - u8 encoding) -{ - u8 *cur = *p; - *val = 0; - - switch (encoding) { - case DW_EH_PE_omit: - *val = 0; - goto out; - case DW_EH_PE_ptr: - *val = dw_read(cur, unsigned long, end); - goto out; - default: - break; - } - - switch (encoding & DW_EH_PE_APPL_MASK) { - case DW_EH_PE_absptr: - break; - case DW_EH_PE_pcrel: - *val = (unsigned long) cur; - break; - default: - return -EINVAL; - } - - if ((encoding & 0x07) == 0x00) - encoding |= DW_EH_PE_udata4; - - switch (encoding & DW_EH_PE_FORMAT_MASK) { - case DW_EH_PE_sdata4: - *val += dw_read(cur, s32, end); - break; - case DW_EH_PE_udata4: - *val += dw_read(cur, u32, end); - break; - case DW_EH_PE_sdata8: - *val += dw_read(cur, s64, end); - break; - case DW_EH_PE_udata8: - *val += dw_read(cur, u64, end); - break; - default: - return -EINVAL; - } - - out: - *p = cur; - return 0; -} - -#define dw_read_encoded_value(ptr, end, enc) ({ \ - u64 __v; \ - if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \ - return -EINVAL; \ - } \ - __v; \ - }) +struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; +struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; +struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops; -static u64 elf_section_offset(int fd, const char *name) +static void unwind__register_ops(struct thread *thread, + struct unwind_libunwind_ops *ops) { - Elf *elf; - GElf_Ehdr ehdr; - GElf_Shdr shdr; - u64 offset = 0; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) - return 0; - - do { - if (gelf_getehdr(elf, &ehdr) == NULL) - break; - - if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL)) - break; - - offset = shdr.sh_offset; - } while (0); - - elf_end(elf); - return offset; + thread->unwind_libunwind_ops = ops; } -#ifndef NO_LIBUNWIND_DEBUG_FRAME -static int elf_is_exec(int fd, const char *name) +int unwind__prepare_access(struct thread *thread, struct map *map, + bool *initialized) { - Elf *elf; - GElf_Ehdr ehdr; - int retval = 0; + const char *arch; + enum dso_type dso_type; + struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops; + int err; - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) + if (thread->addr_space) { + pr_debug("unwind: thread map already set, dso=%s\n", + map->dso->name); + if (initialized) + *initialized = true; return 0; - if (gelf_getehdr(elf, &ehdr) == NULL) - goto out; - - retval = (ehdr.e_type == ET_EXEC); - -out: - elf_end(elf); - pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval); - return retval; -} -#endif - -struct table_entry { - u32 start_ip_offset; - u32 fde_offset; -}; - -struct eh_frame_hdr { - unsigned char version; - unsigned char eh_frame_ptr_enc; - unsigned char fde_count_enc; - unsigned char table_enc; - - /* - * The rest of the header is variable-length and consists of the - * following members: - * - * encoded_t eh_frame_ptr; - * encoded_t fde_count; - */ - - /* A single encoded pointer should not be more than 8 bytes. */ - u64 enc[2]; - - /* - * struct { - * encoded_t start_ip; - * encoded_t fde_addr; - * } binary_search_table[fde_count]; - */ - char data[0]; -} __packed; - -static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, - u64 offset, u64 *table_data, u64 *segbase, - u64 *fde_count) -{ - struct eh_frame_hdr hdr; - u8 *enc = (u8 *) &hdr.enc; - u8 *end = (u8 *) &hdr.data; - ssize_t r; - - r = dso__data_read_offset(dso, machine, offset, - (u8 *) &hdr, sizeof(hdr)); - if (r != sizeof(hdr)) - return -EINVAL; - - /* We dont need eh_frame_ptr, just skip it. */ - dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc); - - *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc); - *segbase = offset; - *table_data = (enc - (u8 *) &hdr) + offset; - return 0; -} - -static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, - u64 *table_data, u64 *segbase, - u64 *fde_count) -{ - int ret = -EINVAL, fd; - u64 offset = dso->data.eh_frame_hdr_offset; - - if (offset == 0) { - fd = dso__data_get_fd(dso, machine); - if (fd < 0) - return -EINVAL; - - /* Check the .eh_frame section for unwinding info */ - offset = elf_section_offset(fd, ".eh_frame_hdr"); - dso->data.eh_frame_hdr_offset = offset; - dso__data_put_fd(dso); } - if (offset) - ret = unwind_spec_ehframe(dso, machine, offset, - table_data, segbase, - fde_count); - - return ret; -} + /* env->arch is NULL for live-mode (i.e. perf top) */ + if (!thread->mg->machine->env || !thread->mg->machine->env->arch) + goto out_register; -#ifndef NO_LIBUNWIND_DEBUG_FRAME -static int read_unwind_spec_debug_frame(struct dso *dso, - struct machine *machine, u64 *offset) -{ - int fd; - u64 ofs = dso->data.debug_frame_offset; - - if (ofs == 0) { - fd = dso__data_get_fd(dso, machine); - if (fd < 0) - return -EINVAL; - - /* Check the .debug_frame section for unwinding info */ - ofs = elf_section_offset(fd, ".debug_frame"); - dso->data.debug_frame_offset = ofs; - dso__data_put_fd(dso); - } - - *offset = ofs; - if (*offset) + dso_type = dso__type(map->dso, thread->mg->machine); + if (dso_type == DSO__TYPE_UNKNOWN) return 0; - return -EINVAL; -} -#endif - -static struct map *find_map(unw_word_t ip, struct unwind_info *ui) -{ - struct addr_location al; - - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); - if (!al.map) { - /* - * We've seen cases (softice) where DWARF unwinder went - * through non executable mmaps, which we need to lookup - * in MAP__VARIABLE tree. - */ - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__VARIABLE, ip, &al); - } - return al.map; -} - -static int -find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, - int need_unwind_info, void *arg) -{ - struct unwind_info *ui = arg; - struct map *map; - unw_dyn_info_t di; - u64 table_data, segbase, fde_count; - int ret = -EINVAL; - - map = find_map(ip, ui); - if (!map || !map->dso) - return -EINVAL; - - pr_debug("unwind: find_proc_info dso %s\n", map->dso->name); + arch = normalize_arch(thread->mg->machine->env->arch); - /* Check the .eh_frame section for unwinding info */ - if (!read_unwind_spec_eh_frame(map->dso, ui->machine, - &table_data, &segbase, &fde_count)) { - memset(&di, 0, sizeof(di)); - di.format = UNW_INFO_FORMAT_REMOTE_TABLE; - di.start_ip = map->start; - di.end_ip = map->end; - di.u.rti.segbase = map->start + segbase; - di.u.rti.table_data = map->start + table_data; - di.u.rti.table_len = fde_count * sizeof(struct table_entry) - / sizeof(unw_word_t); - ret = dwarf_search_unwind_table(as, ip, &di, pi, - need_unwind_info, arg); + if (!strcmp(arch, "x86")) { + if (dso_type != DSO__TYPE_64BIT) + ops = x86_32_unwind_libunwind_ops; + } else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) { + if (dso_type == DSO__TYPE_64BIT) + ops = arm64_unwind_libunwind_ops; } -#ifndef NO_LIBUNWIND_DEBUG_FRAME - /* Check the .debug_frame section for unwinding info */ - if (ret < 0 && - !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { - int fd = dso__data_get_fd(map->dso, ui->machine); - int is_exec = elf_is_exec(fd, map->dso->name); - unw_word_t base = is_exec ? 0 : map->start; - const char *symfile; - - if (fd >= 0) - dso__data_put_fd(map->dso); - - symfile = map->dso->symsrc_filename ?: map->dso->name; - - memset(&di, 0, sizeof(di)); - if (dwarf_find_debug_frame(0, &di, ip, base, symfile, - map->start, map->end)) - return dwarf_search_unwind_table(as, ip, &di, pi, - need_unwind_info, arg); - } -#endif - - return ret; -} - -static int access_fpreg(unw_addr_space_t __maybe_unused as, - unw_regnum_t __maybe_unused num, - unw_fpreg_t __maybe_unused *val, - int __maybe_unused __write, - void __maybe_unused *arg) -{ - pr_err("unwind: access_fpreg unsupported\n"); - return -UNW_EINVAL; -} - -static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as, - unw_word_t __maybe_unused *dil_addr, - void __maybe_unused *arg) -{ - return -UNW_ENOINFO; -} - -static int resume(unw_addr_space_t __maybe_unused as, - unw_cursor_t __maybe_unused *cu, - void __maybe_unused *arg) -{ - pr_err("unwind: resume unsupported\n"); - return -UNW_EINVAL; -} - -static int -get_proc_name(unw_addr_space_t __maybe_unused as, - unw_word_t __maybe_unused addr, - char __maybe_unused *bufp, size_t __maybe_unused buf_len, - unw_word_t __maybe_unused *offp, void __maybe_unused *arg) -{ - pr_err("unwind: get_proc_name unsupported\n"); - return -UNW_EINVAL; -} - -static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, - unw_word_t *data) -{ - struct map *map; - ssize_t size; - - map = find_map(addr, ui); - if (!map) { - pr_debug("unwind: no map for %lx\n", (unsigned long)addr); + if (!ops) { + pr_err("unwind: target platform=%s is not supported\n", arch); return -1; } +out_register: + unwind__register_ops(thread, ops); - if (!map->dso) - return -1; - - size = dso__data_read_addr(map->dso, map, ui->machine, - addr, (u8 *) data, sizeof(*data)); - - return !(size == sizeof(*data)); -} - -static int access_mem(unw_addr_space_t __maybe_unused as, - unw_word_t addr, unw_word_t *valp, - int __write, void *arg) -{ - struct unwind_info *ui = arg; - struct stack_dump *stack = &ui->sample->user_stack; - u64 start, end; - int offset; - int ret; - - /* Don't support write, probably not needed. */ - if (__write || !stack || !ui->sample->user_regs.regs) { - *valp = 0; - return 0; - } - - ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP); - if (ret) - return ret; - - end = start + stack->size; - - /* Check overflow. */ - if (addr + sizeof(unw_word_t) < addr) - return -EINVAL; - - if (addr < start || addr + sizeof(unw_word_t) >= end) { - ret = access_dso_mem(ui, addr, valp); - if (ret) { - pr_debug("unwind: access_mem %p not inside range" - " 0x%" PRIx64 "-0x%" PRIx64 "\n", - (void *) (uintptr_t) addr, start, end); - *valp = 0; - return ret; - } - return 0; - } - - offset = addr - start; - *valp = *(unw_word_t *)&stack->data[offset]; - pr_debug("unwind: access_mem addr %p val %lx, offset %d\n", - (void *) (uintptr_t) addr, (unsigned long)*valp, offset); - return 0; -} - -static int access_reg(unw_addr_space_t __maybe_unused as, - unw_regnum_t regnum, unw_word_t *valp, - int __write, void *arg) -{ - struct unwind_info *ui = arg; - int id, ret; - u64 val; - - /* Don't support write, I suspect we don't need it. */ - if (__write) { - pr_err("unwind: access_reg w %d\n", regnum); - return 0; - } - - if (!ui->sample->user_regs.regs) { - *valp = 0; - return 0; - } - - id = libunwind__arch_reg_id(regnum); - if (id < 0) - return -EINVAL; - - ret = perf_reg_value(&val, &ui->sample->user_regs, id); - if (ret) { - pr_err("unwind: can't read reg %d\n", regnum); - return ret; - } - - *valp = (unw_word_t) val; - pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp); - return 0; -} - -static void put_unwind_info(unw_addr_space_t __maybe_unused as, - unw_proc_info_t *pi __maybe_unused, - void *arg __maybe_unused) -{ - pr_debug("unwind: put_unwind_info called\n"); -} - -static int entry(u64 ip, struct thread *thread, - unwind_entry_cb_t cb, void *arg) -{ - struct unwind_entry e; - struct addr_location al; - - thread__find_addr_location(thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); - - e.ip = ip; - e.map = al.map; - e.sym = al.sym; - - pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", - al.sym ? al.sym->name : "''", - ip, - al.map ? al.map->map_ip(al.map, ip) : (u64) 0); - - return cb(&e, arg); -} - -static void display_error(int err) -{ - switch (err) { - case UNW_EINVAL: - pr_err("unwind: Only supports local.\n"); - break; - case UNW_EUNSPEC: - pr_err("unwind: Unspecified error.\n"); - break; - case UNW_EBADREG: - pr_err("unwind: Register unavailable.\n"); - break; - default: - break; - } -} - -static unw_accessors_t accessors = { - .find_proc_info = find_proc_info, - .put_unwind_info = put_unwind_info, - .get_dyn_info_list_addr = get_dyn_info_list_addr, - .access_mem = access_mem, - .access_reg = access_reg, - .access_fpreg = access_fpreg, - .resume = resume, - .get_proc_name = get_proc_name, -}; - -int unwind__prepare_access(struct thread *thread) -{ - unw_addr_space_t addr_space; - - if (callchain_param.record_mode != CALLCHAIN_DWARF) - return 0; - - addr_space = unw_create_addr_space(&accessors, 0); - if (!addr_space) { - pr_err("unwind: Can't create unwind address space.\n"); - return -ENOMEM; - } - - unw_set_caching_policy(addr_space, UNW_CACHE_GLOBAL); - thread__set_priv(thread, addr_space); - - return 0; + err = thread->unwind_libunwind_ops->prepare_access(thread); + if (initialized) + *initialized = err ? false : true; + return err; } void unwind__flush_access(struct thread *thread) { - unw_addr_space_t addr_space; - - if (callchain_param.record_mode != CALLCHAIN_DWARF) - return; - - addr_space = thread__priv(thread); - unw_flush_cache(addr_space, 0, 0); + if (thread->unwind_libunwind_ops) + thread->unwind_libunwind_ops->flush_access(thread); } void unwind__finish_access(struct thread *thread) { - unw_addr_space_t addr_space; - - if (callchain_param.record_mode != CALLCHAIN_DWARF) - return; - - addr_space = thread__priv(thread); - unw_destroy_addr_space(addr_space); -} - -static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, - void *arg, int max_stack) -{ - u64 val; - unw_word_t ips[max_stack]; - unw_addr_space_t addr_space; - unw_cursor_t c; - int ret, i = 0; - - ret = perf_reg_value(&val, &ui->sample->user_regs, PERF_REG_IP); - if (ret) - return ret; - - ips[i++] = (unw_word_t) val; - - /* - * If we need more than one entry, do the DWARF - * unwind itself. - */ - if (max_stack - 1 > 0) { - addr_space = thread__priv(ui->thread); - if (addr_space == NULL) - return -1; - - ret = unw_init_remote(&c, addr_space, ui); - if (ret) - display_error(ret); - - while (!ret && (unw_step(&c) > 0) && i < max_stack) { - unw_get_reg(&c, UNW_REG_IP, &ips[i]); - ++i; - } - - max_stack = i; - } - - /* - * Display what we got based on the order setup. - */ - for (i = 0; i < max_stack && !ret; i++) { - int j = i; - - if (callchain_param.order == ORDER_CALLER) - j = max_stack - i - 1; - ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0; - } - - return ret; + if (thread->unwind_libunwind_ops) + thread->unwind_libunwind_ops->finish_access(thread); } int unwind__get_entries(unwind_entry_cb_t cb, void *arg, - struct thread *thread, - struct perf_sample *data, int max_stack) + struct thread *thread, + struct perf_sample *data, int max_stack) { - struct unwind_info ui = { - .sample = data, - .thread = thread, - .machine = thread->mg->machine, - }; - - if (!data->user_regs.regs) - return -EINVAL; - - if (max_stack <= 0) - return -EINVAL; - - return get_entries(&ui, cb, arg, max_stack); + if (thread->unwind_libunwind_ops) + return thread->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack); + return 0; } diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index 12790cf94618..61fb1e90ff51 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -14,18 +14,42 @@ struct unwind_entry { typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg); +struct unwind_libunwind_ops { + int (*prepare_access)(struct thread *thread); + void (*flush_access)(struct thread *thread); + void (*finish_access)(struct thread *thread); + int (*get_entries)(unwind_entry_cb_t cb, void *arg, + struct thread *thread, + struct perf_sample *data, int max_stack); +}; + #ifdef HAVE_DWARF_UNWIND_SUPPORT int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, struct perf_sample *data, int max_stack); /* libunwind specific */ #ifdef HAVE_LIBUNWIND_SUPPORT -int libunwind__arch_reg_id(int regnum); -int unwind__prepare_access(struct thread *thread); +#ifndef LIBUNWIND__ARCH_REG_ID +#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arch_reg_id(regnum) +#endif + +#ifndef LIBUNWIND__ARCH_REG_SP +#define LIBUNWIND__ARCH_REG_SP PERF_REG_SP +#endif + +#ifndef LIBUNWIND__ARCH_REG_IP +#define LIBUNWIND__ARCH_REG_IP PERF_REG_IP +#endif + +int LIBUNWIND__ARCH_REG_ID(int regnum); +int unwind__prepare_access(struct thread *thread, struct map *map, + bool *initialized); void unwind__flush_access(struct thread *thread); void unwind__finish_access(struct thread *thread); #else -static inline int unwind__prepare_access(struct thread *thread __maybe_unused) +static inline int unwind__prepare_access(struct thread *thread __maybe_unused, + struct map *map __maybe_unused, + bool *initialized __maybe_unused) { return 0; } @@ -44,7 +68,9 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, return 0; } -static inline int unwind__prepare_access(struct thread *thread __maybe_unused) +static inline int unwind__prepare_access(struct thread *thread __maybe_unused, + struct map *map __maybe_unused, + bool *initialized __maybe_unused) { return 0; } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index b7766c577b01..cee559d8c9e8 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -19,12 +19,19 @@ #include "callchain.h" #include "strlist.h" -struct callchain_param callchain_param = { - .mode = CHAIN_GRAPH_ABS, - .min_percent = 0.5, - .order = ORDER_CALLEE, - .key = CCKEY_FUNCTION, - .value = CCVAL_PERCENT, +#define CALLCHAIN_PARAM_DEFAULT \ + .mode = CHAIN_GRAPH_ABS, \ + .min_percent = 0.5, \ + .order = ORDER_CALLEE, \ + .key = CCKEY_FUNCTION, \ + .value = CCVAL_PERCENT, \ + +struct callchain_param callchain_param = { + CALLCHAIN_PARAM_DEFAULT +}; + +struct callchain_param callchain_param_default = { + CALLCHAIN_PARAM_DEFAULT }; /* @@ -33,6 +40,9 @@ struct callchain_param callchain_param = { unsigned int page_size; int cacheline_size; +int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH; +int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK; + bool test_attr__enabled; bool perf_host = true; @@ -94,20 +104,17 @@ int rm_rf(char *path) scnprintf(namebuf, sizeof(namebuf), "%s/%s", path, d->d_name); - ret = stat(namebuf, &statbuf); + /* We have to check symbolic link itself */ + ret = lstat(namebuf, &statbuf); if (ret < 0) { pr_debug("stat failed: %s\n", namebuf); break; } - if (S_ISREG(statbuf.st_mode)) - ret = unlink(namebuf); - else if (S_ISDIR(statbuf.st_mode)) + if (S_ISDIR(statbuf.st_mode)) ret = rm_rf(namebuf); - else { - pr_debug("unknown file: %s\n", namebuf); - ret = -1; - } + else + ret = unlink(namebuf); } closedir(dir); @@ -117,6 +124,40 @@ int rm_rf(char *path) return rmdir(path); } +/* A filter which removes dot files */ +bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d) +{ + return d->d_name[0] != '.'; +} + +/* lsdir reads a directory and store it in strlist */ +struct strlist *lsdir(const char *name, + bool (*filter)(const char *, struct dirent *)) +{ + struct strlist *list = NULL; + DIR *dir; + struct dirent *d; + + dir = opendir(name); + if (!dir) + return NULL; + + list = strlist__new(NULL, NULL); + if (!list) { + errno = ENOMEM; + goto out; + } + + while ((d = readdir(dir)) != NULL) { + if (!filter || filter(name, d)) + strlist__add(list, d->d_name); + } + +out: + closedir(dir); + return list; +} + static int slow_copyfile(const char *from, const char *to) { int err = -1; @@ -471,7 +512,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) "needed for --call-graph fp\n"); break; -#ifdef HAVE_DWARF_UNWIND_SUPPORT /* Dwarf style */ } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { const unsigned long default_stack_dump_size = 8192; @@ -487,7 +527,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) ret = get_stack_size(tok, &size); param->dump_size = size; } -#endif /* HAVE_DWARF_UNWIND_SUPPORT */ } else if (!strncmp(name, "lbr", sizeof("lbr"))) { if (!strtok_r(NULL, ",", &saveptr)) { param->record_mode = CALLCHAIN_LBR; @@ -707,3 +746,19 @@ void print_binary(unsigned char *data, size_t len, } printer(BINARY_PRINT_DATA_END, -1, extra); } + +int is_printable_array(char *p, unsigned int len) +{ + unsigned int i; + + if (!p || !len || p[len - 1] != 0) + return 0; + + len--; + + for (i = 0; i < len; i++) { + if (!isprint(p[i]) && !isspace(p[i])) + return 0; + } + return 1; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 8298d607c738..e5f55477491d 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -72,13 +72,13 @@ #include <sys/ioctl.h> #include <inttypes.h> #include <linux/kernel.h> -#include <linux/magic.h> #include <linux/types.h> #include <sys/ttydefaults.h> #include <api/fs/tracing_path.h> #include <termios.h> #include <linux/bitops.h> #include <termios.h> +#include "strlist.h" extern const char *graph_line; extern const char *graph_dotted_line; @@ -159,12 +159,6 @@ static inline char *gitstrchrnul(const char *s, int c) } #endif -/* - * Wrappers: - */ -void *xrealloc(void *ptr, size_t size) __attribute__((weak)); - - static inline void *zalloc(size_t size) { return calloc(1, size); @@ -222,6 +216,8 @@ static inline int sane_case(int x, int high) int mkdir_p(char *path, mode_t mode); int rm_rf(char *path); +struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *)); +bool lsdir_no_dot_filter(const char *name, struct dirent *d); int copyfile(const char *from, const char *to); int copyfile_mode(const char *from, const char *to, mode_t mode); int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size); @@ -254,11 +250,18 @@ int hex2u64(const char *ptr, u64 *val); char *ltrim(char *s); char *rtrim(char *s); +static inline char *trim(char *s) +{ + return ltrim(rtrim(s)); +} + void dump_stack(void); void sighandler_dump_stack(int sig); extern unsigned int page_size; extern int cacheline_size; +extern int sysctl_perf_event_max_stack; +extern int sysctl_perf_event_max_contexts_per_stack; struct parse_tag { char tag; @@ -356,4 +359,10 @@ typedef void (*print_binary_t)(enum binary_printer_ops, void print_binary(unsigned char *data, size_t len, size_t bytes_per_line, print_binary_t printer, void *extra); + +#if !defined(__GLIBC__) && !defined(__ANDROID__) +extern int sched_getcpu(void); +#endif + +int is_printable_array(char *p, unsigned int len); #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 44d440da15dc..7bdcad484225 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -134,8 +134,6 @@ static struct dso *__machine__addnew_vdso(struct machine *machine, const char *s return dso; } -#if BITS_PER_LONG == 64 - static enum dso_type machine__thread_dso_type(struct machine *machine, struct thread *thread) { @@ -156,6 +154,8 @@ static enum dso_type machine__thread_dso_type(struct machine *machine, return dso_type; } +#if BITS_PER_LONG == 64 + static int vdso__do_copy_compat(FILE *f, int fd) { char buf[4096]; @@ -283,8 +283,38 @@ static int __machine__findnew_vdso_compat(struct machine *machine, #endif +static struct dso *machine__find_vdso(struct machine *machine, + struct thread *thread) +{ + struct dso *dso = NULL; + enum dso_type dso_type; + + dso_type = machine__thread_dso_type(machine, thread); + switch (dso_type) { + case DSO__TYPE_32BIT: + dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO32, true); + if (!dso) { + dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, + true); + if (dso && dso_type != dso__type(dso, machine)) + dso = NULL; + } + break; + case DSO__TYPE_X32BIT: + dso = __dsos__find(&machine->dsos, DSO__NAME_VDSOX32, true); + break; + case DSO__TYPE_64BIT: + case DSO__TYPE_UNKNOWN: + default: + dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true); + break; + } + + return dso; +} + struct dso *machine__findnew_vdso(struct machine *machine, - struct thread *thread __maybe_unused) + struct thread *thread) { struct vdso_info *vdso_info; struct dso *dso = NULL; @@ -297,6 +327,10 @@ struct dso *machine__findnew_vdso(struct machine *machine, if (!vdso_info) goto out_unlock; + dso = machine__find_vdso(machine, thread); + if (dso) + goto out_unlock; + #if BITS_PER_LONG == 64 if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso)) goto out_unlock; diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c deleted file mode 100644 index 5f1a07c4b87b..000000000000 --- a/tools/perf/util/wrapper.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Various trivial helper wrappers around standard functions - */ -#include "cache.h" - -/* - * There's no pack memory to release - but stay close to the Git - * version so wrap this away: - */ -static inline void release_pack_memory(size_t size __maybe_unused, - int flag __maybe_unused) -{ -} - -void *xrealloc(void *ptr, size_t size) -{ - void *ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) { - release_pack_memory(size, -1); - ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) - die("Out of memory, realloc failed"); - } - return ret; -} |