diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-14 16:31:23 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-14 16:31:23 -0700 |
commit | 1bbeaf83dd7b5e3628b98bec66ff8fe2646e14aa (patch) | |
tree | a391eed8ae206613b48e02e56e6ad5c4432d8767 /tools/perf/arch/arm64/util | |
parent | 63bd30f249dcf0a7ce16967935cecee8feec24bb (diff) | |
parent | 0f66dfe7b91d2743cc71dfff37af503215b204ef (diff) |
Merge tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Namhyung Kim:
"perf stat:
- Support new 'cluster' aggregation mode for shared resources
depending on the hardware configuration:
$ sudo perf stat -a --per-cluster -e cycles,instructions sleep 1
Performance counter stats for 'system wide':
S0-D0-CLS0 2 85,051,822 cycles
S0-D0-CLS0 2 73,909,908 instructions # 0.87 insn per cycle
S0-D0-CLS2 2 93,365,918 cycles
S0-D0-CLS2 2 83,006,158 instructions # 0.89 insn per cycle
S0-D0-CLS4 2 104,157,523 cycles
S0-D0-CLS4 2 53,234,396 instructions # 0.51 insn per cycle
S0-D0-CLS6 2 65,891,079 cycles
S0-D0-CLS6 2 41,478,273 instructions # 0.63 insn per cycle
1.002407989 seconds time elapsed
- Various fixes and cleanups for event metrics including NaN handling
perf script:
- Use libcapstone if available to disassemble the instructions. This
enables 'perf script -F disasm' and 'perf script --insn-trace=disasm'
(for Intel-PT):
$ perf script -F event,ip,disasm
cycles:P: ffffffffa988d428 wrmsr
cycles:P: ffffffffa9839d25 movq %rax, %r14
cycles:P: ffffffffa9cdcaf0 endbr64
cycles:P: ffffffffa988d428 wrmsr
cycles:P: ffffffffa988d428 wrmsr
cycles:P: ffffffffaa401f86 iretq
cycles:P: ffffffffa99c4de5 movq 0x30(%rcx), %r8
cycles:P: ffffffffa988d428 wrmsr
cycles:P: ffffffffaa401f86 iretq
cycles:P: ffffffffa9907983 movl 0x68(%rbx), %eax
cycles:P: ffffffffa988d428 wrmsr
- Expose sample ID / stream ID to python scripts
perf test:
- Add more perf test cases from Redhat internal test suites. This
time it adds the base infra and a few perf probe tests. More to
come. :)
- Add 'perf test -p' for parallel execution and fix some issues found
by the parallel test
- Support symbol test to print symbols in given (active) module:
$ perf test -F -v Symbols --dso /lib/modules/$(uname -r)/kernel/fs/ext4/ext4.ko
--- start ---
Testing /lib/modules/6.5.13-1rodete2-amd64/kernel/fs/ext4/ext4.ko
Overlapping symbols:
7a990-7a9a0 l __pfx_ext4_exit_fs
7a990-7a9a0 g __pfx_cleanup_module
Overlapping symbols:
7a9a0-7aa1c l ext4_exit_fs
7a9a0-7aa1c g cleanup_module
...
JSON metric updates:
- A new round of Intel metric updates
- Support Power11 PVR (compatible to Power10)
- Fix cache latency events on Zen 4 to set SliceId properly
Internal:
- Fix reference counting for 'map' data structure, tireless work from
Ian!
- More memory optimization for struct thread and annotate histogram.
Now, 'perf report' (TUI) and 'perf annotate' should be much
lighter-weight in terms of memory footprint
- Support cross-arch perf register access. Clean up the build
configuration so that it can detect arch-register support at
runtime. This can allow to parse register data in sample which was
recorded in a different arch
Others:
- Sync task state in 'perf sched' to kernel using trace event fields.
The task states have been changed so tools cannot assume a fixed
encoding
- Clean up 'perf mem' to generalize the arch-specific events
- Add support for local and global variables to data type profiling.
This would increase the success rate of type resolution with DWARF
- Add short option -H for --hierarchy in 'perf report' and 'perf top'"
* tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (154 commits)
perf annotate: Add comments in the data structures
perf annotate: Remove sym_hist.addr[] array
perf annotate: Calculate instruction overhead using hashmap
perf annotate: Add a hashmap for symbol histogram
perf threads: Reduce table size from 256 to 8
perf threads: Switch from rbtree to hashmap
perf threads: Move threads to its own files
perf machine: Move machine's threads into its own abstraction
perf machine: Move fprintf to for_each loop and a callback
perf trace: Ignore thread hashing in summary
perf report: Sort child tasks by tid
perf vendor events amd: Fix Zen 4 cache latency events
perf version: Display availability of OpenCSD support
perf vendor events intel: Add umasks/occ_sel to PCU events.
perf map: Fix map reference count issues
libperf evlist: Avoid out-of-bounds access
perf lock contention: Account contending locks too
perf metrics: Fix segv for metrics with no events
perf metrics: Fix metric matching
perf pmu: Fix a potential memory leak in perf_pmu__lookup()
...
Diffstat (limited to 'tools/perf/arch/arm64/util')
-rw-r--r-- | tools/perf/arch/arm64/util/machine.c | 2 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/mem-events.c | 39 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/mem-events.h | 7 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/perf_regs.c | 7 |
4 files changed, 22 insertions, 33 deletions
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index ba1144366e85..aab1cc2bc283 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -12,5 +12,7 @@ void arch__add_leaf_frame_record_opts(struct record_opts *opts) { + const struct sample_reg *sample_reg_masks = arch__sample_reg_masks(); + opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask; } diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c index 3bcc5c7035c2..9f8da7937255 100644 --- a/tools/perf/arch/arm64/util/mem-events.c +++ b/tools/perf/arch/arm64/util/mem-events.c @@ -1,37 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 -#include "map_symbol.h" +#include "util/map_symbol.h" +#include "util/mem-events.h" #include "mem-events.h" -#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } +#define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a } -static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { - E("spe-load", "arm_spe_0/ts_enable=1,pa_enable=1,load_filter=1,store_filter=0,min_latency=%u/", "arm_spe_0"), - E("spe-store", "arm_spe_0/ts_enable=1,pa_enable=1,load_filter=0,store_filter=1/", "arm_spe_0"), - E("spe-ldst", "arm_spe_0/ts_enable=1,pa_enable=1,load_filter=1,store_filter=1,min_latency=%u/", "arm_spe_0"), +struct perf_mem_event perf_mem_events_arm[PERF_MEM_EVENTS__MAX] = { + E("spe-load", "%s/ts_enable=1,pa_enable=1,load_filter=1,store_filter=0,min_latency=%u/", NULL, true, 0), + E("spe-store", "%s/ts_enable=1,pa_enable=1,load_filter=0,store_filter=1/", NULL, false, 0), + E("spe-ldst", "%s/ts_enable=1,pa_enable=1,load_filter=1,store_filter=1,min_latency=%u/", NULL, true, 0), }; - -static char mem_ev_name[100]; - -struct perf_mem_event *perf_mem_events__ptr(int i) -{ - if (i >= PERF_MEM_EVENTS__MAX) - return NULL; - - return &perf_mem_events[i]; -} - -const char *perf_mem_events__name(int i, const char *pmu_name __maybe_unused) -{ - struct perf_mem_event *e = perf_mem_events__ptr(i); - - if (i >= PERF_MEM_EVENTS__MAX) - return NULL; - - if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE) - scnprintf(mem_ev_name, sizeof(mem_ev_name), - e->name, perf_mem_events__loads_ldlat); - else /* PERF_MEM_EVENTS__STORE */ - scnprintf(mem_ev_name, sizeof(mem_ev_name), e->name); - - return mem_ev_name; -} diff --git a/tools/perf/arch/arm64/util/mem-events.h b/tools/perf/arch/arm64/util/mem-events.h new file mode 100644 index 000000000000..5fc50be4be38 --- /dev/null +++ b/tools/perf/arch/arm64/util/mem-events.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARM64_MEM_EVENTS_H +#define _ARM64_MEM_EVENTS_H + +extern struct perf_mem_event perf_mem_events_arm[PERF_MEM_EVENTS__MAX]; + +#endif /* _ARM64_MEM_EVENTS_H */ diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 1b79d8eab22f..09308665e28a 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -16,7 +16,7 @@ #define HWCAP_SVE (1 << 22) #endif -const struct sample_reg sample_reg_masks[] = { +static const struct sample_reg sample_reg_masks[] = { SMPL_REG(x0, PERF_REG_ARM64_X0), SMPL_REG(x1, PERF_REG_ARM64_X1), SMPL_REG(x2, PERF_REG_ARM64_X2), @@ -175,3 +175,8 @@ uint64_t arch__user_reg_mask(void) } return PERF_REGS_MASK; } + +const struct sample_reg *arch__sample_reg_masks(void) +{ + return sample_reg_masks; +} |