summaryrefslogtreecommitdiff
path: root/tools/perf/util/threads.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-14 16:31:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-14 16:31:23 -0700
commit1bbeaf83dd7b5e3628b98bec66ff8fe2646e14aa (patch)
treea391eed8ae206613b48e02e56e6ad5c4432d8767 /tools/perf/util/threads.c
parent63bd30f249dcf0a7ce16967935cecee8feec24bb (diff)
parent0f66dfe7b91d2743cc71dfff37af503215b204ef (diff)
Merge tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Namhyung Kim: "perf stat: - Support new 'cluster' aggregation mode for shared resources depending on the hardware configuration: $ sudo perf stat -a --per-cluster -e cycles,instructions sleep 1 Performance counter stats for 'system wide': S0-D0-CLS0 2 85,051,822 cycles S0-D0-CLS0 2 73,909,908 instructions # 0.87 insn per cycle S0-D0-CLS2 2 93,365,918 cycles S0-D0-CLS2 2 83,006,158 instructions # 0.89 insn per cycle S0-D0-CLS4 2 104,157,523 cycles S0-D0-CLS4 2 53,234,396 instructions # 0.51 insn per cycle S0-D0-CLS6 2 65,891,079 cycles S0-D0-CLS6 2 41,478,273 instructions # 0.63 insn per cycle 1.002407989 seconds time elapsed - Various fixes and cleanups for event metrics including NaN handling perf script: - Use libcapstone if available to disassemble the instructions. This enables 'perf script -F disasm' and 'perf script --insn-trace=disasm' (for Intel-PT): $ perf script -F event,ip,disasm cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffa9839d25 movq %rax, %r14 cycles:P: ffffffffa9cdcaf0 endbr64 cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffaa401f86 iretq cycles:P: ffffffffa99c4de5 movq 0x30(%rcx), %r8 cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffaa401f86 iretq cycles:P: ffffffffa9907983 movl 0x68(%rbx), %eax cycles:P: ffffffffa988d428 wrmsr - Expose sample ID / stream ID to python scripts perf test: - Add more perf test cases from Redhat internal test suites. This time it adds the base infra and a few perf probe tests. More to come. :) - Add 'perf test -p' for parallel execution and fix some issues found by the parallel test - Support symbol test to print symbols in given (active) module: $ perf test -F -v Symbols --dso /lib/modules/$(uname -r)/kernel/fs/ext4/ext4.ko --- start --- Testing /lib/modules/6.5.13-1rodete2-amd64/kernel/fs/ext4/ext4.ko Overlapping symbols: 7a990-7a9a0 l __pfx_ext4_exit_fs 7a990-7a9a0 g __pfx_cleanup_module Overlapping symbols: 7a9a0-7aa1c l ext4_exit_fs 7a9a0-7aa1c g cleanup_module ... JSON metric updates: - A new round of Intel metric updates - Support Power11 PVR (compatible to Power10) - Fix cache latency events on Zen 4 to set SliceId properly Internal: - Fix reference counting for 'map' data structure, tireless work from Ian! - More memory optimization for struct thread and annotate histogram. Now, 'perf report' (TUI) and 'perf annotate' should be much lighter-weight in terms of memory footprint - Support cross-arch perf register access. Clean up the build configuration so that it can detect arch-register support at runtime. This can allow to parse register data in sample which was recorded in a different arch Others: - Sync task state in 'perf sched' to kernel using trace event fields. The task states have been changed so tools cannot assume a fixed encoding - Clean up 'perf mem' to generalize the arch-specific events - Add support for local and global variables to data type profiling. This would increase the success rate of type resolution with DWARF - Add short option -H for --hierarchy in 'perf report' and 'perf top'" * tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (154 commits) perf annotate: Add comments in the data structures perf annotate: Remove sym_hist.addr[] array perf annotate: Calculate instruction overhead using hashmap perf annotate: Add a hashmap for symbol histogram perf threads: Reduce table size from 256 to 8 perf threads: Switch from rbtree to hashmap perf threads: Move threads to its own files perf machine: Move machine's threads into its own abstraction perf machine: Move fprintf to for_each loop and a callback perf trace: Ignore thread hashing in summary perf report: Sort child tasks by tid perf vendor events amd: Fix Zen 4 cache latency events perf version: Display availability of OpenCSD support perf vendor events intel: Add umasks/occ_sel to PCU events. perf map: Fix map reference count issues libperf evlist: Avoid out-of-bounds access perf lock contention: Account contending locks too perf metrics: Fix segv for metrics with no events perf metrics: Fix metric matching perf pmu: Fix a potential memory leak in perf_pmu__lookup() ...
Diffstat (limited to 'tools/perf/util/threads.c')
-rw-r--r--tools/perf/util/threads.c190
1 files changed, 190 insertions, 0 deletions
diff --git a/tools/perf/util/threads.c b/tools/perf/util/threads.c
new file mode 100644
index 000000000000..ff2b169e0085
--- /dev/null
+++ b/tools/perf/util/threads.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "threads.h"
+#include "machine.h"
+#include "thread.h"
+
+static struct threads_table_entry *threads__table(struct threads *threads, pid_t tid)
+{
+ /* Cast it to handle tid == -1 */
+ return &threads->table[(unsigned int)tid % THREADS__TABLE_SIZE];
+}
+
+static size_t key_hash(long key, void *ctx __maybe_unused)
+{
+ /* The table lookup removes low bit entropy, but this is just ignored here. */
+ return key;
+}
+
+static bool key_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return key1 == key2;
+}
+
+void threads__init(struct threads *threads)
+{
+ for (int i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads_table_entry *table = &threads->table[i];
+
+ hashmap__init(&table->shard, key_hash, key_equal, NULL);
+ init_rwsem(&table->lock);
+ table->last_match = NULL;
+ }
+}
+
+void threads__exit(struct threads *threads)
+{
+ threads__remove_all_threads(threads);
+ for (int i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads_table_entry *table = &threads->table[i];
+
+ hashmap__clear(&table->shard);
+ exit_rwsem(&table->lock);
+ }
+}
+
+size_t threads__nr(struct threads *threads)
+{
+ size_t nr = 0;
+
+ for (int i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads_table_entry *table = &threads->table[i];
+
+ down_read(&table->lock);
+ nr += hashmap__size(&table->shard);
+ up_read(&table->lock);
+ }
+ return nr;
+}
+
+/*
+ * Front-end cache - TID lookups come in blocks,
+ * so most of the time we dont have to look up
+ * the full rbtree:
+ */
+static struct thread *__threads_table_entry__get_last_match(struct threads_table_entry *table,
+ pid_t tid)
+{
+ struct thread *th, *res = NULL;
+
+ th = table->last_match;
+ if (th != NULL) {
+ if (thread__tid(th) == tid)
+ res = thread__get(th);
+ }
+ return res;
+}
+
+static void __threads_table_entry__set_last_match(struct threads_table_entry *table,
+ struct thread *th)
+{
+ thread__put(table->last_match);
+ table->last_match = thread__get(th);
+}
+
+static void threads_table_entry__set_last_match(struct threads_table_entry *table,
+ struct thread *th)
+{
+ down_write(&table->lock);
+ __threads_table_entry__set_last_match(table, th);
+ up_write(&table->lock);
+}
+
+struct thread *threads__find(struct threads *threads, pid_t tid)
+{
+ struct threads_table_entry *table = threads__table(threads, tid);
+ struct thread *res;
+
+ down_read(&table->lock);
+ res = __threads_table_entry__get_last_match(table, tid);
+ if (!res) {
+ if (hashmap__find(&table->shard, tid, &res))
+ res = thread__get(res);
+ }
+ up_read(&table->lock);
+ if (res)
+ threads_table_entry__set_last_match(table, res);
+ return res;
+}
+
+struct thread *threads__findnew(struct threads *threads, pid_t pid, pid_t tid, bool *created)
+{
+ struct threads_table_entry *table = threads__table(threads, tid);
+ struct thread *res = NULL;
+
+ *created = false;
+ down_write(&table->lock);
+ res = thread__new(pid, tid);
+ if (res) {
+ if (hashmap__add(&table->shard, tid, res)) {
+ /* Add failed. Assume a race so find other entry. */
+ thread__put(res);
+ res = NULL;
+ if (hashmap__find(&table->shard, tid, &res))
+ res = thread__get(res);
+ } else {
+ res = thread__get(res);
+ *created = true;
+ }
+ if (res)
+ __threads_table_entry__set_last_match(table, res);
+ }
+ up_write(&table->lock);
+ return res;
+}
+
+void threads__remove_all_threads(struct threads *threads)
+{
+ for (int i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads_table_entry *table = &threads->table[i];
+ struct hashmap_entry *cur, *tmp;
+ size_t bkt;
+
+ down_write(&table->lock);
+ __threads_table_entry__set_last_match(table, NULL);
+ hashmap__for_each_entry_safe((&table->shard), cur, tmp, bkt) {
+ struct thread *old_value;
+
+ hashmap__delete(&table->shard, cur->key, /*old_key=*/NULL, &old_value);
+ thread__put(old_value);
+ }
+ up_write(&table->lock);
+ }
+}
+
+void threads__remove(struct threads *threads, struct thread *thread)
+{
+ struct threads_table_entry *table = threads__table(threads, thread__tid(thread));
+ struct thread *old_value;
+
+ down_write(&table->lock);
+ if (table->last_match && RC_CHK_EQUAL(table->last_match, thread))
+ __threads_table_entry__set_last_match(table, NULL);
+
+ hashmap__delete(&table->shard, thread__tid(thread), /*old_key=*/NULL, &old_value);
+ thread__put(old_value);
+ up_write(&table->lock);
+}
+
+int threads__for_each_thread(struct threads *threads,
+ int (*fn)(struct thread *thread, void *data),
+ void *data)
+{
+ for (int i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads_table_entry *table = &threads->table[i];
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ down_read(&table->lock);
+ hashmap__for_each_entry((&table->shard), cur, bkt) {
+ int rc = fn((struct thread *)cur->pvalue, data);
+
+ if (rc != 0) {
+ up_read(&table->lock);
+ return rc;
+ }
+ }
+ up_read(&table->lock);
+ }
+ return 0;
+
+}