diff options
Diffstat (limited to 'tools/perf/arch/x86/util')
-rw-r--r-- | tools/perf/arch/x86/util/Build | 40 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/auxtrace.c | 3 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/dwarf-regs.c | 153 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/env.c | 19 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/env.h | 7 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/event.c | 4 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/evlist.c | 145 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/evsel.c | 38 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/header.c | 5 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/intel-bts.c | 5 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/intel-pt.c | 45 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/iostat.c | 8 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/mem-events.c | 6 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/mem-events.h | 1 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/pmu.c | 290 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/topdown.c | 43 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/topdown.h | 2 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/tsc.c | 18 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/unwind-libdw.c | 2 |
19 files changed, 507 insertions, 327 deletions
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 005907cb97d8..06d7c0205b3d 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -1,24 +1,20 @@ -perf-y += header.o -perf-y += tsc.o -perf-y += pmu.o -perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o -perf-y += perf_regs.o -perf-y += topdown.o -perf-y += machine.o -perf-y += event.o -perf-y += evlist.o -perf-y += mem-events.o -perf-y += evsel.o -perf-y += iostat.o -perf-y += env.o +perf-util-y += header.o +perf-util-y += tsc.o +perf-util-y += pmu.o +perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o +perf-util-y += perf_regs.o +perf-util-y += topdown.o +perf-util-y += machine.o +perf-util-y += event.o +perf-util-y += evlist.o +perf-util-y += mem-events.o +perf-util-y += evsel.o +perf-util-y += iostat.o -perf-$(CONFIG_DWARF) += dwarf-regs.o -perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o +perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o -perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o - -perf-$(CONFIG_AUXTRACE) += auxtrace.o -perf-$(CONFIG_AUXTRACE) += archinsn.o -perf-$(CONFIG_AUXTRACE) += intel-pt.o -perf-$(CONFIG_AUXTRACE) += intel-bts.o +perf-util-$(CONFIG_AUXTRACE) += auxtrace.o +perf-util-y += archinsn.o +perf-util-$(CONFIG_AUXTRACE) += intel-pt.o +perf-util-$(CONFIG_AUXTRACE) += intel-bts.o diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index 354780ff1605..ecbf61a7eb3a 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -55,11 +55,12 @@ struct auxtrace_record *auxtrace_record__init(struct evlist *evlist, int *err) { char buffer[64]; + struct perf_cpu cpu = perf_cpu_map__min(evlist->core.all_cpus); int ret; *err = 0; - ret = get_cpuid(buffer, sizeof(buffer)); + ret = get_cpuid(buffer, sizeof(buffer), cpu); if (ret) { *err = ret; return NULL; diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c deleted file mode 100644 index 399c4a0a29d8..000000000000 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. - * Extracted from probe-finder.c - * - * Written by Masami Hiramatsu <mhiramat@redhat.com> - */ - -#include <stddef.h> -#include <errno.h> /* for EINVAL */ -#include <string.h> /* for strcmp */ -#include <linux/ptrace.h> /* for struct pt_regs */ -#include <linux/kernel.h> /* for offsetof */ -#include <dwarf-regs.h> - -/* - * See arch/x86/kernel/ptrace.c. - * Different from it: - * - * - Since struct pt_regs is defined differently for user and kernel, - * but we want to use 'ax, bx' instead of 'rax, rbx' (which is struct - * field name of user's pt_regs), we make REG_OFFSET_NAME to accept - * both string name and reg field name. - * - * - Since accessing x86_32's pt_regs from x86_64 building is difficult - * and vise versa, we simply fill offset with -1, so - * get_arch_regstr() still works but regs_query_register_offset() - * returns error. - * The only inconvenience caused by it now is that we are not allowed - * to generate BPF prologue for a x86_64 kernel if perf is built for - * x86_32. This is really a rare usecase. - * - * - Order is different from kernel's ptrace.c for get_arch_regstr(). Use - * the order defined by dwarf. - */ - -struct pt_regs_offset { - const char *name; - int offset; -}; - -#define REG_OFFSET_END {.name = NULL, .offset = 0} - -#ifdef __x86_64__ -# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} -# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = -1} -#else -# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = -1} -# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} -#endif - -/* TODO: switching by dwarf address size */ -#ifndef __x86_64__ -static const struct pt_regs_offset x86_32_regoffset_table[] = { - REG_OFFSET_NAME_32("%ax", eax), - REG_OFFSET_NAME_32("%cx", ecx), - REG_OFFSET_NAME_32("%dx", edx), - REG_OFFSET_NAME_32("%bx", ebx), - REG_OFFSET_NAME_32("$stack", esp), /* Stack address instead of %sp */ - REG_OFFSET_NAME_32("%bp", ebp), - REG_OFFSET_NAME_32("%si", esi), - REG_OFFSET_NAME_32("%di", edi), - REG_OFFSET_END, -}; - -#define regoffset_table x86_32_regoffset_table -#else -static const struct pt_regs_offset x86_64_regoffset_table[] = { - REG_OFFSET_NAME_64("%ax", rax), - REG_OFFSET_NAME_64("%dx", rdx), - REG_OFFSET_NAME_64("%cx", rcx), - REG_OFFSET_NAME_64("%bx", rbx), - REG_OFFSET_NAME_64("%si", rsi), - REG_OFFSET_NAME_64("%di", rdi), - REG_OFFSET_NAME_64("%bp", rbp), - REG_OFFSET_NAME_64("%sp", rsp), - REG_OFFSET_NAME_64("%r8", r8), - REG_OFFSET_NAME_64("%r9", r9), - REG_OFFSET_NAME_64("%r10", r10), - REG_OFFSET_NAME_64("%r11", r11), - REG_OFFSET_NAME_64("%r12", r12), - REG_OFFSET_NAME_64("%r13", r13), - REG_OFFSET_NAME_64("%r14", r14), - REG_OFFSET_NAME_64("%r15", r15), - REG_OFFSET_END, -}; - -#define regoffset_table x86_64_regoffset_table -#endif - -/* Minus 1 for the ending REG_OFFSET_END */ -#define ARCH_MAX_REGS ((sizeof(regoffset_table) / sizeof(regoffset_table[0])) - 1) - -/* Return architecture dependent register string (for kprobe-tracer) */ -const char *get_arch_regstr(unsigned int n) -{ - return (n < ARCH_MAX_REGS) ? regoffset_table[n].name : NULL; -} - -/* Reuse code from arch/x86/kernel/ptrace.c */ -/** - * regs_query_register_offset() - query register offset from its name - * @name: the name of a register - * - * regs_query_register_offset() returns the offset of a register in struct - * pt_regs from its name. If the name is invalid, this returns -EINVAL; - */ -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->offset; - return -EINVAL; -} - -struct dwarf_regs_idx { - const char *name; - int idx; -}; - -static const struct dwarf_regs_idx x86_regidx_table[] = { - { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 }, - { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 }, - { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 }, - { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 }, - { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 }, - { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 }, - { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 }, - { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 }, - { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 }, - { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 }, - { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 }, - { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 }, - { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 }, - { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 }, - { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 }, - { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 }, - { "rip", DWARF_REG_PC }, -}; - -int get_arch_regnum(const char *name) -{ - unsigned int i; - - if (*name != '%') - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++) - if (!strcmp(x86_regidx_table[i].name, name + 1)) - return x86_regidx_table[i].idx; - return -ENOENT; -} diff --git a/tools/perf/arch/x86/util/env.c b/tools/perf/arch/x86/util/env.c deleted file mode 100644 index 3e537ffb1353..000000000000 --- a/tools/perf/arch/x86/util/env.c +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "linux/string.h" -#include "util/env.h" -#include "env.h" - -bool x86__is_amd_cpu(void) -{ - struct perf_env env = { .total_mem = 0, }; - static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */ - - if (is_amd) - goto ret; - - perf_env__cpuid(&env); - is_amd = env.cpuid && strstarts(env.cpuid, "AuthenticAMD") ? 1 : -1; - perf_env__exit(&env); -ret: - return is_amd >= 1 ? true : false; -} diff --git a/tools/perf/arch/x86/util/env.h b/tools/perf/arch/x86/util/env.h deleted file mode 100644 index d78f080b6b3f..000000000000 --- a/tools/perf/arch/x86/util/env.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _X86_ENV_H -#define _X86_ENV_H - -bool x86__is_amd_cpu(void); - -#endif /* _X86_ENV_H */ diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index e65b7dbe27fb..a0400707180c 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -15,7 +15,7 @@ #if defined(__x86_64__) struct perf_event__synthesize_extra_kmaps_cb_args { - struct perf_tool *tool; + const struct perf_tool *tool; perf_event__handler_t process; struct machine *machine; union perf_event *event; @@ -65,7 +65,7 @@ static int perf_event__synthesize_extra_kmaps_cb(struct map *map, void *data) return 0; } -int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, +int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index b1ce0c52d88d..1969758cc8c1 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -1,94 +1,83 @@ // SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include "util/pmu.h" -#include "util/pmus.h" -#include "util/evlist.h" -#include "util/parse-events.h" -#include "util/event.h" +#include <string.h> +#include "../../../util/evlist.h" +#include "../../../util/evsel.h" #include "topdown.h" #include "evsel.h" -static int ___evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - LIST_HEAD(head); - size_t i = 0; - - for (i = 0; i < nr_attrs; i++) - event_attr_init(attrs + i); - - if (perf_pmus__num_core_pmus() == 1) - return evlist__add_attrs(evlist, attrs, nr_attrs); - - for (i = 0; i < nr_attrs; i++) { - struct perf_pmu *pmu = NULL; - - if (attrs[i].type == PERF_TYPE_SOFTWARE) { - struct evsel *evsel = evsel__new(attrs + i); - - if (evsel == NULL) - goto out_delete_partial_list; - list_add_tail(&evsel->core.node, &head); - continue; - } - - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { - struct perf_cpu_map *cpus; - struct evsel *evsel; - - evsel = evsel__new(attrs + i); - if (evsel == NULL) - goto out_delete_partial_list; - evsel->core.attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; - cpus = perf_cpu_map__get(pmu->cpus); - evsel->core.cpus = cpus; - evsel->core.own_cpus = perf_cpu_map__get(cpus); - evsel->pmu_name = strdup(pmu->name); - list_add_tail(&evsel->core.node, &head); - } - } - - evlist__splice_list_tail(evlist, &head); - - return 0; - -out_delete_partial_list: - { - struct evsel *evsel, *n; - - __evlist__for_each_entry_safe(&head, n, evsel) - evsel__delete(evsel); - } - return -1; -} - -int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - if (!nr_attrs) - return 0; - - return ___evlist__add_default_attrs(evlist, attrs, nr_attrs); -} - int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) { + /* + * Currently the following topdown events sequence are supported to + * move and regroup correctly. + * + * a. all events in a group + * perf stat -e "{instructions,topdown-retiring,slots}" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 15,066,240 slots + * 1,899,760 instructions + * 2,126,998 topdown-retiring + * b. all events not in a group + * perf stat -e "instructions,topdown-retiring,slots" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 2,045,561 instructions + * 17,108,370 slots + * 2,281,116 topdown-retiring + * c. slots event in a group but topdown metrics events outside the group + * perf stat -e "{instructions,slots},topdown-retiring" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 20,323,878 slots + * 2,634,884 instructions + * 3,028,656 topdown-retiring + * d. slots event and topdown metrics events in two groups + * perf stat -e "{instructions,slots},{topdown-retiring}" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 26,319,024 slots + * 2,427,791 instructions + * 2,683,508 topdown-retiring + * e. slots event and metrics event are not in a group and not adjacent + * perf stat -e "{instructions,slots},cycles,topdown-retiring" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * 68,433,522 slots + * 8,856,102 topdown-retiring + * 7,791,494 instructions + * 11,469,513 cycles + */ if (topdown_sys_has_perf_metrics() && (arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) { /* Ensure the topdown slots comes first. */ - if (strcasestr(lhs->name, "slots") && !strcasestr(lhs->name, "uops_retired.slots")) - return -1; - if (strcasestr(rhs->name, "slots") && !strcasestr(rhs->name, "uops_retired.slots")) - return 1; - /* Followed by topdown events. */ - if (strcasestr(lhs->name, "topdown") && !strcasestr(rhs->name, "topdown")) + if (arch_is_topdown_slots(lhs)) return -1; - if (!strcasestr(lhs->name, "topdown") && strcasestr(rhs->name, "topdown")) + if (arch_is_topdown_slots(rhs)) return 1; + + /* + * Move topdown metrics events forward only when topdown metrics + * events are not in same group with previous slots event. If + * topdown metrics events are already in same group with slots + * event, do nothing. + */ + if (lhs->core.leader != rhs->core.leader) { + bool lhs_topdown = arch_is_topdown_metrics(lhs); + bool rhs_topdown = arch_is_topdown_metrics(rhs); + + if (lhs_topdown && !rhs_topdown) + return -1; + if (!lhs_topdown && rhs_topdown) + return 1; + } } + /* Retire latency event should not be group leader*/ + if (lhs->retire_lat && !rhs->retire_lat) + return 1; + if (!lhs->retire_lat && rhs->retire_lat) + return -1; + /* Default ordering by insertion index. */ return lhs->core.idx - rhs->core.idx; } diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index 090d0f371891..3dd29ba2c23b 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -6,6 +6,7 @@ #include "util/pmu.h" #include "util/pmus.h" #include "linux/string.h" +#include "topdown.h" #include "evsel.h" #include "util/debug.h" #include "env.h" @@ -21,7 +22,8 @@ void arch_evsel__set_sample_weight(struct evsel *evsel) /* Check whether the evsel's PMU supports the perf metrics */ bool evsel__sys_has_perf_metrics(const struct evsel *evsel) { - const char *pmu_name = evsel->pmu_name ? evsel->pmu_name : "cpu"; + struct perf_pmu *pmu; + u32 type = evsel->core.attr.type; /* * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU @@ -31,11 +33,31 @@ bool evsel__sys_has_perf_metrics(const struct evsel *evsel) * Checking both the PERF_TYPE_RAW type and the slots event * should be good enough to detect the perf metrics feature. */ - if ((evsel->core.attr.type == PERF_TYPE_RAW) && - perf_pmus__have_event(pmu_name, "slots")) - return true; +again: + switch (type) { + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; + if (type) + goto again; + break; + case PERF_TYPE_RAW: + break; + default: + return false; + } + + pmu = evsel->pmu; + if (pmu && perf_pmu__is_fake(pmu)) + pmu = NULL; - return false; + if (!pmu) { + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (pmu->type == PERF_TYPE_RAW) + break; + } + } + return pmu && perf_pmu__have_event(pmu, "slots"); } bool arch_evsel__must_be_in_group(const struct evsel *evsel) @@ -44,7 +66,7 @@ bool arch_evsel__must_be_in_group(const struct evsel *evsel) strcasestr(evsel->name, "uops_retired.slots")) return false; - return strcasestr(evsel->name, "topdown") || strcasestr(evsel->name, "slots"); + return arch_is_topdown_metrics(evsel) || arch_is_topdown_slots(evsel); } int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) @@ -63,7 +85,7 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) return scnprintf(bf, size, "%s", event_name); return scnprintf(bf, size, "%s/%s/", - evsel->pmu_name ? evsel->pmu_name : "cpu", + evsel->pmu ? evsel->pmu->name : "cpu", event_name); } @@ -108,7 +130,7 @@ int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size) return 0; if (!evsel->core.attr.precise_ip && - !(evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3))) + !(evsel->pmu && !strncmp(evsel->pmu->name, "ibs", 3))) return 0; /* More verbose IBS errors. */ diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index a51444a77a5f..412977f8aa83 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -58,13 +58,12 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt) } int -get_cpuid(char *buffer, size_t sz) +get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { return __get_cpuid(buffer, sz, "%s,%u,%u,%u$"); } -char * -get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char *get_cpuid_str(struct perf_cpu cpu __maybe_unused) { char *buf = malloc(128); diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index af8ae4647585..85c8186300c8 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -143,7 +143,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, if (!opts->full_auxtrace) return 0; - if (opts->full_auxtrace && !perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { + if (opts->full_auxtrace && !perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n"); return -EINVAL; } @@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, * In the case of per-cpu mmaps, we need the CPU on the * AUX event. */ - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) evsel__set_sample_bit(intel_bts_evsel, CPU); } @@ -434,7 +434,6 @@ struct auxtrace_record *intel_bts_recording_init(int *err) } btsr->intel_bts_pmu = intel_bts_pmu; - btsr->itr.pmu = intel_bts_pmu; btsr->itr.recording_options = intel_bts_recording_options; btsr->itr.info_priv_size = intel_bts_info_priv_size; btsr->itr.info_fill = intel_bts_info_fill; diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index d199619df3ab..add33cb5d1da 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -19,6 +19,7 @@ #include "../../../util/evlist.h" #include "../../../util/evsel.h" #include "../../../util/evsel_config.h" +#include "../../../util/config.h" #include "../../../util/cpumap.h" #include "../../../util/mmap.h" #include <subcmd/parse-options.h> @@ -32,6 +33,7 @@ #include "../../../util/tsc.h" #include <internal/lib.h> // page_size #include "../../../util/intel-pt.h" +#include <api/fs/fs.h> #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -51,6 +53,7 @@ struct intel_pt_recording { struct perf_pmu *intel_pt_pmu; int have_sched_switch; struct evlist *evlist; + bool all_switch_events; bool snapshot_mode; bool snapshot_init_done; size_t snapshot_size; @@ -74,7 +77,8 @@ static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu, goto out_free; attr.config = *config; - err = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/true, /*err=*/NULL); + err = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/true, /*apply_hardcoded=*/false, + /*err=*/NULL); if (err) goto out_free; @@ -369,7 +373,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, ui__warning("Intel Processor Trace: TSC not available\n"); } - per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus); + per_cpu_mmaps = !perf_cpu_map__is_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus); auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; @@ -428,6 +432,16 @@ static int intel_pt_track_switches(struct evlist *evlist) } #endif +static bool intel_pt_exclude_guest(void) +{ + int pt_mode; + + if (sysfs__read_int("module/kvm_intel/parameters/pt_mode", &pt_mode)) + pt_mode = 0; + + return pt_mode == 1; +} + static void intel_pt_valid_str(char *str, size_t len, u64 valid) { unsigned int val, last = 0, state = 1; @@ -620,6 +634,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, } evsel->core.attr.freq = 0; evsel->core.attr.sample_period = 1; + evsel->core.attr.exclude_guest = intel_pt_exclude_guest(); evsel->no_aux_samples = true; evsel->needs_auxtrace_mmap = true; intel_pt_evsel = evsel; @@ -758,7 +773,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, } if (!opts->auxtrace_snapshot_mode && !opts->auxtrace_sample_mode) { - u32 aux_watermark = opts->auxtrace_mmap_pages * page_size / 4; + size_t aw = opts->auxtrace_mmap_pages * (size_t)page_size / 4; + u32 aux_watermark = aw > UINT_MAX ? UINT_MAX : aw; intel_pt_evsel->core.attr.aux_watermark = aux_watermark; } @@ -774,13 +790,13 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * Per-cpu recording needs sched_switch events to distinguish different * threads. */ - if (have_timing_info && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) && + if (have_timing_info && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) && !record_opts__no_switch_events(opts)) { if (perf_can_record_switch_events()) { bool cpu_wide = !target__none(&opts->target) && !target__has_task(&opts->target); - if (!cpu_wide && perf_can_record_cpu_wide()) { + if (ptr->all_switch_events && !cpu_wide && perf_can_record_cpu_wide()) { struct evsel *switch_evsel; switch_evsel = evlist__add_dummy_on_all_cpus(evlist); @@ -832,7 +848,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * In the case of per-cpu mmaps, we need the CPU on the * AUX event. */ - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) evsel__set_sample_bit(intel_pt_evsel, CPU); } @@ -858,7 +874,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, tracking_evsel->immediate = true; /* In per-cpu case, always need the time of mmap events etc */ - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { evsel__set_sample_bit(tracking_evsel, TIME); /* And the CPU for switch events */ evsel__set_sample_bit(tracking_evsel, CPU); @@ -870,7 +886,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * Warn the user when we do not have enough information to decode i.e. * per-cpu with no sched_switch (except workload-only). */ - if (!ptr->have_sched_switch && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) && + if (!ptr->have_sched_switch && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) && !target__none(&opts->target) && !intel_pt_evsel->core.attr.exclude_user) ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); @@ -1164,6 +1180,16 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) return rdtsc(); } +static int intel_pt_perf_config(const char *var, const char *value, void *data) +{ + struct intel_pt_recording *ptr = data; + + if (!strcmp(var, "intel-pt.all-switch-events")) + ptr->all_switch_events = perf_config_bool(var, value); + + return 0; +} + struct auxtrace_record *intel_pt_recording_init(int *err) { struct perf_pmu *intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME); @@ -1183,8 +1209,9 @@ struct auxtrace_record *intel_pt_recording_init(int *err) return NULL; } + perf_config(intel_pt_perf_config, ptr); + ptr->intel_pt_pmu = intel_pt_pmu; - ptr->itr.pmu = intel_pt_pmu; ptr->itr.recording_options = intel_pt_recording_options; ptr->itr.info_priv_size = intel_pt_info_priv_size; ptr->itr.info_fill = intel_pt_info_fill; diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c index df7b5dfcc26a..7442a2cd87ed 100644 --- a/tools/perf/arch/x86/util/iostat.c +++ b/tools/perf/arch/x86/util/iostat.c @@ -32,7 +32,7 @@ #define MAX_PATH 1024 #endif -#define UNCORE_IIO_PMU_PATH "devices/uncore_iio_%d" +#define UNCORE_IIO_PMU_PATH "bus/event_source/devices/uncore_iio_%d" #define SYSFS_UNCORE_PMU_PATH "%s/"UNCORE_IIO_PMU_PATH #define PLATFORM_MAPPING_PATH UNCORE_IIO_PMU_PATH"/die%d" @@ -403,6 +403,10 @@ void iostat_prefix(struct evlist *evlist, struct iio_root_port *rp = evlist->selected->priv; if (rp) { + /* + * TODO: This is the incorrect format in JSON mode. + * See prepare_timestamp() + */ if (ts) sprintf(prefix, "%6lu.%09lu%s%04x:%02x%s", ts->tv_sec, ts->tv_nsec, @@ -444,7 +448,7 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, iostat_value = (count->val - prev_count_val) / ((double) count->run / count->ena); } - out->print_metric(config, out->ctx, NULL, "%8.0f", iostat_metric, + out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, "%8.0f", iostat_metric, iostat_value / (256 * 1024)); } diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c index 62df03e91c7e..b38f519020ff 100644 --- a/tools/perf/arch/x86/util/mem-events.c +++ b/tools/perf/arch/x86/util/mem-events.c @@ -26,3 +26,9 @@ struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = { E(NULL, NULL, NULL, false, 0), E("mem-ldst", "%s//", NULL, false, 0), }; + +struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX] = { + E(NULL, NULL, NULL, false, 0), + E(NULL, NULL, NULL, false, 0), + E("mem-ldst", "%s/ldlat=%u/", NULL, true, 0), +}; diff --git a/tools/perf/arch/x86/util/mem-events.h b/tools/perf/arch/x86/util/mem-events.h index f55c8d3b7d59..11e09a256f5b 100644 --- a/tools/perf/arch/x86/util/mem-events.h +++ b/tools/perf/arch/x86/util/mem-events.h @@ -6,5 +6,6 @@ extern struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX]; extern struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX]; extern struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX]; +extern struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX]; #endif /* _X86_MEM_EVENTS_H */ diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index c3d89d6ba1bf..58113482654b 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -8,6 +8,8 @@ #include <linux/perf_event.h> #include <linux/zalloc.h> #include <api/fs/fs.h> +#include <api/io_dir.h> +#include <internal/cpumap.h> #include <errno.h> #include "../../../util/intel-pt.h" @@ -16,10 +18,261 @@ #include "../../../util/fncache.h" #include "../../../util/pmus.h" #include "mem-events.h" -#include "env.h" +#include "util/debug.h" +#include "util/env.h" +#include "util/header.h" -void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused) +static bool x86__is_intel_graniterapids(void) { + static bool checked_if_graniterapids; + static bool is_graniterapids; + + if (!checked_if_graniterapids) { + const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]"; + char *cpuid = get_cpuid_str((struct perf_cpu){0}); + + is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0; + free(cpuid); + checked_if_graniterapids = true; + } + return is_graniterapids; +} + +static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path) +{ + struct perf_cpu_map *cpus; + char *buf = NULL; + size_t buf_len; + + if (sysfs__read_str(sysfs_path, &buf, &buf_len) < 0) + return NULL; + + cpus = perf_cpu_map__new(buf); + free(buf); + return cpus; +} + +static int snc_nodes_per_l3_cache(void) +{ + static bool checked_snc; + static int snc_nodes; + + if (!checked_snc) { + struct perf_cpu_map *node_cpus = + read_sysfs_cpu_map("devices/system/node/node0/cpulist"); + struct perf_cpu_map *cache_cpus = + read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list"); + + snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus); + perf_cpu_map__put(cache_cpus); + perf_cpu_map__put(node_cpus); + checked_snc = true; + } + return snc_nodes; +} + +static bool starts_with(const char *str, const char *prefix) +{ + return !strncmp(prefix, str, strlen(prefix)); +} + +static int num_chas(void) +{ + static bool checked_chas; + static int num_chas; + + if (!checked_chas) { + int fd = perf_pmu__event_source_devices_fd(); + struct io_dir dir; + struct io_dirent64 *dent; + + if (fd < 0) + return -1; + + io_dir__init(&dir, fd); + + while ((dent = io_dir__readdir(&dir)) != NULL) { + /* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */ + if (starts_with(dent->d_name, "uncore_cha_")) + num_chas++; + } + close(fd); + checked_chas = true; + } + return num_chas; +} + +#define MAX_SNCS 6 + +static int uncore_cha_snc(struct perf_pmu *pmu) +{ + // CHA SNC numbers are ordered correspond to the CHAs number. + unsigned int cha_num; + int num_cha, chas_per_node, cha_snc; + int snc_nodes = snc_nodes_per_l3_cache(); + + if (snc_nodes <= 1) + return 0; + + num_cha = num_chas(); + if (num_cha <= 0) { + pr_warning("Unexpected: no CHAs found\n"); + return 0; + } + + /* Compute SNC for PMU. */ + if (sscanf(pmu->name, "uncore_cha_%u", &cha_num) != 1) { + pr_warning("Unexpected: unable to compute CHA number '%s'\n", pmu->name); + return 0; + } + chas_per_node = num_cha / snc_nodes; + cha_snc = cha_num / chas_per_node; + + /* Range check cha_snc. for unexpected out of bounds. */ + return cha_snc >= MAX_SNCS ? 0 : cha_snc; +} + +static int uncore_imc_snc(struct perf_pmu *pmu) +{ + // Compute the IMC SNC using lookup tables. + unsigned int imc_num; + int snc_nodes = snc_nodes_per_l3_cache(); + const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0}; + const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2}; + const u8 *snc_map; + size_t snc_map_len; + + switch (snc_nodes) { + case 2: + snc_map = snc2_map; + snc_map_len = ARRAY_SIZE(snc2_map); + break; + case 3: + snc_map = snc3_map; + snc_map_len = ARRAY_SIZE(snc3_map); + break; + default: + /* Error or no lookup support for SNC with >3 nodes. */ + return 0; + } + + /* Compute SNC for PMU. */ + if (sscanf(pmu->name, "uncore_imc_%u", &imc_num) != 1) { + pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name); + return 0; + } + if (imc_num >= snc_map_len) { + pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes); + return 0; + } + return snc_map[imc_num]; +} + +static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc) +{ + static bool checked_cpu_adjust[MAX_SNCS]; + static int cpu_adjust[MAX_SNCS]; + struct perf_cpu_map *node_cpus; + char node_path[] = "devices/system/node/node0/cpulist"; + + /* Was adjust already computed? */ + if (checked_cpu_adjust[pmu_snc]) + return cpu_adjust[pmu_snc]; + + /* SNC0 doesn't need an adjust. */ + if (pmu_snc == 0) { + cpu_adjust[0] = 0; + checked_cpu_adjust[0] = true; + return 0; + } + + /* + * Use NUMA topology to compute first CPU of the NUMA node, we want to + * adjust CPU 0 to be this and similarly for other CPUs if there is >1 + * socket. + */ + assert(pmu_snc >= 0 && pmu_snc <= 9); + node_path[24] += pmu_snc; // Shift node0 to be node<pmu_snc>. + node_cpus = read_sysfs_cpu_map(node_path); + cpu_adjust[pmu_snc] = perf_cpu_map__cpu(node_cpus, 0).cpu; + if (cpu_adjust[pmu_snc] < 0) { + pr_debug("Failed to read valid CPU list from <sysfs>/%s\n", node_path); + cpu_adjust[pmu_snc] = 0; + } else { + checked_cpu_adjust[pmu_snc] = true; + } + perf_cpu_map__put(node_cpus); + return cpu_adjust[pmu_snc]; +} + +static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha) +{ + // With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the + // topology. For example, a two socket graniterapids machine may be set + // up with 3-way SNC meaning there are 6 NUMA nodes that should be + // displayed with --per-node. The cpumask of the CHA and IMC PMUs + // reflects per-socket information meaning, for example, uncore_cha_60 + // on a two socket graniterapids machine with 120 cores per socket will + // have a cpumask of "0,120". This cpumask needs adjusting to "40,160" + // to reflect that uncore_cha_60 is used for the 2nd SNC of each + // socket. Without the adjustment events on uncore_cha_60 will appear in + // node 0 and node 3 (in our example 2 socket 3-way set up), but with + // the adjustment they will appear in node 1 and node 4. The number of + // CHAs is typically larger than the number of cores. The CHA numbers + // are assumed to split evenly and inorder wrt core numbers. There are + // fewer memory IMC PMUs than cores and mapping is handled using lookup + // tables. + static struct perf_cpu_map *cha_adjusted[MAX_SNCS]; + static struct perf_cpu_map *imc_adjusted[MAX_SNCS]; + struct perf_cpu_map **adjusted = cha ? cha_adjusted : imc_adjusted; + int idx, pmu_snc, cpu_adjust; + struct perf_cpu cpu; + bool alloc; + + // Cpus from the kernel holds first CPU of each socket. e.g. 0,120. + if (perf_cpu_map__cpu(pmu->cpus, 0).cpu != 0) { + pr_debug("Ignoring cpumask adjust for %s as unexpected first CPU\n", pmu->name); + return; + } + + pmu_snc = cha ? uncore_cha_snc(pmu) : uncore_imc_snc(pmu); + if (pmu_snc == 0) { + // No adjustment necessary for the first SNC. + return; + } + + alloc = adjusted[pmu_snc] == NULL; + if (alloc) { + // Hold onto the perf_cpu_map globally to avoid recomputation. + cpu_adjust = uncore_cha_imc_compute_cpu_adjust(pmu_snc); + adjusted[pmu_snc] = perf_cpu_map__empty_new(perf_cpu_map__nr(pmu->cpus)); + if (!adjusted[pmu_snc]) + return; + } + + perf_cpu_map__for_each_cpu(cpu, idx, pmu->cpus) { + // Compute the new cpu map values or if not allocating, assert + // that they match expectations. asserts will be removed to + // avoid overhead in NDEBUG builds. + if (alloc) { + RC_CHK_ACCESS(adjusted[pmu_snc])->map[idx].cpu = cpu.cpu + cpu_adjust; + } else if (idx == 0) { + cpu_adjust = perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu - cpu.cpu; + assert(uncore_cha_imc_compute_cpu_adjust(pmu_snc) == cpu_adjust); + } else { + assert(perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu == + cpu.cpu + cpu_adjust); + } + } + + perf_cpu_map__put(pmu->cpus); + pmu->cpus = perf_cpu_map__get(adjusted[pmu_snc]); +} + +void perf_pmu__arch_init(struct perf_pmu *pmu) +{ + struct perf_pmu_caps *ldlat_cap; + #ifdef HAVE_AUXTRACE_SUPPORT if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) { pmu->auxtrace = true; @@ -33,12 +286,31 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused) #endif if (x86__is_amd_cpu()) { - if (!strcmp(pmu->name, "ibs_op")) - pmu->mem_events = perf_mem_events_amd; - } else if (pmu->is_core) { - if (perf_pmu__have_event(pmu, "mem-loads-aux")) - pmu->mem_events = perf_mem_events_intel_aux; - else - pmu->mem_events = perf_mem_events_intel; + if (strcmp(pmu->name, "ibs_op")) + return; + + pmu->mem_events = perf_mem_events_amd; + + if (!perf_pmu__caps_parse(pmu)) + return; + + ldlat_cap = perf_pmu__get_cap(pmu, "ldlat"); + if (!ldlat_cap || strcmp(ldlat_cap->value, "1")) + return; + + perf_mem_events__loads_ldlat = 0; + pmu->mem_events = perf_mem_events_amd_ldlat; + } else { + if (pmu->is_core) { + if (perf_pmu__have_event(pmu, "mem-loads-aux")) + pmu->mem_events = perf_mem_events_intel_aux; + else + pmu->mem_events = perf_mem_events_intel; + } else if (x86__is_intel_graniterapids()) { + if (starts_with(pmu->name, "uncore_cha_")) + gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true); + else if (starts_with(pmu->name, "uncore_imc_")) + gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false); + } } } diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c index 3f9a267d4501..d1c654839049 100644 --- a/tools/perf/arch/x86/util/topdown.c +++ b/tools/perf/arch/x86/util/topdown.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "api/fs/fs.h" #include "util/evsel.h" +#include "util/evlist.h" #include "util/pmu.h" #include "util/pmus.h" #include "util/topdown.h" @@ -32,6 +33,31 @@ bool topdown_sys_has_perf_metrics(void) } #define TOPDOWN_SLOTS 0x0400 +bool arch_is_topdown_slots(const struct evsel *evsel) +{ + if (evsel->core.attr.config == TOPDOWN_SLOTS) + return true; + + return false; +} + +bool arch_is_topdown_metrics(const struct evsel *evsel) +{ + int config = evsel->core.attr.config; + const char *name_from_config; + struct perf_pmu *pmu; + + /* All topdown events have an event code of 0. */ + if ((config & 0xFF) != 0) + return false; + + pmu = evsel__find_pmu(evsel); + if (!pmu || !pmu->is_core) + return false; + + name_from_config = perf_pmu__name_from_config(pmu, config); + return name_from_config && strcasestr(name_from_config, "topdown"); +} /* * Check whether a topdown group supports sample-read. @@ -41,11 +67,24 @@ bool topdown_sys_has_perf_metrics(void) */ bool arch_topdown_sample_read(struct evsel *leader) { + struct evsel *evsel; + if (!evsel__sys_has_perf_metrics(leader)) return false; - if (leader->core.attr.config == TOPDOWN_SLOTS) - return true; + if (!arch_is_topdown_slots(leader)) + return false; + + /* + * If slots event as leader event but no topdown metric events + * in group, slots event should still sample as leader. + */ + evlist__for_each_entry(leader->evlist, evsel) { + if (evsel->core.leader != leader->core.leader) + continue; + if (evsel != leader && arch_is_topdown_metrics(evsel)) + return true; + } return false; } diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h index 46bf9273e572..1bae9b1822d7 100644 --- a/tools/perf/arch/x86/util/topdown.h +++ b/tools/perf/arch/x86/util/topdown.h @@ -3,5 +3,7 @@ #define _TOPDOWN_H 1 bool topdown_sys_has_perf_metrics(void); +bool arch_is_topdown_slots(const struct evsel *evsel); +bool arch_is_topdown_metrics(const struct evsel *evsel); #endif diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index e2d6cfe21057..3a439e4b12d2 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -24,9 +24,9 @@ u64 rdtsc(void) * ... * will return 3000000000. */ -static double cpuinfo_tsc_freq(void) +static u64 cpuinfo_tsc_freq(void) { - double result = 0; + u64 result = 0; FILE *cpuinfo; char *line = NULL; size_t len = 0; @@ -34,20 +34,22 @@ static double cpuinfo_tsc_freq(void) cpuinfo = fopen("/proc/cpuinfo", "r"); if (!cpuinfo) { pr_err("Failed to read /proc/cpuinfo for TSC frequency\n"); - return NAN; + return 0; } while (getline(&line, &len, cpuinfo) > 0) { if (!strncmp(line, "model name", 10)) { char *pos = strstr(line + 11, " @ "); + double float_result; - if (pos && sscanf(pos, " @ %lfGHz", &result) == 1) { - result *= 1000000000; + if (pos && sscanf(pos, " @ %lfGHz", &float_result) == 1) { + float_result *= 1000000000; + result = (u64)float_result; goto out; } } } out: - if (fpclassify(result) == FP_ZERO) + if (result == 0) pr_err("Failed to find TSC frequency in /proc/cpuinfo\n"); free(line); @@ -55,7 +57,7 @@ out: return result; } -double arch_get_tsc_freq(void) +u64 arch_get_tsc_freq(void) { unsigned int a, b, c, d, lvl; static bool cached; @@ -86,6 +88,6 @@ double arch_get_tsc_freq(void) return tsc; } - tsc = (double)c * (double)b / (double)a; + tsc = (u64)c * (u64)b / (u64)a; return tsc; } diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c index edb77e20e083..798493e887d7 100644 --- a/tools/perf/arch/x86/util/unwind-libdw.c +++ b/tools/perf/arch/x86/util/unwind-libdw.c @@ -8,7 +8,7 @@ bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { struct unwind_info *ui = arg; - struct regs_dump *user_regs = &ui->sample->user_regs; + struct regs_dump *user_regs = perf_sample__user_regs(ui->sample); Dwarf_Word dwarf_regs[17]; unsigned nregs; |