summaryrefslogtreecommitdiff
path: root/tools/perf/arch/x86/util/pmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/arch/x86/util/pmu.c')
-rw-r--r--tools/perf/arch/x86/util/pmu.c368
1 files changed, 250 insertions, 118 deletions
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 65d8cdff4d5f..a3f96221758d 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -8,6 +8,8 @@
#include <linux/perf_event.h>
#include <linux/zalloc.h>
#include <api/fs/fs.h>
+#include <api/io_dir.h>
+#include <internal/cpumap.h>
#include <errno.h>
#include "../../../util/intel-pt.h"
@@ -15,168 +17,298 @@
#include "../../../util/pmu.h"
#include "../../../util/fncache.h"
#include "../../../util/pmus.h"
-#include "env.h"
+#include "mem-events.h"
+#include "util/debug.h"
+#include "util/env.h"
+#include "util/header.h"
-struct pmu_alias {
- char *name;
- char *alias;
- struct list_head list;
-};
+static bool x86__is_intel_graniterapids(void)
+{
+ static bool checked_if_graniterapids;
+ static bool is_graniterapids;
-static LIST_HEAD(pmu_alias_name_list);
-static bool cached_list;
+ if (!checked_if_graniterapids) {
+ const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]";
+ char *cpuid = get_cpuid_str((struct perf_cpu){0});
-struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
-{
-#ifdef HAVE_AUXTRACE_SUPPORT
- if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
- pmu->auxtrace = true;
- return intel_pt_pmu_default_config(pmu);
- }
- if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
- pmu->auxtrace = true;
- pmu->selectable = true;
+ is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0;
+ free(cpuid);
+ checked_if_graniterapids = true;
}
-#endif
- return NULL;
+ return is_graniterapids;
}
-static void pmu_alias__delete(struct pmu_alias *pmu_alias)
+static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
{
- if (!pmu_alias)
- return;
+ struct perf_cpu_map *cpus;
+ char *buf = NULL;
+ size_t buf_len;
- zfree(&pmu_alias->name);
- zfree(&pmu_alias->alias);
- free(pmu_alias);
+ if (sysfs__read_str(sysfs_path, &buf, &buf_len) < 0)
+ return NULL;
+
+ cpus = perf_cpu_map__new(buf);
+ free(buf);
+ return cpus;
}
-static struct pmu_alias *pmu_alias__new(char *name, char *alias)
+static int snc_nodes_per_l3_cache(void)
{
- struct pmu_alias *pmu_alias = zalloc(sizeof(*pmu_alias));
-
- if (pmu_alias) {
- pmu_alias->name = strdup(name);
- if (!pmu_alias->name)
- goto out_delete;
-
- pmu_alias->alias = strdup(alias);
- if (!pmu_alias->alias)
- goto out_delete;
+ static bool checked_snc;
+ static int snc_nodes;
+
+ if (!checked_snc) {
+ struct perf_cpu_map *node_cpus =
+ read_sysfs_cpu_map("devices/system/node/node0/cpulist");
+ struct perf_cpu_map *cache_cpus =
+ read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
+
+ snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
+ perf_cpu_map__put(cache_cpus);
+ perf_cpu_map__put(node_cpus);
+ checked_snc = true;
}
- return pmu_alias;
+ return snc_nodes;
+}
-out_delete:
- pmu_alias__delete(pmu_alias);
- return NULL;
+static bool starts_with(const char *str, const char *prefix)
+{
+ return !strncmp(prefix, str, strlen(prefix));
}
-static int setup_pmu_alias_list(void)
+static int num_chas(void)
{
- int fd, dirfd;
- DIR *dir;
- struct dirent *dent;
- struct pmu_alias *pmu_alias;
- char buf[MAX_PMU_NAME_LEN];
- FILE *file;
- int ret = -ENOMEM;
-
- dirfd = perf_pmu__event_source_devices_fd();
- if (dirfd < 0)
- return -1;
-
- dir = fdopendir(dirfd);
- if (!dir)
- return -errno;
-
- while ((dent = readdir(dir))) {
- if (!strcmp(dent->d_name, ".") ||
- !strcmp(dent->d_name, ".."))
- continue;
-
- fd = perf_pmu__pathname_fd(dirfd, dent->d_name, "alias", O_RDONLY);
- if (fd < 0)
- continue;
+ static bool checked_chas;
+ static int num_chas;
- file = fdopen(fd, "r");
- if (!file)
- continue;
+ if (!checked_chas) {
+ int fd = perf_pmu__event_source_devices_fd();
+ struct io_dir dir;
+ struct io_dirent64 *dent;
- if (!fgets(buf, sizeof(buf), file)) {
- fclose(file);
- continue;
- }
+ if (fd < 0)
+ return -1;
- fclose(file);
+ io_dir__init(&dir, fd);
- /* Remove the last '\n' */
- buf[strlen(buf) - 1] = 0;
+ while ((dent = io_dir__readdir(&dir)) != NULL) {
+ /* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */
+ if (starts_with(dent->d_name, "uncore_cha_"))
+ num_chas++;
+ }
+ close(fd);
+ checked_chas = true;
+ }
+ return num_chas;
+}
- pmu_alias = pmu_alias__new(dent->d_name, buf);
- if (!pmu_alias)
- goto close_dir;
+#define MAX_SNCS 6
- list_add_tail(&pmu_alias->list, &pmu_alias_name_list);
+static int uncore_cha_snc(struct perf_pmu *pmu)
+{
+ // CHA SNC numbers are ordered correspond to the CHAs number.
+ unsigned int cha_num;
+ int num_cha, chas_per_node, cha_snc;
+ int snc_nodes = snc_nodes_per_l3_cache();
+
+ if (snc_nodes <= 1)
+ return 0;
+
+ num_cha = num_chas();
+ if (num_cha <= 0) {
+ pr_warning("Unexpected: no CHAs found\n");
+ return 0;
}
- ret = 0;
+ /* Compute SNC for PMU. */
+ if (sscanf(pmu->name, "uncore_cha_%u", &cha_num) != 1) {
+ pr_warning("Unexpected: unable to compute CHA number '%s'\n", pmu->name);
+ return 0;
+ }
+ chas_per_node = num_cha / snc_nodes;
+ cha_snc = cha_num / chas_per_node;
-close_dir:
- closedir(dir);
- return ret;
+ /* Range check cha_snc. for unexpected out of bounds. */
+ return cha_snc >= MAX_SNCS ? 0 : cha_snc;
}
-static char *__pmu_find_real_name(const char *name)
+static int uncore_imc_snc(struct perf_pmu *pmu)
{
- struct pmu_alias *pmu_alias;
-
- list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) {
- if (!strcmp(name, pmu_alias->alias))
- return pmu_alias->name;
+ // Compute the IMC SNC using lookup tables.
+ unsigned int imc_num;
+ int snc_nodes = snc_nodes_per_l3_cache();
+ const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0};
+ const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2};
+ const u8 *snc_map;
+ size_t snc_map_len;
+
+ switch (snc_nodes) {
+ case 2:
+ snc_map = snc2_map;
+ snc_map_len = ARRAY_SIZE(snc2_map);
+ break;
+ case 3:
+ snc_map = snc3_map;
+ snc_map_len = ARRAY_SIZE(snc3_map);
+ break;
+ default:
+ /* Error or no lookup support for SNC with >3 nodes. */
+ return 0;
}
- return (char *)name;
+ /* Compute SNC for PMU. */
+ if (sscanf(pmu->name, "uncore_imc_%u", &imc_num) != 1) {
+ pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name);
+ return 0;
+ }
+ if (imc_num >= snc_map_len) {
+ pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes);
+ return 0;
+ }
+ return snc_map[imc_num];
}
-char *pmu_find_real_name(const char *name)
+static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
{
- if (cached_list)
- return __pmu_find_real_name(name);
-
- setup_pmu_alias_list();
- cached_list = true;
+ static bool checked_cpu_adjust[MAX_SNCS];
+ static int cpu_adjust[MAX_SNCS];
+ struct perf_cpu_map *node_cpus;
+ char node_path[] = "devices/system/node/node0/cpulist";
+
+ /* Was adjust already computed? */
+ if (checked_cpu_adjust[pmu_snc])
+ return cpu_adjust[pmu_snc];
+
+ /* SNC0 doesn't need an adjust. */
+ if (pmu_snc == 0) {
+ cpu_adjust[0] = 0;
+ checked_cpu_adjust[0] = true;
+ return 0;
+ }
- return __pmu_find_real_name(name);
+ /*
+ * Use NUMA topology to compute first CPU of the NUMA node, we want to
+ * adjust CPU 0 to be this and similarly for other CPUs if there is >1
+ * socket.
+ */
+ assert(pmu_snc >= 0 && pmu_snc <= 9);
+ node_path[24] += pmu_snc; // Shift node0 to be node<pmu_snc>.
+ node_cpus = read_sysfs_cpu_map(node_path);
+ cpu_adjust[pmu_snc] = perf_cpu_map__cpu(node_cpus, 0).cpu;
+ if (cpu_adjust[pmu_snc] < 0) {
+ pr_debug("Failed to read valid CPU list from <sysfs>/%s\n", node_path);
+ cpu_adjust[pmu_snc] = 0;
+ } else {
+ checked_cpu_adjust[pmu_snc] = true;
+ }
+ perf_cpu_map__put(node_cpus);
+ return cpu_adjust[pmu_snc];
}
-static char *__pmu_find_alias_name(const char *name)
+static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
{
- struct pmu_alias *pmu_alias;
+ // With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the
+ // topology. For example, a two socket graniterapids machine may be set
+ // up with 3-way SNC meaning there are 6 NUMA nodes that should be
+ // displayed with --per-node. The cpumask of the CHA and IMC PMUs
+ // reflects per-socket information meaning, for example, uncore_cha_60
+ // on a two socket graniterapids machine with 120 cores per socket will
+ // have a cpumask of "0,120". This cpumask needs adjusting to "40,160"
+ // to reflect that uncore_cha_60 is used for the 2nd SNC of each
+ // socket. Without the adjustment events on uncore_cha_60 will appear in
+ // node 0 and node 3 (in our example 2 socket 3-way set up), but with
+ // the adjustment they will appear in node 1 and node 4. The number of
+ // CHAs is typically larger than the number of cores. The CHA numbers
+ // are assumed to split evenly and inorder wrt core numbers. There are
+ // fewer memory IMC PMUs than cores and mapping is handled using lookup
+ // tables.
+ static struct perf_cpu_map *cha_adjusted[MAX_SNCS];
+ static struct perf_cpu_map *imc_adjusted[MAX_SNCS];
+ struct perf_cpu_map **adjusted = cha ? cha_adjusted : imc_adjusted;
+ int idx, pmu_snc, cpu_adjust;
+ struct perf_cpu cpu;
+ bool alloc;
+
+ // Cpus from the kernel holds first CPU of each socket. e.g. 0,120.
+ if (perf_cpu_map__cpu(pmu->cpus, 0).cpu != 0) {
+ pr_debug("Ignoring cpumask adjust for %s as unexpected first CPU\n", pmu->name);
+ return;
+ }
- list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) {
- if (!strcmp(name, pmu_alias->name))
- return pmu_alias->alias;
+ pmu_snc = cha ? uncore_cha_snc(pmu) : uncore_imc_snc(pmu);
+ if (pmu_snc == 0) {
+ // No adjustment necessary for the first SNC.
+ return;
}
- return NULL;
-}
-char *pmu_find_alias_name(const char *name)
-{
- if (cached_list)
- return __pmu_find_alias_name(name);
+ alloc = adjusted[pmu_snc] == NULL;
+ if (alloc) {
+ // Hold onto the perf_cpu_map globally to avoid recomputation.
+ cpu_adjust = uncore_cha_imc_compute_cpu_adjust(pmu_snc);
+ adjusted[pmu_snc] = perf_cpu_map__empty_new(perf_cpu_map__nr(pmu->cpus));
+ if (!adjusted[pmu_snc])
+ return;
+ }
- setup_pmu_alias_list();
- cached_list = true;
+ perf_cpu_map__for_each_cpu(cpu, idx, pmu->cpus) {
+ // Compute the new cpu map values or if not allocating, assert
+ // that they match expectations. asserts will be removed to
+ // avoid overhead in NDEBUG builds.
+ if (alloc) {
+ RC_CHK_ACCESS(adjusted[pmu_snc])->map[idx].cpu = cpu.cpu + cpu_adjust;
+ } else if (idx == 0) {
+ cpu_adjust = perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu - cpu.cpu;
+ assert(uncore_cha_imc_compute_cpu_adjust(pmu_snc) == cpu_adjust);
+ } else {
+ assert(perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu ==
+ cpu.cpu + cpu_adjust);
+ }
+ }
- return __pmu_find_alias_name(name);
+ perf_cpu_map__put(pmu->cpus);
+ pmu->cpus = perf_cpu_map__get(adjusted[pmu_snc]);
}
-int perf_pmus__num_mem_pmus(void)
+void perf_pmu__arch_init(struct perf_pmu *pmu)
{
- /* AMD uses IBS OP pmu and not a core PMU for perf mem/c2c */
- if (x86__is_amd_cpu())
- return 1;
+ struct perf_pmu_caps *ldlat_cap;
- /* Intel uses core pmus for perf mem/c2c */
- return perf_pmus__num_core_pmus();
+ if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
+ pmu->auxtrace = true;
+ pmu->selectable = true;
+ pmu->perf_event_attr_init_default = intel_pt_pmu_default_config;
+ }
+ if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
+ pmu->auxtrace = true;
+ pmu->selectable = true;
+ }
+
+ if (x86__is_amd_cpu()) {
+ if (strcmp(pmu->name, "ibs_op"))
+ return;
+
+ pmu->mem_events = perf_mem_events_amd;
+
+ if (!perf_pmu__caps_parse(pmu))
+ return;
+
+ ldlat_cap = perf_pmu__get_cap(pmu, "ldlat");
+ if (!ldlat_cap || strcmp(ldlat_cap->value, "1"))
+ return;
+
+ perf_mem_events__loads_ldlat = 0;
+ pmu->mem_events = perf_mem_events_amd_ldlat;
+ } else {
+ if (pmu->is_core) {
+ if (perf_pmu__have_event(pmu, "mem-loads-aux"))
+ pmu->mem_events = perf_mem_events_intel_aux;
+ else
+ pmu->mem_events = perf_mem_events_intel;
+ } else if (x86__is_intel_graniterapids()) {
+ if (starts_with(pmu->name, "uncore_cha_"))
+ gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
+ else if (starts_with(pmu->name, "uncore_imc_"))
+ gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
+ }
+ }
}