summaryrefslogtreecommitdiff
path: root/tools/power/x86
diff options
context:
space:
mode:
Diffstat (limited to 'tools/power/x86')
-rw-r--r--tools/power/x86/intel-speed-select/Makefile2
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c35
-rw-r--r--tools/power/x86/intel-speed-select/isst-core-tpmi.c12
-rw-r--r--tools/power/x86/intel-speed-select/isst-display.c31
-rw-r--r--tools/power/x86/intel-speed-select/isst.h3
-rw-r--r--tools/power/x86/turbostat/turbostat.818
-rw-r--r--tools/power/x86/turbostat/turbostat.c610
7 files changed, 559 insertions, 152 deletions
diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile
index 7221f2f55e8b..8d3a02a20f3d 100644
--- a/tools/power/x86/intel-speed-select/Makefile
+++ b/tools/power/x86/intel-speed-select/Makefile
@@ -13,7 +13,7 @@ endif
# Do not use make's built-in rules
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
-override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I$(shell $(CC) -print-sysroot)/usr/include/libnl3
override LDFLAGS += -lnl-genl-3 -lnl-3
ALL_TARGETS := intel-speed-select
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index fadfb02b8611..0ce251b8d466 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -16,7 +16,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.21";
+static const char *version_str = "v1.23";
static const int supported_api_ver = 3;
static struct isst_if_platform_info isst_platform_info;
@@ -26,6 +26,7 @@ static FILE *outf;
static int cpu_model;
static int cpu_stepping;
+static int extended_family;
#define MAX_CPUS_IN_ONE_REQ 512
static short max_target_cpus;
@@ -46,8 +47,9 @@ static int force_online_offline;
static int auto_mode;
static int fact_enable_fail;
static int cgroupv2;
+static int max_pkg_id;
static int max_die_id;
-static int max_punit_id;
+static int max_die_id_package_0;
/* clos related */
static int current_clos = -1;
@@ -142,6 +144,14 @@ int is_icx_platform(void)
return 0;
}
+static int is_dmr_plus_platform(void)
+{
+ if (extended_family == 0x04)
+ return 1;
+
+ return 0;
+}
+
static int update_cpu_model(void)
{
unsigned int ebx, ecx, edx;
@@ -149,6 +159,7 @@ static int update_cpu_model(void)
__cpuid(1, fms, ebx, ecx, edx);
family = (fms >> 8) & 0xf;
+ extended_family = (fms >> 20) & 0x0f;
cpu_model = (fms >> 4) & 0xf;
if (family == 6 || family == 0xf)
cpu_model += ((fms >> 16) & 0xf) << 4;
@@ -557,6 +568,8 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void
if (id.pkg < 0 || id.die < 0 || id.punit < 0)
continue;
+ id.die = id.die % (max_die_id_package_0 + 1);
+
valid_mask[id.pkg][id.die] = 1;
if (cpus[id.pkg][id.die][id.punit] == -1)
@@ -564,11 +577,11 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void
}
for (i = 0; i < MAX_PACKAGE_COUNT; i++) {
- if (max_die_id == max_punit_id) {
+ if (max_die_id > max_pkg_id) {
for (k = 0; k < MAX_PUNIT_PER_DIE && k < MAX_DIE_PER_PACKAGE; k++) {
id.cpu = cpus[i][k][k];
id.pkg = i;
- id.die = k;
+ id.die = get_physical_die_id(id.cpu);
id.punit = k;
if (isst_is_punit_valid(&id))
callback(&id, arg1, arg2, arg3, arg4);
@@ -586,7 +599,10 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void
for (k = 0; k < MAX_PUNIT_PER_DIE; k++) {
id.cpu = cpus[i][j][k];
id.pkg = i;
- id.die = j;
+ if (id.cpu >= 0)
+ id.die = get_physical_die_id(id.cpu);
+ else
+ id.die = id.pkg;
id.punit = k;
if (isst_is_punit_valid(&id))
callback(&id, arg1, arg2, arg3, arg4);
@@ -788,6 +804,8 @@ static void create_cpu_map(void)
cpu_map[i].die_id = die_id;
cpu_map[i].core_id = core_id;
+ if (max_pkg_id < pkg_id)
+ max_pkg_id = pkg_id;
punit_id = 0;
@@ -812,8 +830,8 @@ static void create_cpu_map(void)
if (max_die_id < die_id)
max_die_id = die_id;
- if (max_punit_id < cpu_map[i].punit_id)
- max_punit_id = cpu_map[i].punit_id;
+ if (!pkg_id && max_die_id_package_0 < die_id)
+ max_die_id_package_0 = die_id;
debug_printf(
"map logical_cpu:%d core: %d die:%d pkg:%d punit:%d punit_cpu:%d punit_core:%d\n",
@@ -1509,7 +1527,8 @@ display_result:
usleep(2000);
/* Adjusting uncore freq */
- isst_adjust_uncore_freq(id, tdp_level, &ctdp_level);
+ if (!is_dmr_plus_platform())
+ isst_adjust_uncore_freq(id, tdp_level, &ctdp_level);
fprintf(stderr, "Option is set to online/offline\n");
ctdp_level.core_cpumask_size =
diff --git a/tools/power/x86/intel-speed-select/isst-core-tpmi.c b/tools/power/x86/intel-speed-select/isst-core-tpmi.c
index da53aaa27fc9..4f389e1c0525 100644
--- a/tools/power/x86/intel-speed-select/isst-core-tpmi.c
+++ b/tools/power/x86/intel-speed-select/isst-core-tpmi.c
@@ -227,6 +227,7 @@ static int tpmi_get_ctdp_control(struct isst_id *id, int config_index,
static int tpmi_get_tdp_info(struct isst_id *id, int config_index,
struct isst_pkg_ctdp_level_info *ctdp_level)
{
+ struct isst_perf_level_fabric_info fabric_info;
struct isst_perf_level_data_info info;
int ret;
@@ -253,6 +254,17 @@ static int tpmi_get_tdp_info(struct isst_id *id, int config_index,
ctdp_level->uncore_p1 = info.p1_fabric_freq_mhz;
ctdp_level->uncore_pm = info.pm_fabric_freq_mhz;
+ fabric_info.socket_id = id->pkg;
+ fabric_info.power_domain_id = id->punit;
+ fabric_info.level = config_index;
+
+ ret = tpmi_process_ioctl(ISST_IF_GET_PERF_LEVEL_FABRIC_INFO, &fabric_info);
+ if (ret != -1) {
+ ctdp_level->uncore1_p0 = fabric_info.p0_fabric_freq_mhz[1];
+ ctdp_level->uncore1_p1 = fabric_info.p1_fabric_freq_mhz[1];
+ ctdp_level->uncore1_pm = fabric_info.pm_fabric_freq_mhz[1];
+ }
+
debug_printf
("cpu:%d ctdp:%d CONFIG_TDP_GET_TDP_INFO tdp_ratio:%d pkg_tdp:%d ctdp_level->t_proc_hot:%d\n",
id->cpu, config_index, ctdp_level->tdp_ratio, ctdp_level->pkg_tdp,
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 07ebd08f3202..e4884eb02837 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -173,7 +173,11 @@ static int print_package_info(struct isst_id *id, FILE *outf)
if (out_format_is_json()) {
if (api_version() > 1) {
- if (id->cpu < 0)
+ if (id->die < 0 && id->cpu < 0)
+ snprintf(header, sizeof(header),
+ "package-%d:die-IO:powerdomain-%d:cpu-None",
+ id->pkg, id->punit);
+ else if (id->cpu < 0)
snprintf(header, sizeof(header),
"package-%d:die-%d:powerdomain-%d:cpu-None",
id->pkg, id->die, id->punit);
@@ -190,7 +194,10 @@ static int print_package_info(struct isst_id *id, FILE *outf)
}
snprintf(header, sizeof(header), "package-%d", id->pkg);
format_and_print(outf, level++, header, NULL);
- snprintf(header, sizeof(header), "die-%d", id->die);
+ if (id->die < 0)
+ snprintf(header, sizeof(header), "die-IO");
+ else
+ snprintf(header, sizeof(header), "die-%d", id->die);
format_and_print(outf, level++, header, NULL);
if (api_version() > 1) {
snprintf(header, sizeof(header), "powerdomain-%d", id->punit);
@@ -453,6 +460,26 @@ void isst_ctdp_display_information(struct isst_id *id, FILE *outf, int tdp_level
format_and_print(outf, level + 2, header, value);
}
+ if (ctdp_level->uncore1_p1) {
+ snprintf(header, sizeof(header), "uncore-1-frequency-base(MHz)");
+ snprintf(value, sizeof(value), "%d",
+ ctdp_level->uncore1_p1 * isst_get_disp_freq_multiplier());
+ format_and_print(outf, level + 2, header, value);
+ }
+ if (ctdp_level->uncore1_pm) {
+ snprintf(header, sizeof(header), "uncore-1-frequency-min(MHz)");
+ snprintf(value, sizeof(value), "%d",
+ ctdp_level->uncore1_pm * isst_get_disp_freq_multiplier());
+ format_and_print(outf, level + 2, header, value);
+ }
+
+ if (ctdp_level->uncore1_p0) {
+ snprintf(header, sizeof(header), "uncore-1-frequency-max(MHz)");
+ snprintf(value, sizeof(value), "%d",
+ ctdp_level->uncore1_p0 * isst_get_disp_freq_multiplier());
+ format_and_print(outf, level + 2, header, value);
+ }
+
if (ctdp_level->mem_freq) {
snprintf(header, sizeof(header), "max-mem-frequency(MHz)");
snprintf(value, sizeof(value), "%d",
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 39ee75677c2c..960f647cfc2d 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -147,6 +147,9 @@ struct isst_pkg_ctdp_level_info {
int uncore_p0;
int uncore_p1;
int uncore_pm;
+ int uncore1_p0;
+ int uncore1_p1;
+ int uncore1_pm;
int sse_p1;
int avx2_p1;
int avx512_p1;
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 99bf905ade81..fb11108aaf42 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -100,7 +100,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou
.PP
\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names.
.PP
-\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other".
+\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "pct_idle". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "pct_idle". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle".
.PP
\fB--Dump\fP displays the raw counter values.
.PP
@@ -158,16 +158,22 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs.
.PP
-\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system.
+\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default.
.PP
-\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved.
+\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default.
.PP
-\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters.
+\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. These counters are in the "cpuidle" group, which is disabled, by default.
+.PP
+\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. These counters are in the "pct_idle" group, which is enabled by default.
+.PP
+\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters and are in the "hwidle" group, which is enabled, by default.
.PP
\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
.PP
\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
.PP
+\fBCoreThr\fP Core Thermal Throttling events during the measurement interval. Note that events since boot can be find in /sys/devices/system/cpu/cpu*/thermal_throttle/*
+.PP
\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
.PP
\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
@@ -198,7 +204,9 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBUncMHz\fP per-package uncore MHz, instantaneous sample.
.PP
-\fBUMHz1.0\fP per-package uncore MHz for domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages.
+\fBUMHz1.0\fP per-package uncore MHz for pm_domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages.
+Intel Granite Rapids systems use pm_domains 0-2 for CPUs, and 3-4 for IO, with cluster always 0.
+For the "--show" and "--hide" options, use "UncMHz" to operate on all UMHz*.* as a group.
.SH TOO MUCH INFORMATION EXAMPLE
By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters.
This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 8d5011a0bf60..5230e072e414 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -153,7 +153,7 @@ struct msr_counter bic[] = {
{ 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 },
- { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 },
@@ -206,6 +206,7 @@ struct msr_counter bic[] = {
{ 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -219,7 +220,7 @@ struct msr_counter bic[] = {
#define BIC_TSC_MHz (1ULL << 7)
#define BIC_IRQ (1ULL << 8)
#define BIC_SMI (1ULL << 9)
-#define BIC_sysfs (1ULL << 10)
+#define BIC_cpuidle (1ULL << 10)
#define BIC_CPU_c1 (1ULL << 11)
#define BIC_CPU_c3 (1ULL << 12)
#define BIC_CPU_c6 (1ULL << 13)
@@ -272,17 +273,20 @@ struct msr_counter bic[] = {
#define BIC_Sys_J (1ULL << 60)
#define BIC_NMI (1ULL << 61)
#define BIC_CPU_c1e (1ULL << 62)
-
-#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
-#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
-#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
+#define BIC_pct_idle (1ULL << 63)
+
+#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
+#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
+#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
+#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
+#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle )
+#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle)
#define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
-#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
+#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle)
unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
-unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC;
#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
@@ -535,7 +539,7 @@ enum rapl_msrs {
#define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
#define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
#define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY)
-#define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY)
+#define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLICY)
#define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
@@ -835,6 +839,23 @@ static const struct platform_features spr_features = {
.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
};
+static const struct platform_features dmr_features = {
+ .has_msr_misc_feature_control = spr_features.has_msr_misc_feature_control,
+ .has_msr_misc_pwr_mgmt = spr_features.has_msr_misc_pwr_mgmt,
+ .has_nhm_msrs = spr_features.has_nhm_msrs,
+ .has_config_tdp = spr_features.has_config_tdp,
+ .bclk_freq = spr_features.bclk_freq,
+ .supported_cstates = spr_features.supported_cstates,
+ .cst_limit = spr_features.cst_limit,
+ .has_msr_core_c1_res = spr_features.has_msr_core_c1_res,
+ .has_msr_module_c6_res_ms = 1, /* DMR has Dual Core Module and MC6 MSR */
+ .has_irtl_msrs = spr_features.has_irtl_msrs,
+ .has_cst_prewake_bit = spr_features.has_cst_prewake_bit,
+ .has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit,
+ .trl_msrs = spr_features.trl_msrs,
+ .rapl_msrs = 0, /* DMR does not have RAPL MSRs */
+};
+
static const struct platform_features srf_features = {
.has_msr_misc_feature_control = 1,
.has_msr_misc_pwr_mgmt = 1,
@@ -1024,12 +1045,14 @@ static const struct platform_data turbostat_pdata[] = {
{ INTEL_EMERALDRAPIDS_X, &spr_features },
{ INTEL_GRANITERAPIDS_X, &spr_features },
{ INTEL_GRANITERAPIDS_D, &spr_features },
+ { INTEL_PANTHERCOVE_X, &dmr_features },
{ INTEL_LAKEFIELD, &cnl_features },
{ INTEL_ALDERLAKE, &adl_features },
{ INTEL_ALDERLAKE_L, &adl_features },
{ INTEL_RAPTORLAKE, &adl_features },
{ INTEL_RAPTORLAKE_P, &adl_features },
{ INTEL_RAPTORLAKE_S, &adl_features },
+ { INTEL_BARTLETTLAKE, &adl_features },
{ INTEL_METEORLAKE, &adl_features },
{ INTEL_METEORLAKE_L, &adl_features },
{ INTEL_ARROWLAKE_H, &adl_features },
@@ -1056,7 +1079,7 @@ static const struct platform_data turbostat_pdata[] = {
* Missing support for
* INTEL_ICELAKE
* INTEL_ATOM_SILVERMONT_MID
- * INTEL_ATOM_AIRMONT_MID
+ * INTEL_ATOM_SILVERMONT_MID2
* INTEL_ATOM_AIRMONT_NP
*/
{ 0, NULL },
@@ -1068,7 +1091,6 @@ void probe_platform_features(unsigned int family, unsigned int model)
{
int i;
-
if (authentic_amd || hygon_genuine) {
/* fallback to default features on unsupported models */
force_load++;
@@ -1102,8 +1124,7 @@ end:
if (platform)
return;
- fprintf(stderr, "Unsupported platform detected.\n"
- "\tSee RUN THE LATEST VERSION on turbostat(8)\n");
+ fprintf(stderr, "Unsupported platform detected.\n\tSee RUN THE LATEST VERSION on turbostat(8)\n");
exit(1);
}
@@ -1121,9 +1142,10 @@ end:
int backwards_count;
char *progname;
-#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
+#define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */
cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
-size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
+size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize,
+ cpu_subset_size;
#define MAX_ADDED_THREAD_COUNTERS 24
#define MAX_ADDED_CORE_COUNTERS 8
#define MAX_ADDED_PACKAGE_COUNTERS 16
@@ -2136,13 +2158,20 @@ int get_msr_fd(int cpu)
if (fd)
return fd;
-
+#if defined(ANDROID)
+ sprintf(pathname, "/dev/msr%d", cpu);
+#else
sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+#endif
fd = open(pathname, O_RDONLY);
if (fd < 0)
+#if defined(ANDROID)
+ err(-1, "%s open failed, try chown or chmod +r /dev/msr*, "
+ "or run with --no-msr, or run as root", pathname);
+#else
err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
"or run with --no-msr, or run as root", pathname);
-
+#endif
fd_percpu[cpu] = fd;
return fd;
@@ -2211,23 +2240,52 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
return 0;
}
-int probe_msr(int cpu, off_t offset)
+int add_msr_counter(int cpu, off_t offset)
{
ssize_t retval;
unsigned long long value;
- assert(!no_msr);
+ if (no_msr)
+ return -1;
+
+ if (!offset)
+ return -1;
retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset);
- /*
- * Expect MSRs to accumulate some non-zero value since the system was powered on.
- * Treat zero as a read failure.
- */
- if (retval != sizeof(value) || value == 0)
- return 1;
+ /* if the read failed, the probe fails */
+ if (retval != sizeof(value))
+ return -1;
- return 0;
+ if (value == 0)
+ return 0;
+
+ return 1;
+}
+
+int add_rapl_msr_counter(int cpu, const struct rapl_counter_arch_info *cai)
+{
+ int ret;
+
+ if (!(platform->rapl_msrs & cai->feature_mask))
+ return -1;
+
+ ret = add_msr_counter(cpu, cai->msr);
+ if (ret < 0)
+ return -1;
+
+ switch (cai->rci_index) {
+ case RAPL_RCI_INDEX_ENERGY_PKG:
+ case RAPL_RCI_INDEX_ENERGY_CORES:
+ case RAPL_RCI_INDEX_DRAM:
+ case RAPL_RCI_INDEX_GFX:
+ case RAPL_RCI_INDEX_ENERGY_PLATFORM:
+ if (ret == 0)
+ return 1;
+ }
+
+ /* PKG,DRAM_PERF_STATUS MSRs, can return any value */
+ return 1;
}
/* Convert CPU ID to domain ID for given added perf counter. */
@@ -2314,8 +2372,7 @@ void help(void)
" degrees Celsius\n"
" -h, --help\n"
" print this help message\n"
- " -v, --version\n"
- " print version information\n\nFor more help, run \"man turbostat\"\n");
+ " -v, --version\n\t\tprint version information\n\nFor more help, run \"man turbostat\"\n");
}
/*
@@ -2345,16 +2402,25 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
retval |= ~0;
break;
} else if (!strcmp(name_list, "topology")) {
- retval |= BIC_TOPOLOGY;
+ retval |= BIC_GROUP_TOPOLOGY;
break;
} else if (!strcmp(name_list, "power")) {
- retval |= BIC_THERMAL_PWR;
+ retval |= BIC_GROUP_THERMAL_PWR;
break;
} else if (!strcmp(name_list, "idle")) {
- retval |= BIC_IDLE;
+ retval |= BIC_GROUP_IDLE;
+ break;
+ } else if (!strcmp(name_list, "swidle")) {
+ retval |= BIC_GROUP_SW_IDLE;
+ break;
+ } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */
+ retval |= BIC_GROUP_SW_IDLE;
+ break;
+ } else if (!strcmp(name_list, "hwidle")) {
+ retval |= BIC_GROUP_HW_IDLE;
break;
} else if (!strcmp(name_list, "frequency")) {
- retval |= BIC_FREQUENCY;
+ retval |= BIC_GROUP_FREQUENCY;
break;
} else if (!strcmp(name_list, "other")) {
retval |= BIC_OTHER;
@@ -2363,6 +2429,7 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
}
if (i == MAX_BIC) {
+ fprintf(stderr, "deferred %s\n", name_list);
if (mode == SHOW_LIST) {
deferred_add_names[deferred_add_index++] = name_list;
if (deferred_add_index >= MAX_DEFERRED) {
@@ -2621,7 +2688,7 @@ void print_header(char *delim)
if (DO_BIC(BIC_SYS_LPI))
outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
- if (platform->rapl_msrs && !rapl_joules) {
+ if (!rapl_joules) {
if (DO_BIC(BIC_PkgWatt))
outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
@@ -2634,7 +2701,7 @@ void print_header(char *delim)
outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
if (DO_BIC(BIC_RAM__))
outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
- } else if (platform->rapl_msrs && rapl_joules) {
+ } else {
if (DO_BIC(BIC_Pkg_J))
outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
@@ -3476,7 +3543,7 @@ void delta_core(struct core_data *new, struct core_data *old)
old->c6 = new->c6 - old->c6;
old->c7 = new->c7 - old->c7;
old->core_temp_c = new->core_temp_c;
- old->core_throt_cnt = new->core_throt_cnt;
+ old->core_throt_cnt = new->core_throt_cnt - old->core_throt_cnt;
old->mc6_us = new->mc6_us - old->mc6_us;
DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
@@ -3920,7 +3987,6 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
if (average.threads.nmi_count > 9999999)
sums_need_wide_columns = 1;
-
average.cores.c3 /= topo.allowed_cores;
average.cores.c6 /= topo.allowed_cores;
average.cores.c7 /= topo.allowed_cores;
@@ -4743,6 +4809,37 @@ unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id)
return (value & value_mask) >> value_shift;
}
+/* Rapl domain enumeration helpers */
+static inline int get_rapl_num_domains(void)
+{
+ int num_packages = topo.max_package_id + 1;
+ int num_cores_per_package;
+ int num_cores;
+
+ if (!platform->has_per_core_rapl)
+ return num_packages;
+
+ num_cores_per_package = topo.max_core_id + 1;
+ num_cores = num_cores_per_package * num_packages;
+
+ return num_cores;
+}
+
+static inline int get_rapl_domain_id(int cpu)
+{
+ int nr_cores_per_package = topo.max_core_id + 1;
+ int rapl_core_id;
+
+ if (!platform->has_per_core_rapl)
+ return cpus[cpu].physical_package_id;
+
+ /* Compute the system-wide unique core-id for @cpu */
+ rapl_core_id = cpus[cpu].physical_core_id;
+ rapl_core_id += cpus[cpu].physical_package_id * nr_cores_per_package;
+
+ return rapl_core_id;
+}
+
/*
* get_counters(...)
* migrate to cpu
@@ -4798,7 +4895,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
goto done;
if (platform->has_per_core_rapl) {
- status = get_rapl_counters(cpu, c->core_id, c, p);
+ status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p);
if (status != 0)
return status;
}
@@ -4864,7 +4961,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
p->sys_lpi = cpuidle_cur_sys_lpi_us;
if (!platform->has_per_core_rapl) {
- status = get_rapl_counters(cpu, p->package_id, c, p);
+ status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p);
if (status != 0)
return status;
}
@@ -6030,6 +6127,7 @@ int snapshot_graphics(int idx)
int retval;
rewind(gfx_info[idx].fp);
+ fflush(gfx_info[idx].fp);
switch (idx) {
case GFX_rc6:
@@ -6452,8 +6550,11 @@ void check_dev_msr()
if (no_msr)
return;
-
+#if defined(ANDROID)
+ sprintf(pathname, "/dev/msr%d", base_cpu);
+#else
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+#endif
if (stat(pathname, &sb))
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
no_msr = 1;
@@ -6503,7 +6604,11 @@ void check_msr_permission(void)
failed += check_for_cap_sys_rawio();
/* test file permissions */
+#if defined(ANDROID)
+ sprintf(pathname, "/dev/msr%d", base_cpu);
+#else
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+#endif
if (euidaccess(pathname, R_OK)) {
failed++;
}
@@ -6703,7 +6808,20 @@ static void probe_intel_uncore_frequency_cluster(void)
sprintf(path, "%s/current_freq_khz", path_base);
sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id);
- add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id);
+ /*
+ * Once add_couter() is called, that counter is always read
+ * and reported -- So it is effectively (enabled & present).
+ * Only call add_counter() here if legacy BIC_UNCORE_MHZ (UncMHz)
+ * is (enabled). Since we are in this routine, we
+ * know we will not probe and set (present) the legacy counter.
+ *
+ * This allows "--show/--hide UncMHz" to be effective for
+ * the clustered MHz counters, as a group.
+ */
+ if BIC_IS_ENABLED
+ (BIC_UNCORE_MHZ)
+ add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0,
+ package_id);
if (quiet)
continue;
@@ -6775,17 +6893,21 @@ static void probe_graphics(void)
else
goto next;
- set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6);
+ set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms",
+ gt0_is_gt ? GFX_rc6 : SAM_mc6);
set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz);
- set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz);
+ set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq",
+ gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz);
- set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6);
+ set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms",
+ gt0_is_gt ? SAM_mc6 : GFX_rc6);
set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz);
- set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz);
+ set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq",
+ gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz);
goto end;
}
@@ -7221,6 +7343,9 @@ void rapl_probe_intel(void)
else
bic_enabled &= ~bic_joules_bits;
+ if (!platform->rapl_msrs || no_msr)
+ return;
+
if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
bic_enabled &= ~BIC_PKG__;
if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
@@ -7271,6 +7396,9 @@ void rapl_probe_amd(void)
else
bic_enabled &= ~bic_joules_bits;
+ if (!platform->rapl_msrs || no_msr)
+ return;
+
if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
return;
@@ -7297,6 +7425,158 @@ void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
return;
}
+static int fread_int(char *path, int *val)
+{
+ FILE *filep;
+ int ret;
+
+ filep = fopen(path, "r");
+ if (!filep)
+ return -1;
+
+ ret = fscanf(filep, "%d", val);
+ fclose(filep);
+ return ret;
+}
+
+static int fread_ull(char *path, unsigned long long *val)
+{
+ FILE *filep;
+ int ret;
+
+ filep = fopen(path, "r");
+ if (!filep)
+ return -1;
+
+ ret = fscanf(filep, "%llu", val);
+ fclose(filep);
+ return ret;
+}
+
+static int fread_str(char *path, char *buf, int size)
+{
+ FILE *filep;
+ int ret;
+ char *cp;
+
+ filep = fopen(path, "r");
+ if (!filep)
+ return -1;
+
+ ret = fread(buf, 1, size, filep);
+ fclose(filep);
+
+ /* replace '\n' with '\0' */
+ cp = strchr(buf, '\n');
+ if (cp != NULL)
+ *cp = '\0';
+
+ return ret;
+}
+
+#define PATH_RAPL_SYSFS "/sys/class/powercap"
+
+static int dump_one_domain(char *domain_path)
+{
+ char path[PATH_MAX];
+ char str[PATH_MAX];
+ unsigned long long val;
+ int constraint;
+ int enable;
+ int ret;
+
+ snprintf(path, PATH_MAX, "%s/name", domain_path);
+ ret = fread_str(path, str, PATH_MAX);
+ if (ret <= 0)
+ return -1;
+
+ fprintf(outf, "%s: %s", domain_path + strlen(PATH_RAPL_SYSFS) + 1, str);
+
+ snprintf(path, PATH_MAX, "%s/enabled", domain_path);
+ ret = fread_int(path, &enable);
+ if (ret <= 0)
+ return -1;
+
+ if (!enable) {
+ fputs(" disabled\n", outf);
+ return 0;
+ }
+
+ for (constraint = 0;; constraint++) {
+ snprintf(path, PATH_MAX, "%s/constraint_%d_time_window_us", domain_path, constraint);
+ ret = fread_ull(path, &val);
+ if (ret <= 0)
+ break;
+
+ if (val > 1000000)
+ fprintf(outf, " %0.1fs", (double)val / 1000000);
+ else if (val > 1000)
+ fprintf(outf, " %0.1fms", (double)val / 1000);
+ else
+ fprintf(outf, " %0.1fus", (double)val);
+
+ snprintf(path, PATH_MAX, "%s/constraint_%d_power_limit_uw", domain_path, constraint);
+ ret = fread_ull(path, &val);
+ if (ret > 0 && val)
+ fprintf(outf, ":%lluW", val / 1000000);
+
+ snprintf(path, PATH_MAX, "%s/constraint_%d_max_power_uw", domain_path, constraint);
+ ret = fread_ull(path, &val);
+ if (ret > 0 && val)
+ fprintf(outf, ",max:%lluW", val / 1000000);
+ }
+ fputc('\n', outf);
+
+ return 0;
+}
+
+static int print_rapl_sysfs(void)
+{
+ DIR *dir, *cdir;
+ struct dirent *entry, *centry;
+ char path[PATH_MAX];
+ char str[PATH_MAX];
+
+ if ((dir = opendir(PATH_RAPL_SYSFS)) == NULL) {
+ warn("open %s failed", PATH_RAPL_SYSFS);
+ return 1;
+ }
+
+ while ((entry = readdir(dir)) != NULL) {
+ if (strlen(entry->d_name) > 100)
+ continue;
+
+ if (strncmp(entry->d_name, "intel-rapl", strlen("intel-rapl")))
+ continue;
+
+ snprintf(path, PATH_MAX, "%s/%s/name", PATH_RAPL_SYSFS, entry->d_name);
+
+ /* Parse top level domains first, including package and psys */
+ fread_str(path, str, PATH_MAX);
+ if (strncmp(str, "package", strlen("package")) && strncmp(str, "psys", strlen("psys")))
+ continue;
+
+ snprintf(path, PATH_MAX, "%s/%s", PATH_RAPL_SYSFS, entry->d_name);
+ if ((cdir = opendir(path)) == NULL) {
+ perror("opendir() error");
+ return 1;
+ }
+
+ dump_one_domain(path);
+
+ while ((centry = readdir(cdir)) != NULL) {
+ if (strncmp(centry->d_name, "intel-rapl", strlen("intel-rapl")))
+ continue;
+ snprintf(path, PATH_MAX, "%s/%s/%s", PATH_RAPL_SYSFS, entry->d_name, centry->d_name);
+ dump_one_domain(path);
+ }
+ closedir(cdir);
+ }
+
+ closedir(dir);
+ return 0;
+}
+
int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned long long msr;
@@ -7423,9 +7703,6 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
*/
void probe_rapl(void)
{
- if (!platform->rapl_msrs || no_msr)
- return;
-
if (genuine_intel)
rapl_probe_intel();
if (authentic_amd || hygon_genuine)
@@ -7434,6 +7711,11 @@ void probe_rapl(void)
if (quiet)
return;
+ print_rapl_sysfs();
+
+ if (!platform->rapl_msrs || no_msr)
+ return;
+
for_all_cpus(print_rapl, ODD_COUNTERS);
}
@@ -7766,44 +8048,42 @@ static int has_instr_count_access(void)
return has_access;
}
-int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
- double *scale_, enum rapl_unit *unit_)
+int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+ double *scale_, enum rapl_unit *unit_)
{
+ int ret = -1;
+
if (no_perf)
return -1;
+ if (!cai->perf_name)
+ return -1;
+
const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name);
if (scale == 0.0)
- return -1;
+ goto end;
const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
if (unit == RAPL_UNIT_INVALID)
- return -1;
+ goto end;
const unsigned int rapl_type = read_perf_type(cai->perf_subsys);
const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name);
- const int fd_counter =
- open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
- if (fd_counter == -1)
- return -1;
+ ret = open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
+ if (ret == -1)
+ goto end;
/* If it's the first counter opened, make it a group descriptor */
if (rci->fd_perf == -1)
- rci->fd_perf = fd_counter;
+ rci->fd_perf = ret;
*scale_ = scale;
*unit_ = unit;
- return fd_counter;
-}
-
-int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
- double *scale, enum rapl_unit *unit)
-{
- int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit);
+end:
if (debug >= 2)
fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
@@ -7828,7 +8108,7 @@ void linux_perf_init(void)
void rapl_perf_init(void)
{
- const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1;
+ const unsigned int num_domains = get_rapl_num_domains();
bool *domain_visited = calloc(num_domains, sizeof(bool));
rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
@@ -7861,6 +8141,9 @@ void rapl_perf_init(void)
enum rapl_unit unit;
unsigned int next_domain;
+ if (!BIC_IS_ENABLED(cai->bic))
+ continue;
+
memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
@@ -7869,8 +8152,7 @@ void rapl_perf_init(void)
continue;
/* Skip already seen and handled RAPL domains */
- next_domain =
- platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id;
+ next_domain = get_rapl_domain_id(cpu);
assert(next_domain < num_domains);
@@ -7884,27 +8166,37 @@ void rapl_perf_init(void)
struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
- /* Check if the counter is enabled and accessible */
- if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) {
+ /*
+ * rapl_counter_arch_infos[] can have multiple entries describing the same
+ * counter, due to the difference from different platforms/Vendors.
+ * E.g. rapl_counter_arch_infos[0] and rapl_counter_arch_infos[1] share the
+ * same perf_subsys and perf_name, but with different MSR address.
+ * rapl_counter_arch_infos[0] is for Intel and rapl_counter_arch_infos[1]
+ * is for AMD.
+ * In this case, it is possible that multiple rapl_counter_arch_infos[]
+ * entries are probed just because their perf/msr is duplicate and valid.
+ *
+ * Thus need a check to avoid re-probe the same counters.
+ */
+ if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE)
+ break;
- /* Use perf API for this counter */
- if (!no_perf && cai->perf_name
- && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
- rci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
- rci->scale[cai->rci_index] = scale * cai->compat_scale;
- rci->unit[cai->rci_index] = unit;
- rci->flags[cai->rci_index] = cai->flags;
-
- /* Use MSR for this counter */
- } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
- rci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
- rci->msr[cai->rci_index] = cai->msr;
- rci->msr_mask[cai->rci_index] = cai->msr_mask;
- rci->msr_shift[cai->rci_index] = cai->msr_shift;
- rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
- rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
- rci->flags[cai->rci_index] = cai->flags;
- }
+ /* Use perf API for this counter */
+ if (add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
+ rci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
+ rci->scale[cai->rci_index] = scale * cai->compat_scale;
+ rci->unit[cai->rci_index] = unit;
+ rci->flags[cai->rci_index] = cai->flags;
+
+ /* Use MSR for this counter */
+ } else if (add_rapl_msr_counter(cpu, cai) >= 0) {
+ rci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
+ rci->msr[cai->rci_index] = cai->msr;
+ rci->msr_mask[cai->rci_index] = cai->msr_mask;
+ rci->msr_shift[cai->rci_index] = cai->msr_shift;
+ rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
+ rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
+ rci->flags[cai->rci_index] = cai->flags;
}
if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE)
@@ -7937,65 +8229,63 @@ int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *gro
return NULL;
}
-int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai)
+int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai)
{
+ int ret = -1;
+
if (no_perf)
return -1;
+ if (!cai->perf_name)
+ return -1;
+
int *pfd_group = get_cstate_perf_group_fd(cci, cai->perf_subsys);
if (pfd_group == NULL)
- return -1;
+ goto end;
const unsigned int type = read_perf_type(cai->perf_subsys);
const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name);
- const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP);
+ ret = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP);
- if (fd_counter == -1)
- return -1;
+ if (ret == -1)
+ goto end;
/* If it's the first counter opened, make it a group descriptor */
if (*pfd_group == -1)
- *pfd_group = fd_counter;
-
- return fd_counter;
-}
-
-int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai)
-{
- int ret = add_cstate_perf_counter_(cpu, cci, cai);
+ *pfd_group = ret;
+end:
if (debug >= 2)
fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
return ret;
}
-int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai)
+int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai)
{
+ int ret = -1;
+
if (no_perf)
return -1;
+ if (!cai->perf_name)
+ return -1;
+
const unsigned int type = read_perf_type(cai->perf_subsys);
const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name);
- const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP);
+ ret = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP);
- if (fd_counter == -1)
- return -1;
+ if (ret == -1)
+ goto end;
/* If it's the first counter opened, make it a group descriptor */
if (cci->fd_perf == -1)
- cci->fd_perf = fd_counter;
-
- return fd_counter;
-}
-
-int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai)
-{
- int ret = add_msr_perf_counter_(cpu, cci, cai);
+ cci->fd_perf = ret;
+end:
if (debug)
fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu);
@@ -8029,12 +8319,12 @@ void msr_perf_init_(void)
if (cai->needed) {
/* Use perf API for this counter */
- if (!no_perf && cai->perf_name && add_msr_perf_counter(cpu, cci, cai) != -1) {
+ if (add_msr_perf_counter(cpu, cci, cai) != -1) {
cci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
cai->present = true;
/* User MSR for this counter */
- } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+ } else if (add_msr_counter(cpu, cai->msr) >= 0) {
cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
cci->msr[cai->rci_index] = cai->msr;
cci->msr_mask[cai->rci_index] = cai->msr_mask;
@@ -8142,13 +8432,13 @@ void cstate_perf_init_(bool soft_c1)
if (counter_needed && counter_supported) {
/* Use perf API for this counter */
- if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) {
+ if (add_cstate_perf_counter(cpu, cci, cai) != -1) {
cci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
/* User MSR for this counter */
- } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit
- && probe_msr(cpu, cai->msr) == 0) {
+ } else if (pkg_cstate_limit >= cai->pkg_cstate_limit
+ && add_msr_counter(cpu, cai->msr) >= 0) {
cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
cci->msr[cai->rci_index] = cai->msr;
}
@@ -9009,15 +9299,14 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo)
perf_device = "cpu_atom";
break;
- default: /* Don't change, we will probably fail and report a problem soon. */
+ default: /* Don't change, we will probably fail and report a problem soon. */
break;
}
}
perf_type = read_perf_type(perf_device);
if (perf_type == (unsigned int)-1) {
- warnx("%s: perf/%s/%s: failed to read %s",
- __func__, perf_device, pinfo->event, "type");
+ warnx("%s: perf/%s/%s: failed to read %s", __func__, perf_device, pinfo->event, "type");
continue;
}
@@ -9119,7 +9408,7 @@ struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
return NULL;
}
- for ( ; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) {
+ for (; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) {
if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1)
break;
@@ -9559,7 +9848,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2025.02.02 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2025.06.08 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
@@ -9592,7 +9881,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name)
for (mp = head; mp; mp = mp->next) {
if (debug)
fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name);
- if (!strncmp(name, mp->name, strlen(mp->name)))
+ if (!strcmp(name, mp->name))
return mp;
}
return NULL;
@@ -10015,7 +10304,7 @@ void parse_add_command_pmt(char *add_command)
unsigned int lsb;
unsigned int msb;
unsigned int guid;
- unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */
+ unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */
unsigned int domain_id;
enum counter_scope scope = 0;
enum pmt_datatype type = PMT_TYPE_RAW;
@@ -10239,14 +10528,18 @@ int is_deferred_skip(char *name)
return 0;
}
-void probe_sysfs(void)
+void probe_cpuidle_residency(void)
{
char path[64];
char name_buf[16];
FILE *input;
int state;
+ int min_state = 1024, max_state = 0;
char *sp;
+ if (!DO_BIC(BIC_pct_idle))
+ return;
+
for (state = 10; state >= 0; --state) {
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
@@ -10269,14 +10562,32 @@ void probe_sysfs(void)
sprintf(path, "cpuidle/state%d/time", state);
- if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+ if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf))
continue;
if (is_deferred_skip(name_buf))
continue;
add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0);
+
+ if (state > max_state)
+ max_state = state;
+ if (state < min_state)
+ min_state = state;
}
+}
+
+void probe_cpuidle_counts(void)
+{
+ char path[64];
+ char name_buf[16];
+ FILE *input;
+ int state;
+ int min_state = 1024, max_state = 0;
+ char *sp;
+
+ if (!DO_BIC(BIC_cpuidle))
+ return;
for (state = 10; state >= 0; --state) {
@@ -10286,26 +10597,52 @@ void probe_sysfs(void)
continue;
if (!fgets(name_buf, sizeof(name_buf), input))
err(1, "%s: failed to read file", path);
- /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
- sp = strchr(name_buf, '-');
- if (!sp)
- sp = strchrnul(name_buf, '\n');
- *sp = '\0';
fclose(input);
remove_underbar(name_buf);
- sprintf(path, "cpuidle/state%d/usage", state);
-
- if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+ if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf))
continue;
if (is_deferred_skip(name_buf))
continue;
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ sp = strchr(name_buf, '-');
+ if (!sp)
+ sp = strchrnul(name_buf, '\n');
+
+ /*
+ * The 'below' sysfs file always contains 0 for the deepest state (largest index),
+ * do not add it.
+ */
+ if (state != max_state) {
+ /*
+ * Add 'C1+' for C1, and so on. The 'below' sysfs file always contains 0 for
+ * the last state, so do not add it.
+ */
+
+ *sp = '+';
+ *(sp + 1) = '\0';
+ sprintf(path, "cpuidle/state%d/below", state);
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
+ }
+
+ *sp = '\0';
+ sprintf(path, "cpuidle/state%d/usage", state);
add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
- }
+ /*
+ * The 'above' sysfs file always contains 0 for the shallowest state (smallest
+ * index), do not add it.
+ */
+ if (state != min_state) {
+ *sp = '-';
+ *(sp + 1) = '\0';
+ sprintf(path, "cpuidle/state%d/above", state);
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
+ }
+ }
}
/*
@@ -10549,7 +10886,8 @@ skip_cgroup_setting:
print_bootcmd();
}
- probe_sysfs();
+ probe_cpuidle_residency();
+ probe_cpuidle_counts();
if (!getuid())
set_rlimit();