diff options
Diffstat (limited to 'tools/power')
39 files changed, 1986 insertions, 1019 deletions
diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c index 68b9ea86b86c..af0e558f231c 100644 --- a/tools/power/acpi/common/cmfsize.c +++ b/tools/power/acpi/common/cmfsize.c @@ -3,7 +3,7 @@ * * Module Name: cmfsize - Common get file size function * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 6a0cdba6fdfd..3d63626d80e7 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -3,7 +3,7 @@ * * Module Name: getopt * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 9d70d8c945af..de93067a5da3 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -3,7 +3,7 @@ * * Module Name: oslinuxtbl - Linux OSL for obtaining ACPI tables * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ @@ -19,7 +19,7 @@ ACPI_MODULE_NAME("oslinuxtbl") typedef struct osl_table_info { struct osl_table_info *next; u32 instance; - char signature[ACPI_NAMESEG_SIZE]; + char signature[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; } osl_table_info; @@ -995,7 +995,7 @@ static acpi_status osl_list_customized_tables(char *directory) { void *table_dir; u32 instance; - char temp_name[ACPI_NAMESEG_SIZE]; + char temp_name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; char *filename; acpi_status status = AE_OK; @@ -1312,7 +1312,7 @@ osl_get_customized_table(char *pathname, { void *table_dir; u32 current_instance = 0; - char temp_name[ACPI_NAMESEG_SIZE]; + char temp_name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; char table_filename[PATH_MAX]; char *filename; acpi_status status; diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c index 39f3bffd9355..b9bb83116549 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixdir.c +++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c @@ -3,7 +3,7 @@ * * Module Name: osunixdir - Unix directory access interfaces * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index 2b7d56252684..b93ebc9371a5 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -3,7 +3,7 @@ * * Module Name: osunixmap - Unix OSL for file mappings * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c index 46429417c71a..36f27491713c 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixxf.c +++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c @@ -3,7 +3,7 @@ * * Module Name: osunixxf - UNIX OSL interfaces * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index 643e3e722340..fe0d23c4f2ed 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -3,7 +3,7 @@ * * Module Name: acpidump.h - Include file for acpi_dump utility * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index 0742b00b61a1..7a6223aa703c 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -3,7 +3,7 @@ * * Module Name: apdump - Dump routines for ACPI tables (acpidump) * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ @@ -86,9 +86,10 @@ u8 ap_is_valid_checksum(struct acpi_table_header *table) if (ACPI_FAILURE(status)) { fprintf(stderr, "%4.4s: Warning: wrong checksum in table\n", table->signature); + return (FALSE); } - return (AE_OK); + return (TRUE); } /****************************************************************************** diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index 13817f9112c0..d6b8a201480b 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -3,7 +3,7 @@ * * Module Name: apfiles - File-related functions for acpidump utility * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index 666a9675e743..9f3850e3af5b 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -3,7 +3,7 @@ * * Module Name: apmain - Main module for the acpidump utility * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c index 44a9ecbd91e8..4d9b0177c312 100644 --- a/tools/power/acpi/tools/pfrut/pfrut.c +++ b/tools/power/acpi/tools/pfrut/pfrut.c @@ -222,6 +222,7 @@ int main(int argc, char *argv[]) fd_update_log = open("/dev/acpi_pfr_telemetry0", O_RDWR); if (fd_update_log < 0) { printf("PFRT device not supported - Quit...\n"); + close(fd_update); return 1; } @@ -265,7 +266,8 @@ int main(int argc, char *argv[]) printf("chunk2_size:%d\n", data_info.chunk2_size); printf("rollover_cnt:%d\n", data_info.rollover_cnt); printf("reset_cnt:%d\n", data_info.reset_cnt); - + close(fd_update); + close(fd_update_log); return 0; } @@ -358,6 +360,7 @@ int main(int argc, char *argv[]) if (ret == -1) { perror("Failed to load capsule file"); + munmap(addr_map_capsule, st.st_size); close(fd_capsule); close(fd_update); close(fd_update_log); @@ -420,7 +423,7 @@ int main(int argc, char *argv[]) if (p_mmap == MAP_FAILED) { perror("mmap error."); close(fd_update_log); - + free(log_buf); return 1; } diff --git a/tools/power/cpupower/.gitignore b/tools/power/cpupower/.gitignore index 5113d5a7aee0..7677329c42a6 100644 --- a/tools/power/cpupower/.gitignore +++ b/tools/power/cpupower/.gitignore @@ -27,6 +27,3 @@ debug/i386/intel_gsic debug/i386/powernow-k8-decode debug/x86_64/centrino-decode debug/x86_64/powernow-k8-decode - -# Clang's compilation database file -compile_commands.json diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 835123add0ed..a1df9196dc45 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -2,6 +2,7 @@ # Makefile for cpupower # # Copyright (C) 2005,2006 Dominik Brodowski <linux@dominikbrodowski.net> +# Copyright (C) 2025 Francesco Poli <invernomuto@paranoici.org> # # Based largely on the Makefile for udev by: # @@ -36,9 +37,7 @@ NLS ?= true # cpufreq-bench benchmarking tool CPUFREQ_BENCH ?= true -# Do not build libraries, but build the code in statically -# Libraries are still built, otherwise the Makefile code would -# be rather ugly. +# Build the code, including libraries, statically. export STATIC ?= false # Prefix to the directories we're installing to @@ -71,6 +70,8 @@ bindir ?= /usr/bin sbindir ?= /usr/sbin mandir ?= /usr/man libdir ?= /usr/lib +libexecdir ?= /usr/libexec +unitdir ?= /usr/lib/systemd/system includedir ?= /usr/include localedir ?= /usr/share/locale docdir ?= /usr/share/doc/packages/cpupower @@ -83,6 +84,7 @@ CP = cp -fpR INSTALL = /usr/bin/install -c INSTALL_PROGRAM = ${INSTALL} INSTALL_DATA = ${INSTALL} -m 644 +SETPERM_DATA = chmod 644 #bash completion scripts get sourced and so they should be rw only. INSTALL_SCRIPT = ${INSTALL} -m 644 @@ -203,14 +205,25 @@ $(OUTPUT)lib/%.o: $(LIB_SRC) $(LIB_HEADERS) $(ECHO) " CC " $@ $(QUIET) $(CC) $(CFLAGS) -fPIC -o $@ -c lib/$*.c -$(OUTPUT)libcpupower.so.$(LIB_VER): $(LIB_OBJS) +ifeq ($(strip $(STATIC)),true) +LIBCPUPOWER := libcpupower.a +else +LIBCPUPOWER := libcpupower.so.$(LIB_VER) +endif + +$(OUTPUT)$(LIBCPUPOWER): $(LIB_OBJS) +ifeq ($(strip $(STATIC)),true) + $(ECHO) " AR " $@ + $(QUIET) $(AR) rcs $@ $(LIB_OBJS) +else $(ECHO) " LD " $@ $(QUIET) $(CC) -shared $(CFLAGS) $(LDFLAGS) -o $@ \ -Wl,-soname,libcpupower.so.$(LIB_MAJ) $(LIB_OBJS) @ln -sf $(@F) $(OUTPUT)libcpupower.so @ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MAJ) +endif -libcpupower: $(OUTPUT)libcpupower.so.$(LIB_VER) +libcpupower: $(OUTPUT)$(LIBCPUPOWER) # Let all .o files depend on its .c file and all headers # Might be worth to put this into utils/Makefile at some point of time @@ -220,7 +233,7 @@ $(OUTPUT)%.o: %.c $(ECHO) " CC " $@ $(QUIET) $(CC) $(CFLAGS) -I./lib -I ./utils -o $@ -c $*.c -$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_VER) +$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)$(LIBCPUPOWER) $(ECHO) " CC " $@ ifeq ($(strip $(STATIC)),true) $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lrt -lpci -L$(OUTPUT) -o $@ @@ -265,7 +278,7 @@ update-po: $(OUTPUT)po/$(PACKAGE).pot done; endif -compile-bench: $(OUTPUT)libcpupower.so.$(LIB_VER) +compile-bench: $(OUTPUT)$(LIBCPUPOWER) @V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) # we compile into subdirectories. if the target directory is not the @@ -283,6 +296,7 @@ clean: -find $(OUTPUT) \( -not -type d \) -and \( -name '*~' -o -name '*.[oas]' \) -type f -print \ | xargs rm -f -rm -f $(OUTPUT)cpupower + -rm -f $(OUTPUT)libcpupower.a -rm -f $(OUTPUT)libcpupower.so* -rm -rf $(OUTPUT)po/*.gmo -rm -rf $(OUTPUT)po/*.pot @@ -291,7 +305,11 @@ clean: install-lib: libcpupower $(INSTALL) -d $(DESTDIR)${libdir} +ifeq ($(strip $(STATIC)),true) + $(CP) $(OUTPUT)libcpupower.a $(DESTDIR)${libdir}/ +else $(CP) $(OUTPUT)libcpupower.so* $(DESTDIR)${libdir}/ +endif $(INSTALL) -d $(DESTDIR)${includedir} $(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h $(INSTALL_DATA) lib/cpuidle.h $(DESTDIR)${includedir}/cpuidle.h @@ -302,6 +320,13 @@ install-tools: $(OUTPUT)cpupower $(INSTALL_PROGRAM) $(OUTPUT)cpupower $(DESTDIR)${bindir} $(INSTALL) -d $(DESTDIR)${bash_completion_dir} $(INSTALL_SCRIPT) cpupower-completion.sh '$(DESTDIR)${bash_completion_dir}/cpupower' + $(INSTALL) -d $(DESTDIR)${confdir} + $(INSTALL_DATA) cpupower-service.conf '$(DESTDIR)${confdir}' + $(INSTALL) -d $(DESTDIR)${libexecdir} + $(INSTALL_PROGRAM) cpupower.sh '$(DESTDIR)${libexecdir}/cpupower' + $(INSTALL) -d $(DESTDIR)${unitdir} + sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${unitdir}/cpupower.service' + $(SETPERM_DATA) '$(DESTDIR)${unitdir}/cpupower.service' install-man: $(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1 @@ -325,17 +350,16 @@ install-bench: compile-bench @#DESTDIR must be set from outside to survive @sbindir=$(sbindir) bindir=$(bindir) docdir=$(docdir) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) install -ifeq ($(strip $(STATIC)),true) -install: all install-tools install-man $(INSTALL_NLS) $(INSTALL_BENCH) -else install: all install-lib install-tools install-man $(INSTALL_NLS) $(INSTALL_BENCH) -endif uninstall: - rm -f $(DESTDIR)${libdir}/libcpupower.* - rm -f $(DESTDIR)${includedir}/cpufreq.h - rm -f $(DESTDIR)${includedir}/cpuidle.h - rm -f $(DESTDIR)${bindir}/utils/cpupower + - rm -f $(DESTDIR)${confdir}cpupower-service.conf + - rm -f $(DESTDIR)${libexecdir}/cpupower + - rm -f $(DESTDIR)${unitdir}/cpupower.service - rm -f $(DESTDIR)${mandir}/man1/cpupower.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1 diff --git a/tools/power/cpupower/README b/tools/power/cpupower/README index 2678ed81d311..9de449469568 100644 --- a/tools/power/cpupower/README +++ b/tools/power/cpupower/README @@ -59,6 +59,10 @@ $ sudo make install ----------------------------------------------------------------------- | man pages | /usr/man | ----------------------------------------------------------------------- +| systemd service | /usr/lib/systemd/system | +----------------------------------------------------------------------- +| systemd support script | /usr/libexec | +----------------------------------------------------------------------- To put it in other words it makes build results available system-wide, enabling any user to simply start using it without any additional steps @@ -109,6 +113,10 @@ The files will be installed to the following dirs: ----------------------------------------------------------------------- | man pages | ${DESTDIR}/usr/man | ----------------------------------------------------------------------- +| systemd service | ${DESTDIR}/usr/lib/systemd/system | +----------------------------------------------------------------------- +| systemd support script | ${DESTDIR}/usr/libexec | +----------------------------------------------------------------------- If you look at the table for the default 'make' output dirs you will notice that the only difference with the non-default case is the @@ -173,6 +181,26 @@ The issue is that binary cannot find the 'libcpupower' library. So, we shall point to the lib dir: sudo LD_LIBRARY_PATH=lib64/ ./bin/cpupower +systemd service +--------------- + +A systemd service is also provided to run the cpupower utility at boot with +settings read from a configuration file. + +If you want systemd to find the new service after the installation, the service +unit must have been installed in one of the system unit search path directories +(such as '/usr/lib/systemd/system/', which is the default location) and (unless +you are willing to wait for the next reboot) you need to issue the following +command: + +$ sudo systemctl daemon-reload + +If you want to enable this systemd service, edit '/etc/cpupower-service.conf' +(uncommenting at least one of the options, depending on your preferences) +and then issue the following command: + +$ sudo systemctl enable --now cpupower.service + THANKS ------ diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile index 741f21477432..4527cd732b42 100644 --- a/tools/power/cpupower/bindings/python/Makefile +++ b/tools/power/cpupower/bindings/python/Makefile @@ -1,25 +1,25 @@ # SPDX-License-Identifier: GPL-2.0-only # Makefile for libcpupower's Python bindings # -# This Makefile expects you have already run the makefile for cpupower to build -# the .o files in the lib directory for the bindings to be created. +# This Makefile expects you have already run `make install-lib` in the lib +# directory for the bindings to be created. -CC := gcc +CC ?= gcc +# CFLAGS ?= +LDFLAGS ?= -lcpupower HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi) HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi) -LIB_DIR := ../../lib -PY_INCLUDE = $(firstword $(shell python-config --includes)) -OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o) -INSTALL_DIR = $(shell python3 -c "import site; print(site.getsitepackages()[0])") +PY_INCLUDE ?= $(firstword $(shell python-config --includes)) +INSTALL_DIR ?= $(shell python3 -c "import site; print(site.getsitepackages()[0])") all: _raw_pylibcpupower.so _raw_pylibcpupower.so: raw_pylibcpupower_wrap.o - $(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so + $(CC) -shared $(LDFLAGS) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c - $(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE) + $(CC) $(CFLAGS) $(PY_INCLUDE) -fPIC -c raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.c: raw_pylibcpupower.swg ifeq ($(HAVE_SWIG),0) diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README index 952e2e02fd32..2a4896b648b7 100644 --- a/tools/power/cpupower/bindings/python/README +++ b/tools/power/cpupower/bindings/python/README @@ -5,18 +5,21 @@ libcpupower (aside from the libcpupower object files). requirements ------------ -* You need the object files in the libcpupower directory compiled by -cpupower's makefile. +* If you are building completely from upstream; please install libcpupower by +running `make install-lib` within the cpupower directory. This installs the +libcpupower.so file and symlinks needed. Otherwise, please make sure a symlink +to libcpupower.so exists in your library path from your distribution's +packages. * The SWIG program must be installed. -* The Python's development libraries installed. +* The Python's development libraries must be installed. Please check that your version of SWIG is compatible with the version of Python installed on your machine by checking the SWIG changelog on their website. https://swig.org/ Note that while SWIG itself is GPL v3+ licensed; the resulting output, -the bindings code: is permissively licensed + the license of libcpupower's .o -files. For these bindings that means GPL v2. +the bindings code: is permissively licensed + the license of libcpupower's +library files. For these bindings that means GPL v2. Please see https://swig.org/legal.html and the discussion [1] for more details. diff --git a/tools/power/cpupower/cpupower-service.conf b/tools/power/cpupower/cpupower-service.conf new file mode 100644 index 000000000000..02eabe8e3614 --- /dev/null +++ b/tools/power/cpupower/cpupower-service.conf @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2012, Sébastien Luttringer +# Copyright (C) 2024-2025, Francesco Poli <invernomuto@paranoici.org> + +# Configuration file for cpupower.service systemd service unit +# +# Edit this file (uncommenting at least one of the options, depending on +# your preferences) and then enable cpupower.service, if you want cpupower +# to run at boot with these settings. + +# --- CPU clock frequency --- + +# Define CPU governor +# Valid governors: ondemand, performance, powersave, conservative, userspace +#GOVERNOR='ondemand' + +# Limit frequency range +# Valid suffixes: Hz, kHz (default), MHz, GHz, THz +#MIN_FREQ="2.25GHz" +#MAX_FREQ="3GHz" + +# Set a specific frequency +# Requires userspace governor to be available. +# If this option is set, all the previous frequency options are ignored +#FREQ= + +# --- CPU policy --- + +# Set a register on supported Intel processore which allows software to convey +# its policy for the relative importance of performance versus energy savings to +# the processor. See man CPUPOWER-SET(1) for additional details +#PERF_BIAS= diff --git a/tools/power/cpupower/cpupower.service.in b/tools/power/cpupower/cpupower.service.in new file mode 100644 index 000000000000..fbd5b8c14270 --- /dev/null +++ b/tools/power/cpupower/cpupower.service.in @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2012-2020, Sébastien Luttringer +# Copyright (C) 2024-2025, Francesco Poli <invernomuto@paranoici.org> + +[Unit] +Description=Apply cpupower configuration +ConditionVirtualization=!container + +[Service] +Type=oneshot +EnvironmentFile=-___CDIR___cpupower-service.conf +ExecStart=___LDIR___/cpupower +RemainAfterExit=yes + +[Install] +WantedBy=multi-user.target diff --git a/tools/power/cpupower/cpupower.sh b/tools/power/cpupower/cpupower.sh new file mode 100644 index 000000000000..a37dd4cfdb2b --- /dev/null +++ b/tools/power/cpupower/cpupower.sh @@ -0,0 +1,26 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2012, Sébastien Luttringer +# Copyright (C) 2024, Francesco Poli <invernomuto@paranoici.org> + +ESTATUS=0 + +# apply CPU clock frequency options +if test -n "$FREQ" +then + cpupower frequency-set -f "$FREQ" > /dev/null || ESTATUS=1 +elif test -n "${GOVERNOR}${MIN_FREQ}${MAX_FREQ}" +then + cpupower frequency-set \ + ${GOVERNOR:+ -g "$GOVERNOR"} \ + ${MIN_FREQ:+ -d "$MIN_FREQ"} ${MAX_FREQ:+ -u "$MAX_FREQ"} \ + > /dev/null || ESTATUS=1 +fi + +# apply CPU policy options +if test -n "$PERF_BIAS" +then + cpupower set -b "$PERF_BIAS" > /dev/null || ESTATUS=1 +fi + +exit $ESTATUS diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c index 0ecac009273c..f2c1139adf71 100644 --- a/tools/power/cpupower/lib/cpuidle.c +++ b/tools/power/cpupower/lib/cpuidle.c @@ -233,6 +233,7 @@ int cpuidle_state_disable(unsigned int cpu, { char value[SYSFS_PATH_MAX]; int bytes_written; + int len; if (cpuidle_state_count(cpu) <= idlestate) return -1; @@ -241,10 +242,10 @@ int cpuidle_state_disable(unsigned int cpu, idlestate_value_files[IDLESTATE_DISABLE])) return -2; - snprintf(value, SYSFS_PATH_MAX, "%u", disable); + len = snprintf(value, SYSFS_PATH_MAX, "%u", disable); bytes_written = cpuidle_state_write_file(cpu, idlestate, "disable", - value, sizeof(disable)); + value, len); if (bytes_written) return 0; return -3; diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c index ce8dfb8e46ab..d7f7ec6f151c 100644 --- a/tools/power/cpupower/lib/cpupower.c +++ b/tools/power/cpupower/lib/cpupower.c @@ -56,7 +56,7 @@ unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen) if (numwritten < 1) { perror(path); close(fd); - return -1; + return 0; } close(fd); diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1 index 500653ef98c7..8ac82b6f9189 100644 --- a/tools/power/cpupower/man/cpupower-set.1 +++ b/tools/power/cpupower/man/cpupower-set.1 @@ -81,10 +81,11 @@ Refer to the AMD P-State kernel documentation for further information. .RE .PP -\-\-turbo\-boost, \-t +\-\-turbo\-boost, \-\-boost, \-t .RS 4 -This option is used to enable or disable the turbo boost feature on -supported Intel and AMD processors. +This option is used to enable or disable the boost feature on +supported Intel and AMD processors, and other boost supported systems. +(The --boost option is an alias for the --turbo-boost option) This option takes as parameter either \fB1\fP to enable, or \fB0\fP to disable the feature. diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index fc750e127404..7d3732f5f2f6 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -128,7 +128,7 @@ static int get_boost_mode_x86(unsigned int cpu) /* ToDo: Make this more global */ unsigned long pstates[MAX_HW_PSTATES] = {0,}; - ret = cpufreq_has_boost_support(cpu, &support, &active, &b_states); + ret = cpufreq_has_x86_boost_support(cpu, &support, &active, &b_states); if (ret) { printf(_("Error while evaluating Boost Capabilities" " on CPU %d -- are you root?\n"), cpu); @@ -204,6 +204,18 @@ static int get_boost_mode_x86(unsigned int cpu) return 0; } +static int get_boost_mode_generic(unsigned int cpu) +{ + bool active; + + if (!cpufreq_has_generic_boost_support(&active)) { + printf(_(" boost state support:\n")); + printf(_(" Active: %s\n"), active ? _("yes") : _("no")); + } + + return 0; +} + /* --boost / -b */ static int get_boost_mode(unsigned int cpu) @@ -214,6 +226,8 @@ static int get_boost_mode(unsigned int cpu) cpupower_cpu_info.vendor == X86_VENDOR_HYGON || cpupower_cpu_info.vendor == X86_VENDOR_INTEL) return get_boost_mode_x86(cpu); + else + get_boost_mode_generic(cpu); freqs = cpufreq_get_boost_frequencies(cpu); if (freqs) { diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 0677b58374ab..c2117e5650dd 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -21,6 +21,7 @@ static struct option set_opts[] = { {"epp", required_argument, NULL, 'e'}, {"amd-pstate-mode", required_argument, NULL, 'm'}, {"turbo-boost", required_argument, NULL, 't'}, + {"boost", required_argument, NULL, 't'}, { }, }; @@ -62,8 +63,8 @@ int cmd_set(int argc, char **argv) params.params = 0; /* parameter parsing */ - while ((ret = getopt_long(argc, argv, "b:e:m:", - set_opts, NULL)) != -1) { + while ((ret = getopt_long(argc, argv, "b:e:m:t:", + set_opts, NULL)) != -1) { switch (ret) { case 'b': if (params.perf_bias) diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 95749b8ee475..82ea62bdf5a2 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -103,6 +103,9 @@ extern struct cpupower_cpu_info cpupower_cpu_info; /* cpuid and cpuinfo helpers **************************/ +int cpufreq_has_generic_boost_support(bool *active); +int cpupower_set_turbo_boost(int turbo_boost); + /* X86 ONLY ****************************************/ #if defined(__i386__) || defined(__x86_64__) @@ -118,7 +121,6 @@ extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu); extern int cpupower_set_epp(unsigned int cpu, char *epp); extern int cpupower_set_amd_pstate_mode(char *mode); -extern int cpupower_set_turbo_boost(int turbo_boost); /* Read/Write msr ****************************/ @@ -139,8 +141,8 @@ extern int decode_pstates(unsigned int cpu, int boost_states, /* AMD HW pstate decoding **************************/ -extern int cpufreq_has_boost_support(unsigned int cpu, int *support, - int *active, int * states); +int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, + int *active, int *states); /* AMD P-State stuff **************************/ bool cpupower_amd_pstate_enabled(void); @@ -181,13 +183,11 @@ static inline int cpupower_set_epp(unsigned int cpu, char *epp) { return -1; }; static inline int cpupower_set_amd_pstate_mode(char *mode) { return -1; }; -static inline int cpupower_set_turbo_boost(int turbo_boost) -{ return -1; }; /* Read/Write msr ****************************/ -static inline int cpufreq_has_boost_support(unsigned int cpu, int *support, - int *active, int * states) +static inline int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, + int *active, int *states) { return -1; } static inline bool cpupower_amd_pstate_enabled(void) diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 76e461ff4f74..166dc1e470ea 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -8,15 +8,14 @@ #include "helpers/helpers.h" #include "helpers/sysfs.h" #include "cpufreq.h" +#include "cpupower_intern.h" #if defined(__i386__) || defined(__x86_64__) -#include "cpupower_intern.h" - #define MSR_AMD_HWCR 0xc0010015 -int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, - int *states) +int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active, + int *states) { int ret; unsigned long long val; @@ -124,24 +123,6 @@ int cpupower_set_amd_pstate_mode(char *mode) return 0; } -int cpupower_set_turbo_boost(int turbo_boost) -{ - char path[SYSFS_PATH_MAX]; - char linebuf[2] = {}; - - snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); - - if (!is_valid_path(path)) - return -1; - - snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost); - - if (cpupower_write_sysfs(path, linebuf, 2) <= 0) - return -1; - - return 0; -} - bool cpupower_amd_pstate_enabled(void) { char *driver = cpufreq_get_driver(0); @@ -160,6 +141,39 @@ bool cpupower_amd_pstate_enabled(void) #endif /* #if defined(__i386__) || defined(__x86_64__) */ +int cpufreq_has_generic_boost_support(bool *active) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[2] = {}; + unsigned long val; + char *endp; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); + + if (!is_valid_path(path)) + return -EACCES; + + if (cpupower_read_sysfs(path, linebuf, 2) <= 0) + return -EINVAL; + + val = strtoul(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -EINVAL; + + switch (val) { + case 0: + *active = false; + break; + case 1: + *active = true; + break; + default: + return -EINVAL; + } + + return 0; +} + /* get_cpustate * * Gather the information of all online CPUs into bitmask struct @@ -259,3 +273,21 @@ void print_speed(unsigned long speed, int no_rounding) } } } + +int cpupower_set_turbo_boost(int turbo_boost) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[2] = {}; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); + + if (!is_valid_path(path)) + return -1; + + snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost); + + if (cpupower_write_sysfs(path, linebuf, 2) <= 0) + return -1; + + return 0; +} diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index ad493157f826..e8b3841d5c0f 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -121,10 +121,8 @@ void print_header(int topology_depth) switch (topology_depth) { case TOPOLOGY_DEPTH_PKG: printf(" PKG|"); - break; case TOPOLOGY_DEPTH_CORE: printf("CORE|"); - break; case TOPOLOGY_DEPTH_CPU: printf(" CPU|"); break; @@ -167,10 +165,8 @@ void print_results(int topology_depth, int cpu) switch (topology_depth) { case TOPOLOGY_DEPTH_PKG: printf("%4d|", cpu_top.core_info[cpu].pkg); - break; case TOPOLOGY_DEPTH_CORE: printf("%4d|", cpu_top.core_info[cpu].core); - break; case TOPOLOGY_DEPTH_CPU: printf("%4d|", cpu_top.core_info[cpu].cpu); break; diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index 73b6b10cbdd2..5ae02c3d5b64 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -240,9 +240,9 @@ static int mperf_stop(void) int cpu; for (cpu = 0; cpu < cpu_count; cpu++) { - mperf_measure_stats(cpu); - mperf_get_tsc(&tsc_at_measure_end[cpu]); clock_gettime(CLOCK_REALTIME, &time_end[cpu]); + mperf_get_tsc(&tsc_at_measure_end[cpu]); + mperf_measure_stats(cpu); } return 0; diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index e2261f33a082..1555b51a7d55 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -4017,7 +4017,8 @@ def parseKernelLog(data): 'PM: early restore of devices complete after.*'], 'resume_complete': ['PM: resume of devices complete after.*', 'PM: restore of devices complete after.*'], - 'post_resume': [r'.*Restarting tasks \.\.\..*'], + 'post_resume': [r'.*Restarting tasks \.\.\..*', + 'Done restarting tasks.*'], } # action table (expected events that occur and show up in dmesg) diff --git a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py index feb9f9421c7b..875b086550d1 100755 --- a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py +++ b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py @@ -11,7 +11,7 @@ Prerequisites: gnuplot 5.0 or higher gnuplot-py 1.8 or higher (Most of the distributions have these required packages. They may be called - gnuplot-py, phython-gnuplot or phython3-gnuplot, gnuplot-nox, ... ) + gnuplot-py, python-gnuplot or python3-gnuplot, gnuplot-nox, ... ) Kernel config for Linux trace is enabled diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index eaa420ac848d..558138eea75e 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -16,7 +16,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.22"; +static const char *version_str = "v1.24"; static const int supported_api_ver = 3; static struct isst_if_platform_info isst_platform_info; @@ -26,6 +26,7 @@ static FILE *outf; static int cpu_model; static int cpu_stepping; +static int extended_family; #define MAX_CPUS_IN_ONE_REQ 512 static short max_target_cpus; @@ -143,6 +144,14 @@ int is_icx_platform(void) return 0; } +static int is_dmr_plus_platform(void) +{ + if (extended_family == 0x04) + return 1; + + return 0; +} + static int update_cpu_model(void) { unsigned int ebx, ecx, edx; @@ -150,6 +159,7 @@ static int update_cpu_model(void) __cpuid(1, fms, ebx, ecx, edx); family = (fms >> 8) & 0xf; + extended_family = (fms >> 20) & 0x0f; cpu_model = (fms >> 4) & 0xf; if (family == 6 || family == 0xf) cpu_model += ((fms >> 16) & 0xf) << 4; @@ -1517,7 +1527,8 @@ display_result: usleep(2000); /* Adjusting uncore freq */ - isst_adjust_uncore_freq(id, tdp_level, &ctdp_level); + if (!is_dmr_plus_platform()) + isst_adjust_uncore_freq(id, tdp_level, &ctdp_level); fprintf(stderr, "Option is set to online/offline\n"); ctdp_level.core_cpumask_size = diff --git a/tools/power/x86/intel-speed-select/isst-core-tpmi.c b/tools/power/x86/intel-speed-select/isst-core-tpmi.c index da53aaa27fc9..ebaad0dc8ca6 100644 --- a/tools/power/x86/intel-speed-select/isst-core-tpmi.c +++ b/tools/power/x86/intel-speed-select/isst-core-tpmi.c @@ -227,6 +227,7 @@ static int tpmi_get_ctdp_control(struct isst_id *id, int config_index, static int tpmi_get_tdp_info(struct isst_id *id, int config_index, struct isst_pkg_ctdp_level_info *ctdp_level) { + struct isst_perf_level_fabric_info fabric_info; struct isst_perf_level_data_info info; int ret; @@ -253,6 +254,17 @@ static int tpmi_get_tdp_info(struct isst_id *id, int config_index, ctdp_level->uncore_p1 = info.p1_fabric_freq_mhz; ctdp_level->uncore_pm = info.pm_fabric_freq_mhz; + fabric_info.socket_id = id->pkg; + fabric_info.power_domain_id = id->punit; + fabric_info.level = config_index; + + ret = tpmi_process_ioctl(ISST_IF_GET_PERF_LEVEL_FABRIC_INFO, &fabric_info); + if (ret != -1) { + ctdp_level->uncore1_p0 = fabric_info.p0_fabric_freq_mhz[1]; + ctdp_level->uncore1_p1 = fabric_info.p1_fabric_freq_mhz[1]; + ctdp_level->uncore1_pm = fabric_info.pm_fabric_freq_mhz[1]; + } + debug_printf ("cpu:%d ctdp:%d CONFIG_TDP_GET_TDP_INFO tdp_ratio:%d pkg_tdp:%d ctdp_level->t_proc_hot:%d\n", id->cpu, config_index, ctdp_level->tdp_ratio, ctdp_level->pkg_tdp, @@ -440,13 +452,16 @@ static int tpmi_get_pbf_info(struct isst_id *id, int level, return _pbf_get_coremask_info(id, level, pbf_info); } +#define FEATURE_ENABLE_WAIT_US 1000 +#define FEATURE_ENABLE_RETRIES 5 + static int tpmi_set_pbf_fact_status(struct isst_id *id, int pbf, int enable) { struct isst_pkg_ctdp pkg_dev; struct isst_pkg_ctdp_level_info ctdp_level; int current_level; struct isst_perf_feature_control info; - int ret; + int ret, i; ret = isst_get_ctdp_levels(id, &pkg_dev); if (ret) @@ -491,6 +506,30 @@ static int tpmi_set_pbf_fact_status(struct isst_id *id, int pbf, int enable) if (ret == -1) return ret; + for (i = 0; i < FEATURE_ENABLE_RETRIES; ++i) { + + usleep(FEATURE_ENABLE_WAIT_US); + + /* Check status */ + ret = isst_get_ctdp_control(id, current_level, &ctdp_level); + if (ret) + return ret; + + debug_printf("pbf_enabled:%d fact_enabled:%d\n", + ctdp_level.pbf_enabled, ctdp_level.fact_enabled); + + if (pbf) { + if (ctdp_level.pbf_enabled == enable) + break; + } else { + if (ctdp_level.fact_enabled == enable) + break; + } + } + + if (i == FEATURE_ENABLE_RETRIES) + return -1; + return 0; } @@ -501,6 +540,7 @@ static int tpmi_get_fact_info(struct isst_id *id, int level, int fact_bucket, int i, j; int ret; + memset(&info, 0, sizeof(info)); info.socket_id = id->pkg; info.power_domain_id = id->punit; info.level = level; @@ -647,7 +687,8 @@ static int tpmi_pm_qos_config(struct isst_id *id, int enable_clos, int priority_type) { struct isst_core_power info; - int i, ret, saved_punit; + int cp_state = 0, cp_cap = 0; + int i, j, ret, saved_punit; info.get_set = 1; info.socket_id = id->pkg; @@ -667,6 +708,19 @@ static int tpmi_pm_qos_config(struct isst_id *id, int enable_clos, id->punit = saved_punit; return ret; } + /* Get status */ + for (j = 0; j < FEATURE_ENABLE_RETRIES; ++j) { + usleep(FEATURE_ENABLE_WAIT_US); + ret = tpmi_read_pm_config(id, &cp_state, &cp_cap); + debug_printf("ret:%d cp_state:%d enable_clos:%d\n", ret, + cp_state, enable_clos); + if (ret || cp_state == enable_clos) + break; + } + if (j == FEATURE_ENABLE_RETRIES) { + id->punit = saved_punit; + return -1; + } } } diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index da5a59a4c545..e4884eb02837 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -460,6 +460,26 @@ void isst_ctdp_display_information(struct isst_id *id, FILE *outf, int tdp_level format_and_print(outf, level + 2, header, value); } + if (ctdp_level->uncore1_p1) { + snprintf(header, sizeof(header), "uncore-1-frequency-base(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->uncore1_p1 * isst_get_disp_freq_multiplier()); + format_and_print(outf, level + 2, header, value); + } + if (ctdp_level->uncore1_pm) { + snprintf(header, sizeof(header), "uncore-1-frequency-min(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->uncore1_pm * isst_get_disp_freq_multiplier()); + format_and_print(outf, level + 2, header, value); + } + + if (ctdp_level->uncore1_p0) { + snprintf(header, sizeof(header), "uncore-1-frequency-max(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->uncore1_p0 * isst_get_disp_freq_multiplier()); + format_and_print(outf, level + 2, header, value); + } + if (ctdp_level->mem_freq) { snprintf(header, sizeof(header), "max-mem-frequency(MHz)"); snprintf(value, sizeof(value), "%d", diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 39ee75677c2c..960f647cfc2d 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -147,6 +147,9 @@ struct isst_pkg_ctdp_level_info { int uncore_p0; int uncore_p1; int uncore_pm; + int uncore1_p0; + int uncore1_p1; + int uncore1_pm; int sse_p1; int avx2_p1; int avx512_p1; diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index b74ed916057e..1551fcdbfd8a 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -47,10 +47,11 @@ name as necessary to disambiguate it from others is necessary. Note that option MSRs are read as 64-bits, u32 truncates the displayed value to 32-bits. default: u64 - format: {\fBraw\fP | \fBdelta\fP | \fBpercent\fP} + format: {\fBraw\fP | \fBdelta\fP | \fBpercent\fP | \fBaverage\fP} 'raw' shows the MSR contents in hex. 'delta' shows the difference in values during the measurement interval. 'percent' shows the delta as a percentage of the cycles elapsed. + 'average' similar to raw, but also averaged for node/package summaries (or when using -S). default: delta name: "name_string" @@ -100,7 +101,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. .PP -\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "idle_pct". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "idle_pct". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle". +\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a comma-separated-list of CATEGORIES of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "cache", "llc", "other". "idle" (enabled by default), includes "hwidle" and "pct_idle". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "pct_idle". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle". .PP \fB--Dump\fP displays the raw counter values. .PP @@ -158,6 +159,10 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs. .PP +\fBLLCkRPS\fP Last Level Cache Thousands of References Per Second. For CPUs with an L3 LLC, this is the number of references that CPU made to the L3 (and the number of misses that CPU made to it's L2). For CPUs with an L2 LLC, this is the number of references to the L2 (and the number of misses to the CPU's L1). The system summary row shows the sum for all CPUs. In both cases, the value displayed is the actual value divided by 1000 in the interest of usually fitting into 8 columns. +.PP +\fBLLC%hit\fP Last Level Cache Hit Rate %. Hit Rate Percent = 100.0 * (References - Misses)/References. The system summary row shows the weighted average for all CPUs (100.0 * (Sum_References - Sum_Misses)/Sum_References). +.PP \fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default. .PP \fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default. @@ -186,6 +191,14 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBSAMAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used. .PP +\fBTotl%C0\fP Weighted percentage of time that CPUs are busy. If N CPUs are busy during an interval, the percentage is N * 100%. +.PP +\fBAny%C0\fP Percentage of time that at least one CPU is busy. +.PP +\fBGFX%C0\fP Percentage of time that at least one GFX compute engine is busy. +.PP +\fBCPUGFX%\fP Percentage of time that at least one CPU is busy at the same time as at least one Graphics compute enginer is busy. +.PP \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters. .PP \fBPkgWatt\fP Watts consumed by the whole package. @@ -204,8 +217,8 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBUncMHz\fP per-package uncore MHz, instantaneous sample. .PP -\fBUMHz1.0\fP per-package uncore MHz for domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages. -Intel Granite Rapids systems use domains 0-2 for CPUs, and 3-4 for IO, with cluster always 0. +\fBUMHz1.0\fP per-package uncore MHz for pm_domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages. +Intel Granite Rapids systems use pm_domains 0-2 for CPUs, and 3-4 for IO, with cluster always 0. For the "--show" and "--hide" options, use "UncMHz" to operate on all UMHz*.* as a group. .SH TOO MUCH INFORMATION EXAMPLE By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters. @@ -401,25 +414,24 @@ CPU pCPU%c1 CPU%c1 .fi .SH ADD PERF COUNTER EXAMPLE #2 (using virtual cpu device) -Here we run on hybrid, Raptor Lake platform. -We limit turbostat to show output for just cpu0 (pcore) and cpu12 (ecore). +Here we run on hybrid, Meteor Lake platform. +We limit turbostat to show output for just cpu0 (pcore) and cpu4 (ecore). We add a counter showing number of L3 cache misses, using virtual "cpu" device, labeling it with the column header, "VCMISS". We add a counter showing number of L3 cache misses, using virtual "cpu_core" device, -labeling it with the column header, "PCMISS". This will fail on ecore cpu12. +labeling it with the column header, "PCMISS". This will fail on ecore cpu4. We add a counter showing number of L3 cache misses, using virtual "cpu_atom" device, labeling it with the column header, "ECMISS". This will fail on pcore cpu0. We display it only once, after the conclusion of 0.1 second sleep. .nf -sudo ./turbostat --quiet --cpu 0,12 --show CPU --add perf/cpu/cache-misses,cpu,delta,raw,VCMISS --add perf/cpu_core/cache-misses,cpu,delta,raw,PCMISS --add perf/cpu_atom/cache-misses,cpu,delta,raw,ECMISS sleep .1 +sudo ./turbostat --quiet --cpu 0,4 --show CPU --add perf/cpu/cache-misses,cpu,delta,VCMISS --add perf/cpu_core/cache-misses,cpu,delta,PCMISS --add perf/cpu_atom/cache-misses,cpu,delta,ECMISS sleep 5 turbostat: added_perf_counters_init_: perf/cpu_atom/cache-misses: failed to open counter on cpu0 -turbostat: added_perf_counters_init_: perf/cpu_core/cache-misses: failed to open counter on cpu12 -0.104630 sec -CPU ECMISS PCMISS VCMISS -- 0x0000000000000000 0x0000000000000000 0x0000000000000000 -0 0x0000000000000000 0x0000000000007951 0x0000000000007796 -12 0x000000000001137a 0x0000000000000000 0x0000000000011392 - +turbostat: added_perf_counters_init_: perf/cpu_core/cache-misses: failed to open counter on cpu4 +5.001207 sec +CPU ECMISS PCMISS VCMISS +- 41586506 46291219 87877749 +4 83173012 0 83173040 +0 0 92582439 92582458 .fi .SH ADD PMT COUNTER EXAMPLE diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0170d3cc6819..5ad45c2ac5bd 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -67,6 +67,7 @@ #include <stdbool.h> #include <assert.h> #include <linux/kernel.h> +#include <limits.h> #define UNUSED(x) (void)(x) @@ -141,6 +142,7 @@ struct msr_counter { #define FLAGS_SHOW (1 << 1) #define SYSFS_PERCPU (1 << 1) }; +static int use_android_msr_path; struct msr_counter bic[] = { { 0x0, "usec", NULL, 0, 0, 0, NULL, 0 }, @@ -194,6 +196,7 @@ struct msr_counter bic[] = { { 0x0, "APIC", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "X2APIC", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "Die", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "L3", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "GFXAMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "IPC", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CoreThr", NULL, 0, 0, 0, NULL, 0 }, @@ -207,93 +210,250 @@ struct msr_counter bic[] = { { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "LLCkRPS", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "LLC%hit", NULL, 0, 0, 0, NULL, 0 }, }; -#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) -#define BIC_USEC (1ULL << 0) -#define BIC_TOD (1ULL << 1) -#define BIC_Package (1ULL << 2) -#define BIC_Node (1ULL << 3) -#define BIC_Avg_MHz (1ULL << 4) -#define BIC_Busy (1ULL << 5) -#define BIC_Bzy_MHz (1ULL << 6) -#define BIC_TSC_MHz (1ULL << 7) -#define BIC_IRQ (1ULL << 8) -#define BIC_SMI (1ULL << 9) -#define BIC_cpuidle (1ULL << 10) -#define BIC_CPU_c1 (1ULL << 11) -#define BIC_CPU_c3 (1ULL << 12) -#define BIC_CPU_c6 (1ULL << 13) -#define BIC_CPU_c7 (1ULL << 14) -#define BIC_ThreadC (1ULL << 15) -#define BIC_CoreTmp (1ULL << 16) -#define BIC_CoreCnt (1ULL << 17) -#define BIC_PkgTmp (1ULL << 18) -#define BIC_GFX_rc6 (1ULL << 19) -#define BIC_GFXMHz (1ULL << 20) -#define BIC_Pkgpc2 (1ULL << 21) -#define BIC_Pkgpc3 (1ULL << 22) -#define BIC_Pkgpc6 (1ULL << 23) -#define BIC_Pkgpc7 (1ULL << 24) -#define BIC_Pkgpc8 (1ULL << 25) -#define BIC_Pkgpc9 (1ULL << 26) -#define BIC_Pkgpc10 (1ULL << 27) -#define BIC_CPU_LPI (1ULL << 28) -#define BIC_SYS_LPI (1ULL << 29) -#define BIC_PkgWatt (1ULL << 30) -#define BIC_CorWatt (1ULL << 31) -#define BIC_GFXWatt (1ULL << 32) -#define BIC_PkgCnt (1ULL << 33) -#define BIC_RAMWatt (1ULL << 34) -#define BIC_PKG__ (1ULL << 35) -#define BIC_RAM__ (1ULL << 36) -#define BIC_Pkg_J (1ULL << 37) -#define BIC_Cor_J (1ULL << 38) -#define BIC_GFX_J (1ULL << 39) -#define BIC_RAM_J (1ULL << 40) -#define BIC_Mod_c6 (1ULL << 41) -#define BIC_Totl_c0 (1ULL << 42) -#define BIC_Any_c0 (1ULL << 43) -#define BIC_GFX_c0 (1ULL << 44) -#define BIC_CPUGFX (1ULL << 45) -#define BIC_Core (1ULL << 46) -#define BIC_CPU (1ULL << 47) -#define BIC_APIC (1ULL << 48) -#define BIC_X2APIC (1ULL << 49) -#define BIC_Die (1ULL << 50) -#define BIC_GFXACTMHz (1ULL << 51) -#define BIC_IPC (1ULL << 52) -#define BIC_CORE_THROT_CNT (1ULL << 53) -#define BIC_UNCORE_MHZ (1ULL << 54) -#define BIC_SAM_mc6 (1ULL << 55) -#define BIC_SAMMHz (1ULL << 56) -#define BIC_SAMACTMHz (1ULL << 57) -#define BIC_Diec6 (1ULL << 58) -#define BIC_SysWatt (1ULL << 59) -#define BIC_Sys_J (1ULL << 60) -#define BIC_NMI (1ULL << 61) -#define BIC_CPU_c1e (1ULL << 62) -#define BIC_pct_idle (1ULL << 63) - -#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) -#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) -#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) -#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle ) -#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle) -#define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) - -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle) - -unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC; - -#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) -#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) -#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) -#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) -#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) -#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) +/* n.b. bic_names must match the order in bic[], above */ +enum bic_names { + BIC_USEC, + BIC_TOD, + BIC_Package, + BIC_Node, + BIC_Avg_MHz, + BIC_Busy, + BIC_Bzy_MHz, + BIC_TSC_MHz, + BIC_IRQ, + BIC_SMI, + BIC_cpuidle, + BIC_CPU_c1, + BIC_CPU_c3, + BIC_CPU_c6, + BIC_CPU_c7, + BIC_ThreadC, + BIC_CoreTmp, + BIC_CoreCnt, + BIC_PkgTmp, + BIC_GFX_rc6, + BIC_GFXMHz, + BIC_Pkgpc2, + BIC_Pkgpc3, + BIC_Pkgpc6, + BIC_Pkgpc7, + BIC_Pkgpc8, + BIC_Pkgpc9, + BIC_Pkgpc10, + BIC_CPU_LPI, + BIC_SYS_LPI, + BIC_PkgWatt, + BIC_CorWatt, + BIC_GFXWatt, + BIC_PkgCnt, + BIC_RAMWatt, + BIC_PKG__, + BIC_RAM__, + BIC_Pkg_J, + BIC_Cor_J, + BIC_GFX_J, + BIC_RAM_J, + BIC_Mod_c6, + BIC_Totl_c0, + BIC_Any_c0, + BIC_GFX_c0, + BIC_CPUGFX, + BIC_Core, + BIC_CPU, + BIC_APIC, + BIC_X2APIC, + BIC_Die, + BIC_L3, + BIC_GFXACTMHz, + BIC_IPC, + BIC_CORE_THROT_CNT, + BIC_UNCORE_MHZ, + BIC_SAM_mc6, + BIC_SAMMHz, + BIC_SAMACTMHz, + BIC_Diec6, + BIC_SysWatt, + BIC_Sys_J, + BIC_NMI, + BIC_CPU_c1e, + BIC_pct_idle, + BIC_LLC_RPS, + BIC_LLC_HIT, + MAX_BIC +}; + +void print_bic_set(char *s, cpu_set_t *set) +{ + int i; + + assert(MAX_BIC < CPU_SETSIZE); + + printf("%s:", s); + + for (i = 0; i <= MAX_BIC; ++i) { + + if (CPU_ISSET(i, set)) { + assert(i < MAX_BIC); + printf(" %s", bic[i].name); + } + } + putchar('\n'); +} + +static cpu_set_t bic_group_topology; +static cpu_set_t bic_group_thermal_pwr; +static cpu_set_t bic_group_frequency; +static cpu_set_t bic_group_hw_idle; +static cpu_set_t bic_group_sw_idle; +static cpu_set_t bic_group_idle; +static cpu_set_t bic_group_cache; +static cpu_set_t bic_group_other; +static cpu_set_t bic_group_disabled_by_default; +static cpu_set_t bic_enabled; +static cpu_set_t bic_present; + +/* modify */ +#define BIC_INIT(set) CPU_ZERO(set) + +#define SET_BIC(COUNTER_NUMBER, set) CPU_SET(COUNTER_NUMBER, set) +#define CLR_BIC(COUNTER_NUMBER, set) CPU_CLR(COUNTER_NUMBER, set) + +#define BIC_PRESENT(COUNTER_NUMBER) SET_BIC(COUNTER_NUMBER, &bic_present) +#define BIC_NOT_PRESENT(COUNTER_NUMBER) CPU_CLR(COUNTER_NUMBER, &bic_present) + +/* test */ +#define BIC_IS_ENABLED(COUNTER_NUMBER) CPU_ISSET(COUNTER_NUMBER, &bic_enabled) +#define DO_BIC_READ(COUNTER_NUMBER) CPU_ISSET(COUNTER_NUMBER, &bic_present) +#define DO_BIC(COUNTER_NUMBER) (CPU_ISSET(COUNTER_NUMBER, &bic_enabled) && CPU_ISSET(COUNTER_NUMBER, &bic_present)) + +static void bic_set_all(cpu_set_t *set) +{ + int i; + + assert(MAX_BIC < CPU_SETSIZE); + + for (i = 0; i < MAX_BIC; ++i) + SET_BIC(i, set); +} + +/* + * bic_clear_bits() + * clear all the bits from "clr" in "dst" + */ +static void bic_clear_bits(cpu_set_t *dst, cpu_set_t *clr) +{ + int i; + + assert(MAX_BIC < CPU_SETSIZE); + + for (i = 0; i < MAX_BIC; ++i) + if (CPU_ISSET(i, clr)) + CLR_BIC(i, dst); +} + +static void bic_groups_init(void) +{ + BIC_INIT(&bic_group_topology); + SET_BIC(BIC_Package, &bic_group_topology); + SET_BIC(BIC_Node, &bic_group_topology); + SET_BIC(BIC_CoreCnt, &bic_group_topology); + SET_BIC(BIC_PkgCnt, &bic_group_topology); + SET_BIC(BIC_Core, &bic_group_topology); + SET_BIC(BIC_CPU, &bic_group_topology); + SET_BIC(BIC_Die, &bic_group_topology); + SET_BIC(BIC_L3, &bic_group_topology); + + BIC_INIT(&bic_group_thermal_pwr); + SET_BIC(BIC_CoreTmp, &bic_group_thermal_pwr); + SET_BIC(BIC_PkgTmp, &bic_group_thermal_pwr); + SET_BIC(BIC_PkgWatt, &bic_group_thermal_pwr); + SET_BIC(BIC_CorWatt, &bic_group_thermal_pwr); + SET_BIC(BIC_GFXWatt, &bic_group_thermal_pwr); + SET_BIC(BIC_RAMWatt, &bic_group_thermal_pwr); + SET_BIC(BIC_PKG__, &bic_group_thermal_pwr); + SET_BIC(BIC_RAM__, &bic_group_thermal_pwr); + SET_BIC(BIC_SysWatt, &bic_group_thermal_pwr); + + BIC_INIT(&bic_group_frequency); + SET_BIC(BIC_Avg_MHz, &bic_group_frequency); + SET_BIC(BIC_Busy, &bic_group_frequency); + SET_BIC(BIC_Bzy_MHz, &bic_group_frequency); + SET_BIC(BIC_TSC_MHz, &bic_group_frequency); + SET_BIC(BIC_GFXMHz, &bic_group_frequency); + SET_BIC(BIC_GFXACTMHz, &bic_group_frequency); + SET_BIC(BIC_SAMMHz, &bic_group_frequency); + SET_BIC(BIC_SAMACTMHz, &bic_group_frequency); + SET_BIC(BIC_UNCORE_MHZ, &bic_group_frequency); + + BIC_INIT(&bic_group_hw_idle); + SET_BIC(BIC_Busy, &bic_group_hw_idle); + SET_BIC(BIC_CPU_c1, &bic_group_hw_idle); + SET_BIC(BIC_CPU_c3, &bic_group_hw_idle); + SET_BIC(BIC_CPU_c6, &bic_group_hw_idle); + SET_BIC(BIC_CPU_c7, &bic_group_hw_idle); + SET_BIC(BIC_GFX_rc6, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc2, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc3, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc6, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc7, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc8, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc9, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc10, &bic_group_hw_idle); + SET_BIC(BIC_CPU_LPI, &bic_group_hw_idle); + SET_BIC(BIC_SYS_LPI, &bic_group_hw_idle); + SET_BIC(BIC_Mod_c6, &bic_group_hw_idle); + SET_BIC(BIC_Totl_c0, &bic_group_hw_idle); + SET_BIC(BIC_Any_c0, &bic_group_hw_idle); + SET_BIC(BIC_GFX_c0, &bic_group_hw_idle); + SET_BIC(BIC_CPUGFX, &bic_group_hw_idle); + SET_BIC(BIC_SAM_mc6, &bic_group_hw_idle); + SET_BIC(BIC_Diec6, &bic_group_hw_idle); + + BIC_INIT(&bic_group_sw_idle); + SET_BIC(BIC_Busy, &bic_group_sw_idle); + SET_BIC(BIC_cpuidle, &bic_group_sw_idle); + SET_BIC(BIC_pct_idle, &bic_group_sw_idle); + + BIC_INIT(&bic_group_idle); + + CPU_OR(&bic_group_idle, &bic_group_idle, &bic_group_hw_idle); + SET_BIC(BIC_pct_idle, &bic_group_idle); + + BIC_INIT(&bic_group_cache); + SET_BIC(BIC_LLC_RPS, &bic_group_cache); + SET_BIC(BIC_LLC_HIT, &bic_group_cache); + + BIC_INIT(&bic_group_other); + SET_BIC(BIC_IRQ, &bic_group_other); + SET_BIC(BIC_NMI, &bic_group_other); + SET_BIC(BIC_SMI, &bic_group_other); + SET_BIC(BIC_ThreadC, &bic_group_other); + SET_BIC(BIC_CoreTmp, &bic_group_other); + SET_BIC(BIC_IPC, &bic_group_other); + + BIC_INIT(&bic_group_disabled_by_default); + SET_BIC(BIC_USEC, &bic_group_disabled_by_default); + SET_BIC(BIC_TOD, &bic_group_disabled_by_default); + SET_BIC(BIC_cpuidle, &bic_group_disabled_by_default); + SET_BIC(BIC_APIC, &bic_group_disabled_by_default); + SET_BIC(BIC_X2APIC, &bic_group_disabled_by_default); + + BIC_INIT(&bic_enabled); + bic_set_all(&bic_enabled); + bic_clear_bits(&bic_enabled, &bic_group_disabled_by_default); + + BIC_INIT(&bic_present); + SET_BIC(BIC_USEC, &bic_present); + SET_BIC(BIC_TOD, &bic_present); + SET_BIC(BIC_cpuidle, &bic_present); + SET_BIC(BIC_APIC, &bic_present); + SET_BIC(BIC_X2APIC, &bic_present); + SET_BIC(BIC_pct_idle, &bic_present); +} /* * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: @@ -317,12 +477,11 @@ unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle #define PCL_10 14 /* PC10 */ #define PCLUNL 15 /* Unlimited */ -struct amperf_group_fd; - char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; int *fd_instr_count_percpu; +int *fd_llc_percpu; struct timeval interval_tv = { 5, 0 }; struct timespec interval_ts = { 5, 0 }; @@ -333,11 +492,12 @@ unsigned int quiet; unsigned int shown; unsigned int sums_need_wide_columns; unsigned int rapl_joules; +unsigned int valid_rapl_msrs; unsigned int summary_only; unsigned int list_header_only; unsigned int dump_only; unsigned int force_load; -unsigned int has_aperf; +unsigned int cpuid_has_aperf_mperf; unsigned int has_aperf_access; unsigned int has_epb; unsigned int has_turbo; @@ -403,8 +563,7 @@ static struct gfx_sysfs_info gfx_info[GFX_MAX]; int get_msr(int cpu, off_t offset, unsigned long long *msr); int add_counter(unsigned int msr_num, char *path, char *name, - unsigned int width, enum counter_scope scope, - enum counter_type type, enum counter_format format, int flags, int package_num); + unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format, int flags, int package_num); /* Model specific support Start */ @@ -429,7 +588,7 @@ struct platform_features { bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */ int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */ int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */ - int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ + int plat_rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ @@ -539,7 +698,7 @@ enum rapl_msrs { #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) #define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY) -#define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY) +#define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLICY) #define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT) @@ -584,7 +743,7 @@ static const struct platform_features snb_features = { .cst_limit = CST_LIMIT_SNB, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features snx_features = { @@ -596,7 +755,7 @@ static const struct platform_features snx_features = { .cst_limit = CST_LIMIT_SNB, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, }; static const struct platform_features ivb_features = { @@ -609,7 +768,7 @@ static const struct platform_features ivb_features = { .cst_limit = CST_LIMIT_SNB, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features ivx_features = { @@ -621,7 +780,7 @@ static const struct platform_features ivx_features = { .cst_limit = CST_LIMIT_SNB, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE | TRL_LIMIT1, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, }; static const struct platform_features hsw_features = { @@ -635,7 +794,7 @@ static const struct platform_features hsw_features = { .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features hsx_features = { @@ -649,7 +808,7 @@ static const struct platform_features hsx_features = { .has_irtl_msrs = 1, .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2, .plr_msrs = PLR_CORE | PLR_RING, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, .has_fixed_rapl_unit = 1, }; @@ -664,7 +823,7 @@ static const struct platform_features hswl_features = { .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features hswg_features = { @@ -678,7 +837,7 @@ static const struct platform_features hswg_features = { .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features bdw_features = { @@ -691,7 +850,7 @@ static const struct platform_features bdw_features = { .cst_limit = CST_LIMIT_HSW, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features bdwg_features = { @@ -704,7 +863,7 @@ static const struct platform_features bdwg_features = { .cst_limit = CST_LIMIT_HSW, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features bdx_features = { @@ -718,7 +877,7 @@ static const struct platform_features bdx_features = { .has_irtl_msrs = 1, .has_cst_auto_convension = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, .has_fixed_rapl_unit = 1, }; @@ -735,7 +894,7 @@ static const struct platform_features skl_features = { .has_ext_cst_msrs = 1, .trl_msrs = TRL_BASE, .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, .enable_tsc_tweak = 1, }; @@ -752,7 +911,7 @@ static const struct platform_features cnl_features = { .has_ext_cst_msrs = 1, .trl_msrs = TRL_BASE, .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, .enable_tsc_tweak = 1, }; @@ -770,7 +929,7 @@ static const struct platform_features adl_features = { .has_ext_cst_msrs = cnl_features.has_ext_cst_msrs, .trl_msrs = cnl_features.trl_msrs, .tcc_offset_bits = cnl_features.tcc_offset_bits, - .rapl_msrs = cnl_features.rapl_msrs, + .plat_rapl_msrs = cnl_features.plat_rapl_msrs, .enable_tsc_tweak = cnl_features.enable_tsc_tweak, }; @@ -788,7 +947,7 @@ static const struct platform_features lnl_features = { .has_ext_cst_msrs = adl_features.has_ext_cst_msrs, .trl_msrs = adl_features.trl_msrs, .tcc_offset_bits = adl_features.tcc_offset_bits, - .rapl_msrs = adl_features.rapl_msrs, + .plat_rapl_msrs = adl_features.plat_rapl_msrs, .enable_tsc_tweak = adl_features.enable_tsc_tweak, }; @@ -803,7 +962,7 @@ static const struct platform_features skx_features = { .has_irtl_msrs = 1, .has_cst_auto_convension = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, .has_fixed_rapl_unit = 1, }; @@ -819,7 +978,7 @@ static const struct platform_features icx_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, .has_fixed_rapl_unit = 1, }; @@ -836,7 +995,25 @@ static const struct platform_features spr_features = { .has_cst_prewake_bit = 1, .has_fixed_rapl_psys_unit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, +}; + +static const struct platform_features dmr_features = { + .has_msr_misc_feature_control = spr_features.has_msr_misc_feature_control, + .has_msr_misc_pwr_mgmt = spr_features.has_msr_misc_pwr_mgmt, + .has_nhm_msrs = spr_features.has_nhm_msrs, + .bclk_freq = spr_features.bclk_freq, + .supported_cstates = spr_features.supported_cstates, + .cst_limit = spr_features.cst_limit, + .has_msr_core_c1_res = spr_features.has_msr_core_c1_res, + .has_cst_prewake_bit = spr_features.has_cst_prewake_bit, + .has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit, + .trl_msrs = spr_features.trl_msrs, + .has_msr_module_c6_res_ms = 1, /* DMR has Dual-Core-Module and MC6 MSR */ + .plat_rapl_msrs = 0, /* DMR does not have RAPL MSRs */ + .plr_msrs = 0, /* DMR does not have PLR MSRs */ + .has_irtl_msrs = 0, /* DMR does not have IRTL MSRs */ + .has_config_tdp = 0, /* DMR does not have CTDP MSRs */ }; static const struct platform_features srf_features = { @@ -852,7 +1029,7 @@ static const struct platform_features srf_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features grr_features = { @@ -868,7 +1045,7 @@ static const struct platform_features grr_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features slv_features = { @@ -881,7 +1058,7 @@ static const struct platform_features slv_features = { .has_msr_c6_demotion_policy_config = 1, .has_msr_atom_pkg_c6_residency = 1, .trl_msrs = TRL_ATOM, - .rapl_msrs = RAPL_PKG | RAPL_CORE, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE, .has_rapl_divisor = 1, .rapl_quirk_tdp = 30, }; @@ -894,7 +1071,7 @@ static const struct platform_features slvd_features = { .cst_limit = CST_LIMIT_SLV, .has_msr_atom_pkg_c6_residency = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE, .rapl_quirk_tdp = 30, }; @@ -915,7 +1092,7 @@ static const struct platform_features gmt_features = { .cst_limit = CST_LIMIT_GMT, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, }; static const struct platform_features gmtd_features = { @@ -928,7 +1105,7 @@ static const struct platform_features gmtd_features = { .has_irtl_msrs = 1, .has_msr_core_c1_res = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, }; static const struct platform_features gmtp_features = { @@ -940,7 +1117,7 @@ static const struct platform_features gmtp_features = { .cst_limit = CST_LIMIT_GMT, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, }; static const struct platform_features tmt_features = { @@ -951,7 +1128,7 @@ static const struct platform_features tmt_features = { .cst_limit = CST_LIMIT_GMT, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, .enable_tsc_tweak = 1, }; @@ -963,7 +1140,7 @@ static const struct platform_features tmtd_features = { .cst_limit = CST_LIMIT_GMT, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL, }; static const struct platform_features knl_features = { @@ -975,7 +1152,7 @@ static const struct platform_features knl_features = { .cst_limit = CST_LIMIT_KNL, .has_msr_knl_core_c6_residency = 1, .trl_msrs = TRL_KNL, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, .has_fixed_rapl_unit = 1, .need_perf_multiplier = 1, }; @@ -984,7 +1161,7 @@ static const struct platform_features default_features = { }; static const struct platform_features amd_features_with_rapl = { - .rapl_msrs = RAPL_AMD_F17H, + .plat_rapl_msrs = RAPL_AMD_F17H, .has_per_core_rapl = 1, .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ }; @@ -1028,12 +1205,14 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_EMERALDRAPIDS_X, &spr_features }, { INTEL_GRANITERAPIDS_X, &spr_features }, { INTEL_GRANITERAPIDS_D, &spr_features }, + { INTEL_DIAMONDRAPIDS_X, &dmr_features }, { INTEL_LAKEFIELD, &cnl_features }, { INTEL_ALDERLAKE, &adl_features }, { INTEL_ALDERLAKE_L, &adl_features }, { INTEL_RAPTORLAKE, &adl_features }, { INTEL_RAPTORLAKE_P, &adl_features }, { INTEL_RAPTORLAKE_S, &adl_features }, + { INTEL_BARTLETTLAKE, &adl_features }, { INTEL_METEORLAKE, &adl_features }, { INTEL_METEORLAKE_L, &adl_features }, { INTEL_ARROWLAKE_H, &adl_features }, @@ -1041,6 +1220,9 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_ARROWLAKE, &adl_features }, { INTEL_LUNARLAKE_M, &lnl_features }, { INTEL_PANTHERLAKE_L, &lnl_features }, + { INTEL_NOVALAKE, &lnl_features }, + { INTEL_NOVALAKE_L, &lnl_features }, + { INTEL_WILDCATLAKE_L, &lnl_features }, { INTEL_ATOM_SILVERMONT, &slv_features }, { INTEL_ATOM_SILVERMONT_D, &slvd_features }, { INTEL_ATOM_AIRMONT, &amt_features }, @@ -1072,7 +1254,6 @@ void probe_platform_features(unsigned int family, unsigned int model) { int i; - if (authentic_amd || hygon_genuine) { /* fallback to default features on unsupported models */ force_load++; @@ -1106,8 +1287,7 @@ end: if (platform) return; - fprintf(stderr, "Unsupported platform detected.\n" - "\tSee RUN THE LATEST VERSION on turbostat(8)\n"); + fprintf(stderr, "Unsupported platform detected.\n\tSee RUN THE LATEST VERSION on turbostat(8)\n"); exit(1); } @@ -1186,7 +1366,7 @@ struct rapl_counter_arch_info { int msr_shift; /* Positive mean shift right, negative mean shift left */ double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */ unsigned int rci_index; /* Maps data from perf counters to global variables */ - unsigned long long bic; + unsigned int bic_number; double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */ unsigned long long flags; }; @@ -1201,7 +1381,33 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, - .bic = BIC_PkgWatt | BIC_Pkg_J, + .bic_number = BIC_PkgWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_PKG, + .perf_subsys = "power", + .perf_name = "energy-pkg", + .msr = MSR_PKG_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, + .bic_number = BIC_Pkg_J, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_AMD_F17H, + .perf_subsys = "power", + .perf_name = "energy-pkg", + .msr = MSR_PKG_ENERGY_STAT, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, + .bic_number = BIC_PkgWatt, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1214,7 +1420,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, - .bic = BIC_PkgWatt | BIC_Pkg_J, + .bic_number = BIC_Pkg_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1227,7 +1433,33 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, - .bic = BIC_CorWatt | BIC_Cor_J, + .bic_number = BIC_CorWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_CORE_ENERGY_STATUS, + .perf_subsys = "power", + .perf_name = "energy-cores", + .msr = MSR_PP0_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, + .bic_number = BIC_Cor_J, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_DRAM, + .perf_subsys = "power", + .perf_name = "energy-ram", + .msr = MSR_DRAM_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_dram_energy_units, + .rci_index = RAPL_RCI_INDEX_DRAM, + .bic_number = BIC_RAMWatt, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1240,7 +1472,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_dram_energy_units, .rci_index = RAPL_RCI_INDEX_DRAM, - .bic = BIC_RAMWatt | BIC_RAM_J, + .bic_number = BIC_RAM_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1253,7 +1485,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_GFX, - .bic = BIC_GFXWatt | BIC_GFX_J, + .bic_number = BIC_GFXWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_GFX, + .perf_subsys = "power", + .perf_name = "energy-gpu", + .msr = MSR_PP1_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_GFX, + .bic_number = BIC_GFX_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1266,7 +1511,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_time_units, .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS, - .bic = BIC_PKG__, + .bic_number = BIC_PKG__, .compat_scale = 100.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1279,7 +1524,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_time_units, .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS, - .bic = BIC_RAM__, + .bic_number = BIC_RAM__, .compat_scale = 100.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1292,7 +1537,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, - .bic = BIC_CorWatt | BIC_Cor_J, + .bic_number = BIC_CorWatt, + .compat_scale = 1.0, + .flags = 0, + }, + { + .feature_mask = RAPL_AMD_F17H, + .perf_subsys = NULL, + .perf_name = NULL, + .msr = MSR_CORE_ENERGY_STAT, + .msr_mask = 0xFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, + .bic_number = BIC_Cor_J, .compat_scale = 1.0, .flags = 0, }, @@ -1305,7 +1563,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_psys_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, - .bic = BIC_SysWatt | BIC_Sys_J, + .bic_number = BIC_SysWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_PSYS, + .perf_subsys = "power", + .perf_name = "energy-psys", + .msr = MSR_PLATFORM_ENERGY_STATUS, + .msr_mask = 0x00000000FFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_psys_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, + .bic_number = BIC_Sys_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1354,7 +1625,7 @@ struct cstate_counter_arch_info { const char *perf_name; unsigned long long msr; unsigned int rci_index; /* Maps data from perf counters to global variables */ - unsigned long long bic; + unsigned int bic_number; unsigned long long flags; int pkg_cstate_limit; }; @@ -1366,7 +1637,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c1-residency", .msr = MSR_CORE_C1_RES, .rci_index = CCSTATE_RCI_INDEX_C1_RESIDENCY, - .bic = BIC_CPU_c1, + .bic_number = BIC_CPU_c1, .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD, .pkg_cstate_limit = 0, }, @@ -1376,7 +1647,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c3-residency", .msr = MSR_CORE_C3_RESIDENCY, .rci_index = CCSTATE_RCI_INDEX_C3_RESIDENCY, - .bic = BIC_CPU_c3, + .bic_number = BIC_CPU_c3, .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, .pkg_cstate_limit = 0, }, @@ -1386,7 +1657,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c6-residency", .msr = MSR_CORE_C6_RESIDENCY, .rci_index = CCSTATE_RCI_INDEX_C6_RESIDENCY, - .bic = BIC_CPU_c6, + .bic_number = BIC_CPU_c6, .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, .pkg_cstate_limit = 0, }, @@ -1396,7 +1667,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c7-residency", .msr = MSR_CORE_C7_RESIDENCY, .rci_index = CCSTATE_RCI_INDEX_C7_RESIDENCY, - .bic = BIC_CPU_c7, + .bic_number = BIC_CPU_c7, .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, .pkg_cstate_limit = 0, }, @@ -1406,7 +1677,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c2-residency", .msr = MSR_PKG_C2_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C2_RESIDENCY, - .bic = BIC_Pkgpc2, + .bic_number = BIC_Pkgpc2, .flags = 0, .pkg_cstate_limit = PCL__2, }, @@ -1416,7 +1687,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c3-residency", .msr = MSR_PKG_C3_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C3_RESIDENCY, - .bic = BIC_Pkgpc3, + .bic_number = BIC_Pkgpc3, .flags = 0, .pkg_cstate_limit = PCL__3, }, @@ -1426,7 +1697,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c6-residency", .msr = MSR_PKG_C6_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C6_RESIDENCY, - .bic = BIC_Pkgpc6, + .bic_number = BIC_Pkgpc6, .flags = 0, .pkg_cstate_limit = PCL__6, }, @@ -1436,7 +1707,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c7-residency", .msr = MSR_PKG_C7_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C7_RESIDENCY, - .bic = BIC_Pkgpc7, + .bic_number = BIC_Pkgpc7, .flags = 0, .pkg_cstate_limit = PCL__7, }, @@ -1446,7 +1717,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c8-residency", .msr = MSR_PKG_C8_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C8_RESIDENCY, - .bic = BIC_Pkgpc8, + .bic_number = BIC_Pkgpc8, .flags = 0, .pkg_cstate_limit = PCL__8, }, @@ -1456,7 +1727,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c9-residency", .msr = MSR_PKG_C9_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C9_RESIDENCY, - .bic = BIC_Pkgpc9, + .bic_number = BIC_Pkgpc9, .flags = 0, .pkg_cstate_limit = PCL__9, }, @@ -1466,7 +1737,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c10-residency", .msr = MSR_PKG_C10_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C10_RESIDENCY, - .bic = BIC_Pkgpc10, + .bic_number = BIC_Pkgpc10, .flags = 0, .pkg_cstate_limit = PCL_10, }, @@ -1631,7 +1902,7 @@ int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b) sscanf((*a)->d_name, "telem%u", &aidx); sscanf((*b)->d_name, "telem%u", &bidx); - return aidx >= bidx; + return (aidx > bidx) ? 1 : (aidx < bidx) ? -1 : 0; } const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter) @@ -1732,6 +2003,10 @@ void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) pmt_counter_resize_(pcounter, new_size); } +struct llc_stats { + unsigned long long references; + unsigned long long misses; +}; struct thread_data { struct timeval tv_begin; struct timeval tv_end; @@ -1744,6 +2019,7 @@ struct thread_data { unsigned long long irq_count; unsigned long long nmi_count; unsigned int smi_count; + struct llc_stats llc; unsigned int cpu_id; unsigned int apic_id; unsigned int x2apic_id; @@ -1822,8 +2098,6 @@ struct pkg_data { ((node_no) * topo.cores_per_node) + \ (core_no)) -#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) - /* * The accumulated sum of MSR is defined as a monotonic * increasing MSR, it will be accumulated periodically, @@ -1861,7 +2135,7 @@ off_t idx_to_offset(int idx) switch (idx) { case IDX_PKG_ENERGY: - if (platform->rapl_msrs & RAPL_AMD_F17H) + if (valid_rapl_msrs & RAPL_AMD_F17H) offset = MSR_PKG_ENERGY_STAT; else offset = MSR_PKG_ENERGY_STATUS; @@ -1927,19 +2201,19 @@ int idx_valid(int idx) { switch (idx) { case IDX_PKG_ENERGY: - return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); + return valid_rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); case IDX_DRAM_ENERGY: - return platform->rapl_msrs & RAPL_DRAM; + return valid_rapl_msrs & RAPL_DRAM; case IDX_PP0_ENERGY: - return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS; + return valid_rapl_msrs & RAPL_CORE_ENERGY_STATUS; case IDX_PP1_ENERGY: - return platform->rapl_msrs & RAPL_GFX; + return valid_rapl_msrs & RAPL_GFX; case IDX_PKG_PERF: - return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; + return valid_rapl_msrs & RAPL_PKG_PERF_STATUS; case IDX_DRAM_PERF: - return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; + return valid_rapl_msrs & RAPL_DRAM_PERF_STATUS; case IDX_PSYS_ENERGY: - return platform->rapl_msrs & RAPL_PSYS; + return valid_rapl_msrs & RAPL_PSYS; default: return 0; } @@ -2018,6 +2292,7 @@ struct platform_counters { struct cpu_topology { int physical_package_id; int die_id; + int l3_id; int logical_cpu_id; int physical_node_id; int logical_node_id; /* 0-based count within the package */ @@ -2039,6 +2314,7 @@ struct topo_params { int max_core_id; int max_package_id; int max_die_id; + int max_l3_id; int max_node_num; int nodes_per_pkg; int cores_per_node; @@ -2072,6 +2348,8 @@ int cpu_is_not_allowed(int cpu) * skip non-present cpus */ +#define PER_THREAD_PARAMS struct thread_data *t, struct core_data *c, struct pkg_data *p + int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) { @@ -2085,16 +2363,15 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { struct thread_data *t; struct core_data *c; - struct pkg_data *p; + t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); if (cpu_is_not_allowed(t->cpu_id)) continue; c = GET_CORE(core_base, core_no, node_no, pkg_no); - p = GET_PKG(pkg_base, pkg_no); - retval |= func(t, c, p); + retval |= func(t, c, &pkg_base[pkg_no]); } } } @@ -2102,23 +2379,19 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk return retval; } -int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c) { - UNUSED(p); - return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); } -int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int is_cpu_first_core_in_package(struct thread_data *t, struct pkg_data *p) { - UNUSED(c); - return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); } int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) { - return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); + return is_cpu_first_thread_in_core(t, c) && is_cpu_first_core_in_package(t, p); } int cpu_migrate(int cpu) @@ -2140,13 +2413,11 @@ int get_msr_fd(int cpu) if (fd) return fd; - - sprintf(pathname, "/dev/cpu/%d/msr", cpu); + sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDONLY); if (fd < 0) - err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, " - "or run with --no-msr, or run as root", pathname); - + err(-1, "%s open failed, try chown or chmod +r %s, " + "or run with --no-msr, or run as root", pathname, use_android_msr_path ? "/dev/msr*" : "/dev/cpu/*/msr"); fd_percpu[cpu] = fd; return fd; @@ -2154,14 +2425,24 @@ int get_msr_fd(int cpu) static void bic_disable_msr_access(void) { - const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp | - BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp; - - bic_enabled &= ~bic_msrs; + CLR_BIC(BIC_Mod_c6, &bic_enabled); + CLR_BIC(BIC_CoreTmp, &bic_enabled); + CLR_BIC(BIC_Totl_c0, &bic_enabled); + CLR_BIC(BIC_Any_c0, &bic_enabled); + CLR_BIC(BIC_GFX_c0, &bic_enabled); + CLR_BIC(BIC_CPUGFX, &bic_enabled); + CLR_BIC(BIC_PkgTmp, &bic_enabled); free_sys_msr_counters(); } +static void bic_disable_perf_access(void) +{ + CLR_BIC(BIC_IPC, &bic_enabled); + CLR_BIC(BIC_LLC_RPS, &bic_enabled); + CLR_BIC(BIC_LLC_HIT, &bic_enabled); +} + static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { assert(!no_perf); @@ -2215,32 +2496,52 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) return 0; } -int probe_rapl_msr(int cpu, off_t offset, int index) +int add_msr_counter(int cpu, off_t offset) { ssize_t retval; unsigned long long value; - assert(!no_msr); + if (no_msr) + return -1; + + if (!offset) + return -1; retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset); /* if the read failed, the probe fails */ if (retval != sizeof(value)) - return 1; + return -1; + + if (value == 0) + return 0; + + return 1; +} + +int add_rapl_msr_counter(int cpu, const struct rapl_counter_arch_info *cai) +{ + int ret; + + if (!(valid_rapl_msrs & cai->feature_mask)) + return -1; + + ret = add_msr_counter(cpu, cai->msr); + if (ret < 0) + return -1; - /* If an Energy Status Counter MSR returns 0, the probe fails */ - switch (index) { + switch (cai->rci_index) { case RAPL_RCI_INDEX_ENERGY_PKG: case RAPL_RCI_INDEX_ENERGY_CORES: case RAPL_RCI_INDEX_DRAM: case RAPL_RCI_INDEX_GFX: case RAPL_RCI_INDEX_ENERGY_PLATFORM: - if (value == 0) + if (ret == 0) return 1; } /* PKG,DRAM_PERF_STATUS MSRs, can return any value */ - return 0; + return 1; } /* Convert CPU ID to domain ID for given added perf counter. */ @@ -2265,6 +2566,8 @@ char *deferred_add_names[MAX_DEFERRED]; char *deferred_skip_names[MAX_DEFERRED]; int deferred_add_index; int deferred_skip_index; +unsigned int deferred_add_consumed; +unsigned int deferred_skip_consumed; /* * HIDE_LIST - hide this list of counters, show the rest [default] @@ -2327,8 +2630,7 @@ void help(void) " degrees Celsius\n" " -h, --help\n" " print this help message\n" - " -v, --version\n" - " print version information\n\nFor more help, run \"man turbostat\"\n"); + " -v, --version\n\t\tprint version information\n\nFor more help, run \"man turbostat\"\n"); } /* @@ -2336,10 +2638,9 @@ void help(void) * for all the strings in comma separate name_list, * set the approprate bit in return value. */ -unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) +void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode) { unsigned int i; - unsigned long long retval = 0; while (name_list) { char *comma; @@ -2351,46 +2652,49 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) for (i = 0; i < MAX_BIC; ++i) { if (!strcmp(name_list, bic[i].name)) { - retval |= (1ULL << i); + SET_BIC(i, ret_set); break; } if (!strcmp(name_list, "all")) { - retval |= ~0; + bic_set_all(ret_set); break; } else if (!strcmp(name_list, "topology")) { - retval |= BIC_GROUP_TOPOLOGY; + CPU_OR(ret_set, ret_set, &bic_group_topology); break; } else if (!strcmp(name_list, "power")) { - retval |= BIC_GROUP_THERMAL_PWR; + CPU_OR(ret_set, ret_set, &bic_group_thermal_pwr); break; } else if (!strcmp(name_list, "idle")) { - retval |= BIC_GROUP_IDLE; + CPU_OR(ret_set, ret_set, &bic_group_idle); + break; + } else if (!strcmp(name_list, "cache")) { + CPU_OR(ret_set, ret_set, &bic_group_cache); + break; + } else if (!strcmp(name_list, "llc")) { + CPU_OR(ret_set, ret_set, &bic_group_cache); break; } else if (!strcmp(name_list, "swidle")) { - retval |= BIC_GROUP_SW_IDLE; + CPU_OR(ret_set, ret_set, &bic_group_sw_idle); break; } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */ - retval |= BIC_GROUP_SW_IDLE; + CPU_OR(ret_set, ret_set, &bic_group_sw_idle); break; } else if (!strcmp(name_list, "hwidle")) { - retval |= BIC_GROUP_HW_IDLE; + CPU_OR(ret_set, ret_set, &bic_group_hw_idle); break; } else if (!strcmp(name_list, "frequency")) { - retval |= BIC_GROUP_FREQUENCY; + CPU_OR(ret_set, ret_set, &bic_group_frequency); break; } else if (!strcmp(name_list, "other")) { - retval |= BIC_OTHER; + CPU_OR(ret_set, ret_set, &bic_group_other); break; } - } if (i == MAX_BIC) { - fprintf(stderr, "deferred %s\n", name_list); if (mode == SHOW_LIST) { deferred_add_names[deferred_add_index++] = name_list; if (deferred_add_index >= MAX_DEFERRED) { - fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", - MAX_DEFERRED, name_list); + fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", MAX_DEFERRED, name_list); help(); exit(1); } @@ -2399,8 +2703,7 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) if (debug) fprintf(stderr, "deferred \"%s\"\n", name_list); if (deferred_skip_index >= MAX_DEFERRED) { - fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", - MAX_DEFERRED, name_list); + fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", MAX_DEFERRED, name_list); help(); exit(1); } @@ -2412,7 +2715,47 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) name_list++; } - return retval; +} + +/* + * print_name() + * Print column header name for raw 64-bit counter in 16 columns (at least 8-char plus a tab) + * Otherwise, allow the name + tab to fit within 8-coumn tab-stop. + * In both cases, left justififed, just like other turbostat columns, + * to allow the column values to consume the tab. + * + * Yes, 32-bit counters can overflow 8-columns, and + * 64-bit counters can overflow 16-columns, but that is uncommon. + */ +static inline int print_name(int width, int *printed, char *delim, char *name, enum counter_type type, enum counter_format format) +{ + UNUSED(type); + + if (format == FORMAT_RAW && width >= 64) + return (sprintf(outp, "%s%-8s", (*printed++ ? delim : ""), name)); + else + return (sprintf(outp, "%s%s", (*printed++ ? delim : ""), name)); +} + +static inline int print_hex_value(int width, int *printed, char *delim, unsigned long long value) +{ + if (width <= 32) + return (sprintf(outp, "%s%08x", (*printed++ ? delim : ""), (unsigned int)value)); + else + return (sprintf(outp, "%s%016llx", (*printed++ ? delim : ""), value)); +} + +static inline int print_decimal_value(int width, int *printed, char *delim, unsigned long long value) +{ + if (width <= 32) + return (sprintf(outp, "%s%d", (*printed++ ? delim : ""), (unsigned int)value)); + else + return (sprintf(outp, "%s%-8lld", (*printed++ ? delim : ""), value)); +} + +static inline int print_float_value(int *printed, char *delim, double value) +{ + return (sprintf(outp, "%s%0.2f", (*printed++ ? delim : ""), value)); } void print_header(char *delim) @@ -2430,6 +2773,8 @@ void print_header(char *delim) outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); if (DO_BIC(BIC_Die)) outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); + if (DO_BIC(BIC_L3)) + outp += sprintf(outp, "%sL3", (printed++ ? delim : "")); if (DO_BIC(BIC_Node)) outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -2468,50 +2813,28 @@ void print_header(char *delim) if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); - for (mp = sys.tp; mp; mp = mp->next) { + if (DO_BIC(BIC_LLC_RPS)) + outp += sprintf(outp, "%sLLCkRPS", (printed++ ? delim : "")); - if (mp->format == FORMAT_RAW) { - if (mp->width == 64) - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); - else - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name); - } else { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name); - else - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name); - } - } + if (DO_BIC(BIC_LLC_HIT)) + outp += sprintf(outp, "%sLLC%%hit", (printed++ ? delim : "")); - for (pp = sys.perf_tp; pp; pp = pp->next) { + for (mp = sys.tp; mp; mp = mp->next) + outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format); - if (pp->format == FORMAT_RAW) { - if (pp->width == 64) - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); - } else { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); - } - } + for (pp = sys.perf_tp; pp; pp = pp->next) + outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format); ppmt = sys.pmt_tp; while (ppmt) { switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); - else - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); - + outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; case PMT_TYPE_XTAL_TIME: case PMT_TYPE_TCORE_CLOCK: - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); + outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; } @@ -2536,63 +2859,36 @@ void print_header(char *delim) if (DO_BIC(BIC_CORE_THROT_CNT)) outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); - if (platform->rapl_msrs && !rapl_joules) { + if (valid_rapl_msrs && !rapl_joules) { if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); - } else if (platform->rapl_msrs && rapl_joules) { + } else if (valid_rapl_msrs && rapl_joules) { if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); } - for (mp = sys.cp; mp; mp = mp->next) { - if (mp->format == FORMAT_RAW) { - if (mp->width == 64) - outp += sprintf(outp, "%s%18.18s", delim, mp->name); - else - outp += sprintf(outp, "%s%10.10s", delim, mp->name); - } else { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", delim, mp->name); - else - outp += sprintf(outp, "%s%s", delim, mp->name); - } - } - - for (pp = sys.perf_cp; pp; pp = pp->next) { + for (mp = sys.cp; mp; mp = mp->next) + outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format); - if (pp->format == FORMAT_RAW) { - if (pp->width == 64) - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); - } else { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); - } - } + for (pp = sys.perf_cp; pp; pp = pp->next) + outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format); ppmt = sys.pmt_cp; while (ppmt) { switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); - else - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; case PMT_TYPE_XTAL_TIME: case PMT_TYPE_TCORE_CLOCK: - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); + outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; } ppmt = ppmt->next; } - if (DO_BIC(BIC_PkgTmp)) outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); @@ -2644,7 +2940,7 @@ void print_header(char *delim) if (DO_BIC(BIC_SYS_LPI)) outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); - if (platform->rapl_msrs && !rapl_joules) { + if (!rapl_joules) { if (DO_BIC(BIC_PkgWatt)) outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) @@ -2657,7 +2953,7 @@ void print_header(char *delim) outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); if (DO_BIC(BIC_RAM__)) outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); - } else if (platform->rapl_msrs && rapl_joules) { + } else { if (DO_BIC(BIC_Pkg_J)) outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) @@ -2674,51 +2970,22 @@ void print_header(char *delim) if (DO_BIC(BIC_UNCORE_MHZ)) outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); - for (mp = sys.pp; mp; mp = mp->next) { - if (mp->format == FORMAT_RAW) { - if (mp->width == 64) - outp += sprintf(outp, "%s%18.18s", delim, mp->name); - else if (mp->width == 32) - outp += sprintf(outp, "%s%10.10s", delim, mp->name); - else - outp += sprintf(outp, "%s%7.7s", delim, mp->name); - } else { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", delim, mp->name); - else - outp += sprintf(outp, "%s%7.7s", delim, mp->name); - } - } + for (mp = sys.pp; mp; mp = mp->next) + outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format); - for (pp = sys.perf_pp; pp; pp = pp->next) { - - if (pp->format == FORMAT_RAW) { - if (pp->width == 64) - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); - } else { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); - } - } + for (pp = sys.perf_pp; pp; pp = pp->next) + outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format); ppmt = sys.pmt_pp; while (ppmt) { switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); - else - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); - + outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; case PMT_TYPE_XTAL_TIME: case PMT_TYPE_TCORE_CLOCK: - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); + outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; } @@ -2733,7 +3000,26 @@ void print_header(char *delim) outp += sprintf(outp, "\n"); } -int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +/* + * pct() + * + * If absolute value is < 1.1, return percentage + * otherwise, return nan + * + * return value is appropriate for printing percentages with %f + * while flagging some obvious erroneous values. + */ +double pct(double d) +{ + + double abs = fabs(d); + + if (abs < 1.10) + return (100.0 * d); + return nan(""); +} + +int dump_counters(PER_THREAD_PARAMS) { int i; struct msr_counter *mp; @@ -2758,14 +3044,16 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "SMI: %d\n", t->smi_count); + outp += sprintf(outp, "LLC refs: %lld", t->llc.references); + outp += sprintf(outp, "LLC miss: %lld", t->llc.misses); + outp += sprintf(outp, "LLC Hit%%: %.2f", pct((t->llc.references - t->llc.misses) / t->llc.references)); + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - outp += - sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, - t->counter[i], mp->sp->path); + outp += sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, t->counter[i], mp->sp->path); } } - if (c && is_cpu_first_thread_in_core(t, c, p)) { + if (c && is_cpu_first_thread_in_core(t, c)) { outp += sprintf(outp, "core: %d\n", c->core_id); outp += sprintf(outp, "c3: %016llX\n", c->c3); outp += sprintf(outp, "c6: %016llX\n", c->c6); @@ -2780,14 +3068,12 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - outp += - sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, - c->counter[i], mp->sp->path); + outp += sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, c->counter[i], mp->sp->path); } outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); } - if (p && is_cpu_first_core_in_package(t, c, p)) { + if (p && is_cpu_first_core_in_package(t, p)) { outp += sprintf(outp, "package: %d\n", p->package_id); outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); @@ -2817,9 +3103,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - outp += - sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, - p->counter[i], mp->sp->path); + outp += sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, p->counter[i], mp->sp->path); } } @@ -2845,16 +3129,37 @@ double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desir return scaled; } +void get_perf_llc_stats(int cpu, struct llc_stats *llc) +{ + struct read_format { + unsigned long long num_read; + struct llc_stats llc; + } r; + const ssize_t expected_read_size = sizeof(r); + ssize_t actual_read_size; + + actual_read_size = read(fd_llc_percpu[cpu], &r, expected_read_size); + + if (actual_read_size == -1) + err(-1, "%s(cpu%d,) %d,,%ld\n", __func__, cpu, fd_llc_percpu[cpu], expected_read_size); + + llc->references = r.llc.references; + llc->misses = r.llc.misses; + if (actual_read_size != expected_read_size) + warn("%s: failed to read perf_data (req %zu act %zu)", __func__, expected_read_size, actual_read_size); +} + /* * column formatting convention & formats */ -int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int format_counters(PER_THREAD_PARAMS) { static int count; struct platform_counters *pplat_cnt = NULL; double interval_float, tsc; - char *fmt8; + char *fmt8 = "%s%.2f"; + int i; struct msr_counter *mp; struct perf_counter_info *pp; @@ -2868,11 +3173,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } /* if showing only 1st thread in core and this isn't one, bail out */ - if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) + if (show_core_only && !is_cpu_first_thread_in_core(t, c)) return 0; /* if showing only 1st thread in pkg and this isn't one, bail out */ - if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p)) + if (show_pkg_only && !is_cpu_first_core_in_package(t, p)) return 0; /*if not summary line and --cpu is used */ @@ -2901,6 +3206,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Die)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_L3)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Node)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -2924,10 +3231,15 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data else outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } + if (DO_BIC(BIC_L3)) { + if (c) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].l3_id); + else + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + } if (DO_BIC(BIC_Node)) { if (t) - outp += sprintf(outp, "%s%d", - (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); else outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } @@ -2949,15 +3261,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); if (DO_BIC(BIC_Busy)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(t->mperf / tsc)); if (DO_BIC(BIC_Bzy_MHz)) { if (has_base_hz) - outp += - sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); else - outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), - tsc / units * t->aperf / t->mperf / interval_float); + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), tsc / units * t->aperf / t->mperf / interval_float); } if (DO_BIC(BIC_TSC_MHz)) @@ -2986,96 +3296,81 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); - /* Added counters */ + /* LLC Stats */ + if (DO_BIC(BIC_LLC_RPS) || DO_BIC(BIC_LLC_HIT)) { + if (DO_BIC(BIC_LLC_RPS)) + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), t->llc.references / interval_float / 1000); + + if (DO_BIC(BIC_LLC_HIT)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), pct((t->llc.references - t->llc.misses) / t->llc.references)); + } + + /* Added Thread Counters */ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) { - if (mp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); - } else if (mp->format == FORMAT_DELTA) { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); - } else if (mp->format == FORMAT_PERCENT) { + if (mp->format == FORMAT_RAW) + outp += print_hex_value(mp->width, &printed, delim, t->counter[i]); + else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(mp->width, &printed, delim, t->counter[i]); + else if (mp->format == FORMAT_PERCENT) { if (mp->type == COUNTER_USEC) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - t->counter[i] / interval_float / 10000); + outp += print_float_value(&printed, delim, t->counter[i] / interval_float / 10000); else - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); + outp += print_float_value(&printed, delim, pct(t->counter[i] / tsc)); } } - /* Added perf counters */ + /* Added perf Thread Counters */ for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { - if (pp->format == FORMAT_RAW) { - if (pp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)t->perf_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]); - } else if (pp->format == FORMAT_DELTA) { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]); - } else if (pp->format == FORMAT_PERCENT) { + if (pp->format == FORMAT_RAW) + outp += print_hex_value(pp->width, &printed, delim, t->perf_counter[i]); + else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(pp->width, &printed, delim, t->perf_counter[i]); + else if (pp->format == FORMAT_PERCENT) { if (pp->type == COUNTER_USEC) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - t->perf_counter[i] / interval_float / 10000); + outp += print_float_value(&printed, delim, t->perf_counter[i] / interval_float / 10000); else - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc); + outp += print_float_value(&printed, delim, pct(t->perf_counter[i] / tsc)); } } + /* Added PMT Thread Counters */ for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = t->pmt_counter[i]; double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)t->pmt_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); - + outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, t->pmt_counter[i]); break; case PMT_TYPE_XTAL_TIME: - value_converted = 100.0 * value_raw / crystal_hz / interval_float; + value_converted = pct(value_raw / crystal_hz / interval_float); outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; case PMT_TYPE_TCORE_CLOCK: - value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; + value_converted = pct(value_raw / tcore_clock_freq_hz / interval_float); outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); } } /* C1 */ if (DO_BIC(BIC_CPU_c1)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(t->c1 / tsc)); /* print per-core data only for 1st thread in core */ - if (!is_cpu_first_thread_in_core(t, c, p)) + if (!is_cpu_first_thread_in_core(t, c)) goto done; if (DO_BIC(BIC_CPU_c3)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c3 / tsc)); if (DO_BIC(BIC_CPU_c6)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c6 / tsc)); if (DO_BIC(BIC_CPU_c7)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c7 / tsc)); /* Mod%c6 */ if (DO_BIC(BIC_Mod_c6)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->mc6_us / tsc)); if (DO_BIC(BIC_CoreTmp)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); @@ -3084,77 +3379,53 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_CORE_THROT_CNT)) outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); + /* Added Core Counters */ for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) { - if (mp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); - } else if (mp->format == FORMAT_DELTA) { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); - } else if (mp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); - } + if (mp->format == FORMAT_RAW) + outp += print_hex_value(mp->width, &printed, delim, c->counter[i]); + else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(mp->width, &printed, delim, c->counter[i]); + else if (mp->format == FORMAT_PERCENT) + outp += print_float_value(&printed, delim, pct(c->counter[i] / tsc)); } + /* Added perf Core counters */ for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { - if (pp->format == FORMAT_RAW) { - if (pp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)c->perf_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]); - } else if (pp->format == FORMAT_DELTA) { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]); - } else if (pp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc); - } + if (pp->format == FORMAT_RAW) + outp += print_hex_value(pp->width, &printed, delim, c->perf_counter[i]); + else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(pp->width, &printed, delim, c->perf_counter[i]); + else if (pp->format == FORMAT_PERCENT) + outp += print_float_value(&printed, delim, pct(c->perf_counter[i] / tsc)); } + /* Added PMT Core counters */ for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = c->pmt_counter[i]; double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)c->pmt_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); - + outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, c->pmt_counter[i]); break; case PMT_TYPE_XTAL_TIME: - value_converted = 100.0 * value_raw / crystal_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + value_converted = pct(value_raw / crystal_hz / interval_float); + outp += print_float_value(&printed, delim, value_converted); break; case PMT_TYPE_TCORE_CLOCK: - value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + value_converted = pct(value_raw / tcore_clock_freq_hz / interval_float); + outp += print_float_value(&printed, delim, value_converted); } } - fmt8 = "%s%.2f"; - if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); /* print per-package data only for 1st core in package */ - if (!is_cpu_first_core_in_package(t, c, p)) + if (!is_cpu_first_core_in_package(t, p)) goto done; /* PkgTmp */ @@ -3166,8 +3437,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); } else { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - p->gfx_rc6_ms / 10.0 / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), p->gfx_rc6_ms / 10.0 / interval_float); } } @@ -3184,8 +3454,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */ outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); } else { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - p->sam_mc6_ms / 10.0 / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), p->sam_mc6_ms / 10.0 / interval_float); } } @@ -3199,150 +3468,112 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (DO_BIC(BIC_Totl_c0)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100 * p->pkg_wtd_core_c0 / tsc); /* can exceed 100% */ if (DO_BIC(BIC_Any_c0)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_any_core_c0 / tsc)); if (DO_BIC(BIC_GFX_c0)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_any_gfxe_c0 / tsc)); if (DO_BIC(BIC_CPUGFX)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_both_core_gfxe_c0 / tsc)); if (DO_BIC(BIC_Pkgpc2)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc2 / tsc)); if (DO_BIC(BIC_Pkgpc3)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc3 / tsc)); if (DO_BIC(BIC_Pkgpc6)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc6 / tsc)); if (DO_BIC(BIC_Pkgpc7)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc7 / tsc)); if (DO_BIC(BIC_Pkgpc8)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc8 / tsc)); if (DO_BIC(BIC_Pkgpc9)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc9 / tsc)); if (DO_BIC(BIC_Pkgpc10)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc10 / tsc)); if (DO_BIC(BIC_Diec6)) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->die_c6 / crystal_hz / interval_float)); if (DO_BIC(BIC_CPU_LPI)) { if (p->cpu_lpi >= 0) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - 100.0 * p->cpu_lpi / 1000000.0 / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->cpu_lpi / 1000000.0 / interval_float)); else outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); } if (DO_BIC(BIC_SYS_LPI)) { if (p->sys_lpi >= 0) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - 100.0 * p->sys_lpi / 1000000.0 / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->sys_lpi / 1000000.0 / interval_float)); else outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); } if (DO_BIC(BIC_PkgWatt)) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_GFXWatt)) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_RAMWatt)) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_Pkg_J)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_GFX_J)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_RAM_J)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_PKG__)) outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); + sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_RAM__)) outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); + sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); /* UncMHz */ if (DO_BIC(BIC_UNCORE_MHZ)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); + /* Added Package Counters */ for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) { - if (mp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); - } else if (mp->format == FORMAT_DELTA) { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); - } else if (mp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); - } else if (mp->type == COUNTER_K2M) + if (mp->format == FORMAT_RAW) + outp += print_hex_value(mp->width, &printed, delim, p->counter[i]); + else if (mp->type == COUNTER_K2M) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); + else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(mp->width, &printed, delim, p->counter[i]); + else if (mp->format == FORMAT_PERCENT) + outp += print_float_value(&printed, delim, pct(p->counter[i] / tsc)); } + /* Added perf Package Counters */ for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { - if (pp->format == FORMAT_RAW) { - if (pp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)p->perf_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]); - } else if (pp->format == FORMAT_DELTA) { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); - } else if (pp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); - } else if (pp->type == COUNTER_K2M) { - outp += - sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); - } + if (pp->format == FORMAT_RAW) + outp += print_hex_value(pp->width, &printed, delim, p->perf_counter[i]); + else if (pp->type == COUNTER_K2M) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); + else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(pp->width, &printed, delim, p->perf_counter[i]); + else if (pp->format == FORMAT_PERCENT) + outp += print_float_value(&printed, delim, pct(p->perf_counter[i] / tsc)); } + /* Added PMT Package Counters */ for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = p->pmt_counter[i]; double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)p->pmt_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); - + outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, p->pmt_counter[i]); break; case PMT_TYPE_XTAL_TIME: - value_converted = 100.0 * value_raw / crystal_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + value_converted = pct(value_raw / crystal_hz / interval_float); + outp += print_float_value(&printed, delim, value_converted); break; case PMT_TYPE_TCORE_CLOCK: - value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + value_converted = pct(value_raw / tcore_clock_freq_hz / interval_float); + outp += print_float_value(&printed, delim, value_converted); } } @@ -3382,7 +3613,7 @@ void flush_output_stderr(void) outp = output_buffer; } -void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +void format_all_counters(PER_THREAD_PARAMS) { static int count; @@ -3457,8 +3688,7 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value; old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value; old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value; - old->rapl_dram_perf_status.raw_value = - new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; + old->rapl_dram_perf_status.raw_value = new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -3505,7 +3735,7 @@ void delta_core(struct core_data *new, struct core_data *old) DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; else old->counter[i] = new->counter[i] - old->counter[i]; @@ -3565,8 +3795,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d /* check for TSC < 1 Mcycles over interval */ if (old->tsc < (1000 * 1000)) errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n" - "You can disable all c-states by booting with \"idle=poll\"\n" - "or just the deep ones with \"processor.max_cstate=1\""); + "You can disable all c-states by booting with \"idle=poll\"\n" "or just the deep ones with \"processor.max_cstate=1\""); old->c1 = new->c1 - old->c1; @@ -3595,8 +3824,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d old->c1 = 0; else { /* normal case, derive c1 */ - old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 - - core_delta->c6 - core_delta->c7; + old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 - core_delta->c6 - core_delta->c7; } } @@ -3618,8 +3846,14 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d if (DO_BIC(BIC_SMI)) old->smi_count = new->smi_count - old->smi_count; + if (DO_BIC(BIC_LLC_RPS)) + old->llc.references = new->llc.references - old->llc.references; + + if (DO_BIC(BIC_LLC_HIT)) + old->llc.misses = new->llc.misses - old->llc.misses; + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; else old->counter[i] = new->counter[i] - old->counter[i]; @@ -3642,20 +3876,19 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d return 0; } -int delta_cpu(struct thread_data *t, struct core_data *c, - struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) +int delta_cpu(struct thread_data *t, struct core_data *c, struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) { int retval = 0; /* calculate core delta only for 1st thread in core */ - if (is_cpu_first_thread_in_core(t, c, p)) + if (is_cpu_first_thread_in_core(t, c)) delta_core(c, c2); /* always calculate thread delta */ retval = delta_thread(t, t2, c2); /* c2 is core delta */ /* calculate package delta only for 1st core in package */ - if (is_cpu_first_core_in_package(t, c, p)) + if (is_cpu_first_core_in_package(t, p)) retval |= delta_package(p, p2); return retval; @@ -3673,7 +3906,7 @@ void rapl_counter_clear(struct rapl_counter *c) c->unit = RAPL_UNIT_INVALID; } -void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +void clear_counters(PER_THREAD_PARAMS) { int i; struct msr_counter *mp; @@ -3696,6 +3929,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->nmi_count = 0; t->smi_count = 0; + t->llc.references = 0; + t->llc.misses = 0; + c->c3 = 0; c->c6 = 0; c->c7 = 0; @@ -3704,6 +3940,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data rapl_counter_clear(&c->core_energy); c->core_throt_cnt = 0; + t->llc.references = 0; + t->llc.misses = 0; + p->pkg_wtd_core_c0 = 0; p->pkg_any_core_c0 = 0; p->pkg_any_gfxe_c0 = 0; @@ -3770,7 +4009,7 @@ void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter dst->raw_value += src->raw_value; } -int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int sum_counters(PER_THREAD_PARAMS) { int i; struct msr_counter *mp; @@ -3801,6 +4040,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.threads.nmi_count += t->nmi_count; average.threads.smi_count += t->smi_count; + average.threads.llc.references += t->llc.references; + average.threads.llc.misses += t->llc.misses; + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) continue; @@ -3818,7 +4060,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) } /* sum per-core values only for 1st thread in core */ - if (!is_cpu_first_thread_in_core(t, c, p)) + if (!is_cpu_first_thread_in_core(t, c)) return 0; average.cores.c3 += c->c3; @@ -3848,7 +4090,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) } /* sum per-pkg values only for 1st core in pkg */ - if (!is_cpu_first_core_in_package(t, c, p)) + if (!is_cpu_first_core_in_package(t, p)) return 0; if (DO_BIC(BIC_Totl_c0)) @@ -3918,7 +4160,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) * sum the counters for all cpus in the system * compute the weighted average */ -void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) +void compute_average(PER_THREAD_PARAMS) { int i; struct msr_counter *mp; @@ -3943,7 +4185,6 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data if (average.threads.nmi_count > 9999999) sums_need_wide_columns = 1; - average.cores.c3 /= topo.allowed_cores; average.cores.c6 /= topo.allowed_cores; average.cores.c7 /= topo.allowed_cores; @@ -4115,8 +4356,7 @@ unsigned long long get_legacy_uncore_mhz(int package) */ for (die = 0; die <= topo.max_die_id; ++die) { - sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", - package, die); + sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package, die); if (access(path, R_OK) == 0) return (snapshot_sysfs_counter(path) / 1000); @@ -4227,11 +4467,6 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt) return 0; } -struct amperf_group_fd { - int aperf; /* Also the group descriptor */ - int mperf; -}; - static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr) { int fdmt; @@ -4431,8 +4666,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data)); if (actual_read_size != expected_read_size) - err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, - actual_read_size); + err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size); } for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { @@ -4502,7 +4736,7 @@ char *find_sysfs_path_by_id(struct sysfs_path *sp, int id) return NULL; } -int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_data *c, struct pkg_data *p) +int get_cstate_counters(unsigned int cpu, PER_THREAD_PARAMS) { /* * Overcommit memory a little bit here, @@ -4670,8 +4904,7 @@ int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data)); if (actual_read_size != expected_read_size) - err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, - actual_read_size); + err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size); } for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) { @@ -4766,12 +4999,43 @@ unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) return (value & value_mask) >> value_shift; } +/* Rapl domain enumeration helpers */ +static inline int get_rapl_num_domains(void) +{ + int num_packages = topo.max_package_id + 1; + int num_cores_per_package; + int num_cores; + + if (!platform->has_per_core_rapl) + return num_packages; + + num_cores_per_package = topo.max_core_id + 1; + num_cores = num_cores_per_package * num_packages; + + return num_cores; +} + +static inline int get_rapl_domain_id(int cpu) +{ + int nr_cores_per_package = topo.max_core_id + 1; + int rapl_core_id; + + if (!platform->has_per_core_rapl) + return cpus[cpu].physical_package_id; + + /* Compute the system-wide unique core-id for @cpu */ + rapl_core_id = cpus[cpu].physical_core_id; + rapl_core_id += cpus[cpu].physical_package_id * nr_cores_per_package; + + return rapl_core_id; +} + /* * get_counters(...) * migrate to cpu * acquire and record local counters for that cpu */ -int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int get_counters(PER_THREAD_PARAMS) { int cpu = t->cpu_id; unsigned long long msr; @@ -4794,6 +5058,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) get_smi_aperf_mperf(cpu, t); + if (DO_BIC(BIC_LLC_RPS) || DO_BIC(BIC_LLC_HIT)) + get_perf_llc_stats(cpu, &t->llc); + if (DO_BIC(BIC_IPC)) if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) return -4; @@ -4817,11 +5084,11 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id); /* collect core counters only for 1st thread in core */ - if (!is_cpu_first_thread_in_core(t, c, p)) + if (!is_cpu_first_thread_in_core(t, c)) goto done; if (platform->has_per_core_rapl) { - status = get_rapl_counters(cpu, c->core_id, c, p); + status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p); if (status != 0) return status; } @@ -4861,7 +5128,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) c->pmt_counter[i] = pmt_read_counter(pp, c->core_id); /* collect package counters only for 1st core in package */ - if (!is_cpu_first_core_in_package(t, c, p)) + if (!is_cpu_first_core_in_package(t, p)) goto done; if (DO_BIC(BIC_Totl_c0)) { @@ -4887,7 +5154,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) p->sys_lpi = cpuidle_cur_sys_lpi_us; if (!platform->has_per_core_rapl) { - status = get_rapl_counters(cpu, p->package_id, c, p); + status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p); if (status != 0) return status; } @@ -4950,48 +5217,39 @@ char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2", "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" }; -int nhm_pkg_cstate_limits[16] = - { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int nhm_pkg_cstate_limits[16] = { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int snb_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int snb_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int hsw_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int hsw_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int slv_pkg_cstate_limits[16] = - { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int slv_pkg_cstate_limits[16] = { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7 }; -int amt_pkg_cstate_limits[16] = - { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int amt_pkg_cstate_limits[16] = { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int phi_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int phi_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int glm_pkg_cstate_limits[16] = - { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int glm_pkg_cstate_limits[16] = { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int skx_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int skx_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int icx_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int icx_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; @@ -5066,8 +5324,7 @@ static void dump_power_ctl(void) return; get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); - fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", - base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); + fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ if (platform->has_cst_prewake_bit) @@ -5160,8 +5417,7 @@ static void dump_turbo_ratio_limits(int trl_msr_offset) ratio = (msr >> shift) & 0xFF; group_size = (core_counts >> shift) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", - ratio, bclk, ratio * bclk, group_size); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", ratio, bclk, ratio * bclk, group_size); } return; @@ -5259,9 +5515,7 @@ static void dump_knl_turbo_ratio_limits(void) for (i = buckets_no - 1; i >= 0; i--) if (i > 0 ? ratio[i] != ratio[i - 1] : 1) - fprintf(outf, - "%d * %.1f = %.1f MHz max turbo %d active cores\n", - ratio[i], bclk, ratio[i] * bclk, cores[i]); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", ratio[i], bclk, ratio[i] * bclk, cores[i]); } static void dump_cst_cfg(void) @@ -5346,43 +5600,37 @@ void print_irtl(void) if (platform->supported_cstates & PC3) { get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC6) { get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC7) { get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC8) { get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC9) { get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC10) { get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } } @@ -5416,6 +5664,20 @@ void free_fd_instr_count_percpu(void) fd_instr_count_percpu = NULL; } +void free_fd_llc_percpu(void) +{ + if (!fd_llc_percpu) + return; + + for (int i = 0; i < topo.max_cpu_num + 1; ++i) { + if (fd_llc_percpu[i] != 0) + close(fd_llc_percpu[i]); + } + + free(fd_llc_percpu); + fd_llc_percpu = NULL; +} + void free_fd_cstate(void) { if (!ccstate_counter_info) @@ -5540,6 +5802,7 @@ void free_all_buffers(void) free_fd_percpu(); free_fd_instr_count_percpu(); + free_fd_llc_percpu(); free_fd_msr(); free_fd_rapl_percpu(); free_fd_cstate(); @@ -5599,6 +5862,11 @@ int get_die_id(int cpu) return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); } +int get_l3_id(int cpu) +{ + return parse_int_file("/sys/devices/system/cpu/cpu%d/cache/index3/id", cpu); +} + int get_core_id(int cpu) { return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); @@ -5787,7 +6055,6 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { struct thread_data *t, *t2; struct core_data *c, *c2; - struct pkg_data *p, *p2; t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); @@ -5799,10 +6066,7 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, c = GET_CORE(core_base, core_no, node_no, pkg_no); c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); - p = GET_PKG(pkg_base, pkg_no); - p2 = GET_PKG(pkg_base2, pkg_no); - - retval |= func(t, c, p, t2, c2, p2); + retval |= func(t, c, &pkg_base[pkg_no], t2, c2, &pkg_base2[pkg_no]); } } } @@ -5885,6 +6149,7 @@ void linux_perf_init(void); void msr_perf_init(void); void rapl_perf_init(void); void cstate_perf_init(void); +void perf_llc_init(void); void added_perf_counters_init(void); void pmt_init(void); @@ -5896,10 +6161,10 @@ void re_initialize(void) msr_perf_init(); rapl_perf_init(); cstate_perf_init(); + perf_llc_init(); added_perf_counters_init(); pmt_init(); - fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, - topo.allowed_cpus); + fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus); } void set_max_cpu_num(void) @@ -6260,7 +6525,7 @@ int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) timer_t timerid; /* Timer callback, update the sum of MSRs periodically. */ -static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) +static int update_msr_sum(PER_THREAD_PARAMS) { int i, ret; int cpu = t->cpu_id; @@ -6345,6 +6610,7 @@ release_timer: timer_delete(timerid); release_msr: free(per_cpu_msr_sum); + per_cpu_msr_sum = NULL; } /* @@ -6469,18 +6735,43 @@ restart: } } -void check_dev_msr() +int probe_dev_msr(void) { struct stat sb; char pathname[32]; - if (no_msr) - return; + sprintf(pathname, "/dev/msr%d", base_cpu); + return !stat(pathname, &sb); +} + +int probe_dev_cpu_msr(void) +{ + struct stat sb; + char pathname[32]; sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); - if (stat(pathname, &sb)) - if (system("/sbin/modprobe msr > /dev/null 2>&1")) - no_msr = 1; + return !stat(pathname, &sb); +} + +int probe_msr_driver(void) +{ + if (probe_dev_msr()) { + use_android_msr_path = 1; + return 1; + } + return probe_dev_cpu_msr(); +} + +void check_msr_driver(void) +{ + if (probe_msr_driver()) + return; + + if (system("/sbin/modprobe msr > /dev/null 2>&1")) + no_msr = 1; + + if (!probe_msr_driver()) + no_msr = 1; } /* @@ -6495,8 +6786,16 @@ int check_for_cap_sys_rawio(void) int ret = 0; caps = cap_get_proc(); - if (caps == NULL) + if (caps == NULL) { + /* + * CONFIG_MULTIUSER=n kernels have no cap_get_proc() + * Allow them to continue and attempt to access MSRs + */ + if (errno == ENOSYS) + return 0; + return 1; + } if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) { ret = 1; @@ -6527,7 +6826,7 @@ void check_msr_permission(void) failed += check_for_cap_sys_rawio(); /* test file permissions */ - sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", base_cpu); if (euidaccess(pathname, R_OK)) { failed++; } @@ -6656,10 +6955,10 @@ static void probe_intel_uncore_frequency_legacy(void) int k, l; char path_base[128]; - sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, - j); + sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, j); - if (access(path_base, R_OK)) + sprintf(path, "%s/current_freq_khz", path_base); + if (access(path, R_OK)) continue; BIC_PRESENT(BIC_UNCORE_MHZ); @@ -6737,8 +7036,9 @@ static void probe_intel_uncore_frequency_cluster(void) * This allows "--show/--hide UncMHz" to be effective for * the clustered MHz counters, as a group. */ - if BIC_IS_ENABLED(BIC_UNCORE_MHZ) - add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); + if BIC_IS_ENABLED + (BIC_UNCORE_MHZ) + add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); if (quiet) continue; @@ -6747,8 +7047,7 @@ static void probe_intel_uncore_frequency_cluster(void) k = read_sysfs_int(path); sprintf(path, "%s/max_freq_khz", path_base); l = read_sysfs_int(path); - fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, - cluster_id, k / 1000, l / 1000); + fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, cluster_id, k / 1000, l / 1000); sprintf(path, "%s/initial_min_freq_khz", path_base); k = read_sysfs_int(path); @@ -6985,7 +7284,7 @@ static void dump_sysfs_pstate_config(void) * print_epb() * Decode the ENERGY_PERF_BIAS MSR */ -int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_epb(PER_THREAD_PARAMS) { char *epb_string; int cpu, epb; @@ -7034,7 +7333,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) * print_hwp() * Decode the MSR_HWP_CAPABILITIES */ -int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_hwp(PER_THREAD_PARAMS) { unsigned long long msr; int cpu; @@ -7075,8 +7374,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) "(high %d guar %d eff %d low %d)\n", cpu, msr, (unsigned int)HWP_HIGHEST_PERF(msr), - (unsigned int)HWP_GUARANTEED_PERF(msr), - (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); + (unsigned int)HWP_GUARANTEED_PERF(msr), (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) return 0; @@ -7087,8 +7385,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) (unsigned int)(((msr) >> 0) & 0xff), (unsigned int)(((msr) >> 8) & 0xff), (unsigned int)(((msr) >> 16) & 0xff), - (unsigned int)(((msr) >> 24) & 0xff), - (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); + (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); if (has_hwp_pkg) { if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) @@ -7099,23 +7396,20 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu, msr, (unsigned int)(((msr) >> 0) & 0xff), (unsigned int)(((msr) >> 8) & 0xff), - (unsigned int)(((msr) >> 16) & 0xff), - (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); + (unsigned int)(((msr) >> 16) & 0xff), (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); } if (has_hwp_notify) { if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) return 0; fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " - "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", - cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); + "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); } if (get_msr(cpu, MSR_HWP_STATUS, &msr)) return 0; fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " - "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", - cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); + "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); return 0; } @@ -7123,7 +7417,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) /* * print_perf_limit() */ -int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_perf_limit(PER_THREAD_PARAMS) { unsigned long long msr; int cpu; @@ -7160,8 +7454,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 6) ? "VR-Therm, " : "", (msr & 1 << 5) ? "Auto-HWP, " : "", (msr & 1 << 4) ? "Graphics, " : "", - (msr & 1 << 2) ? "bit2, " : "", - (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); + (msr & 1 << 2) ? "bit2, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", (msr & 1 << 31) ? "bit31, " : "", (msr & 1 << 30) ? "bit30, " : "", @@ -7174,8 +7467,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 22) ? "VR-Therm, " : "", (msr & 1 << 21) ? "Auto-HWP, " : "", (msr & 1 << 20) ? "Graphics, " : "", - (msr & 1 << 18) ? "bit18, " : "", - (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); + (msr & 1 << 18) ? "bit18, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); } if (platform->plr_msrs & PLR_GFX) { @@ -7187,16 +7479,14 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 4) ? "Graphics, " : "", (msr & 1 << 6) ? "VR-Therm, " : "", (msr & 1 << 8) ? "Amps, " : "", - (msr & 1 << 9) ? "GFXPwr, " : "", - (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); + (msr & 1 << 9) ? "GFXPwr, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", (msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 20) ? "Graphics, " : "", (msr & 1 << 22) ? "VR-Therm, " : "", (msr & 1 << 24) ? "Amps, " : "", - (msr & 1 << 25) ? "GFXPwr, " : "", - (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); + (msr & 1 << 25) ? "GFXPwr, " : "", (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); } if (platform->plr_msrs & PLR_RING) { get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); @@ -7205,14 +7495,12 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 0) ? "PROCHOT, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 6) ? "VR-Therm, " : "", - (msr & 1 << 8) ? "Amps, " : "", - (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); + (msr & 1 << 8) ? "Amps, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", (msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 22) ? "VR-Therm, " : "", - (msr & 1 << 24) ? "Amps, " : "", - (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); + (msr & 1 << 24) ? "Amps, " : "", (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); } return 0; } @@ -7232,7 +7520,7 @@ double get_tdp_intel(void) { unsigned long long msr; - if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) + if (valid_rapl_msrs & RAPL_PKG_POWER_INFO) if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; return get_quirk_tdp(); @@ -7248,18 +7536,28 @@ void rapl_probe_intel(void) unsigned long long msr; unsigned int time_unit; double tdp; - const unsigned long long bic_watt_bits = BIC_SysWatt | BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; - const unsigned long long bic_joules_bits = BIC_Sys_J | BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; - if (rapl_joules) - bic_enabled &= ~bic_watt_bits; - else - bic_enabled &= ~bic_joules_bits; + if (rapl_joules) { + CLR_BIC(BIC_SysWatt, &bic_enabled); + CLR_BIC(BIC_PkgWatt, &bic_enabled); + CLR_BIC(BIC_CorWatt, &bic_enabled); + CLR_BIC(BIC_RAMWatt, &bic_enabled); + CLR_BIC(BIC_GFXWatt, &bic_enabled); + } else { + CLR_BIC(BIC_Sys_J, &bic_enabled); + CLR_BIC(BIC_Pkg_J, &bic_enabled); + CLR_BIC(BIC_Cor_J, &bic_enabled); + CLR_BIC(BIC_RAM_J, &bic_enabled); + CLR_BIC(BIC_GFX_J, &bic_enabled); + } - if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) - bic_enabled &= ~BIC_PKG__; - if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) - bic_enabled &= ~BIC_RAM__; + if (!valid_rapl_msrs || no_msr) + return; + + if (!(valid_rapl_msrs & RAPL_PKG_PERF_STATUS)) + CLR_BIC(BIC_PKG__, &bic_enabled); + if (!(valid_rapl_msrs & RAPL_DRAM_PERF_STATUS)) + CLR_BIC(BIC_RAM__, &bic_enabled); /* units on package 0, verify later other packages match */ if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) @@ -7298,13 +7596,17 @@ void rapl_probe_amd(void) { unsigned long long msr; double tdp; - const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt; - const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J; - if (rapl_joules) - bic_enabled &= ~bic_watt_bits; - else - bic_enabled &= ~bic_joules_bits; + if (rapl_joules) { + CLR_BIC(BIC_SysWatt, &bic_enabled); + CLR_BIC(BIC_CorWatt, &bic_enabled); + } else { + CLR_BIC(BIC_Pkg_J, &bic_enabled); + CLR_BIC(BIC_Cor_J, &bic_enabled); + } + + if (!valid_rapl_msrs || no_msr) + return; if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) return; @@ -7326,13 +7628,164 @@ void print_power_limit_msr(int cpu, unsigned long long msr, char *label) cpu, label, ((msr >> 15) & 1) ? "EN" : "DIS", ((msr >> 0) & 0x7FFF) * rapl_power_units, - (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, - (((msr >> 16) & 1) ? "EN" : "DIS")); + (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, (((msr >> 16) & 1) ? "EN" : "DIS")); return; } -int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) +static int fread_int(char *path, int *val) +{ + FILE *filep; + int ret; + + filep = fopen(path, "r"); + if (!filep) + return -1; + + ret = fscanf(filep, "%d", val); + fclose(filep); + return ret; +} + +static int fread_ull(char *path, unsigned long long *val) +{ + FILE *filep; + int ret; + + filep = fopen(path, "r"); + if (!filep) + return -1; + + ret = fscanf(filep, "%llu", val); + fclose(filep); + return ret; +} + +static int fread_str(char *path, char *buf, int size) +{ + FILE *filep; + int ret; + char *cp; + + filep = fopen(path, "r"); + if (!filep) + return -1; + + ret = fread(buf, 1, size, filep); + fclose(filep); + + /* replace '\n' with '\0' */ + cp = strchr(buf, '\n'); + if (cp != NULL) + *cp = '\0'; + + return ret; +} + +#define PATH_RAPL_SYSFS "/sys/class/powercap" + +static int dump_one_domain(char *domain_path) +{ + char path[PATH_MAX]; + char str[PATH_MAX]; + unsigned long long val; + int constraint; + int enable; + int ret; + + snprintf(path, PATH_MAX, "%s/name", domain_path); + ret = fread_str(path, str, PATH_MAX); + if (ret <= 0) + return -1; + + fprintf(outf, "%s: %s", domain_path + strlen(PATH_RAPL_SYSFS) + 1, str); + + snprintf(path, PATH_MAX, "%s/enabled", domain_path); + ret = fread_int(path, &enable); + if (ret <= 0) + return -1; + + if (!enable) { + fputs(" disabled\n", outf); + return 0; + } + + for (constraint = 0;; constraint++) { + snprintf(path, PATH_MAX, "%s/constraint_%d_time_window_us", domain_path, constraint); + ret = fread_ull(path, &val); + if (ret <= 0) + break; + + if (val > 1000000) + fprintf(outf, " %0.1fs", (double)val / 1000000); + else if (val > 1000) + fprintf(outf, " %0.1fms", (double)val / 1000); + else + fprintf(outf, " %0.1fus", (double)val); + + snprintf(path, PATH_MAX, "%s/constraint_%d_power_limit_uw", domain_path, constraint); + ret = fread_ull(path, &val); + if (ret > 0 && val) + fprintf(outf, ":%lluW", val / 1000000); + + snprintf(path, PATH_MAX, "%s/constraint_%d_max_power_uw", domain_path, constraint); + ret = fread_ull(path, &val); + if (ret > 0 && val) + fprintf(outf, ",max:%lluW", val / 1000000); + } + fputc('\n', outf); + + return 0; +} + +static int print_rapl_sysfs(void) +{ + DIR *dir, *cdir; + struct dirent *entry, *centry; + char path[PATH_MAX]; + char str[PATH_MAX]; + + if ((dir = opendir(PATH_RAPL_SYSFS)) == NULL) { + warn("open %s failed", PATH_RAPL_SYSFS); + return 1; + } + + while ((entry = readdir(dir)) != NULL) { + if (strlen(entry->d_name) > 100) + continue; + + if (strncmp(entry->d_name, "intel-rapl", strlen("intel-rapl"))) + continue; + + snprintf(path, PATH_MAX, "%s/%s/name", PATH_RAPL_SYSFS, entry->d_name); + + /* Parse top level domains first, including package and psys */ + fread_str(path, str, PATH_MAX); + if (strncmp(str, "package", strlen("package")) && strncmp(str, "psys", strlen("psys"))) + continue; + + snprintf(path, PATH_MAX, "%s/%s", PATH_RAPL_SYSFS, entry->d_name); + if ((cdir = opendir(path)) == NULL) { + perror("opendir() error"); + return 1; + } + + dump_one_domain(path); + + while ((centry = readdir(cdir)) != NULL) { + if (strncmp(centry->d_name, "intel-rapl", strlen("intel-rapl"))) + continue; + snprintf(path, PATH_MAX, "%s/%s/%s", PATH_RAPL_SYSFS, entry->d_name, centry->d_name); + dump_one_domain(path); + } + closedir(cdir); + } + + closedir(dir); + return 0; +} + +int print_rapl(PER_THREAD_PARAMS) { unsigned long long msr; const char *msr_name; @@ -7341,7 +7794,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) UNUSED(c); UNUSED(p); - if (!platform->rapl_msrs) + if (!valid_rapl_msrs) return 0; /* RAPL counters are per package, so print only for 1st thread/package */ @@ -7354,7 +7807,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -1; } - if (platform->rapl_msrs & RAPL_AMD_F17H) { + if (valid_rapl_msrs & RAPL_AMD_F17H) { msr_name = "MSR_RAPL_PWR_UNIT"; if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) return -1; @@ -7367,7 +7820,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, rapl_power_units, rapl_energy_units, rapl_time_units); - if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) { + if (valid_rapl_msrs & RAPL_PKG_POWER_INFO) { if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) return -5; @@ -7376,25 +7829,22 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, - ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, - ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); } - if (platform->rapl_msrs & RAPL_PKG) { + if (valid_rapl_msrs & RAPL_PKG) { if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) return -9; - fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 63) & 1 ? "" : "UN"); + fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 63) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "PKG Limit #1"); fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", cpu, ((msr >> 47) & 1) ? "EN" : "DIS", ((msr >> 32) & 0x7FFF) * rapl_power_units, - (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, - ((msr >> 48) & 1) ? "EN" : "DIS"); + (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, ((msr >> 48) & 1) ? "EN" : "DIS"); if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) return -9; @@ -7404,7 +7854,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); } - if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) { + if (valid_rapl_msrs & RAPL_DRAM_POWER_INFO) { if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) return -6; @@ -7412,31 +7862,28 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, - ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, - ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); } - if (platform->rapl_msrs & RAPL_DRAM) { + if (valid_rapl_msrs & RAPL_DRAM) { if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) return -9; - fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "DRAM Limit"); } - if (platform->rapl_msrs & RAPL_CORE_POLICY) { + if (valid_rapl_msrs & RAPL_CORE_POLICY) { if (get_msr(cpu, MSR_PP0_POLICY, &msr)) return -7; fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); } - if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) { + if (valid_rapl_msrs & RAPL_CORE_POWER_LIMIT) { if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) return -9; - fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "Cores Limit"); } - if (platform->rapl_msrs & RAPL_GFX) { + if (valid_rapl_msrs & RAPL_GFX) { if (get_msr(cpu, MSR_PP1_POLICY, &msr)) return -8; @@ -7444,22 +7891,57 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) return -9; - fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "GFX Limit"); } return 0; } /* + * probe_rapl_msrs + * + * initialize global valid_rapl_msrs to platform->plat_rapl_msrs + * only if PKG_ENERGY counter is enumerated and reads non-zero + */ +void probe_rapl_msrs(void) +{ + int ret; + off_t offset; + unsigned long long msr_value; + + if (no_msr) + return; + + if ((platform->plat_rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H)) == 0) + return; + + offset = idx_to_offset(IDX_PKG_ENERGY); + if (offset < 0) + return; + + ret = get_msr(base_cpu, offset, &msr_value); + if (ret) { + if (debug) + fprintf(outf, "Can not read RAPL_PKG_ENERGY MSR(0x%llx)\n", (unsigned long long)offset); + return; + } + if (msr_value == 0) { + if (debug) + fprintf(outf, "RAPL_PKG_ENERGY MSR(0x%llx) == ZERO: disabling all RAPL MSRs\n", (unsigned long long)offset); + return; + } + + valid_rapl_msrs = platform->plat_rapl_msrs; /* success */ +} + +/* * probe_rapl() * * sets rapl_power_units, rapl_energy_units, rapl_time_units */ void probe_rapl(void) { - if (!platform->rapl_msrs || no_msr) - return; + probe_rapl_msrs(); if (genuine_intel) rapl_probe_intel(); @@ -7469,6 +7951,11 @@ void probe_rapl(void) if (quiet) return; + print_rapl_sysfs(); + + if (!valid_rapl_msrs || no_msr) + return; + for_all_cpus(print_rapl, ODD_COUNTERS); } @@ -7484,7 +7971,7 @@ void probe_rapl(void) * below this value, including the Digital Thermal Sensor (DTS), * Package Thermal Management Sensor (PTM), and thermal event thresholds. */ -int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int set_temperature_target(PER_THREAD_PARAMS) { unsigned long long msr; unsigned int tcc_default, tcc_offset; @@ -7552,7 +8039,7 @@ guess: return 0; } -int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_thermal(PER_THREAD_PARAMS) { unsigned long long msr; unsigned int dts, dts2; @@ -7570,7 +8057,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p cpu = t->cpu_id; /* DTS is per-core, no need to print for each thread */ - if (!is_cpu_first_thread_in_core(t, c, p)) + if (!is_cpu_first_thread_in_core(t, c)) return 0; if (cpu_migrate(cpu)) { @@ -7578,7 +8065,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p return -1; } - if (do_ptm && is_cpu_first_core_in_package(t, c, p)) { + if (do_ptm && is_cpu_first_core_in_package(t, p)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) return 0; @@ -7590,8 +8077,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p dts = (msr >> 16) & 0x7F; dts2 = (msr >> 8) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", - cpu, msr, tj_max - dts, tj_max - dts2); + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tj_max - dts, tj_max - dts2); } if (do_dts && debug) { @@ -7602,16 +8088,14 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p dts = (msr >> 16) & 0x7F; resolution = (msr >> 27) & 0xF; - fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", - cpu, msr, tj_max - dts, resolution); + fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", cpu, msr, tj_max - dts, resolution); if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) return 0; dts = (msr >> 16) & 0x7F; dts2 = (msr >> 8) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", - cpu, msr, tj_max - dts, tj_max - dts2); + fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tj_max - dts, tj_max - dts2); } return 0; @@ -7632,7 +8116,7 @@ void probe_thermal(void) for_all_cpus(print_thermal, ODD_COUNTERS); } -int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int get_cpu_type(PER_THREAD_PARAMS) { unsigned int eax, ebx, ecx, edx; @@ -7685,8 +8169,7 @@ void decode_misc_enable_msr(void) msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", - msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", - msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); + msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); } void decode_misc_feature_control(void) @@ -7725,8 +8208,7 @@ void decode_misc_pwr_mgmt_msr(void) if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", - base_cpu, msr, - msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); + base_cpu, msr, msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); } /* @@ -7779,66 +8261,60 @@ void print_dev_latency(void) close(fd); } -static int has_instr_count_access(void) +static int has_perf_instr_count_access(void) { int fd; - int has_access; if (no_perf) return 0; fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); - has_access = fd != -1; - if (fd != -1) close(fd); - if (!has_access) + if (fd == -1) warnx("Failed to access %s. Some of the counters may not be available\n" - "\tRun as root to enable them or use %s to disable the access explicitly", - "instructions retired perf counter", "--no-perf"); + "\tRun as root to enable them or use %s to disable the access explicitly", "perf instructions retired counter", + "'--hide IPC' or '--no-perf'"); - return has_access; + return (fd != -1); } -int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, - double *scale_, enum rapl_unit *unit_) +int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, double *scale_, enum rapl_unit *unit_) { + int ret = -1; + if (no_perf) return -1; + if (!cai->perf_name) + return -1; + const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name); if (scale == 0.0) - return -1; + goto end; const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name); if (unit == RAPL_UNIT_INVALID) - return -1; + goto end; const unsigned int rapl_type = read_perf_type(cai->perf_subsys); const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name); - const int fd_counter = - open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); - if (fd_counter == -1) - return -1; + ret = open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); + if (ret == -1) + goto end; /* If it's the first counter opened, make it a group descriptor */ if (rci->fd_perf == -1) - rci->fd_perf = fd_counter; + rci->fd_perf = ret; *scale_ = scale; *unit_ = unit; - return fd_counter; -} - -int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, - double *scale, enum rapl_unit *unit) -{ - int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit); +end: if (debug >= 2) fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); @@ -7854,16 +8330,21 @@ void linux_perf_init(void) if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) return; - if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) { + if (BIC_IS_ENABLED(BIC_IPC) && cpuid_has_aperf_mperf) { fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); if (fd_instr_count_percpu == NULL) err(-1, "calloc fd_instr_count_percpu"); } + if (BIC_IS_ENABLED(BIC_LLC_RPS)) { + fd_llc_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); + if (fd_llc_percpu == NULL) + err(-1, "calloc fd_llc_percpu"); + } } void rapl_perf_init(void) { - const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1; + const unsigned int num_domains = get_rapl_num_domains(); bool *domain_visited = calloc(num_domains, sizeof(bool)); rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); @@ -7896,6 +8377,9 @@ void rapl_perf_init(void) enum rapl_unit unit; unsigned int next_domain; + if (!BIC_IS_ENABLED(cai->bic_number)) + continue; + memset(domain_visited, 0, num_domains * sizeof(*domain_visited)); for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { @@ -7904,8 +8388,7 @@ void rapl_perf_init(void) continue; /* Skip already seen and handled RAPL domains */ - next_domain = - platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id; + next_domain = get_rapl_domain_id(cpu); assert(next_domain < num_domains); @@ -7919,27 +8402,37 @@ void rapl_perf_init(void) struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain]; - /* Check if the counter is enabled and accessible */ - if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) { + /* + * rapl_counter_arch_infos[] can have multiple entries describing the same + * counter, due to the difference from different platforms/Vendors. + * E.g. rapl_counter_arch_infos[0] and rapl_counter_arch_infos[1] share the + * same perf_subsys and perf_name, but with different MSR address. + * rapl_counter_arch_infos[0] is for Intel and rapl_counter_arch_infos[1] + * is for AMD. + * In this case, it is possible that multiple rapl_counter_arch_infos[] + * entries are probed just because their perf/msr is duplicate and valid. + * + * Thus need a check to avoid re-probe the same counters. + */ + if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE) + break; - /* Use perf API for this counter */ - if (!no_perf && cai->perf_name - && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { - rci->source[cai->rci_index] = COUNTER_SOURCE_PERF; - rci->scale[cai->rci_index] = scale * cai->compat_scale; - rci->unit[cai->rci_index] = unit; - rci->flags[cai->rci_index] = cai->flags; - - /* Use MSR for this counter */ - } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { - rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; - rci->msr[cai->rci_index] = cai->msr; - rci->msr_mask[cai->rci_index] = cai->msr_mask; - rci->msr_shift[cai->rci_index] = cai->msr_shift; - rci->unit[cai->rci_index] = RAPL_UNIT_JOULES; - rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale; - rci->flags[cai->rci_index] = cai->flags; - } + /* Use perf API for this counter */ + if (add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { + rci->source[cai->rci_index] = COUNTER_SOURCE_PERF; + rci->scale[cai->rci_index] = scale * cai->compat_scale; + rci->unit[cai->rci_index] = unit; + rci->flags[cai->rci_index] = cai->flags; + + /* Use MSR for this counter */ + } else if (add_rapl_msr_counter(cpu, cai) >= 0) { + rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; + rci->msr[cai->rci_index] = cai->msr; + rci->msr_mask[cai->rci_index] = cai->msr_mask; + rci->msr_shift[cai->rci_index] = cai->msr_shift; + rci->unit[cai->rci_index] = RAPL_UNIT_JOULES; + rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale; + rci->flags[cai->rci_index] = cai->flags; } if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE) @@ -7948,7 +8441,7 @@ void rapl_perf_init(void) /* If any CPU has access to the counter, make it present */ if (has_counter) - BIC_PRESENT(cai->bic); + BIC_PRESENT(cai->bic_number); } free(domain_visited); @@ -7957,7 +8450,7 @@ void rapl_perf_init(void) /* Assumes msr_counter_info is populated */ static int has_amperf_access(void) { - return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && + return cpuid_has_aperf_mperf && msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present; } @@ -7972,65 +8465,63 @@ int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *gro return NULL; } -int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) +int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) { + int ret = -1; + if (no_perf) return -1; + if (!cai->perf_name) + return -1; + int *pfd_group = get_cstate_perf_group_fd(cci, cai->perf_subsys); if (pfd_group == NULL) - return -1; + goto end; const unsigned int type = read_perf_type(cai->perf_subsys); const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); - const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP); + ret = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP); - if (fd_counter == -1) - return -1; + if (ret == -1) + goto end; /* If it's the first counter opened, make it a group descriptor */ if (*pfd_group == -1) - *pfd_group = fd_counter; - - return fd_counter; -} - -int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) -{ - int ret = add_cstate_perf_counter_(cpu, cci, cai); + *pfd_group = ret; +end: if (debug >= 2) fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); return ret; } -int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) +int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) { + int ret = -1; + if (no_perf) return -1; + if (!cai->perf_name) + return -1; + const unsigned int type = read_perf_type(cai->perf_subsys); const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); - const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP); + ret = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP); - if (fd_counter == -1) - return -1; + if (ret == -1) + goto end; /* If it's the first counter opened, make it a group descriptor */ if (cci->fd_perf == -1) - cci->fd_perf = fd_counter; - - return fd_counter; -} - -int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) -{ - int ret = add_msr_perf_counter_(cpu, cci, cai); + cci->fd_perf = ret; +end: if (debug) fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu); @@ -8064,12 +8555,12 @@ void msr_perf_init_(void) if (cai->needed) { /* Use perf API for this counter */ - if (!no_perf && cai->perf_name && add_msr_perf_counter(cpu, cci, cai) != -1) { + if (add_msr_perf_counter(cpu, cci, cai) != -1) { cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; cai->present = true; /* User MSR for this counter */ - } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { + } else if (add_msr_counter(cpu, cai->msr) >= 0) { cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; cci->msr[cai->rci_index] = cai->msr; cci->msr_mask[cai->rci_index] = cai->msr_mask; @@ -8171,19 +8662,18 @@ void cstate_perf_init_(bool soft_c1) if (!per_core && pkg_visited[pkg_id]) continue; - const bool counter_needed = BIC_IS_ENABLED(cai->bic) || + const bool counter_needed = BIC_IS_ENABLED(cai->bic_number) || (soft_c1 && (cai->flags & CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY)); const bool counter_supported = (platform->supported_cstates & cai->feature_mask); if (counter_needed && counter_supported) { /* Use perf API for this counter */ - if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) { + if (add_cstate_perf_counter(cpu, cci, cai) != -1) { cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; /* User MSR for this counter */ - } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit - && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { + } else if (pkg_cstate_limit >= cai->pkg_cstate_limit && add_msr_counter(cpu, cai->msr) >= 0) { cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; cci->msr[cai->rci_index] = cai->msr; } @@ -8198,7 +8688,7 @@ void cstate_perf_init_(bool soft_c1) /* If any CPU has access to the counter, make it present */ if (has_counter) - BIC_PRESENT(cai->bic); + BIC_PRESENT(cai->bic_number); } free(cores_visited); @@ -8301,8 +8791,7 @@ void process_cpuid() hygon_genuine = 1; if (!quiet) - fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", - (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); + fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); __cpuid(1, fms, ebx, ecx, edx); family = (fms >> 8) & 0xf; @@ -8331,8 +8820,7 @@ void process_cpuid() __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); if (!quiet) { - fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", - family, model, stepping, family, model, stepping); + fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", family, model, stepping, family, model, stepping); if (ucode_patch_valid) fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); fputc('\n', outf); @@ -8346,8 +8834,7 @@ void process_cpuid() ecx_flags & (1 << 8) ? "TM2" : "-", edx_flags & (1 << 4) ? "TSC" : "-", edx_flags & (1 << 5) ? "MSR" : "-", - edx_flags & (1 << 22) ? "ACPI-TM" : "-", - edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); + edx_flags & (1 << 22) ? "ACPI-TM" : "-", edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); } probe_platform_features(family, model); @@ -8371,7 +8858,7 @@ void process_cpuid() */ __cpuid(0x6, eax, ebx, ecx, edx); - has_aperf = ecx & (1 << 0); + cpuid_has_aperf_mperf = ecx & (1 << 0); do_dts = eax & (1 << 0); if (do_dts) BIC_PRESENT(BIC_CoreTmp); @@ -8389,14 +8876,13 @@ void process_cpuid() if (!quiet) fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, " "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", - has_aperf ? "" : "No-", + cpuid_has_aperf_mperf ? "" : "No-", has_turbo ? "" : "No-", do_dts ? "" : "No-", do_ptm ? "" : "No-", has_hwp ? "" : "No-", has_hwp_notify ? "" : "No-", - has_hwp_activity_window ? "" : "No-", - has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); + has_hwp_activity_window ? "" : "No-", has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); if (!quiet) decode_misc_enable_msr(); @@ -8430,8 +8916,7 @@ void process_cpuid() if (ebx_tsc != 0) { if (!quiet && (ebx != 0)) - fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", - eax_crystal, ebx_tsc, crystal_hz); + fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", eax_crystal, ebx_tsc, crystal_hz); if (crystal_hz == 0) crystal_hz = platform->crystal_freq; @@ -8463,11 +8948,10 @@ void process_cpuid() tsc_tweak = base_hz / tsc_hz; if (!quiet) - fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", - base_mhz, max_mhz, bus_mhz); + fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", base_mhz, max_mhz, bus_mhz); } - if (has_aperf) + if (cpuid_has_aperf_mperf) aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; BIC_PRESENT(BIC_IRQ); @@ -8519,6 +9003,62 @@ void probe_pm_features(void) decode_misc_feature_control(); } +/* perf_llc_probe + * + * return 1 on success, else 0 + */ +int has_perf_llc_access(void) +{ + int fd; + + if (no_perf) + return 0; + + fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, -1, PERF_FORMAT_GROUP); + if (fd != -1) + close(fd); + + if (fd == -1) + warnx("Failed to access %s. Some of the counters may not be available\n" + "\tRun as root to enable them or use %s to disable the access explicitly", "perf LLC counters", "'--hide LLC' or '--no-perf'"); + + return (fd != -1); +} + +void perf_llc_init(void) +{ + int cpu; + int retval; + + if (no_perf) + return; + if (!(BIC_IS_ENABLED(BIC_LLC_RPS) && BIC_IS_ENABLED(BIC_LLC_HIT))) + return; + + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { + + if (cpu_is_not_allowed(cpu)) + continue; + + assert(fd_llc_percpu != 0); + fd_llc_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, -1, PERF_FORMAT_GROUP); + if (fd_llc_percpu[cpu] == -1) { + warnx("%s: perf REFS: failed to open counter on cpu%d", __func__, cpu); + free_fd_llc_percpu(); + return; + } + assert(fd_llc_percpu != 0); + retval = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, fd_llc_percpu[cpu], PERF_FORMAT_GROUP); + if (retval == -1) { + warnx("%s: perf MISS: failed to open counter on cpu%d", __func__, cpu); + free_fd_llc_percpu(); + return; + } + } + BIC_PRESENT(BIC_LLC_RPS); + BIC_PRESENT(BIC_LLC_HIT); +} + /* * in /dev/cpu/ return success for names that are numbers * ie. filter out ".", "..", "microcode". @@ -8694,6 +9234,11 @@ void topology_probe(bool startup) if (cpus[i].die_id > topo.max_die_id) topo.max_die_id = cpus[i].die_id; + /* get l3 information */ + cpus[i].l3_id = get_l3_id(i); + if (cpus[i].l3_id > topo.max_l3_id) + topo.max_l3_id = cpus[i].l3_id; + /* get numa node information */ cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); if (cpus[i].physical_node_id > topo.max_node_num) @@ -8726,6 +9271,9 @@ void topology_probe(bool startup) if (!summary_only && topo.num_die > 1) BIC_PRESENT(BIC_Die); + if (!summary_only && topo.max_l3_id > 0) + BIC_PRESENT(BIC_L3); + topo.num_packages = max_package_id + 1; if (debug > 1) fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); @@ -8749,8 +9297,8 @@ void topology_probe(bool startup) if (cpu_is_not_present(i)) continue; fprintf(outf, - "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", - i, cpus[i].physical_package_id, cpus[i].die_id, + "cpu %d pkg %d die %d l3 %d node %d lnode %d core %d thread %d\n", + i, cpus[i].physical_package_id, cpus[i].die_id, cpus[i].l3_id, cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); } @@ -8805,7 +9353,6 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, int thread_id = cpus[cpu_id].thread_id; struct thread_data *t; struct core_data *c; - struct pkg_data *p; /* Workaround for systems where physical_node_id==-1 * and logical_node_id==(-1 - topo.num_cpus) @@ -8815,18 +9362,18 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); c = GET_CORE(core_base, core_id, node_id, pkg_id); - p = GET_PKG(pkg_base, pkg_id); t->cpu_id = cpu_id; if (!cpu_is_not_allowed(cpu_id)) { + if (c->base_cpu < 0) c->base_cpu = t->cpu_id; - if (p->base_cpu < 0) - p->base_cpu = t->cpu_id; + if (pkg_base[pkg_id].base_cpu < 0) + pkg_base[pkg_id].base_cpu = t->cpu_id; } c->core_id = core_id; - p->package_id = pkg_id; + pkg_base[pkg_id].package_id = pkg_id; } int initialize_counters(int cpu_id) @@ -8866,7 +9413,7 @@ void allocate_irq_buffers(void) err(-1, "calloc %d NMI", topo.max_cpu_num + 1); } -int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int update_topo(PER_THREAD_PARAMS) { topo.allowed_cpus++; if ((int)t->cpu_id == c->base_cpu) @@ -8924,7 +9471,7 @@ bool has_added_counters(void) void check_msr_access(void) { - check_dev_msr(); + check_msr_driver(); check_msr_permission(); if (no_msr) @@ -8933,8 +9480,16 @@ void check_msr_access(void) void check_perf_access(void) { - if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) - bic_enabled &= ~BIC_IPC; + if (BIC_IS_ENABLED(BIC_IPC)) + if (!has_perf_instr_count_access()) + no_perf = 1; + + if (BIC_IS_ENABLED(BIC_LLC_RPS) || BIC_IS_ENABLED(BIC_LLC_HIT)) + if (!has_perf_llc_access()) + no_perf = 1; + + if (no_perf) + bic_disable_perf_access(); } bool perf_has_hybrid_devices(void) @@ -9044,22 +9599,20 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) perf_device = "cpu_atom"; break; - default: /* Don't change, we will probably fail and report a problem soon. */ + default: /* Don't change, we will probably fail and report a problem soon. */ break; } } perf_type = read_perf_type(perf_device); if (perf_type == (unsigned int)-1) { - warnx("%s: perf/%s/%s: failed to read %s", - __func__, perf_device, pinfo->event, "type"); + warnx("%s: perf/%s/%s: failed to read %s", __func__, perf_device, pinfo->event, "type"); continue; } perf_config = read_perf_config(perf_device, pinfo->event); if (perf_config == (unsigned int)-1) { - warnx("%s: perf/%s/%s: failed to read %s", - __func__, perf_device, pinfo->event, "config"); + warnx("%s: perf/%s/%s: failed to read %s", __func__, perf_device, pinfo->event, "config"); continue; } @@ -9070,8 +9623,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); if (fd_perf == -1) { - warnx("%s: perf/%s/%s: failed to open counter on cpu%d", - __func__, perf_device, pinfo->event, cpu); + warnx("%s: perf/%s/%s: failed to open counter on cpu%d", __func__, perf_device, pinfo->event, cpu); continue; } @@ -9080,8 +9632,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) pinfo->scale = perf_scale; if (debug) - fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", - perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); + fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); } pinfo = pinfo->next; @@ -9154,7 +9705,7 @@ struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) return NULL; } - for ( ; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) { + for (; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) { if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1) break; @@ -9395,8 +9946,7 @@ int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum } if (conflict) { - fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", - __func__, name); + fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", __func__, name); exit(1); } @@ -9439,8 +9989,7 @@ void pmt_init(void) * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK. */ pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK, - PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, - FORMAT_DELTA, cpu_num, PMT_OPEN_TRY); + PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, FORMAT_DELTA, cpu_num, PMT_OPEN_TRY); /* * Rather complex logic for each time we go to the next loop iteration, @@ -9490,6 +10039,7 @@ void turbostat_init() linux_perf_init(); rapl_perf_init(); cstate_perf_init(); + perf_llc_init(); added_perf_counters_init(); pmt_init(); @@ -9504,8 +10054,8 @@ void turbostat_init() * disable more BICs, since it can't be reported accurately. */ if (platform->enable_tsc_tweak && !has_base_hz) { - bic_enabled &= ~BIC_Busy; - bic_enabled &= ~BIC_Bzy_MHz; + CLR_BIC(BIC_Busy, &bic_enabled); + CLR_BIC(BIC_Bzy_MHz, &bic_enabled); } } @@ -9563,6 +10113,7 @@ int fork_it(char **argv) timersub(&tv_odd, &tv_even, &tv_delta); if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) fprintf(outf, "%s: Counter reset detected\n", progname); + delta_platform(&platform_counters_odd, &platform_counters_even); compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); @@ -9594,7 +10145,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.04.06 - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2025.12.02 - Len Brown <lenb@kernel.org>\n"); } #define COMMAND_LINE_SIZE 2048 @@ -9634,8 +10185,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) } int add_counter(unsigned int msr_num, char *path, char *name, - unsigned int width, enum counter_scope scope, - enum counter_type type, enum counter_format format, int flags, int id) + unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format, int flags, int id) { struct msr_counter *msrp; @@ -9744,9 +10294,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, struct perf_counter_info *make_perf_counter_info(const char *perf_device, const char *perf_event, const char *name, - unsigned int width, - enum counter_scope scope, - enum counter_type type, enum counter_format format) + unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format) { struct perf_counter_info *pinfo; @@ -9821,8 +10369,7 @@ int add_perf_counter(const char *perf_device, const char *perf_event, const char // FIXME: we might not have debug here yet if (debug) - fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", - __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); + fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); return 0; } @@ -9891,6 +10438,10 @@ void parse_add_command_msr(char *add_command) format = FORMAT_RAW; goto next; } + if (!strncmp(add_command, "average", strlen("average"))) { + format = FORMAT_AVERAGE; + goto next; + } if (!strncmp(add_command, "delta", strlen("delta"))) { format = FORMAT_DELTA; goto next; @@ -9987,8 +10538,7 @@ int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigne pmt_diriter_init(&pmt_iter); - for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; - dirname = pmt_diriter_next(&pmt_iter)) { + for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; dirname = pmt_diriter_next(&pmt_iter)) { fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY); if (fd_telem_dir == -1) @@ -10000,8 +10550,7 @@ int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigne } if (fstat(fd_telem_dir, &stat) == -1) { - fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, - dirname->d_name, strerror(errno)); + fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, dirname->d_name, strerror(errno)); continue; } @@ -10050,7 +10599,7 @@ void parse_add_command_pmt(char *add_command) unsigned int lsb; unsigned int msb; unsigned int guid; - unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */ + unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */ unsigned int domain_id; enum counter_scope scope = 0; enum pmt_datatype type = PMT_TYPE_RAW; @@ -10097,8 +10646,7 @@ void parse_add_command_pmt(char *add_command) } if (!has_scope) { - printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", - __func__); + printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", __func__); exit(1); } @@ -10163,13 +10711,18 @@ next: has_format = true; } + if (strcmp("average", format_name) == 0) { + format = FORMAT_AVERAGE; + has_format = true; + } + if (strcmp("delta", format_name) == 0) { format = FORMAT_DELTA; has_format = true; } if (!has_format) { - fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name); + fprintf(stderr, "%s: Invalid format %s. Expected raw, average or delta\n", __func__, format_name); exit(1); } } @@ -10259,8 +10812,10 @@ int is_deferred_add(char *name) int i; for (i = 0; i < deferred_add_index; ++i) - if (!strcmp(name, deferred_add_names[i])) + if (!strcmp(name, deferred_add_names[i])) { + deferred_add_consumed |= (1 << i); return 1; + } return 0; } @@ -10269,11 +10824,34 @@ int is_deferred_skip(char *name) int i; for (i = 0; i < deferred_skip_index; ++i) - if (!strcmp(name, deferred_skip_names[i])) + if (!strcmp(name, deferred_skip_names[i])) { + deferred_skip_consumed |= (1 << i); return 1; + } return 0; } +void verify_deferred_consumed(void) +{ + int i; + int fail = 0; + + for (i = 0; i < deferred_add_index; ++i) { + if (!(deferred_add_consumed & (1 << i))) { + warnx("Counter '%s' can not be added.", deferred_add_names[i]); + fail++; + } + } + for (i = 0; i < deferred_skip_index; ++i) { + if (!(deferred_skip_consumed & (1 << i))) { + warnx("Counter '%s' can not be skipped.", deferred_skip_names[i]); + fail++; + } + } + if (fail) + exit(-EINVAL); +} + void probe_cpuidle_residency(void) { char path[64]; @@ -10283,9 +10861,6 @@ void probe_cpuidle_residency(void) int min_state = 1024, max_state = 0; char *sp; - if (!DO_BIC(BIC_pct_idle)) - return; - for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -10314,7 +10889,7 @@ void probe_cpuidle_residency(void) if (is_deferred_skip(name_buf)) continue; - add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); + add_counter(0, path, name_buf, 32, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); if (state > max_state) max_state = state; @@ -10498,22 +11073,29 @@ void cmdline(int argc, char **argv) no_perf = 1; break; case 'e': - /* --enable specified counter */ - bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); + /* --enable specified counter, without clearning existing list */ + bic_lookup(&bic_enabled, optarg, SHOW_LIST); break; case 'f': force_load++; break; case 'd': debug++; - ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); + bic_set_all(&bic_enabled); break; case 'H': /* * --hide: do not show those specified * multiple invocations simply clear more bits in enabled mask */ - bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); + { + cpu_set_t bic_group_hide; + + BIC_INIT(&bic_group_hide); + + bic_lookup(&bic_group_hide, optarg, HIDE_LIST); + bic_clear_bits(&bic_enabled, &bic_group_hide); + } break; case 'h': default: @@ -10537,7 +11119,7 @@ void cmdline(int argc, char **argv) rapl_joules++; break; case 'l': - ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); + bic_set_all(&bic_enabled); list_header_only++; quiet++; break; @@ -10574,9 +11156,8 @@ void cmdline(int argc, char **argv) * subsequent invocations can add to it. */ if (shown == 0) - bic_enabled = bic_lookup(optarg, SHOW_LIST); - else - bic_enabled |= bic_lookup(optarg, SHOW_LIST); + BIC_INIT(&bic_enabled); + bic_lookup(&bic_enabled, optarg, SHOW_LIST); shown = 1; break; case 'S': @@ -10613,6 +11194,8 @@ int main(int argc, char **argv) { int fd, ret; + bic_groups_init(); + fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY); if (fd < 0) goto skip_cgroup_setting; @@ -10635,6 +11218,8 @@ skip_cgroup_setting: probe_cpuidle_residency(); probe_cpuidle_counts(); + verify_deferred_consumed(); + if (!getuid()) set_rlimit(); diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index 666b325a62a2..d18284667400 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -1,8 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 CC = $(CROSS_COMPILE)gcc -BUILD_OUTPUT := $(CURDIR) +BUILD_OUTPUT := $(CURDIR) PREFIX := /usr DESTDIR := +DAY := $(shell date +%Y.%m.%d) +SNAPSHOT = x86_energy_perf_policy-$(DAY) + + ifeq ("$(origin O)", "command line") BUILD_OUTPUT := $(O) @@ -27,3 +31,26 @@ install : x86_energy_perf_policy install -d $(DESTDIR)$(PREFIX)/share/man/man8 install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8 +snapshot: x86_energy_perf_policy + @rm -rf $(SNAPSHOT) + @mkdir $(SNAPSHOT) + @cp x86_energy_perf_policy Makefile x86_energy_perf_policy.c x86_energy_perf_policy.8 $(SNAPSHOT) + + @sed -e 's/^#include <linux\/bits.h>/#include "bits.h"/' -e 's/u64/unsigned long long/' ../../../../arch/x86/include/asm/msr-index.h > $(SNAPSHOT)/msr-index.h + @echo '#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))' >> $(SNAPSHOT)/msr-index.h + @echo "#define BIT(x) (1 << (x))" > $(SNAPSHOT)/bits.h + @echo "#define BIT_ULL(nr) (1ULL << (nr))" >> $(SNAPSHOT)/bits.h + @echo "#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h + @echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h + + @echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h + @echo '#define __must_be_array(arr) 0' >> $(SNAPSHOT)/build_bug.h + + @echo PWD=. > $(SNAPSHOT)/Makefile + @echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile + @echo "CFLAGS += -DBUILD_BUG_HEADER='\"build_bug.h\"'" >> $(SNAPSHOT)/Makefile + @sed -e's/.*MSRHEADER.*//' Makefile >> $(SNAPSHOT)/Makefile + + @rm -f $(SNAPSHOT).tar.gz + tar cvzf $(SNAPSHOT).tar.gz $(SNAPSHOT) + diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 index 78c6361898b1..0aa981c18e56 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 @@ -2,7 +2,7 @@ .\" Distributed under the GPL, Copyleft 1994. .TH X86_ENERGY_PERF_POLICY 8 .SH NAME -x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers +x86_energy_perf_policy \- Manage Energy vs. Performance Policy .SH SYNOPSIS .B x86_energy_perf_policy .RB "[ options ] [ scope ] [field \ value]" @@ -19,9 +19,14 @@ x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Spe .SH DESCRIPTION \fBx86_energy_perf_policy\fP displays and updates energy-performance policy settings specific to -Intel Architecture Processors. Settings are accessed via Model Specific Register (MSR) -updates, no matter if the Linux cpufreq sub-system is enabled or not. +Intel Architecture Processors. It summarizes settings available +in standard Linux interfaces (eg. cpufreq), +and also decodes underlying Model Specific Register (MSRs). +While \fBx86_energy_perf_policy\fP can manage energy-performance policy +using only MSR access, it prefers standard +Linux kernel interfaces, when they are available. +.SH BACKGROUND Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB) may affect a wide range of hardware decisions, such as how aggressively the hardware enters and exits CPU idle states (C-states) @@ -200,7 +205,9 @@ runs only as root. .SH FILES .ta .nf -/dev/cpu/*/msr +EPB: /sys/devices/system/cpu/cpu*/power/energy_perf_bias +EPP: /sys/devices/system/cpu/cpu*/cpufreq/energy_performance_preference +MSR: /dev/cpu/*/msr .fi .SH "SEE ALSO" .nf diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index ebda9c366b2b..ac37132207a4 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -4,7 +4,7 @@ * policy preference bias on recent X86 processors. */ /* - * Copyright (c) 2010 - 2017 Intel Corporation. + * Copyright (c) 2010 - 2025 Intel Corporation. * Len Brown <len.brown@intel.com> */ @@ -62,6 +62,7 @@ unsigned char turbo_update_value; unsigned char update_hwp_epp; unsigned char update_hwp_min; unsigned char update_hwp_max; +unsigned char hwp_limits_done_via_sysfs; unsigned char update_hwp_desired; unsigned char update_hwp_window; unsigned char update_hwp_use_pkg; @@ -94,6 +95,8 @@ unsigned int bdx_highest_ratio; #define PATH_TO_CPU "/sys/devices/system/cpu/" #define SYSFS_PATH_MAX 255 +static int use_android_msr_path; + /* * maintain compatibility with original implementation, but don't document it: */ @@ -369,7 +372,7 @@ void validate_cpu_selected_set(void) for (cpu = 0; cpu <= max_cpu_num; ++cpu) { if (CPU_ISSET_S(cpu, cpu_setsize, cpu_selected_set)) if (!CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) - errx(1, "Requested cpu% is not present", cpu); + errx(1, "Requested cpu%d is not present", cpu); } } @@ -517,7 +520,7 @@ void for_packages(unsigned long long pkg_set, int (func)(int)) void print_version(void) { - printf("x86_energy_perf_policy 17.05.11 (C) Len Brown <len.brown@intel.com>\n"); + printf("x86_energy_perf_policy 2025.11.22 Len Brown <lenb@kernel.org>\n"); } void cmdline(int argc, char **argv) @@ -630,7 +633,7 @@ void cmdline(int argc, char **argv) */ FILE *fopen_or_die(const char *path, const char *mode) { - FILE *filep = fopen(path, "r"); + FILE *filep = fopen(path, mode); if (!filep) err(1, "%s: open failed", path); @@ -644,7 +647,7 @@ void err_on_hypervisor(void) char *buffer; /* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */ - cpuinfo = fopen_or_die("/proc/cpuinfo", "ro"); + cpuinfo = fopen_or_die("/proc/cpuinfo", "r"); buffer = malloc(4096); if (!buffer) { @@ -659,6 +662,11 @@ void err_on_hypervisor(void) } flags = strstr(buffer, "flags"); + if (!flags) { + fclose(cpuinfo); + free(buffer); + err(1, "Failed to find 'flags' in /proc/cpuinfo"); + } rewind(cpuinfo); fseek(cpuinfo, flags - buffer, SEEK_SET); if (!fgets(buffer, 4096, cpuinfo)) { @@ -683,10 +691,12 @@ int get_msr(int cpu, int offset, unsigned long long *msr) char pathname[32]; int fd; - sprintf(pathname, "/dev/cpu/%d/msr", cpu); + sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDONLY); if (fd < 0) - err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); + err(-1, "%s open failed, try chown or chmod +r %s, or run as root", + pathname, use_android_msr_path ? "/dev/msr*" : "/dev/cpu/*/msr"); + retval = pread(fd, msr, sizeof(*msr), offset); if (retval != sizeof(*msr)) { @@ -707,10 +717,11 @@ int put_msr(int cpu, int offset, unsigned long long new_msr) int retval; int fd; - sprintf(pathname, "/dev/cpu/%d/msr", cpu); + sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDWR); if (fd < 0) - err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); + err(-1, "%s open failed, try chown or chmod +r %s, or run as root", + pathname, use_android_msr_path ? "/dev/msr*" : "/dev/cpu/*/msr"); retval = pwrite(fd, &new_msr, sizeof(new_msr), offset); if (retval != sizeof(new_msr)) @@ -809,7 +820,7 @@ void print_hwp_request_pkg(int pkg, struct msr_hwp_request *h, char *str) h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp, h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7); } -void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) +void read_hwp_request_msr(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) { unsigned long long msr; @@ -823,7 +834,7 @@ void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr hwp_req->hwp_use_pkg = (((msr) >> 42) & 0x1); } -void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) +void write_hwp_request_msr(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) { unsigned long long msr = 0; @@ -843,7 +854,7 @@ void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int ms put_msr(cpu, msr_offset, msr); } -static int get_epb(int cpu) +static int get_epb_sysfs(int cpu) { char path[SYSFS_PATH_MAX]; char linebuf[3]; @@ -865,7 +876,7 @@ static int get_epb(int cpu) return (int)val; } -static int set_epb(int cpu, int val) +static int set_epb_sysfs(int cpu, int val) { char path[SYSFS_PATH_MAX]; char linebuf[3]; @@ -895,14 +906,14 @@ int print_cpu_msrs(int cpu) struct msr_hwp_cap cap; int epb; - epb = get_epb(cpu); + epb = get_epb_sysfs(cpu); if (epb >= 0) printf("cpu%d: EPB %u\n", cpu, (unsigned int) epb); if (!has_hwp) return 0; - read_hwp_request(cpu, &req, MSR_HWP_REQUEST); + read_hwp_request_msr(cpu, &req, MSR_HWP_REQUEST); print_hwp_request(cpu, &req, ""); read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES); @@ -919,7 +930,7 @@ int print_pkg_msrs(int pkg) if (!has_hwp) return 0; - read_hwp_request(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG); + read_hwp_request_msr(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG); print_hwp_request_pkg(pkg, &req, ""); if (has_hwp_notify) { @@ -951,8 +962,10 @@ int ratio_2_sysfs_khz(int ratio) } /* * If HWP is enabled and cpufreq sysfs attribtes are present, - * then update sysfs, so that it will not become - * stale when we write to MSRs. + * then update via sysfs. The intel_pstate driver may modify (clip) + * this request, say, when HWP_CAP is outside of PLATFORM_INFO limits, + * and the driver-chosen value takes precidence. + * * (intel_pstate's max_perf_pct and min_perf_pct will follow cpufreq, * so we don't have to touch that.) */ @@ -1007,6 +1020,8 @@ int update_sysfs(int cpu) if (update_hwp_max) update_cpufreq_scaling_freq(1, cpu, req_update.hwp_max); + hwp_limits_done_via_sysfs = 1; + return 0; } @@ -1074,21 +1089,21 @@ int check_hwp_request_v_hwp_capabilities(int cpu, struct msr_hwp_request *req, s return 0; } -int update_hwp_request(int cpu) +int update_hwp_request_msr(int cpu) { struct msr_hwp_request req; struct msr_hwp_cap cap; int msr_offset = MSR_HWP_REQUEST; - read_hwp_request(cpu, &req, msr_offset); + read_hwp_request_msr(cpu, &req, msr_offset); if (debug) print_hwp_request(cpu, &req, "old: "); - if (update_hwp_min) + if (update_hwp_min && !hwp_limits_done_via_sysfs) req.hwp_min = req_update.hwp_min; - if (update_hwp_max) + if (update_hwp_max && !hwp_limits_done_via_sysfs) req.hwp_max = req_update.hwp_max; if (update_hwp_desired) @@ -1111,15 +1126,15 @@ int update_hwp_request(int cpu) verify_hwp_req_self_consistency(cpu, &req); - write_hwp_request(cpu, &req, msr_offset); + write_hwp_request_msr(cpu, &req, msr_offset); if (debug) { - read_hwp_request(cpu, &req, msr_offset); + read_hwp_request_msr(cpu, &req, msr_offset); print_hwp_request(cpu, &req, "new: "); } return 0; } -int update_hwp_request_pkg(int pkg) +int update_hwp_request_pkg_msr(int pkg) { struct msr_hwp_request req; struct msr_hwp_cap cap; @@ -1127,7 +1142,7 @@ int update_hwp_request_pkg(int pkg) int msr_offset = MSR_HWP_REQUEST_PKG; - read_hwp_request(cpu, &req, msr_offset); + read_hwp_request_msr(cpu, &req, msr_offset); if (debug) print_hwp_request_pkg(pkg, &req, "old: "); @@ -1155,10 +1170,10 @@ int update_hwp_request_pkg(int pkg) verify_hwp_req_self_consistency(cpu, &req); - write_hwp_request(cpu, &req, msr_offset); + write_hwp_request_msr(cpu, &req, msr_offset); if (debug) { - read_hwp_request(cpu, &req, msr_offset); + read_hwp_request_msr(cpu, &req, msr_offset); print_hwp_request_pkg(pkg, &req, "new: "); } return 0; @@ -1166,30 +1181,39 @@ int update_hwp_request_pkg(int pkg) int enable_hwp_on_cpu(int cpu) { - unsigned long long msr; + unsigned long long old_msr, new_msr; + + get_msr(cpu, MSR_PM_ENABLE, &old_msr); + + if (old_msr & 1) + return 0; /* already enabled */ - get_msr(cpu, MSR_PM_ENABLE, &msr); - put_msr(cpu, MSR_PM_ENABLE, 1); + new_msr = old_msr | 1; + put_msr(cpu, MSR_PM_ENABLE, new_msr); if (verbose) - printf("cpu%d: MSR_PM_ENABLE old: %d new: %d\n", cpu, (unsigned int) msr, 1); + printf("cpu%d: MSR_PM_ENABLE old: %llX new: %llX\n", cpu, old_msr, new_msr); return 0; } -int update_cpu_msrs(int cpu) +int update_cpu_epb_sysfs(int cpu) { - unsigned long long msr; int epb; - if (update_epb) { - epb = get_epb(cpu); - set_epb(cpu, new_epb); + epb = get_epb_sysfs(cpu); + set_epb_sysfs(cpu, new_epb); - if (verbose) - printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n", - cpu, epb, (unsigned int) new_epb); - } + if (verbose) + printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n", + cpu, epb, (unsigned int) new_epb); + + return 0; +} + +int update_cpu_msrs(int cpu) +{ + unsigned long long msr; if (update_turbo) { int turbo_is_present_and_disabled; @@ -1224,7 +1248,7 @@ int update_cpu_msrs(int cpu) if (!hwp_update_enabled()) return 0; - update_hwp_request(cpu); + update_hwp_request_msr(cpu); return 0; } @@ -1312,6 +1336,17 @@ void for_all_cpus_in_set(size_t set_size, cpu_set_t *cpu_set, int (func)(int)) if (CPU_ISSET_S(cpu_num, set_size, cpu_set)) func(cpu_num); } +int for_all_cpus_in_set_and(size_t set_size, cpu_set_t *cpu_set, int (func)(int)) +{ + int cpu_num; + int retval = 1; + + for (cpu_num = 0; cpu_num <= max_cpu_num; ++cpu_num) + if (CPU_ISSET_S(cpu_num, set_size, cpu_set)) + retval &= func(cpu_num); + + return retval; +} void init_data_structures(void) { @@ -1326,21 +1361,38 @@ void init_data_structures(void) for_all_proc_cpus(mark_cpu_present); } -/* clear has_hwp if it is not enable (or being enabled) */ +int is_hwp_enabled_on_cpu(int cpu_num) +{ + unsigned long long msr; + int retval; + + /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ + get_msr(cpu_num, MSR_PM_ENABLE, &msr); + retval = (msr & 1); + + if (verbose) + fprintf(stderr, "cpu%d: %sHWP\n", cpu_num, retval ? "" : "No-"); + + return retval; +} +/* + * verify_hwp_is_enabled() + * + * Set (has_hwp=0) if no HWP feature or any of selected CPU set does not have HWP enabled + */ void verify_hwp_is_enabled(void) { - unsigned long long msr; + int retval; if (!has_hwp) /* set in early_cpuid() */ return; - /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ - get_msr(base_cpu, MSR_PM_ENABLE, &msr); - if ((msr & 1) == 0) { + retval = for_all_cpus_in_set_and(cpu_setsize, cpu_selected_set, is_hwp_enabled_on_cpu); + + if (retval == 0) { fprintf(stderr, "HWP can be enabled using '--hwp-enable'\n"); has_hwp = 0; - return; } } @@ -1379,16 +1431,32 @@ void set_base_cpu(void) err(-ENODEV, "No valid cpus found"); } +static void probe_android_msr_path(void) +{ + struct stat sb; + char test_path[32]; + + sprintf(test_path, "/dev/msr%d", base_cpu); + if (stat(test_path, &sb) == 0) + use_android_msr_path = 1; +} void probe_dev_msr(void) { struct stat sb; char pathname[32]; - sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); - if (stat(pathname, &sb)) - if (system("/sbin/modprobe msr > /dev/null 2>&1")) - err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); + probe_android_msr_path(); + + sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", base_cpu); + if (stat(pathname, &sb)) { + if (system("/sbin/modprobe msr > /dev/null 2>&1")) { + if (use_android_msr_path) + err(-5, "no /dev/msr0, Try \"# modprobe msr\" "); + else + err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); + } + } } static void get_cpuid_or_exit(unsigned int leaf, @@ -1505,6 +1573,7 @@ void parse_cpuid(void) int main(int argc, char **argv) { set_base_cpu(); + probe_dev_msr(); init_data_structures(); @@ -1551,10 +1620,13 @@ int main(int argc, char **argv) /* update CPU set */ if (cpu_selected_set) { + if (update_epb) + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_epb_sysfs); for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs); for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs); + } else if (pkg_selected_set) - for_packages(pkg_selected_set, update_hwp_request_pkg); + for_packages(pkg_selected_set, update_hwp_request_pkg_msr); return 0; } |
