diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-11-23 12:19:49 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-11-23 12:20:58 -0800 |
commit | 45c226dde742a92e22dcd65b96bf7e02620a9c19 (patch) | |
tree | abaedb7f2ddf75914659c7b9a48af34ca89a9208 /tools | |
parent | c5b9f4792ea6b9abfcfb9486ba256f55e296aaa7 (diff) | |
parent | d3fa86b1a7b4cdc4367acacea16b72e0a200b3d7 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
Conflicts:
drivers/net/ethernet/intel/ice/ice_main.c
c9663f79cd82 ("ice: adjust switchdev rebuild path")
7758017911a4 ("ice: restore timestamp configuration after device reset")
https://lore.kernel.org/all/20231121211259.3348630-1-anthony.l.nguyen@intel.com/
Adjacent changes:
kernel/bpf/verifier.c
bb124da69c47 ("bpf: keep track of max number of bpf_loop callback iterations")
5f99f312bd3b ("bpf: add register bounds sanity checks and sanitization")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'tools')
20 files changed, 2111 insertions, 1680 deletions
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 264eeb9c46a9..318e2dad27e0 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -1421,7 +1421,7 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (error) goto setval_error; - if (new_val->addr_family == ADDR_FAMILY_IPV6) { + if (new_val->addr_family & ADDR_FAMILY_IPV6) { error = fprintf(nmfile, "\n[ipv6]\n"); if (error < 0) goto setval_error; @@ -1455,14 +1455,18 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (error < 0) goto setval_error; - error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way); - if (error < 0) - goto setval_error; - - error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr); - if (error < 0) - goto setval_error; + /* we do not want ipv4 addresses in ipv6 section and vice versa */ + if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) { + error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way); + if (error < 0) + goto setval_error; + } + if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) { + error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr); + if (error < 0) + goto setval_error; + } fclose(nmfile); fclose(ifcfg_file); diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh index ae5a7a8249a2..440a91b35823 100755 --- a/tools/hv/hv_set_ifconfig.sh +++ b/tools/hv/hv_set_ifconfig.sh @@ -53,7 +53,7 @@ # or "manual" if no boot-time protocol should be used) # # address1=ipaddr1/plen -# address=ipaddr2/plen +# address2=ipaddr2/plen # # gateway=gateway1;gateway2 # @@ -61,7 +61,7 @@ # # [ipv6] # address1=ipaddr1/plen -# address2=ipaddr1/plen +# address2=ipaddr2/plen # # gateway=gateway1;gateway2 # diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 64d139400db1..3110f84dd029 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -18,4 +18,4 @@ CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) -CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_H,nfsd.h) +CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c index bc5065bd99b2..c12ca87ca2bb 100644 --- a/tools/net/ynl/generated/devlink-user.c +++ b/tools/net/ynl/generated/devlink-user.c @@ -15,7 +15,7 @@ /* Enums */ static const char * const devlink_op_strmap[] = { [3] = "get", - [7] = "port-get", + // skip "port-get", duplicate reply value [DEVLINK_CMD_PORT_NEW] = "port-new", [13] = "sb-get", [17] = "sb-pool-get", diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index c4003a83cd5d..3bd6b928c14f 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1505,6 +1505,12 @@ def put_op_name(family, cw): cw.block_start(line=f"static const char * const {map_name}[] =") for op_name, op in family.msgs.items(): if op.rsp_value: + # Make sure we don't add duplicated entries, if multiple commands + # produce the same response in legacy families. + if family.rsp_by_value[op.rsp_value] != op: + cw.p(f'// skip "{op_name}", duplicate reply value') + continue + if op.req_value == op.rsp_value: cw.p(f'[{op.enum_name}] = "{op_name}",') else: diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9a10512e3407..7a334377f92b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -211,9 +211,6 @@ int *fd_instr_count_percpu; struct timeval interval_tv = { 5, 0 }; struct timespec interval_ts = { 5, 0 }; -/* Save original CPU model */ -unsigned int model_orig; - unsigned int num_iterations; unsigned int header_iterations; unsigned int debug; @@ -224,24 +221,16 @@ unsigned int rapl_joules; unsigned int summary_only; unsigned int list_header_only; unsigned int dump_only; -unsigned int do_snb_cstates; -unsigned int do_knl_cstates; -unsigned int do_slm_cstates; -unsigned int use_c1_residency_msr; unsigned int has_aperf; unsigned int has_epb; unsigned int has_turbo; unsigned int is_hybrid; -unsigned int do_irtl_snb; -unsigned int do_irtl_hsw; unsigned int units = 1000000; /* MHz etc */ unsigned int genuine_intel; unsigned int authentic_amd; unsigned int hygon_genuine; unsigned int max_level, max_extended_level; unsigned int has_invariant_tsc; -unsigned int do_nhm_platform_info; -unsigned int no_MSR_MISC_PWR_MGMT; unsigned int aperf_mperf_multiplier = 1; double bclk; double base_hz; @@ -250,7 +239,6 @@ double tsc_tweak = 1.0; unsigned int show_pkg_only; unsigned int show_core_only; char *output_buffer, *outp; -unsigned int do_rapl; unsigned int do_dts; unsigned int do_ptm; unsigned int do_ipc; @@ -261,65 +249,686 @@ unsigned int gfx_cur_mhz; unsigned int gfx_act_mhz; unsigned int tj_max; unsigned int tj_max_override; -int tcc_offset_bits; double rapl_power_units, rapl_time_units; double rapl_dram_energy_units, rapl_energy_units; double rapl_joule_counter_range; -unsigned int do_core_perf_limit_reasons; -unsigned int has_automatic_cstate_conversion; -unsigned int dis_cstate_prewake; -unsigned int do_gfx_perf_limit_reasons; -unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; -double discover_bclk(unsigned int family, unsigned int model); unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ -unsigned int has_misc_feature_control; unsigned int first_counter_read = 1; int ignore_stdin; -#define RAPL_PKG (1 << 0) - /* 0x610 MSR_PKG_POWER_LIMIT */ - /* 0x611 MSR_PKG_ENERGY_STATUS */ -#define RAPL_PKG_PERF_STATUS (1 << 1) - /* 0x613 MSR_PKG_PERF_STATUS */ -#define RAPL_PKG_POWER_INFO (1 << 2) - /* 0x614 MSR_PKG_POWER_INFO */ - -#define RAPL_DRAM (1 << 3) - /* 0x618 MSR_DRAM_POWER_LIMIT */ - /* 0x619 MSR_DRAM_ENERGY_STATUS */ -#define RAPL_DRAM_PERF_STATUS (1 << 4) - /* 0x61b MSR_DRAM_PERF_STATUS */ -#define RAPL_DRAM_POWER_INFO (1 << 5) - /* 0x61c MSR_DRAM_POWER_INFO */ - -#define RAPL_CORES_POWER_LIMIT (1 << 6) - /* 0x638 MSR_PP0_POWER_LIMIT */ -#define RAPL_CORE_POLICY (1 << 7) - /* 0x63a MSR_PP0_POLICY */ - -#define RAPL_GFX (1 << 8) - /* 0x640 MSR_PP1_POWER_LIMIT */ - /* 0x641 MSR_PP1_ENERGY_STATUS */ - /* 0x642 MSR_PP1_POLICY */ - -#define RAPL_CORES_ENERGY_STATUS (1 << 9) - /* 0x639 MSR_PP0_ENERGY_STATUS */ -#define RAPL_PER_CORE_ENERGY (1 << 10) - /* Indicates cores energy collection is per-core, - * not per-package. */ -#define RAPL_AMD_F17H (1 << 11) - /* 0xc0010299 MSR_RAPL_PWR_UNIT */ - /* 0xc001029a MSR_CORE_ENERGY_STAT */ - /* 0xc001029b MSR_PKG_ENERGY_STAT */ -#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT) +int get_msr(int cpu, off_t offset, unsigned long long *msr); + +/* Model specific support Start */ + +/* List of features that may diverge among different platforms */ +struct platform_features { + bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */ + bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */ + bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */ + bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */ + int bclk_freq; /* CPU base clock */ + int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */ + int supported_cstates; /* Core cstates and Package cstates supported */ + int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */ + bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */ + bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */ + bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */ + bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */ + bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */ + bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */ + bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */ + bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */ + bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */ + int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */ + int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */ + int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ + bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ + bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ + bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ + int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */ + int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */ + bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */ + bool need_perf_multiplier; /* mperf/aperf multiplier */ +}; + +struct platform_data { + unsigned int model; + const struct platform_features *features; +}; + +/* For BCLK */ +enum bclk_freq { + BCLK_100MHZ = 1, + BCLK_133MHZ, + BCLK_SLV, +}; + +#define SLM_BCLK_FREQS 5 +double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; + +double slm_bclk(void) +{ + unsigned long long msr = 3; + unsigned int i; + double freq; + + if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) + fprintf(outf, "SLM BCLK: unknown\n"); + + i = msr & 0xf; + if (i >= SLM_BCLK_FREQS) { + fprintf(outf, "SLM BCLK[%d] invalid\n", i); + i = 3; + } + freq = slm_freq_table[i]; + + if (!quiet) + fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); + + return freq; +} + +/* For Package cstate limit */ +enum package_cstate_limit { + CST_LIMIT_NHM = 1, + CST_LIMIT_SNB, + CST_LIMIT_HSW, + CST_LIMIT_SKX, + CST_LIMIT_ICX, + CST_LIMIT_SLV, + CST_LIMIT_AMT, + CST_LIMIT_KNL, + CST_LIMIT_GMT, +}; + +/* For Turbo Ratio Limit MSRs */ +enum turbo_ratio_limit_msrs { + TRL_BASE = BIT(0), + TRL_LIMIT1 = BIT(1), + TRL_LIMIT2 = BIT(2), + TRL_ATOM = BIT(3), + TRL_KNL = BIT(4), + TRL_CORECOUNT = BIT(5), +}; + +/* For Perf Limit Reason MSRs */ +enum perf_limit_reason_msrs { + PLR_CORE = BIT(0), + PLR_GFX = BIT(1), + PLR_RING = BIT(2), +}; + +/* For RAPL MSRs */ +enum rapl_msrs { + RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */ + RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */ + RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */ + RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */ + RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */ + RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */ + RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */ + RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */ + RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */ + RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */ + RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */ + RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */ + RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */ + RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */ + RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */ + RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */ + RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */ +}; + +#define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT) +#define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT) +#define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT) +#define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS) + +#define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) +#define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) +#define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY) +#define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY) + +#define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT) + +/* For Cstates */ +enum cstates { + CC1 = BIT(0), + CC3 = BIT(1), + CC6 = BIT(2), + CC7 = BIT(3), + PC2 = BIT(4), + PC3 = BIT(5), + PC6 = BIT(6), + PC7 = BIT(7), + PC8 = BIT(8), + PC9 = BIT(9), + PC10 = BIT(10), +}; + +static const struct platform_features nhm_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_133MHZ, + .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, + .cst_limit = CST_LIMIT_NHM, + .trl_msrs = TRL_BASE, +}; + +static const struct platform_features nhx_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_133MHZ, + .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, + .cst_limit = CST_LIMIT_NHM, +}; + +static const struct platform_features snb_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_SNB, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features snx_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_SNB, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, +}; + +static const struct platform_features ivb_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_SNB, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features ivx_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_SNB, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE | TRL_LIMIT1, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, +}; + +static const struct platform_features hsw_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features hsx_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2, + .plr_msrs = PLR_CORE | PLR_RING, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .has_fixed_rapl_unit = 1, +}; + +static const struct platform_features hswl_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features hswg_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features bdw_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features bdwg_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features bdx_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .has_cst_auto_convension = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .has_fixed_rapl_unit = 1, +}; + +static const struct platform_features skl_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .crystal_freq = 24000000, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .has_ext_cst_msrs = 1, + .trl_msrs = TRL_BASE, + .tcc_offset_bits = 6, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .enable_tsc_tweak = 1, +}; + +static const struct platform_features cnl_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .has_msr_core_c1_res = 1, + .has_ext_cst_msrs = 1, + .trl_msrs = TRL_BASE, + .tcc_offset_bits = 6, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .enable_tsc_tweak = 1, +}; + +static const struct platform_features adl_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .has_msr_core_c1_res = 1, + .has_ext_cst_msrs = 1, + .trl_msrs = TRL_BASE, + .tcc_offset_bits = 6, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .enable_tsc_tweak = 1, +}; + +static const struct platform_features skx_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | PC2 | PC6, + .cst_limit = CST_LIMIT_SKX, + .has_irtl_msrs = 1, + .has_cst_auto_convension = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .has_fixed_rapl_unit = 1, +}; + +static const struct platform_features icx_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | PC2 | PC6, + .cst_limit = CST_LIMIT_ICX, + .has_irtl_msrs = 1, + .has_cst_prewake_bit = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .has_fixed_rapl_unit = 1, +}; + +static const struct platform_features spr_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | PC2 | PC6, + .cst_limit = CST_LIMIT_SKX, + .has_msr_core_c1_res = 1, + .has_irtl_msrs = 1, + .has_cst_prewake_bit = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, +}; + +static const struct platform_features srf_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | PC2 | PC6, + .cst_limit = CST_LIMIT_SKX, + .has_msr_core_c1_res = 1, + .has_msr_module_c6_res_ms = 1, + .has_irtl_msrs = 1, + .has_cst_prewake_bit = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, +}; + +static const struct platform_features grr_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6, + .cst_limit = CST_LIMIT_SKX, + .has_msr_core_c1_res = 1, + .has_msr_module_c6_res_ms = 1, + .has_irtl_msrs = 1, + .has_cst_prewake_bit = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, +}; + +static const struct platform_features slv_features = { + .has_nhm_msrs = 1, + .bclk_freq = BCLK_SLV, + .supported_cstates = CC1 | CC6 | PC6, + .cst_limit = CST_LIMIT_SLV, + .has_msr_core_c1_res = 1, + .has_msr_module_c6_res_ms = 1, + .has_msr_c6_demotion_policy_config = 1, + .has_msr_atom_pkg_c6_residency = 1, + .trl_msrs = TRL_ATOM, + .rapl_msrs = RAPL_PKG | RAPL_CORE, + .has_rapl_divisor = 1, + .rapl_quirk_tdp = 30, +}; + +static const struct platform_features slvd_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_SLV, + .supported_cstates = CC1 | CC6 | PC3 | PC6, + .cst_limit = CST_LIMIT_SLV, + .has_msr_atom_pkg_c6_residency = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG | RAPL_CORE, + .rapl_quirk_tdp = 30, +}; + +static const struct platform_features amt_features = { + .has_nhm_msrs = 1, + .bclk_freq = BCLK_133MHZ, + .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, + .cst_limit = CST_LIMIT_AMT, + .trl_msrs = TRL_BASE, +}; + +static const struct platform_features gmt_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .crystal_freq = 19200000, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_GMT, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features gmtd_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .crystal_freq = 25000000, + .supported_cstates = CC1 | CC6 | PC2 | PC6, + .cst_limit = CST_LIMIT_GMT, + .has_irtl_msrs = 1, + .has_msr_core_c1_res = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, +}; + +static const struct platform_features gmtp_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .crystal_freq = 19200000, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_GMT, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features tmt_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_GMT, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .enable_tsc_tweak = 1, +}; + +static const struct platform_features tmtd_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6, + .cst_limit = CST_LIMIT_GMT, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL, +}; + +static const struct platform_features knl_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | PC3 | PC6, + .cst_limit = CST_LIMIT_KNL, + .has_msr_knl_core_c6_residency = 1, + .trl_msrs = TRL_KNL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .has_fixed_rapl_unit = 1, + .need_perf_multiplier = 1, +}; + +static const struct platform_features default_features = { +}; + +static const struct platform_features amd_features_with_rapl = { + .rapl_msrs = RAPL_AMD_F17H, + .has_per_core_rapl = 1, + .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ +}; + +static const struct platform_data turbostat_pdata[] = { + { INTEL_FAM6_NEHALEM, &nhm_features }, + { INTEL_FAM6_NEHALEM_G, &nhm_features }, + { INTEL_FAM6_NEHALEM_EP, &nhm_features }, + { INTEL_FAM6_NEHALEM_EX, &nhx_features }, + { INTEL_FAM6_WESTMERE, &nhm_features }, + { INTEL_FAM6_WESTMERE_EP, &nhm_features }, + { INTEL_FAM6_WESTMERE_EX, &nhx_features }, + { INTEL_FAM6_SANDYBRIDGE, &snb_features }, + { INTEL_FAM6_SANDYBRIDGE_X, &snx_features }, + { INTEL_FAM6_IVYBRIDGE, &ivb_features }, + { INTEL_FAM6_IVYBRIDGE_X, &ivx_features }, + { INTEL_FAM6_HASWELL, &hsw_features }, + { INTEL_FAM6_HASWELL_X, &hsx_features }, + { INTEL_FAM6_HASWELL_L, &hswl_features }, + { INTEL_FAM6_HASWELL_G, &hswg_features }, + { INTEL_FAM6_BROADWELL, &bdw_features }, + { INTEL_FAM6_BROADWELL_G, &bdwg_features }, + { INTEL_FAM6_BROADWELL_X, &bdx_features }, + { INTEL_FAM6_BROADWELL_D, &bdx_features }, + { INTEL_FAM6_SKYLAKE_L, &skl_features }, + { INTEL_FAM6_SKYLAKE, &skl_features }, + { INTEL_FAM6_SKYLAKE_X, &skx_features }, + { INTEL_FAM6_KABYLAKE_L, &skl_features }, + { INTEL_FAM6_KABYLAKE, &skl_features }, + { INTEL_FAM6_COMETLAKE, &skl_features }, + { INTEL_FAM6_COMETLAKE_L, &skl_features }, + { INTEL_FAM6_CANNONLAKE_L, &cnl_features }, + { INTEL_FAM6_ICELAKE_X, &icx_features }, + { INTEL_FAM6_ICELAKE_D, &icx_features }, + { INTEL_FAM6_ICELAKE_L, &cnl_features }, + { INTEL_FAM6_ICELAKE_NNPI, &cnl_features }, + { INTEL_FAM6_ROCKETLAKE, &cnl_features }, + { INTEL_FAM6_TIGERLAKE_L, &cnl_features }, + { INTEL_FAM6_TIGERLAKE, &cnl_features }, + { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features }, + { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features }, + { INTEL_FAM6_GRANITERAPIDS_X, &spr_features }, + { INTEL_FAM6_LAKEFIELD, &cnl_features }, + { INTEL_FAM6_ALDERLAKE, &adl_features }, + { INTEL_FAM6_ALDERLAKE_L, &adl_features }, + { INTEL_FAM6_RAPTORLAKE, &adl_features }, + { INTEL_FAM6_RAPTORLAKE_P, &adl_features }, + { INTEL_FAM6_RAPTORLAKE_S, &adl_features }, + { INTEL_FAM6_METEORLAKE, &cnl_features }, + { INTEL_FAM6_METEORLAKE_L, &cnl_features }, + { INTEL_FAM6_ARROWLAKE, &cnl_features }, + { INTEL_FAM6_LUNARLAKE_M, &cnl_features }, + { INTEL_FAM6_ATOM_SILVERMONT, &slv_features }, + { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features }, + { INTEL_FAM6_ATOM_AIRMONT, &amt_features }, + { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features }, + { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features }, + { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features }, + { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features }, + { INTEL_FAM6_ATOM_TREMONT, &tmt_features }, + { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features }, + { INTEL_FAM6_ATOM_GRACEMONT, &adl_features }, + { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features }, + { INTEL_FAM6_ATOM_CRESTMONT, &grr_features }, + { INTEL_FAM6_XEON_PHI_KNL, &knl_features }, + { INTEL_FAM6_XEON_PHI_KNM, &knl_features }, + /* + * Missing support for + * INTEL_FAM6_ICELAKE + * INTEL_FAM6_ATOM_SILVERMONT_MID + * INTEL_FAM6_ATOM_AIRMONT_MID + * INTEL_FAM6_ATOM_AIRMONT_NP + */ + { 0, NULL }, +}; + +static const struct platform_features *platform; + +void probe_platform_features(unsigned int family, unsigned int model) +{ + int i; + + platform = &default_features; + + if (authentic_amd || hygon_genuine) { + if (max_extended_level >= 0x80000007) { + unsigned int eax, ebx, ecx, edx; + + __cpuid(0x80000007, eax, ebx, ecx, edx); + /* RAPL (Fam 17h+) */ + if ((edx & (1 << 14)) && family >= 0x17) + platform = &amd_features_with_rapl; + } + return; + } + + if (!genuine_intel || family != 6) + return; + + for (i = 0; turbostat_pdata[i].features; i++) { + if (turbostat_pdata[i].model == model) { + platform = turbostat_pdata[i].features; + return; + } + } +} + +/* Model specific support End */ + #define TJMAX_DEFAULT 100 /* MSRs that are not yet in the kernel-provided header. */ @@ -333,8 +942,8 @@ int backwards_count; char *progname; #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ -cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset; -size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; +cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; +size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_COUNTERS 8 #define MAX_ADDED_THREAD_COUNTERS 24 #define BITMASK_SIZE 32 @@ -355,12 +964,11 @@ struct thread_data { unsigned int x2apic_id; unsigned int flags; bool is_atom; -#define CPU_IS_FIRST_THREAD_IN_CORE 0x2 -#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; } *thread_even, *thread_odd; struct core_data { + int base_cpu; unsigned long long c3; unsigned long long c6; unsigned long long c7; @@ -373,6 +981,7 @@ struct core_data { } *core_even, *core_odd; struct pkg_data { + int base_cpu; unsigned long long pc2; unsigned long long pc3; unsigned long long pc6; @@ -456,7 +1065,7 @@ off_t idx_to_offset(int idx) switch (idx) { case IDX_PKG_ENERGY: - if (do_rapl & RAPL_AMD_F17H) + if (platform->rapl_msrs & RAPL_AMD_F17H) offset = MSR_PKG_ENERGY_STAT; else offset = MSR_PKG_ENERGY_STATUS; @@ -516,17 +1125,17 @@ int idx_valid(int idx) { switch (idx) { case IDX_PKG_ENERGY: - return do_rapl & (RAPL_PKG | RAPL_AMD_F17H); + return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); case IDX_DRAM_ENERGY: - return do_rapl & RAPL_DRAM; + return platform->rapl_msrs & RAPL_DRAM; case IDX_PP0_ENERGY: - return do_rapl & RAPL_CORES_ENERGY_STATUS; + return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS; case IDX_PP1_ENERGY: - return do_rapl & RAPL_GFX; + return platform->rapl_msrs & RAPL_GFX; case IDX_PKG_PERF: - return do_rapl & RAPL_PKG_PERF_STATUS; + return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; case IDX_DRAM_PERF: - return do_rapl & RAPL_DRAM_PERF_STATUS; + return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; default: return 0; } @@ -563,6 +1172,9 @@ struct topo_params { int num_die; int num_cpus; int num_cores; + int allowed_packages; + int allowed_cpus; + int allowed_cores; int max_cpu_num; int max_node_num; int nodes_per_pkg; @@ -575,7 +1187,7 @@ struct timeval tv_even, tv_odd, tv_delta; int *irq_column_2_cpu; /* /proc/interrupts column numbers */ int *irqs_per_cpu; /* indexed by cpu_num */ -void setup_all_buffers(void); +void setup_all_buffers(bool startup); char *sys_lpi_file; char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us"; @@ -586,6 +1198,11 @@ int cpu_is_not_present(int cpu) return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); } +int cpu_is_not_allowed(int cpu) +{ + return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set); +} + /* * run func(thread, core, package) in topology order * skip non-present cpus @@ -603,10 +1220,9 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk struct thread_data *t; struct core_data *c; struct pkg_data *p; - t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); - if (cpu_is_not_present(t->cpu_id)) + if (cpu_is_not_allowed(t->cpu_id)) continue; c = GET_CORE(core_base, core_no, node_no, pkg_no); @@ -622,6 +1238,25 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk return 0; } +int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + UNUSED(p); + + return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); +} + +int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + UNUSED(c); + + return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); +} + +int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); +} + int cpu_migrate(int cpu) { CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); @@ -904,11 +1539,11 @@ void print_header(char *delim) if (DO_BIC(BIC_CORE_THROT_CNT)) outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); - if (do_rapl && !rapl_joules) { - if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) + if (platform->rapl_msrs && !rapl_joules) { + if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); - } else if (do_rapl && rapl_joules) { - if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) + } else if (platform->rapl_msrs && rapl_joules) { + if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); } @@ -966,10 +1601,10 @@ void print_header(char *delim) if (DO_BIC(BIC_SYS_LPI)) outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); - if (do_rapl && !rapl_joules) { + if (platform->rapl_msrs && !rapl_joules) { if (DO_BIC(BIC_PkgWatt)) outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); - if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) + if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); if (DO_BIC(BIC_GFXWatt)) outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); @@ -979,10 +1614,10 @@ void print_header(char *delim) outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); if (DO_BIC(BIC_RAM__)) outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); - } else if (do_rapl && rapl_joules) { + } else if (platform->rapl_msrs && rapl_joules) { if (DO_BIC(BIC_Pkg_J)) outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); - if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) + if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); if (DO_BIC(BIC_GFX_J)) outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); @@ -1106,11 +1741,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data int printed = 0; /* if showing only 1st thread in core and this isn't one, bail out */ - if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) return 0; /* if showing only 1st thread in pkg and this isn't one, bail out */ - if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p)) return 0; /*if not summary line and --cpu is used */ @@ -1244,7 +1879,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); /* print per-core data only for 1st thread in core */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + if (!is_cpu_first_thread_in_core(t, c, p)) goto done; if (DO_BIC(BIC_CPU_c3)) @@ -1284,14 +1919,14 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data fmt8 = "%s%.2f"; - if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) + if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); - if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) + if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units); /* print per-package data only for 1st core in package */ - if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_core_in_package(t, c, p)) goto done; /* PkgTmp */ @@ -1352,7 +1987,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); - if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) + if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); if (DO_BIC(BIC_GFXWatt)) @@ -1364,7 +1999,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->energy_dram * rapl_dram_energy_units / interval_float); if (DO_BIC(BIC_Pkg_J)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units); - if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) + if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units); if (DO_BIC(BIC_GFX_J)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units); @@ -1527,7 +2162,7 @@ void delta_core(struct core_data *new, struct core_data *old) int soft_c1_residency_display(int bic) { - if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr) + if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res) return 0; return DO_BIC_READ(bic); @@ -1567,7 +2202,8 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d old->c1 = new->c1 - old->c1; - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) { + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) + || soft_c1_residency_display(BIC_Avg_MHz)) { if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { old->aperf = new->aperf - old->aperf; old->mperf = new->mperf - old->mperf; @@ -1576,7 +2212,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d } } - if (use_c1_residency_msr) { + if (platform->has_msr_core_c1_res) { /* * Some models have a dedicated C1 residency MSR, * which should be more accurate than the derivation below. @@ -1626,7 +2262,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c, int retval = 0; /* calculate core delta only for 1st thread in core */ - if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) + if (is_cpu_first_thread_in_core(t, c, p)) delta_core(c, c2); /* always calculate thread delta */ @@ -1635,7 +2271,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c, return retval; /* calculate package delta only for 1st core in package */ - if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) + if (is_cpu_first_core_in_package(t, c, p)) retval = delta_package(p, p2); return retval; @@ -1663,9 +2299,6 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->irq_count = 0; t->smi_count = 0; - /* tells format_counters to dump all fields from this set */ - t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; - c->c3 = 0; c->c6 = 0; c->c7 = 0; @@ -1749,7 +2382,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) } /* sum per-core values only for 1st thread in core */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + if (!is_cpu_first_thread_in_core(t, c, p)) return 0; average.cores.c3 += c->c3; @@ -1769,7 +2402,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) } /* sum per-pkg values only for 1st core in pkg */ - if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_core_in_package(t, c, p)) return 0; if (DO_BIC(BIC_Totl_c0)) @@ -1834,40 +2467,40 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data /* Use the global time delta for the average. */ average.threads.tv_delta = tv_delta; - average.threads.tsc /= topo.num_cpus; - average.threads.aperf /= topo.num_cpus; - average.threads.mperf /= topo.num_cpus; - average.threads.instr_count /= topo.num_cpus; - average.threads.c1 /= topo.num_cpus; + average.threads.tsc /= topo.allowed_cpus; + average.threads.aperf /= topo.allowed_cpus; + average.threads.mperf /= topo.allowed_cpus; + average.threads.instr_count /= topo.allowed_cpus; + average.threads.c1 /= topo.allowed_cpus; if (average.threads.irq_count > 9999999) sums_need_wide_columns = 1; - average.cores.c3 /= topo.num_cores; - average.cores.c6 /= topo.num_cores; - average.cores.c7 /= topo.num_cores; - average.cores.mc6_us /= topo.num_cores; + average.cores.c3 /= topo.allowed_cores; + average.cores.c6 /= topo.allowed_cores; + average.cores.c7 /= topo.allowed_cores; + average.cores.mc6_us /= topo.allowed_cores; if (DO_BIC(BIC_Totl_c0)) - average.packages.pkg_wtd_core_c0 /= topo.num_packages; + average.packages.pkg_wtd_core_c0 /= topo.allowed_packages; if (DO_BIC(BIC_Any_c0)) - average.packages.pkg_any_core_c0 /= topo.num_packages; + average.packages.pkg_any_core_c0 /= topo.allowed_packages; if (DO_BIC(BIC_GFX_c0)) - average.packages.pkg_any_gfxe_c0 /= topo.num_packages; + average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages; if (DO_BIC(BIC_CPUGFX)) - average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages; + average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages; - average.packages.pc2 /= topo.num_packages; + average.packages.pc2 /= topo.allowed_packages; if (DO_BIC(BIC_Pkgpc3)) - average.packages.pc3 /= topo.num_packages; + average.packages.pc3 /= topo.allowed_packages; if (DO_BIC(BIC_Pkgpc6)) - average.packages.pc6 /= topo.num_packages; + average.packages.pc6 /= topo.allowed_packages; if (DO_BIC(BIC_Pkgpc7)) - average.packages.pc7 /= topo.num_packages; + average.packages.pc7 /= topo.allowed_packages; - average.packages.pc8 /= topo.num_packages; - average.packages.pc9 /= topo.num_packages; - average.packages.pc10 /= topo.num_packages; + average.packages.pc8 /= topo.allowed_packages; + average.packages.pc9 /= topo.allowed_packages; + average.packages.pc10 /= topo.allowed_packages; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -1877,7 +2510,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data sums_need_wide_columns = 1; continue; } - average.threads.counter[i] /= topo.num_cpus; + average.threads.counter[i] /= topo.allowed_cpus; } for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -1886,7 +2519,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data if (average.cores.counter[i] > 9999999) sums_need_wide_columns = 1; } - average.cores.counter[i] /= topo.num_cores; + average.cores.counter[i] /= topo.allowed_cores; } for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -1895,7 +2528,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data if (average.packages.counter[i] > 9999999) sums_need_wide_columns = 1; } - average.packages.counter[i] /= topo.num_packages; + average.packages.counter[i] /= topo.allowed_packages; } } @@ -2092,7 +2725,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) retry: t->tsc = rdtsc(); /* we are running on local CPU of interest */ - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) { + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) + || soft_c1_residency_display(BIC_Avg_MHz)) { unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; /* @@ -2158,7 +2792,7 @@ retry: return -5; t->smi_count = msr & 0xFFFFFFFF; } - if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) { + if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) { if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1)) return -6; } @@ -2169,7 +2803,7 @@ retry: } /* collect core counters only for 1st thread in core */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + if (!is_cpu_first_thread_in_core(t, c, p)) goto done; if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) { @@ -2177,10 +2811,10 @@ retry: return -6; } - if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) { + if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) { if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) return -7; - } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) { + } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) { if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) return -7; } @@ -2212,7 +2846,7 @@ retry: if (DO_BIC(BIC_CORE_THROT_CNT)) get_core_throt_cnt(cpu, &c->core_throt_cnt); - if (do_rapl & RAPL_AMD_F17H) { + if (platform->rapl_msrs & RAPL_AMD_F17H) { if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr)) return -14; c->core_energy = msr & 0xFFFFFFFF; @@ -2224,7 +2858,7 @@ retry: } /* collect package counters only for 1st core in package */ - if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_core_in_package(t, c, p)) goto done; if (DO_BIC(BIC_Totl_c0)) { @@ -2247,7 +2881,7 @@ retry: if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) return -9; if (DO_BIC(BIC_Pkgpc6)) { - if (do_slm_cstates) { + if (platform->has_msr_atom_pkg_c6_residency) { if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6)) return -10; } else { @@ -2277,37 +2911,37 @@ retry: if (DO_BIC(BIC_SYS_LPI)) p->sys_lpi = cpuidle_cur_sys_lpi_us; - if (do_rapl & RAPL_PKG) { + if (platform->rapl_msrs & RAPL_PKG) { if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr)) return -13; p->energy_pkg = msr; } - if (do_rapl & RAPL_CORES_ENERGY_STATUS) { + if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) { if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr)) return -14; p->energy_cores = msr; } - if (do_rapl & RAPL_DRAM) { + if (platform->rapl_msrs & RAPL_DRAM) { if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) return -15; p->energy_dram = msr; } - if (do_rapl & RAPL_GFX) { + if (platform->rapl_msrs & RAPL_GFX) { if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr)) return -16; p->energy_gfx = msr; } - if (do_rapl & RAPL_PKG_PERF_STATUS) { + if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) { if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr)) return -16; p->rapl_pkg_perf_status = msr; } - if (do_rapl & RAPL_DRAM_PERF_STATUS) { + if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) { if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr)) return -16; p->rapl_dram_perf_status = msr; } - if (do_rapl & RAPL_AMD_F17H) { + if (platform->rapl_msrs & RAPL_AMD_F17H) { if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr)) return -13; p->energy_pkg = msr; @@ -2414,18 +3048,58 @@ int icx_pkg_cstate_limits[16] = PCLRSV, PCLRSV }; -static void calculate_tsc_tweak() +void probe_cst_limit(void) { - tsc_tweak = base_hz / tsc_hz; -} + unsigned long long msr; + int *pkg_cstate_limits; + + if (!platform->has_nhm_msrs) + return; + + switch (platform->cst_limit) { + case CST_LIMIT_NHM: + pkg_cstate_limits = nhm_pkg_cstate_limits; + break; + case CST_LIMIT_SNB: + pkg_cstate_limits = snb_pkg_cstate_limits; + break; + case CST_LIMIT_HSW: + pkg_cstate_limits = hsw_pkg_cstate_limits; + break; + case CST_LIMIT_SKX: + pkg_cstate_limits = skx_pkg_cstate_limits; + break; + case CST_LIMIT_ICX: + pkg_cstate_limits = icx_pkg_cstate_limits; + break; + case CST_LIMIT_SLV: + pkg_cstate_limits = slv_pkg_cstate_limits; + break; + case CST_LIMIT_AMT: + pkg_cstate_limits = amt_pkg_cstate_limits; + break; + case CST_LIMIT_KNL: + pkg_cstate_limits = phi_pkg_cstate_limits; + break; + case CST_LIMIT_GMT: + pkg_cstate_limits = glm_pkg_cstate_limits; + break; + default: + return; + } -void prewake_cstate_probe(unsigned int family, unsigned int model); + get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); + pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; +} -static void dump_nhm_platform_info(void) +static void dump_platform_info(void) { unsigned long long msr; unsigned int ratio; + if (!platform->has_nhm_msrs) + return; + get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); @@ -2435,19 +3109,27 @@ static void dump_nhm_platform_info(void) ratio = (msr >> 8) & 0xFF; fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); +} + +static void dump_power_ctl(void) +{ + unsigned long long msr; + + if (!platform->has_nhm_msrs) + return; get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ - if (dis_cstate_prewake) + if (platform->has_cst_prewake_bit) fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN"); return; } -static void dump_hsw_turbo_ratio_limits(void) +static void dump_turbo_ratio_limit2(void) { unsigned long long msr; unsigned int ratio; @@ -2466,7 +3148,7 @@ static void dump_hsw_turbo_ratio_limits(void) return; } -static void dump_ivt_turbo_ratio_limits(void) +static void dump_turbo_ratio_limit1(void) { unsigned long long msr; unsigned int ratio; @@ -2509,29 +3191,7 @@ static void dump_ivt_turbo_ratio_limits(void) return; } -int has_turbo_ratio_group_limits(int family, int model) -{ - - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ATOM_GOLDMONT: - case INTEL_FAM6_SKYLAKE_X: - case INTEL_FAM6_ICELAKE_X: - case INTEL_FAM6_SAPPHIRERAPIDS_X: - case INTEL_FAM6_ATOM_GOLDMONT_D: - case INTEL_FAM6_ATOM_TREMONT_D: - return 1; - default: - return 0; - } -} - -static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model) +static void dump_turbo_ratio_limits(int trl_msr_offset) { unsigned long long msr, core_counts; int shift; @@ -2540,7 +3200,7 @@ static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model) fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr); - if (has_turbo_ratio_group_limits(family, model)) { + if (platform->trl_msrs & TRL_CORECOUNT) { get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts); fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts); } else { @@ -2657,10 +3317,13 @@ static void dump_knl_turbo_ratio_limits(void) ratio[i], bclk, ratio[i] * bclk, cores[i]); } -static void dump_nhm_cst_cfg(void) +static void dump_cst_cfg(void) { unsigned long long msr; + if (!platform->has_nhm_msrs) + return; + get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); @@ -2673,7 +3336,7 @@ static void dump_nhm_cst_cfg(void) (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) - if (has_automatic_cstate_conversion) { + if (platform->has_cst_auto_convension) { fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); } @@ -2730,39 +3393,50 @@ void print_irtl(void) { unsigned long long msr; - get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); - fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); - - get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); - fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + if (!platform->has_irtl_msrs) + return; - get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); - fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + if (platform->supported_cstates & PC3) { + get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + } - if (!do_irtl_hsw) - return; + if (platform->supported_cstates & PC6) { + get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + } - get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); - fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + if (platform->supported_cstates & PC7) { + get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + } - get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); - fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + if (platform->supported_cstates & PC8) { + get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + } - get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); - fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + if (platform->supported_cstates & PC9) { + get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + } + if (platform->supported_cstates & PC10) { + get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + } } void free_fd_percpu(void) @@ -2785,6 +3459,14 @@ void free_all_buffers(void) cpu_present_set = NULL; cpu_present_setsize = 0; + CPU_FREE(cpu_effective_set); + cpu_effective_set = NULL; + cpu_effective_setsize = 0; + + CPU_FREE(cpu_allowed_set); + cpu_allowed_set = NULL; + cpu_allowed_setsize = 0; + CPU_FREE(cpu_affinity_set); cpu_affinity_set = NULL; cpu_affinity_setsize = 0; @@ -2927,6 +3609,59 @@ int get_physical_node_id(struct cpu_topology *thiscpu) return -1; } +static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size) +{ + unsigned int start, end; + char *next = cpu_str; + + while (next && *next) { + + if (*next == '-') /* no negative cpu numbers */ + return 1; + + start = strtoul(next, &next, 10); + + if (start >= CPU_SUBSET_MAXCPUS) + return 1; + CPU_SET_S(start, cpu_set_size, cpu_set); + + if (*next == '\0' || *next == '\n') + break; + + if (*next == ',') { + next += 1; + continue; + } + + if (*next == '-') { + next += 1; /* start range */ + } else if (*next == '.') { + next += 1; + if (*next == '.') + next += 1; /* start range */ + else + return 1; + } + + end = strtoul(next, &next, 10); + if (end <= start) + return 1; + + while (++start <= end) { + if (start >= CPU_SUBSET_MAXCPUS) + return 1; + CPU_SET_S(start, cpu_set_size, cpu_set); + } + + if (*next == ',') + next += 1; + else if (*next != '\0' && *next != '\n') + return 1; + } + + return 0; +} + int get_thread_siblings(struct cpu_topology *thiscpu) { char path[80], character; @@ -2998,7 +3733,7 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); - if (cpu_is_not_present(t->cpu_id)) + if (cpu_is_not_allowed(t->cpu_id)) continue; t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); @@ -3050,11 +3785,51 @@ int for_all_proc_cpus(int (func) (int)) return 0; } +#define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective" + +static char cpu_effective_str[1024]; + +static int update_effective_str(bool startup) +{ + FILE *fp; + char *pos; + char buf[1024]; + int ret; + + if (cpu_effective_str[0] == '\0' && !startup) + return 0; + + fp = fopen(PATH_EFFECTIVE_CPUS, "r"); + if (!fp) + return 0; + + pos = fgets(buf, 1024, fp); + if (!pos) + err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS); + + fclose(fp); + + ret = strncmp(cpu_effective_str, buf, 1024); + if (!ret) + return 0; + + strncpy(cpu_effective_str, buf, 1024); + return 1; +} + +static void update_effective_set(bool startup) +{ + update_effective_str(startup); + + if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize)) + err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str); +} + void re_initialize(void) { free_all_buffers(); - setup_all_buffers(); - fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); + setup_all_buffers(false); + fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus); } void set_max_cpu_num(void) @@ -3191,8 +3966,8 @@ int snapshot_gfx_rc6_ms(void) /* * snapshot_gfx_mhz() * - * record snapshot of - * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz + * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz + * when /sys/class/drm/card0/gt_cur_freq_mhz is not available. * * return 1 if config change requires a restart, else return 0 */ @@ -3201,9 +3976,11 @@ int snapshot_gfx_mhz(void) static FILE *fp; int retval; - if (fp == NULL) - fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r"); - else { + if (fp == NULL) { + fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r"); + if (!fp) + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r"); + } else { rewind(fp); fflush(fp); } @@ -3218,8 +3995,8 @@ int snapshot_gfx_mhz(void) /* * snapshot_gfx_cur_mhz() * - * record snapshot of - * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz + * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz + * when /sys/class/drm/card0/gt_act_freq_mhz is not available. * * return 1 if config change requires a restart, else return 0 */ @@ -3228,9 +4005,11 @@ int snapshot_gfx_act_mhz(void) static FILE *fp; int retval; - if (fp == NULL) - fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r"); - else { + if (fp == NULL) { + fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r"); + if (!fp) + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r"); + } else { rewind(fp); fflush(fp); } @@ -3562,6 +4341,10 @@ restart: re_initialize(); goto restart; } + if (update_effective_str(false)) { + re_initialize(); + goto restart; + } do_sleep(); if (snapshot_proc_sysfs_files()) goto restart; @@ -3674,395 +4457,31 @@ void check_permissions(void) exit(-6); } -/* - * NHM adds support for additional MSRs: - * - * MSR_SMI_COUNT 0x00000034 - * - * MSR_PLATFORM_INFO 0x000000ce - * MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 - * - * MSR_MISC_PWR_MGMT 0x000001aa - * - * MSR_PKG_C3_RESIDENCY 0x000003f8 - * MSR_PKG_C6_RESIDENCY 0x000003f9 - * MSR_CORE_C3_RESIDENCY 0x000003fc - * MSR_CORE_C6_RESIDENCY 0x000003fd - * - * Side effect: - * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL - * sets has_misc_feature_control - */ -int probe_nhm_msrs(unsigned int family, unsigned int model) +void probe_bclk(void) { unsigned long long msr; unsigned int base_ratio; - int *pkg_cstate_limits; - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - bclk = discover_bclk(family, model); + if (!platform->has_nhm_msrs) + return; - switch (model) { - case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ - case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ - pkg_cstate_limits = nhm_pkg_cstate_limits; - break; - case INTEL_FAM6_SANDYBRIDGE: /* SNB */ - case INTEL_FAM6_SANDYBRIDGE_X: /* SNB Xeon */ - case INTEL_FAM6_IVYBRIDGE: /* IVB */ - case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ - pkg_cstate_limits = snb_pkg_cstate_limits; - has_misc_feature_control = 1; - break; - case INTEL_FAM6_HASWELL: /* HSW */ - case INTEL_FAM6_HASWELL_G: /* HSW */ - case INTEL_FAM6_HASWELL_X: /* HSX */ - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_BROADWELL: /* BDW */ - case INTEL_FAM6_BROADWELL_G: /* BDW */ - case INTEL_FAM6_BROADWELL_X: /* BDX */ - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - pkg_cstate_limits = hsw_pkg_cstate_limits; - has_misc_feature_control = 1; - break; - case INTEL_FAM6_SKYLAKE_X: /* SKX */ - case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */ - pkg_cstate_limits = skx_pkg_cstate_limits; - has_misc_feature_control = 1; - break; - case INTEL_FAM6_ICELAKE_X: /* ICX */ - pkg_cstate_limits = icx_pkg_cstate_limits; - has_misc_feature_control = 1; - break; - case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ - no_MSR_MISC_PWR_MGMT = 1; - /* FALLTHRU */ - case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ - pkg_cstate_limits = slv_pkg_cstate_limits; - break; - case INTEL_FAM6_ATOM_AIRMONT: /* AMT */ - pkg_cstate_limits = amt_pkg_cstate_limits; - no_MSR_MISC_PWR_MGMT = 1; - break; - case INTEL_FAM6_XEON_PHI_KNL: /* PHI */ - pkg_cstate_limits = phi_pkg_cstate_limits; - break; - case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ - case INTEL_FAM6_ATOM_TREMONT: /* EHL */ - case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ - pkg_cstate_limits = glm_pkg_cstate_limits; - break; - default: - return 0; - } - get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); - pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; + if (platform->bclk_freq == BCLK_100MHZ) + bclk = 100.00; + else if (platform->bclk_freq == BCLK_133MHZ) + bclk = 133.33; + else if (platform->bclk_freq == BCLK_SLV) + bclk = slm_bclk(); + else + return; get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); base_ratio = (msr >> 8) & 0xFF; base_hz = base_ratio * bclk * 1000000; has_base_hz = 1; - return 1; -} - -/* - * SLV client has support for unique MSRs: - * - * MSR_CC6_DEMOTION_POLICY_CONFIG - * MSR_MC6_DEMOTION_POLICY_CONFIG - */ - -int has_slv_msrs(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ATOM_SILVERMONT: - case INTEL_FAM6_ATOM_SILVERMONT_MID: - case INTEL_FAM6_ATOM_AIRMONT_MID: - return 1; - } - return 0; -} - -int is_dnv(unsigned int family, unsigned int model) -{ - - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ATOM_GOLDMONT_D: - return 1; - } - return 0; -} - -int is_bdx(unsigned int family, unsigned int model) -{ - - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_BROADWELL_X: - return 1; - } - return 0; -} - -int is_skx(unsigned int family, unsigned int model) -{ - - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_SKYLAKE_X: - return 1; - } - return 0; -} - -int is_icx(unsigned int family, unsigned int model) -{ - - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ICELAKE_X: - return 1; - } - return 0; -} - -int is_spr(unsigned int family, unsigned int model) -{ - - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_SAPPHIRERAPIDS_X: - return 1; - } - return 0; -} - -int is_ehl(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ATOM_TREMONT: - return 1; - } - return 0; -} - -int is_jvl(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ATOM_TREMONT_D: - return 1; - } - return 0; -} - -int has_turbo_ratio_limit(unsigned int family, unsigned int model) -{ - if (has_slv_msrs(family, model)) - return 0; - - if (family != 6) - return 0; - - switch (model) { - /* Nehalem compatible, but do not include turbo-ratio limit support */ - case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ - case INTEL_FAM6_XEON_PHI_KNL: /* PHI - Knights Landing (different MSR definition) */ - return 0; - default: - return 1; - } -} - -int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model) -{ - if (has_slv_msrs(family, model)) - return 1; - - return 0; -} - -int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ - case INTEL_FAM6_HASWELL_X: /* HSW Xeon */ - return 1; - default: - return 0; - } -} - -int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_HASWELL_X: /* HSW Xeon */ - return 1; - default: - return 0; - } -} - -int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ - return 1; - default: - return 0; - } -} - -int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ATOM_GOLDMONT: - case INTEL_FAM6_SKYLAKE_X: - case INTEL_FAM6_ICELAKE_X: - case INTEL_FAM6_SAPPHIRERAPIDS_X: - return 1; - default: - return 0; - } -} - -int has_config_tdp(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_IVYBRIDGE: /* IVB */ - case INTEL_FAM6_HASWELL: /* HSW */ - case INTEL_FAM6_HASWELL_X: /* HSX */ - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_HASWELL_G: /* HSW */ - case INTEL_FAM6_BROADWELL: /* BDW */ - case INTEL_FAM6_BROADWELL_G: /* BDW */ - case INTEL_FAM6_BROADWELL_X: /* BDX */ - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - case INTEL_FAM6_SKYLAKE_X: /* SKX */ - case INTEL_FAM6_ICELAKE_X: /* ICX */ - case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */ - case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ - return 1; - default: - return 0; - } -} - -/* - * tcc_offset_bits: - * 0: Tcc Offset not supported (Default) - * 6: Bit 29:24 of MSR_PLATFORM_INFO - * 4: Bit 27:24 of MSR_PLATFORM_INFO - */ -void check_tcc_offset(int model) -{ - unsigned long long msr; - - if (!genuine_intel) - return; - - switch (model) { - case INTEL_FAM6_SKYLAKE_L: - case INTEL_FAM6_SKYLAKE: - case INTEL_FAM6_KABYLAKE_L: - case INTEL_FAM6_KABYLAKE: - case INTEL_FAM6_ICELAKE_L: - case INTEL_FAM6_ICELAKE: - case INTEL_FAM6_TIGERLAKE_L: - case INTEL_FAM6_TIGERLAKE: - case INTEL_FAM6_COMETLAKE: - if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) { - msr = (msr >> 30) & 1; - if (msr) - tcc_offset_bits = 6; - } - return; - default: - return; - } + if (platform->enable_tsc_tweak) + tsc_tweak = base_hz / tsc_hz; } static void remove_underbar(char *s) @@ -4078,44 +4497,37 @@ static void remove_underbar(char *s) *to = 0; } -static void dump_turbo_ratio_info(unsigned int family, unsigned int model) +static void dump_turbo_ratio_info(void) { if (!has_turbo) return; - if (has_hsw_turbo_ratio_limit(family, model)) - dump_hsw_turbo_ratio_limits(); + if (!platform->has_nhm_msrs) + return; + + if (platform->trl_msrs & TRL_LIMIT2) + dump_turbo_ratio_limit2(); - if (has_ivt_turbo_ratio_limit(family, model)) - dump_ivt_turbo_ratio_limits(); + if (platform->trl_msrs & TRL_LIMIT1) + dump_turbo_ratio_limit1(); - if (has_turbo_ratio_limit(family, model)) { - dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT, family, model); + if (platform->trl_msrs & TRL_BASE) { + dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT); if (is_hybrid) - dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT, family, model); + dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT); } - if (has_atom_turbo_ratio_limit(family, model)) + if (platform->trl_msrs & TRL_ATOM) dump_atom_turbo_ratio_limits(); - if (has_knl_turbo_ratio_limit(family, model)) + if (platform->trl_msrs & TRL_KNL) dump_knl_turbo_ratio_limits(); - if (has_config_tdp(family, model)) + if (platform->has_config_tdp) dump_config_tdp(); } -static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model) -{ - if (!do_nhm_platform_info) - return; - - dump_nhm_platform_info(); - dump_turbo_ratio_info(family, model); - dump_nhm_cst_cfg(); -} - static int read_sysfs_int(char *path) { FILE *input; @@ -4152,7 +4564,7 @@ static void dump_sysfs_file(char *path) fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); } -static void intel_uncore_frequency_probe(void) +static void probe_intel_uncore_frequency(void) { int i, j; char path[128]; @@ -4163,6 +4575,10 @@ static void intel_uncore_frequency_probe(void) if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK)) return; + /* Cluster level sysfs not supported yet. */ + if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK)) + return; + if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK)) BIC_PRESENT(BIC_UNCORE_MHZ); @@ -4194,6 +4610,20 @@ static void intel_uncore_frequency_probe(void) } } +static void probe_graphics(void) +{ + if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) + BIC_PRESENT(BIC_GFX_rc6); + + if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) || + !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) + BIC_PRESENT(BIC_GFXMHz); + + if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) || + !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) + BIC_PRESENT(BIC_GFXACTMHz); +} + static void dump_sysfs_cstate_config(void) { char path[64]; @@ -4310,7 +4740,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu = t->cpu_id; /* EPB is per-package */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_thread_in_package(t, c, p)) return 0; if (cpu_migrate(cpu)) { @@ -4359,7 +4789,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu = t->cpu_id; /* MSR_HWP_CAPABILITIES is per-package */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_thread_in_package(t, c, p)) return 0; if (cpu_migrate(cpu)) { @@ -4442,7 +4872,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data cpu = t->cpu_id; /* per-package */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_thread_in_package(t, c, p)) return 0; if (cpu_migrate(cpu)) { @@ -4450,7 +4880,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data return -1; } - if (do_core_perf_limit_reasons) { + if (platform->plr_msrs & PLR_CORE) { get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", @@ -4483,7 +4913,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); } - if (do_gfx_perf_limit_reasons) { + if (platform->plr_msrs & PLR_GFX) { get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", @@ -4503,7 +4933,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 25) ? "GFXPwr, " : "", (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); } - if (do_ring_perf_limit_reasons) { + if (platform->plr_msrs & PLR_RING) { get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); fprintf(outf, " (Active: %s%s%s%s%s%s)", @@ -4525,208 +4955,74 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ -double get_tdp_intel(unsigned int model) +double get_quirk_tdp(void) { - unsigned long long msr; + if (platform->rapl_quirk_tdp) + return platform->rapl_quirk_tdp; - if (do_rapl & RAPL_PKG_POWER_INFO) - if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) - return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; - - switch (model) { - case INTEL_FAM6_ATOM_SILVERMONT: - case INTEL_FAM6_ATOM_SILVERMONT_D: - return 30.0; - default: - return 135.0; - } + return 135.0; } -double get_tdp_amd(unsigned int family) +double get_tdp_intel(void) { - UNUSED(family); + unsigned long long msr; - /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ - return 280.0; + if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) + if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) + return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; + return get_quirk_tdp(); } -/* - * rapl_dram_energy_units_probe() - * Energy units are either hard-coded, or come from RAPL Energy Unit MSR. - */ -static double rapl_dram_energy_units_probe(int model, double rapl_energy_units) +double get_tdp_amd(void) { - /* only called for genuine_intel, family 6 */ - - switch (model) { - case INTEL_FAM6_HASWELL_X: /* HSX */ - case INTEL_FAM6_BROADWELL_X: /* BDX */ - case INTEL_FAM6_SKYLAKE_X: /* SKX */ - case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ - case INTEL_FAM6_ICELAKE_X: /* ICX */ - return (rapl_dram_energy_units = 15.3 / 1000000); - default: - return (rapl_energy_units); - } + return get_quirk_tdp(); } -void rapl_probe_intel(unsigned int family, unsigned int model) +void rapl_probe_intel(void) { unsigned long long msr; unsigned int time_unit; double tdp; - if (family != 6) - return; - - switch (model) { - case INTEL_FAM6_SANDYBRIDGE: - case INTEL_FAM6_IVYBRIDGE: - case INTEL_FAM6_HASWELL: /* HSW */ - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_HASWELL_G: /* HSW */ - case INTEL_FAM6_BROADWELL: /* BDW */ - case INTEL_FAM6_BROADWELL_G: /* BDW */ - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - BIC_PRESENT(BIC_GFX_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - BIC_PRESENT(BIC_GFXWatt); - } - break; - case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO; - if (rapl_joules) - BIC_PRESENT(BIC_Pkg_J); - else - BIC_PRESENT(BIC_PkgWatt); - break; - case INTEL_FAM6_ATOM_TREMONT: /* EHL */ - do_rapl = - RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS - | RAPL_GFX | RAPL_PKG_POWER_INFO; - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - BIC_PRESENT(BIC_RAM_J); - BIC_PRESENT(BIC_GFX_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - BIC_PRESENT(BIC_RAMWatt); - BIC_PRESENT(BIC_GFXWatt); - } - break; - case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ - do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; - BIC_PRESENT(BIC_PKG__); - if (rapl_joules) - BIC_PRESENT(BIC_Pkg_J); - else - BIC_PRESENT(BIC_PkgWatt); - break; - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - do_rapl = - RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS - | RAPL_GFX | RAPL_PKG_POWER_INFO; - BIC_PRESENT(BIC_PKG__); - BIC_PRESENT(BIC_RAM__); - if (rapl_joules) { + if (rapl_joules) { + if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS) BIC_PRESENT(BIC_Pkg_J); + if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) BIC_PRESENT(BIC_Cor_J); + if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS) BIC_PRESENT(BIC_RAM_J); + if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS) BIC_PRESENT(BIC_GFX_J); - } else { + } else { + if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS) BIC_PRESENT(BIC_PkgWatt); + if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) BIC_PRESENT(BIC_CorWatt); + if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS) BIC_PRESENT(BIC_RAMWatt); + if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS) BIC_PRESENT(BIC_GFXWatt); - } - break; - case INTEL_FAM6_HASWELL_X: /* HSX */ - case INTEL_FAM6_BROADWELL_X: /* BDX */ - case INTEL_FAM6_SKYLAKE_X: /* SKX */ - case INTEL_FAM6_ICELAKE_X: /* ICX */ - case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */ - case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ - do_rapl = - RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | - RAPL_PKG_POWER_INFO; - BIC_PRESENT(BIC_PKG__); - BIC_PRESENT(BIC_RAM__); - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_RAM_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_RAMWatt); - } - break; - case INTEL_FAM6_SANDYBRIDGE_X: - case INTEL_FAM6_IVYBRIDGE_X: - do_rapl = - RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | - RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; - BIC_PRESENT(BIC_PKG__); - BIC_PRESENT(BIC_RAM__); - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - BIC_PRESENT(BIC_RAM_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - BIC_PRESENT(BIC_RAMWatt); - } - break; - case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ - case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ - do_rapl = RAPL_PKG | RAPL_CORES; - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - } - break; - case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ - do_rapl = - RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | - RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS; + } + + if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) BIC_PRESENT(BIC_PKG__); + if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) BIC_PRESENT(BIC_RAM__); - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - BIC_PRESENT(BIC_RAM_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - BIC_PRESENT(BIC_RAMWatt); - } - break; - default: - return; - } /* units on package 0, verify later other packages match */ if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) return; rapl_power_units = 1.0 / (1 << (msr & 0xF)); - if (model == INTEL_FAM6_ATOM_SILVERMONT) + if (platform->has_rapl_divisor) rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; else rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); - rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units); + if (platform->has_fixed_rapl_unit) + rapl_dram_energy_units = (15.3 / 1000000); + else + rapl_dram_energy_units = rapl_energy_units; time_unit = msr >> 16 & 0xF; if (time_unit == 0) @@ -4734,32 +5030,18 @@ void rapl_probe_intel(unsigned int family, unsigned int model) rapl_time_units = 1.0 / (1 << (time_unit)); - tdp = get_tdp_intel(model); + tdp = get_tdp_intel(); rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; if (!quiet) fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); } -void rapl_probe_amd(unsigned int family, unsigned int model) +void rapl_probe_amd(void) { unsigned long long msr; - unsigned int eax, ebx, ecx, edx; - unsigned int has_rapl = 0; double tdp; - UNUSED(model); - - if (max_extended_level >= 0x80000007) { - __cpuid(0x80000007, eax, ebx, ecx, edx); - /* RAPL (Fam 17h+) */ - has_rapl = edx & (1 << 14); - } - - if (!has_rapl || family < 0x17) - return; - - do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; if (rapl_joules) { BIC_PRESENT(BIC_Pkg_J); BIC_PRESENT(BIC_Cor_J); @@ -4775,128 +5057,13 @@ void rapl_probe_amd(unsigned int family, unsigned int model) rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); rapl_power_units = ldexp(1.0, -(msr & 0xf)); - tdp = get_tdp_amd(family); + tdp = get_tdp_amd(); rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; if (!quiet) fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); } -/* - * rapl_probe() - * - * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units - */ -void rapl_probe(unsigned int family, unsigned int model) -{ - if (genuine_intel) - rapl_probe_intel(family, model); - if (authentic_amd || hygon_genuine) - rapl_probe_amd(family, model); -} - -void perf_limit_reasons_probe(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return; - - if (family != 6) - return; - - switch (model) { - case INTEL_FAM6_HASWELL: /* HSW */ - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_HASWELL_G: /* HSW */ - do_gfx_perf_limit_reasons = 1; - /* FALLTHRU */ - case INTEL_FAM6_HASWELL_X: /* HSX */ - do_core_perf_limit_reasons = 1; - do_ring_perf_limit_reasons = 1; - default: - return; - } -} - -void automatic_cstate_conversion_probe(unsigned int family, unsigned int model) -{ - if (family != 6) - return; - - switch (model) { - case INTEL_FAM6_BROADWELL_X: - case INTEL_FAM6_SKYLAKE_X: - has_automatic_cstate_conversion = 1; - } -} - -void prewake_cstate_probe(unsigned int family, unsigned int model) -{ - if (is_icx(family, model) || is_spr(family, model)) - dis_cstate_prewake = 1; -} - -int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) -{ - unsigned long long msr; - unsigned int dts, dts2; - int cpu; - - UNUSED(c); - UNUSED(p); - - if (!(do_dts || do_ptm)) - return 0; - - cpu = t->cpu_id; - - /* DTS is per-core, no need to print for each thread */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) - return 0; - - if (cpu_migrate(cpu)) { - fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); - return -1; - } - - if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) { - if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) - return 0; - - dts = (msr >> 16) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); - - if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) - return 0; - - dts = (msr >> 16) & 0x7F; - dts2 = (msr >> 8) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", - cpu, msr, tj_max - dts, tj_max - dts2); - } - - if (do_dts && debug) { - unsigned int resolution; - - if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) - return 0; - - dts = (msr >> 16) & 0x7F; - resolution = (msr >> 27) & 0xF; - fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", - cpu, msr, tj_max - dts, resolution); - - if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) - return 0; - - dts = (msr >> 16) & 0x7F; - dts2 = (msr >> 8) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", - cpu, msr, tj_max - dts, tj_max - dts2); - } - - return 0; -} - void print_power_limit_msr(int cpu, unsigned long long msr, char *label) { fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n", @@ -4918,11 +5085,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) UNUSED(c); UNUSED(p); - if (!do_rapl) + if (!platform->rapl_msrs) return 0; /* RAPL counters are per package, so print only for 1st thread/package */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_thread_in_package(t, c, p)) return 0; cpu = t->cpu_id; @@ -4931,7 +5098,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -1; } - if (do_rapl & RAPL_AMD_F17H) { + if (platform->rapl_msrs & RAPL_AMD_F17H) { msr_name = "MSR_RAPL_PWR_UNIT"; if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) return -1; @@ -4944,7 +5111,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, rapl_power_units, rapl_energy_units, rapl_time_units); - if (do_rapl & RAPL_PKG_POWER_INFO) { + if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) { if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) return -5; @@ -4957,7 +5124,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); } - if (do_rapl & RAPL_PKG) { + if (platform->rapl_msrs & RAPL_PKG) { if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) return -9; @@ -4981,7 +5148,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); } - if (do_rapl & RAPL_DRAM_POWER_INFO) { + if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) { if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) return -6; @@ -4992,7 +5159,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); } - if (do_rapl & RAPL_DRAM) { + if (platform->rapl_msrs & RAPL_DRAM) { if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) return -9; fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", @@ -5000,20 +5167,20 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) print_power_limit_msr(cpu, msr, "DRAM Limit"); } - if (do_rapl & RAPL_CORE_POLICY) { + if (platform->rapl_msrs & RAPL_CORE_POLICY) { if (get_msr(cpu, MSR_PP0_POLICY, &msr)) return -7; fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); } - if (do_rapl & RAPL_CORES_POWER_LIMIT) { + if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) { if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) return -9; fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "Cores Limit"); } - if (do_rapl & RAPL_GFX) { + if (platform->rapl_msrs & RAPL_GFX) { if (get_msr(cpu, MSR_PP1_POLICY, &msr)) return -8; @@ -5029,217 +5196,24 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) } /* - * SNB adds support for additional MSRs: + * probe_rapl() * - * MSR_PKG_C7_RESIDENCY 0x000003fa - * MSR_CORE_C7_RESIDENCY 0x000003fe - * MSR_PKG_C2_RESIDENCY 0x0000060d + * sets rapl_power_units, rapl_energy_units, rapl_time_units */ - -int has_snb_msrs(unsigned int family, unsigned int model) +void probe_rapl(void) { - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_SANDYBRIDGE: - case INTEL_FAM6_SANDYBRIDGE_X: - case INTEL_FAM6_IVYBRIDGE: /* IVB */ - case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ - case INTEL_FAM6_HASWELL: /* HSW */ - case INTEL_FAM6_HASWELL_X: /* HSW */ - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_HASWELL_G: /* HSW */ - case INTEL_FAM6_BROADWELL: /* BDW */ - case INTEL_FAM6_BROADWELL_G: /* BDW */ - case INTEL_FAM6_BROADWELL_X: /* BDX */ - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - case INTEL_FAM6_SKYLAKE_X: /* SKX */ - case INTEL_FAM6_ICELAKE_X: /* ICX */ - case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */ - case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ - case INTEL_FAM6_ATOM_TREMONT: /* EHL */ - case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ - return 1; - } - return 0; -} - -/* - * HSW ULT added support for C8/C9/C10 MSRs: - * - * MSR_PKG_C8_RESIDENCY 0x00000630 - * MSR_PKG_C9_RESIDENCY 0x00000631 - * MSR_PKG_C10_RESIDENCY 0x00000632 - * - * MSR_PKGC8_IRTL 0x00000633 - * MSR_PKGC9_IRTL 0x00000634 - * MSR_PKGC10_IRTL 0x00000635 - * - */ -int has_c8910_msrs(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_BROADWELL: /* BDW */ - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - case INTEL_FAM6_ATOM_TREMONT: /* EHL */ - return 1; - } - return 0; -} - -/* - * SKL adds support for additional MSRS: - * - * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 - * MSR_PKG_ANY_CORE_C0_RES 0x00000659 - * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A - * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B - */ -int has_skl_msrs(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - return 1; - } - return 0; -} - -int is_slm(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ - case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ - return 1; - } - return 0; -} - -int is_knl(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ - return 1; - } - return 0; -} - -int is_cnl(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - return 1; - } - - return 0; -} - -unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model) -{ - if (is_knl(family, model)) - return 1024; - return 1; -} - -#define SLM_BCLK_FREQS 5 -double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; - -double slm_bclk(void) -{ - unsigned long long msr = 3; - unsigned int i; - double freq; - - if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) - fprintf(outf, "SLM BCLK: unknown\n"); - - i = msr & 0xf; - if (i >= SLM_BCLK_FREQS) { - fprintf(outf, "SLM BCLK[%d] invalid\n", i); - i = 3; - } - freq = slm_freq_table[i]; - - if (!quiet) - fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); - - return freq; -} - -double discover_bclk(unsigned int family, unsigned int model) -{ - if (has_snb_msrs(family, model) || is_knl(family, model)) - return 100.00; - else if (is_slm(family, model)) - return slm_bclk(); - else - return 133.33; -} - -int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) -{ - unsigned int eax, ebx, ecx, edx; - - UNUSED(c); - UNUSED(p); - - if (!genuine_intel) - return 0; + if (!platform->rapl_msrs) + return; - if (cpu_migrate(t->cpu_id)) { - fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); - return -1; - } + if (genuine_intel) + rapl_probe_intel(); + if (authentic_amd || hygon_genuine) + rapl_probe_amd(); - if (max_level < 0x1a) - return 0; + if (quiet) + return; - __cpuid(0x1a, eax, ebx, ecx, edx); - eax = (eax >> 24) & 0xFF; - if (eax == 0x20) - t->is_atom = true; - return 0; + for_all_cpus(print_rapl, ODD_COUNTERS); } /* @@ -5268,7 +5242,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk return 0; /* this is a per-package concept */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_thread_in_package(t, c, p)) return 0; cpu = t->cpu_id; @@ -5284,7 +5258,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk } /* Temperature Target MSR is Nehalem and newer only */ - if (!do_nhm_platform_info) + if (!platform->has_nhm_msrs) goto guess; if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) @@ -5293,20 +5267,18 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk tcc_default = (msr >> 16) & 0xFF; if (!quiet) { - switch (tcc_offset_bits) { - case 4: - tcc_offset = (msr >> 24) & 0xF; - fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", - cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); - break; - case 6: - tcc_offset = (msr >> 24) & 0x3F; + int bits = platform->tcc_offset_bits; + unsigned long long enabled = 0; + + if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled)) + enabled = (enabled >> 30) & 1; + + if (bits && enabled) { + tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0); fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); - break; - default: + } else { fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default); - break; } } @@ -5324,6 +5296,108 @@ guess: return 0; } +int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + unsigned int dts, dts2; + int cpu; + + UNUSED(c); + UNUSED(p); + + if (!(do_dts || do_ptm)) + return 0; + + cpu = t->cpu_id; + + /* DTS is per-core, no need to print for each thread */ + if (!is_cpu_first_thread_in_core(t, c, p)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (do_ptm && is_cpu_first_core_in_package(t, c, p)) { + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); + + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + dts2 = (msr >> 8) & 0x7F; + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + cpu, msr, tj_max - dts, tj_max - dts2); + } + + if (do_dts && debug) { + unsigned int resolution; + + if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + resolution = (msr >> 27) & 0xF; + fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", + cpu, msr, tj_max - dts, resolution); + + if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + dts2 = (msr >> 8) & 0x7F; + fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + cpu, msr, tj_max - dts, tj_max - dts2); + } + + return 0; +} + +void probe_thermal(void) +{ + if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) + BIC_PRESENT(BIC_CORE_THROT_CNT); + else + BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); + + for_all_cpus(set_temperature_target, ODD_COUNTERS); + + if (quiet) + return; + + for_all_cpus(print_thermal, ODD_COUNTERS); +} + +int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned int eax, ebx, ecx, edx; + + UNUSED(c); + UNUSED(p); + + if (!genuine_intel) + return 0; + + if (cpu_migrate(t->cpu_id)) { + fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); + return -1; + } + + if (max_level < 0x1a) + return 0; + + __cpuid(0x1a, eax, ebx, ecx, edx); + eax = (eax >> 24) & 0xFF; + if (eax == 0x20) + t->is_atom = true; + return 0; +} + void decode_feature_control_msr(void) { unsigned long long msr; @@ -5354,7 +5428,7 @@ void decode_misc_feature_control(void) { unsigned long long msr; - if (!has_misc_feature_control) + if (!platform->has_msr_misc_feature_control) return; if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) @@ -5375,10 +5449,7 @@ void decode_misc_pwr_mgmt_msr(void) { unsigned long long msr; - if (!do_nhm_platform_info) - return; - - if (no_MSR_MISC_PWR_MGMT) + if (!platform->has_msr_misc_pwr_mgmt) return; if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) @@ -5397,6 +5468,9 @@ void decode_c6_demotion_policy_msr(void) { unsigned long long msr; + if (!platform->has_msr_c6_demotion_policy_config) + return; + if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr)) fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n", base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); @@ -5406,67 +5480,6 @@ void decode_c6_demotion_policy_msr(void) base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); } -/* - * When models are the same, for the purpose of turbostat, reuse - */ -unsigned int intel_model_duplicates(unsigned int model) -{ - - switch (model) { - case INTEL_FAM6_NEHALEM_EP: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ - case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ - case 0x1F: /* Core i7 and i5 Processor - Nehalem */ - case INTEL_FAM6_WESTMERE: /* Westmere Client - Clarkdale, Arrandale */ - case INTEL_FAM6_WESTMERE_EP: /* Westmere EP - Gulftown */ - return INTEL_FAM6_NEHALEM; - - case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ - case INTEL_FAM6_WESTMERE_EX: /* Westmere-EX Xeon - Eagleton */ - return INTEL_FAM6_NEHALEM_EX; - - case INTEL_FAM6_XEON_PHI_KNM: - return INTEL_FAM6_XEON_PHI_KNL; - - case INTEL_FAM6_BROADWELL_X: - case INTEL_FAM6_BROADWELL_D: /* BDX-DE */ - return INTEL_FAM6_BROADWELL_X; - - case INTEL_FAM6_SKYLAKE_L: - case INTEL_FAM6_SKYLAKE: - case INTEL_FAM6_KABYLAKE_L: - case INTEL_FAM6_KABYLAKE: - case INTEL_FAM6_COMETLAKE_L: - case INTEL_FAM6_COMETLAKE: - return INTEL_FAM6_SKYLAKE_L; - - case INTEL_FAM6_ICELAKE_L: - case INTEL_FAM6_ICELAKE_NNPI: - case INTEL_FAM6_TIGERLAKE_L: - case INTEL_FAM6_TIGERLAKE: - case INTEL_FAM6_ROCKETLAKE: - case INTEL_FAM6_LAKEFIELD: - case INTEL_FAM6_ALDERLAKE: - case INTEL_FAM6_ALDERLAKE_L: - case INTEL_FAM6_ATOM_GRACEMONT: - case INTEL_FAM6_RAPTORLAKE: - case INTEL_FAM6_RAPTORLAKE_P: - case INTEL_FAM6_RAPTORLAKE_S: - case INTEL_FAM6_METEORLAKE: - case INTEL_FAM6_METEORLAKE_L: - return INTEL_FAM6_CANNONLAKE_L; - - case INTEL_FAM6_ATOM_TREMONT_L: - return INTEL_FAM6_ATOM_TREMONT; - - case INTEL_FAM6_ICELAKE_D: - return INTEL_FAM6_ICELAKE_X; - - case INTEL_FAM6_EMERALDRAPIDS_X: - return INTEL_FAM6_SAPPHIRERAPIDS_X; - } - return model; -} - void print_dev_latency(void) { char *path = "/dev/cpu_dma_latency"; @@ -5510,6 +5523,101 @@ void linux_perf_init(void) BIC_PRESENT(BIC_IPC); } +void probe_cstates(void) +{ + probe_cst_limit(); + + if (platform->supported_cstates & CC1) + BIC_PRESENT(BIC_CPU_c1); + + if (platform->supported_cstates & CC3) + BIC_PRESENT(BIC_CPU_c3); + + if (platform->supported_cstates & CC6) + BIC_PRESENT(BIC_CPU_c6); + + if (platform->supported_cstates & CC7) + BIC_PRESENT(BIC_CPU_c7); + + if (platform->supported_cstates & PC2 && (pkg_cstate_limit >= PCL__2)) + BIC_PRESENT(BIC_Pkgpc2); + + if (platform->supported_cstates & PC3 && (pkg_cstate_limit >= PCL__3)) + BIC_PRESENT(BIC_Pkgpc3); + + if (platform->supported_cstates & PC6 && (pkg_cstate_limit >= PCL__6)) + BIC_PRESENT(BIC_Pkgpc6); + + if (platform->supported_cstates & PC7 && (pkg_cstate_limit >= PCL__7)) + BIC_PRESENT(BIC_Pkgpc7); + + if (platform->supported_cstates & PC8 && (pkg_cstate_limit >= PCL__8)) + BIC_PRESENT(BIC_Pkgpc8); + + if (platform->supported_cstates & PC9 && (pkg_cstate_limit >= PCL__9)) + BIC_PRESENT(BIC_Pkgpc9); + + if (platform->supported_cstates & PC10 && (pkg_cstate_limit >= PCL_10)) + BIC_PRESENT(BIC_Pkgpc10); + + if (platform->has_msr_module_c6_res_ms) + BIC_PRESENT(BIC_Mod_c6); + + if (platform->has_ext_cst_msrs) { + BIC_PRESENT(BIC_Totl_c0); + BIC_PRESENT(BIC_Any_c0); + BIC_PRESENT(BIC_GFX_c0); + BIC_PRESENT(BIC_CPUGFX); + } + + if (quiet) + return; + + dump_power_ctl(); + dump_cst_cfg(); + decode_c6_demotion_policy_msr(); + print_dev_latency(); + dump_sysfs_cstate_config(); + print_irtl(); +} + +void probe_lpi(void) +{ + if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) + BIC_PRESENT(BIC_CPU_LPI); + else + BIC_NOT_PRESENT(BIC_CPU_LPI); + + if (!access(sys_lpi_file_sysfs, R_OK)) { + sys_lpi_file = sys_lpi_file_sysfs; + BIC_PRESENT(BIC_SYS_LPI); + } else if (!access(sys_lpi_file_debugfs, R_OK)) { + sys_lpi_file = sys_lpi_file_debugfs; + BIC_PRESENT(BIC_SYS_LPI); + } else { + sys_lpi_file_sysfs = NULL; + BIC_NOT_PRESENT(BIC_SYS_LPI); + } + +} + +void probe_pstates(void) +{ + probe_bclk(); + + if (quiet) + return; + + dump_platform_info(); + dump_turbo_ratio_info(); + dump_sysfs_pstate_config(); + decode_misc_pwr_mgmt_msr(); + + for_all_cpus(print_hwp, ODD_COUNTERS); + for_all_cpus(print_epb, ODD_COUNTERS); + for_all_cpus(print_perf_limit, ODD_COUNTERS); +} + void process_cpuid() { unsigned int eax, ebx, ecx, edx; @@ -5569,10 +5677,8 @@ void process_cpuid() edx_flags & (1 << 22) ? "ACPI-TM" : "-", edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); } - if (genuine_intel) { - model_orig = model; - model = intel_model_duplicates(model); - } + + probe_platform_features(family, model); if (!(edx_flags & (1 << 5))) errx(1, "CPUID: no MSR"); @@ -5656,26 +5762,12 @@ void process_cpuid() __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); if (ebx_tsc != 0) { - if (!quiet && (ebx != 0)) fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", eax_crystal, ebx_tsc, crystal_hz); if (crystal_hz == 0) - switch (model) { - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - crystal_hz = 24000000; /* 24.0 MHz */ - break; - case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ - crystal_hz = 25000000; /* 25.0 MHz */ - break; - case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - crystal_hz = 19200000; /* 19.2 MHz */ - break; - default: - crystal_hz = 0; - } + crystal_hz = platform->crystal_freq; if (crystal_hz) { tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal; @@ -5700,147 +5792,33 @@ void process_cpuid() } if (has_aperf) - aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); + aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; BIC_PRESENT(BIC_IRQ); BIC_PRESENT(BIC_TSC_MHz); +} - if (probe_nhm_msrs(family, model)) { - do_nhm_platform_info = 1; - BIC_PRESENT(BIC_CPU_c1); - BIC_PRESENT(BIC_CPU_c3); - BIC_PRESENT(BIC_CPU_c6); - BIC_PRESENT(BIC_SMI); - } - do_snb_cstates = has_snb_msrs(family, model); - - if (do_snb_cstates) - BIC_PRESENT(BIC_CPU_c7); - - do_irtl_snb = has_snb_msrs(family, model); - if (do_snb_cstates && (pkg_cstate_limit >= PCL__2)) - BIC_PRESENT(BIC_Pkgpc2); - if (pkg_cstate_limit >= PCL__3) - BIC_PRESENT(BIC_Pkgpc3); - if (pkg_cstate_limit >= PCL__6) - BIC_PRESENT(BIC_Pkgpc6); - if (do_snb_cstates && (pkg_cstate_limit >= PCL__7)) - BIC_PRESENT(BIC_Pkgpc7); - if (has_slv_msrs(family, model)) { - BIC_NOT_PRESENT(BIC_Pkgpc2); - BIC_NOT_PRESENT(BIC_Pkgpc3); - BIC_PRESENT(BIC_Pkgpc6); - BIC_NOT_PRESENT(BIC_Pkgpc7); - BIC_PRESENT(BIC_Mod_c6); - use_c1_residency_msr = 1; - } - if (is_jvl(family, model)) { - BIC_NOT_PRESENT(BIC_CPU_c3); - BIC_NOT_PRESENT(BIC_CPU_c7); - BIC_NOT_PRESENT(BIC_Pkgpc2); - BIC_NOT_PRESENT(BIC_Pkgpc3); - BIC_NOT_PRESENT(BIC_Pkgpc6); - BIC_NOT_PRESENT(BIC_Pkgpc7); - } - if (is_dnv(family, model)) { - BIC_PRESENT(BIC_CPU_c1); - BIC_NOT_PRESENT(BIC_CPU_c3); - BIC_NOT_PRESENT(BIC_Pkgpc3); - BIC_NOT_PRESENT(BIC_CPU_c7); - BIC_NOT_PRESENT(BIC_Pkgpc7); - use_c1_residency_msr = 1; - } - if (is_skx(family, model) || is_icx(family, model) || is_spr(family, model)) { - BIC_NOT_PRESENT(BIC_CPU_c3); - BIC_NOT_PRESENT(BIC_Pkgpc3); - BIC_NOT_PRESENT(BIC_CPU_c7); - BIC_NOT_PRESENT(BIC_Pkgpc7); - } - if (is_bdx(family, model)) { - BIC_NOT_PRESENT(BIC_CPU_c7); - BIC_NOT_PRESENT(BIC_Pkgpc7); - } - if (has_c8910_msrs(family, model)) { - if (pkg_cstate_limit >= PCL__8) - BIC_PRESENT(BIC_Pkgpc8); - if (pkg_cstate_limit >= PCL__9) - BIC_PRESENT(BIC_Pkgpc9); - if (pkg_cstate_limit >= PCL_10) - BIC_PRESENT(BIC_Pkgpc10); - } - do_irtl_hsw = has_c8910_msrs(family, model); - if (has_skl_msrs(family, model)) { - BIC_PRESENT(BIC_Totl_c0); - BIC_PRESENT(BIC_Any_c0); - BIC_PRESENT(BIC_GFX_c0); - BIC_PRESENT(BIC_CPUGFX); - } - do_slm_cstates = is_slm(family, model); - do_knl_cstates = is_knl(family, model); - - if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model)) - BIC_NOT_PRESENT(BIC_CPU_c3); - - if (!quiet) - decode_misc_pwr_mgmt_msr(); - - if (!quiet && has_slv_msrs(family, model)) - decode_c6_demotion_policy_msr(); - - rapl_probe(family, model); - perf_limit_reasons_probe(family, model); - automatic_cstate_conversion_probe(family, model); - - check_tcc_offset(model_orig); - - if (!quiet) - dump_cstate_pstate_config_info(family, model); - intel_uncore_frequency_probe(); - - if (!quiet) - print_dev_latency(); - if (!quiet) - dump_sysfs_cstate_config(); - if (!quiet) - dump_sysfs_pstate_config(); +void probe_pm_features(void) +{ + probe_pstates(); - if (has_skl_msrs(family, model) || is_ehl(family, model)) - calculate_tsc_tweak(); + probe_cstates(); - if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) - BIC_PRESENT(BIC_GFX_rc6); + probe_lpi(); - if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) - BIC_PRESENT(BIC_GFXMHz); + probe_intel_uncore_frequency(); - if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) - BIC_PRESENT(BIC_GFXACTMHz); + probe_graphics(); - if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) - BIC_PRESENT(BIC_CPU_LPI); - else - BIC_NOT_PRESENT(BIC_CPU_LPI); + probe_rapl(); - if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) - BIC_PRESENT(BIC_CORE_THROT_CNT); - else - BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); + probe_thermal(); - if (!access(sys_lpi_file_sysfs, R_OK)) { - sys_lpi_file = sys_lpi_file_sysfs; - BIC_PRESENT(BIC_SYS_LPI); - } else if (!access(sys_lpi_file_debugfs, R_OK)) { - sys_lpi_file = sys_lpi_file_debugfs; - BIC_PRESENT(BIC_SYS_LPI); - } else { - sys_lpi_file_sysfs = NULL; - BIC_NOT_PRESENT(BIC_SYS_LPI); - } + if (platform->has_nhm_msrs) + BIC_PRESENT(BIC_SMI); if (!quiet) decode_misc_feature_control(); - - return; } /* @@ -5855,7 +5833,7 @@ int dir_filter(const struct dirent *dirp) return 0; } -void topology_probe() +void topology_probe(bool startup) { int i; int max_core_id = 0; @@ -5888,14 +5866,62 @@ void topology_probe() for_all_proc_cpus(mark_cpu_present); /* - * Validate that all cpus in cpu_subset are also in cpu_present_set + * Allocate and initialize cpu_effective_set + */ + cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1)); + if (cpu_effective_set == NULL) + err(3, "CPU_ALLOC"); + cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set); + update_effective_set(startup); + + /* + * Allocate and initialize cpu_allowed_set + */ + cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1)); + if (cpu_allowed_set == NULL) + err(3, "CPU_ALLOC"); + cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set); + + /* + * Validate and update cpu_allowed_set. + * + * Make sure all cpus in cpu_subset are also in cpu_present_set during startup. + * Give a warning when cpus in cpu_subset become unavailable at runtime. + * Give a warning when cpus are not effective because of cgroup setting. + * + * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset. */ for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) { - if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) - if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) - err(1, "cpu%d not present", i); + if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) + continue; + + if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) { + if (cpu_subset) { + /* cpus in cpu_subset must be in cpu_present_set during startup */ + if (startup) + err(1, "cpu%d not present", i); + else + fprintf(stderr, "cpu%d not present\n", i); + } + continue; + } + + if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) { + if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) { + fprintf(stderr, "cpu%d not effective\n", i); + continue; + } + } + + CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set); } + if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set)) + err(-ENODEV, "No valid cpus found"); + sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set); + /* * Allocate and initialize cpu_affinity_set */ @@ -6009,15 +6035,19 @@ void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_ if (*c == NULL) goto error; - for (i = 0; i < num_cores; i++) + for (i = 0; i < num_cores; i++) { (*c)[i].core_id = -1; + (*c)[i].base_cpu = -1; + } *p = calloc(topo.num_packages, sizeof(struct pkg_data)); if (*p == NULL) goto error; - for (i = 0; i < topo.num_packages; i++) + for (i = 0; i < topo.num_packages; i++) { (*p)[i].package_id = i; + (*p)[i].base_cpu = -1; + } return; error: @@ -6050,10 +6080,11 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, p = GET_PKG(pkg_base, pkg_id); t->cpu_id = cpu_id; - if (thread_id == 0) { - t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; - if (cpu_is_first_core_in_package(cpu_id)) - t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; + if (!cpu_is_not_allowed(cpu_id)) { + if (c->base_cpu < 0) + c->base_cpu = t->cpu_id; + if (p->base_cpu < 0) + p->base_cpu = t->cpu_id; } c->core_id = core_id; @@ -6093,59 +6124,64 @@ void allocate_irq_buffers(void) err(-1, "calloc %d", topo.max_cpu_num + 1); } -void setup_all_buffers(void) +int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + topo.allowed_cpus++; + if ((int)t->cpu_id == c->base_cpu) + topo.allowed_cores++; + if ((int)t->cpu_id == p->base_cpu) + topo.allowed_packages++; + + return 0; +} + +void topology_update(void) { - topology_probe(); + topo.allowed_cpus = 0; + topo.allowed_cores = 0; + topo.allowed_packages = 0; + for_all_cpus(update_topo, ODD_COUNTERS); +} +void setup_all_buffers(bool startup) +{ + topology_probe(startup); allocate_irq_buffers(); allocate_fd_percpu(); allocate_counters(&thread_even, &core_even, &package_even); allocate_counters(&thread_odd, &core_odd, &package_odd); allocate_output_buffer(); for_all_proc_cpus(initialize_counters); + topology_update(); } void set_base_cpu(void) { - base_cpu = sched_getcpu(); - if (base_cpu < 0) - err(-ENODEV, "No valid cpus found"); + int i; - if (debug > 1) - fprintf(outf, "base_cpu = %d\n", base_cpu); + for (i = 0; i < topo.max_cpu_num + 1; ++i) { + if (cpu_is_not_allowed(i)) + continue; + base_cpu = i; + if (debug > 1) + fprintf(outf, "base_cpu = %d\n", base_cpu); + return; + } + err(-ENODEV, "No valid cpus found"); } void turbostat_init() { - setup_all_buffers(); + setup_all_buffers(true); set_base_cpu(); check_dev_msr(); check_permissions(); process_cpuid(); + probe_pm_features(); linux_perf_init(); - if (!quiet) - for_all_cpus(print_hwp, ODD_COUNTERS); - - if (!quiet) - for_all_cpus(print_epb, ODD_COUNTERS); - - if (!quiet) - for_all_cpus(print_perf_limit, ODD_COUNTERS); - - if (!quiet) - for_all_cpus(print_rapl, ODD_COUNTERS); - - for_all_cpus(set_temperature_target, ODD_COUNTERS); - for_all_cpus(get_cpu_type, ODD_COUNTERS); for_all_cpus(get_cpu_type, EVEN_COUNTERS); - if (!quiet) - for_all_cpus(print_thermal, ODD_COUNTERS); - - if (!quiet && do_irtl_snb) - print_irtl(); - if (DO_BIC(BIC_IPC)) (void)get_instr_count_fd(base_cpu); } @@ -6160,8 +6196,6 @@ int fork_it(char **argv) first_counter_read = 0; if (status) exit(status); - /* clear affinity side-effect of get_counters() */ - sched_setaffinity(0, cpu_present_setsize, cpu_present_set); gettimeofday(&tv_even, (struct timezone *)NULL); child_pid = fork(); @@ -6225,7 +6259,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2023.03.17 - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n"); } #define COMMAND_LINE_SIZE 2048 @@ -6508,9 +6542,6 @@ void probe_sysfs(void) */ void parse_cpu_command(char *optarg) { - unsigned int start, end; - char *next; - if (!strcmp(optarg, "core")) { if (cpu_subset) goto error; @@ -6533,52 +6564,8 @@ void parse_cpu_command(char *optarg) CPU_ZERO_S(cpu_subset_size, cpu_subset); - next = optarg; - - while (next && *next) { - - if (*next == '-') /* no negative cpu numbers */ - goto error; - - start = strtoul(next, &next, 10); - - if (start >= CPU_SUBSET_MAXCPUS) - goto error; - CPU_SET_S(start, cpu_subset_size, cpu_subset); - - if (*next == '\0') - break; - - if (*next == ',') { - next += 1; - continue; - } - - if (*next == '-') { - next += 1; /* start range */ - } else if (*next == '.') { - next += 1; - if (*next == '.') - next += 1; /* start range */ - else - goto error; - } - - end = strtoul(next, &next, 10); - if (end <= start) - goto error; - - while (++start <= end) { - if (start >= CPU_SUBSET_MAXCPUS) - goto error; - CPU_SET_S(start, cpu_subset_size, cpu_subset); - } - - if (*next == ',') - next += 1; - else if (*next != '\0') - goto error; - } + if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size)) + goto error; return; @@ -6719,6 +6706,19 @@ void cmdline(int argc, char **argv) int main(int argc, char **argv) { + int fd, ret; + + fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY); + if (fd < 0) + goto skip_cgroup_setting; + + ret = write(fd, "0\n", 2); + if (ret == -1) + perror("Can't update cgroup\n"); + + close(fd); + +skip_cgroup_setting: outf = stderr; cmdline(argc, argv); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 6ee22c3b251a..518f143c5b0f 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -24,6 +24,7 @@ #include "test_progs.h" #include "network_helpers.h" +#include "netlink_helpers.h" #include "test_tc_neigh_fib.skel.h" #include "test_tc_neigh.skel.h" #include "test_tc_peer.skel.h" @@ -110,11 +111,17 @@ static void netns_setup_namespaces_nofail(const char *verb) } } +enum dev_mode { + MODE_VETH, + MODE_NETKIT, +}; + struct netns_setup_result { - int ifindex_veth_src; - int ifindex_veth_src_fwd; - int ifindex_veth_dst; - int ifindex_veth_dst_fwd; + enum dev_mode dev_mode; + int ifindex_src; + int ifindex_src_fwd; + int ifindex_dst; + int ifindex_dst_fwd; }; static int get_ifaddr(const char *name, char *ifaddr) @@ -137,58 +144,110 @@ static int get_ifaddr(const char *name, char *ifaddr) return 0; } +static int create_netkit(int mode, char *prim, char *peer) +{ + struct rtattr *linkinfo, *data, *peer_info; + struct rtnl_handle rth = { .fd = -1 }; + const char *type = "netkit"; + struct { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[1024]; + } req = {}; + int err; + + err = rtnl_open(&rth, 0); + if (!ASSERT_OK(err, "open_rtnetlink")) + return err; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.n.nlmsg_type = RTM_NEWLINK; + req.i.ifi_family = AF_UNSPEC; + + addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim)); + linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO); + addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type)); + data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); + peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO); + req.n.nlmsg_len += sizeof(struct ifinfomsg); + addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer)); + addattr_nest_end(&req.n, peer_info); + addattr_nest_end(&req.n, data); + addattr_nest_end(&req.n, linkinfo); + + err = rtnl_talk(&rth, &req.n, NULL); + ASSERT_OK(err, "talk_rtnetlink"); + rtnl_close(&rth); + return err; +} + static int netns_setup_links_and_routes(struct netns_setup_result *result) { struct nstoken *nstoken = NULL; - char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; - - SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd"); - SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd"); + char src_fwd_addr[IFADDR_STR_LEN+1] = {}; + int err; - SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD); - SYS(fail, "ip link set veth_dst address " MAC_DST); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip link add src type veth peer name src_fwd"); + SYS(fail, "ip link add dst type veth peer name dst_fwd"); + + SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD); + SYS(fail, "ip link set dst address " MAC_DST); + } else if (result->dev_mode == MODE_NETKIT) { + err = create_netkit(NETKIT_L3, "src", "src_fwd"); + if (!ASSERT_OK(err, "create_ifindex_src")) + goto fail; + err = create_netkit(NETKIT_L3, "dst", "dst_fwd"); + if (!ASSERT_OK(err, "create_ifindex_dst")) + goto fail; + } - if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) + if (get_ifaddr("src_fwd", src_fwd_addr)) goto fail; - result->ifindex_veth_src = if_nametoindex("veth_src"); - if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src")) + result->ifindex_src = if_nametoindex("src"); + if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src")) goto fail; - result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd"); - if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd")) + result->ifindex_src_fwd = if_nametoindex("src_fwd"); + if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd")) goto fail; - result->ifindex_veth_dst = if_nametoindex("veth_dst"); - if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst")) + result->ifindex_dst = if_nametoindex("dst"); + if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst")) goto fail; - result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd"); - if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd")) + result->ifindex_dst_fwd = if_nametoindex("dst_fwd"); + if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd")) goto fail; - SYS(fail, "ip link set veth_src netns " NS_SRC); - SYS(fail, "ip link set veth_src_fwd netns " NS_FWD); - SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD); - SYS(fail, "ip link set veth_dst netns " NS_DST); + SYS(fail, "ip link set src netns " NS_SRC); + SYS(fail, "ip link set src_fwd netns " NS_FWD); + SYS(fail, "ip link set dst_fwd netns " NS_FWD); + SYS(fail, "ip link set dst netns " NS_DST); /** setup in 'src' namespace */ nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto fail; - SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src"); - SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad"); - SYS(fail, "ip link set dev veth_src up"); + SYS(fail, "ip addr add " IP4_SRC "/32 dev src"); + SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad"); + SYS(fail, "ip link set dev src up"); - SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global"); - SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global"); - SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev src scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev src scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev src scope global"); - SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s", - veth_src_fwd_addr); - SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s", - veth_src_fwd_addr); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s", + src_fwd_addr); + SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s", + src_fwd_addr); + } close_netns(nstoken); @@ -201,15 +260,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) * needs v4 one in order to start ARP probing. IP4_NET route is added * to the endpoints so that the ARP processing will reply. */ - SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd"); - SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); - SYS(fail, "ip link set dev veth_src_fwd up"); - SYS(fail, "ip link set dev veth_dst_fwd up"); + SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd"); + SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd"); + SYS(fail, "ip link set dev src_fwd up"); + SYS(fail, "ip link set dev dst_fwd up"); - SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); - SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); - SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); - SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global"); close_netns(nstoken); @@ -218,16 +277,18 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (!ASSERT_OK_PTR(nstoken, "setns dst")) goto fail; - SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst"); - SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad"); - SYS(fail, "ip link set dev veth_dst up"); + SYS(fail, "ip addr add " IP4_DST "/32 dev dst"); + SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad"); + SYS(fail, "ip link set dev dst up"); - SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global"); - SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global"); - SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global"); - SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD); + SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD); + } close_netns(nstoken); @@ -293,23 +354,23 @@ static int netns_load_bpf(const struct bpf_program *src_prog, const struct bpf_program *chk_prog, const struct netns_setup_result *setup_result) { - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); int err; - /* tc qdisc add dev veth_src_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); - /* tc filter add dev veth_src_fwd ingress bpf da src_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0); - /* tc filter add dev veth_src_fwd egress bpf da chk_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0); + /* tc qdisc add dev src_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); + /* tc filter add dev src_fwd ingress bpf da src_prog */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0); + /* tc filter add dev src_fwd egress bpf da chk_prog */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0); - /* tc qdisc add dev veth_dst_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); - /* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); - /* tc filter add dev veth_dst_fwd egress bpf da chk_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); + /* tc qdisc add dev dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); + /* tc filter add dev dst_fwd ingress bpf da dst_prog */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); + /* tc filter add dev dst_fwd egress bpf da chk_prog */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); return 0; fail: @@ -539,10 +600,10 @@ done: static int netns_load_dtime_bpf(struct test_tc_dtime *skel, const struct netns_setup_result *setup_result) { - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst); struct nstoken *nstoken; int err; @@ -550,58 +611,58 @@ static int netns_load_dtime_bpf(struct test_tc_dtime *skel, nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) return -1; - /* tc qdisc add dev veth_src clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src); - /* tc filter add dev veth_src ingress bpf da ingress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); - /* tc filter add dev veth_src egress bpf da egress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); + /* tc qdisc add dev src clsact */ + QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src); + /* tc filter add dev src ingress bpf da ingress_host */ + XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); + /* tc filter add dev src egress bpf da egress_host */ + XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); close_netns(nstoken); /* setup ns_dst tc progs */ nstoken = open_netns(NS_DST); if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST)) return -1; - /* tc qdisc add dev veth_dst clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst); - /* tc filter add dev veth_dst ingress bpf da ingress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); - /* tc filter add dev veth_dst egress bpf da egress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); + /* tc qdisc add dev dst clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst); + /* tc filter add dev dst ingress bpf da ingress_host */ + XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); + /* tc filter add dev dst egress bpf da egress_host */ + XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); close_netns(nstoken); /* setup ns_fwd tc progs */ nstoken = open_netns(NS_FWD); if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) return -1; - /* tc qdisc add dev veth_dst_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); - /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, + /* tc qdisc add dev dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); + /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio100, 100); - /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, + /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio101, 101); - /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, + /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio100, 100); - /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, + /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio101, 101); - /* tc qdisc add dev veth_src_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); - /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, + /* tc qdisc add dev src_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); + /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio100, 100); - /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, + /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio101, 101); - /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, + /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio100, 100); - /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, + /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio101, 101); close_netns(nstoken); return 0; @@ -777,8 +838,8 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open")) return; - skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_dtime__load(skel); if (!ASSERT_OK(err, "test_tc_dtime__load")) @@ -868,8 +929,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) goto done; - skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_neigh__load(skel); if (!ASSERT_OK(err, "test_tc_neigh__load")) @@ -904,8 +965,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) goto done; - skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_peer__load(skel); if (!ASSERT_OK(err, "test_tc_peer__load")) @@ -996,7 +1057,7 @@ static int tun_relay_loop(int src_fd, int target_fd) static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) { LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); struct test_tc_peer *skel = NULL; struct nstoken *nstoken = NULL; int err; @@ -1045,7 +1106,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) goto fail; skel->rodata->IFINDEX_SRC = ifindex; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_peer__load(skel); if (!ASSERT_OK(err, "test_tc_peer__load")) @@ -1053,19 +1114,19 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) /* Load "tc_src_l3" to the tun_fwd interface to redirect packets * towards dst, and "tc_dst" to redirect packets - * and "tc_chk" on veth_dst_fwd to drop non-redirected packets. + * and "tc_chk" on dst_fwd to drop non-redirected packets. */ /* tc qdisc add dev tun_fwd clsact */ QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex); /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */ XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0); - /* tc qdisc add dev veth_dst_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); - /* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); - /* tc filter add dev veth_dst_fwd egress bpf da tc_chk */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); + /* tc qdisc add dev dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); + /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); + /* tc filter add dev dst_fwd egress bpf da tc_chk */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); /* Setup route and neigh tables */ SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); @@ -1074,17 +1135,17 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); - SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global"); SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD " dev tun_src scope global"); - SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); - SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global"); + SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global"); SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD " dev tun_src scope global"); - SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global"); - SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD); if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) goto fail; @@ -1106,9 +1167,9 @@ fail: close_netns(nstoken); } -#define RUN_TEST(name) \ +#define RUN_TEST(name, mode) \ ({ \ - struct netns_setup_result setup_result; \ + struct netns_setup_result setup_result = { .dev_mode = mode, }; \ if (test__start_subtest(#name)) \ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \ @@ -1122,11 +1183,13 @@ static void *test_tc_redirect_run_tests(void *arg) { netns_setup_namespaces_nofail("delete"); - RUN_TEST(tc_redirect_peer); - RUN_TEST(tc_redirect_peer_l3); - RUN_TEST(tc_redirect_neigh); - RUN_TEST(tc_redirect_neigh_fib); - RUN_TEST(tc_redirect_dtime); + RUN_TEST(tc_redirect_peer, MODE_VETH); + RUN_TEST(tc_redirect_peer, MODE_NETKIT); + RUN_TEST(tc_redirect_peer_l3, MODE_VETH); + RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT); + RUN_TEST(tc_redirect_neigh, MODE_VETH); + RUN_TEST(tc_redirect_neigh_fib, MODE_VETH); + RUN_TEST(tc_redirect_dtime, MODE_VETH); return NULL; } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e5c61aa6604a..5cfa7a6316b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -31,6 +31,7 @@ #include "verifier_helper_restricted.skel.h" #include "verifier_helper_value_access.skel.h" #include "verifier_int_ptr.skel.h" +#include "verifier_iterating_callbacks.skel.h" #include "verifier_jeq_infer_not_null.skel.h" #include "verifier_ld_ind.skel.h" #include "verifier_ldsx.skel.h" @@ -139,6 +140,7 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); } void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } +void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); } void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); } diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c index 4ce76eb064c4..d461746fd3c1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c @@ -15,13 +15,16 @@ static int empty_callback(__u32 index, void *data) return 0; } +static int outer_loop(__u32 index, void *data) +{ + bpf_loop(nr_loops, empty_callback, NULL, 0); + __sync_add_and_fetch(&hits, nr_loops); + return 0; +} + SEC("fentry/" SYS_PREFIX "sys_getpgid") int benchmark(void *ctx) { - for (int i = 0; i < 1000; i++) { - bpf_loop(nr_loops, empty_callback, NULL, 0); - - __sync_add_and_fetch(&hits, nr_loops); - } + bpf_loop(1000, outer_loop, NULL, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 76d661b20e87..56c764df8196 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -33,6 +33,7 @@ int underflow_prog(void *ctx) if (!p) return 0; bpf_for_each_map_elem(&array_map, cb1, &p, 0); + bpf_kfunc_call_test_release(p); return 0; } diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 4c39e920dac2..8c0ef2742208 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -171,6 +171,7 @@ int reject_with_rbtree_add_throw(void *ctx) return 0; bpf_spin_lock(&lock); bpf_rbtree_add(&rbtree, &f->node, rbless); + bpf_spin_unlock(&lock); return 0; } @@ -214,6 +215,7 @@ int reject_with_cb_reference(void *ctx) if (!f) return 0; bpf_loop(5, subprog_cb_ref, NULL, 0); + bpf_obj_drop(f); return 0; } diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index e02cfd380746..40df2cc26eaf 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -24,9 +24,11 @@ struct task_struct {}; #define STACK_TABLE_EPOCH_SHIFT 20 #define STROBE_MAX_STR_LEN 1 #define STROBE_MAX_CFGS 32 +#define READ_MAP_VAR_PAYLOAD_CAP \ + ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) #define STROBE_MAX_PAYLOAD \ (STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \ - STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) + STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP) struct strobe_value_header { /* @@ -355,7 +357,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base, struct strobe_value_generic *value, struct strobemeta_payload *data, - void *payload) + size_t off) { void *location; uint64_t len; @@ -366,7 +368,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, return 0; bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr); + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr); /* * if bpf_probe_read_user_str returns error (<0), due to casting to * unsinged int, it will become big number, so next check is @@ -378,14 +380,14 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, return 0; data->str_lens[idx] = len; - return len; + return off + len; } -static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, - size_t idx, void *tls_base, - struct strobe_value_generic *value, - struct strobemeta_payload *data, - void *payload) +static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg, + size_t idx, void *tls_base, + struct strobe_value_generic *value, + struct strobemeta_payload *data, + size_t off) { struct strobe_map_descr* descr = &data->map_descrs[idx]; struct strobe_map_raw map; @@ -397,11 +399,11 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, location = calc_location(&cfg->map_locs[idx], tls_base); if (!location) - return payload; + return off; bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr)) - return payload; + return off; descr->id = map.id; descr->cnt = map.cnt; @@ -410,10 +412,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, data->req_meta_valid = 1; } - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag); + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag); if (len <= STROBE_MAX_STR_LEN) { descr->tag_len = len; - payload += len; + off += len; } #ifdef NO_UNROLL @@ -426,22 +428,22 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, break; descr->key_lens[i] = 0; - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.entries[i].key); if (len <= STROBE_MAX_STR_LEN) { descr->key_lens[i] = len; - payload += len; + off += len; } descr->val_lens[i] = 0; - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.entries[i].val); if (len <= STROBE_MAX_STR_LEN) { descr->val_lens[i] = len; - payload += len; + off += len; } } - return payload; + return off; } #ifdef USE_BPF_LOOP @@ -455,14 +457,20 @@ struct read_var_ctx { struct strobemeta_payload *data; void *tls_base; struct strobemeta_cfg *cfg; - void *payload; + size_t payload_off; /* value gets mutated */ struct strobe_value_generic *value; enum read_type type; }; -static int read_var_callback(__u32 index, struct read_var_ctx *ctx) +static int read_var_callback(__u64 index, struct read_var_ctx *ctx) { + /* lose precision info for ctx->payload_off, verifier won't track + * double xor, barrier_var() is needed to force clang keep both xors. + */ + ctx->payload_off ^= index; + barrier_var(ctx->payload_off); + ctx->payload_off ^= index; switch (ctx->type) { case READ_INT_VAR: if (index >= STROBE_MAX_INTS) @@ -472,14 +480,18 @@ static int read_var_callback(__u32 index, struct read_var_ctx *ctx) case READ_MAP_VAR: if (index >= STROBE_MAX_MAPS) return 1; - ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, - ctx->value, ctx->data, ctx->payload); + if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP) + return 1; + ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload_off); break; case READ_STR_VAR: if (index >= STROBE_MAX_STRS) return 1; - ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, - ctx->value, ctx->data, ctx->payload); + if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN) + return 1; + ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload_off); break; } return 0; @@ -501,7 +513,8 @@ static void *read_strobe_meta(struct task_struct *task, pid_t pid = bpf_get_current_pid_tgid() >> 32; struct strobe_value_generic value = {0}; struct strobemeta_cfg *cfg; - void *tls_base, *payload; + size_t payload_off; + void *tls_base; cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid); if (!cfg) @@ -509,7 +522,7 @@ static void *read_strobe_meta(struct task_struct *task, data->int_vals_set_mask = 0; data->req_meta_valid = 0; - payload = data->payload; + payload_off = 0; /* * we don't have struct task_struct definition, it should be: * tls_base = (void *)task->thread.fsbase; @@ -522,7 +535,7 @@ static void *read_strobe_meta(struct task_struct *task, .tls_base = tls_base, .value = &value, .data = data, - .payload = payload, + .payload_off = 0, }; int err; @@ -540,6 +553,11 @@ static void *read_strobe_meta(struct task_struct *task, err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); if (err != STROBE_MAX_MAPS) return NULL; + + payload_off = ctx.payload_off; + /* this should not really happen, here only to satisfy verifer */ + if (payload_off > sizeof(data->payload)) + payload_off = sizeof(data->payload); #else #ifdef NO_UNROLL #pragma clang loop unroll(disable) @@ -555,7 +573,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_STRS; ++i) { - payload += read_str_var(cfg, i, tls_base, &value, data, payload); + payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off); } #ifdef NO_UNROLL #pragma clang loop unroll(disable) @@ -563,7 +581,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_MAPS; ++i) { - payload = read_map_var(cfg, i, tls_base, &value, data, payload); + payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off); } #endif /* USE_BPF_LOOP */ @@ -571,7 +589,7 @@ static void *read_strobe_meta(struct task_struct *task, * return pointer right after end of payload, so it's possible to * calculate exact amount of useful data that needs to be sent */ - return payload; + return &data->payload[payload_off]; } SEC("raw_tracepoint/kfree_skb") diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c new file mode 100644 index 000000000000..5905e036e0ea --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 8); + __type(key, __u32); + __type(value, __u64); +} map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_USER_RINGBUF); + __uint(max_entries, 8); +} ringbuf SEC(".maps"); + +struct vm_area_struct; +struct bpf_map; + +struct buf_context { + char *buf; +}; + +struct num_context { + __u64 i; + __u64 j; +}; + +__u8 choice_arr[2] = { 0, 1 }; + +static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx) +{ + if (idx == 0) { + ctx->buf = (char *)(0xDEAD); + return 0; + } + + if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE))) + return 1; + + return 0; +} + +SEC("?raw_tp") +__failure __msg("R1 type=scalar expected=fp") +int unsafe_on_2nd_iter(void *unused) +{ + char buf[4]; + struct buf_context loop_ctx = { .buf = buf }; + + bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0); + return 0; +} + +static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i = 0; + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_on_zero_iter(void *unused) +{ + struct num_context loop_ctx = { .i = 32 }; + + bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static int widening_cb(__u32 idx, struct num_context *ctx) +{ + ++ctx->i; + return 0; +} + +SEC("?raw_tp") +__success +int widening(void *unused) +{ + struct num_context loop_ctx = { .i = 0, .j = 1 }; + + bpf_loop(100, widening_cb, &loop_ctx, 0); + /* loop_ctx.j is not changed during callback iteration, + * verifier should not apply widening to it. + */ + return choice_arr[loop_ctx.j]; +} + +static int loop_detection_cb(__u32 idx, struct num_context *ctx) +{ + for (;;) {} + return 0; +} + +SEC("?raw_tp") +__failure __msg("infinite loop detected") +int loop_detection(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_loop(100, loop_detection_cb, &loop_ctx, 0); + return 0; +} + +static __always_inline __u64 oob_state_machine(struct num_context *ctx) +{ + switch (ctx->i) { + case 0: + ctx->i = 1; + break; + case 1: + ctx->i = 32; + break; + } + return 0; +} + +static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_for_each_map_elem(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_ringbuf_drain(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_find_vma(void *unused) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct num_context loop_ctx = { .i = 0 }; + + bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static int iter_limit_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i++; + return 0; +} + +SEC("?raw_tp") +__success +int bpf_loop_iter_limit_ok(void *unused) +{ + struct num_context ctx = { .i = 0 }; + + bpf_loop(1, iter_limit_cb, &ctx, 0); + return choice_arr[ctx.i]; +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=2 size=1") +int bpf_loop_iter_limit_overflow(void *unused) +{ + struct num_context ctx = { .i = 0 }; + + bpf_loop(2, iter_limit_cb, &ctx, 0); + return choice_arr[ctx.i]; +} + +static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 100; + return 0; +} + +static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 10; + return 0; +} + +static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 1; + bpf_loop(1, iter_limit_level2a_cb, ctx, 0); + bpf_loop(1, iter_limit_level2b_cb, ctx, 0); + return 0; +} + +/* Check that path visiting every callback function once had been + * reached by verifier. Variables 'ctx{1,2}i' below serve as flags, + * with each decimal digit corresponding to a callback visit marker. + */ +SEC("socket") +__success __retval(111111) +int bpf_loop_iter_limit_nested(void *unused) +{ + struct num_context ctx1 = { .i = 0 }; + struct num_context ctx2 = { .i = 0 }; + __u64 a, b, c; + + bpf_loop(1, iter_limit_level1_cb, &ctx1, 0); + bpf_loop(1, iter_limit_level1_cb, &ctx2, 0); + a = ctx1.i; + b = ctx2.i; + /* Force 'ctx1.i' and 'ctx2.i' precise. */ + c = choice_arr[(a + b) % 2]; + /* This makes 'c' zero, but neither clang nor verifier know it. */ + c /= 10; + /* Make sure that verifier does not visit 'impossible' states: + * enumerate all possible callback visit masks. + */ + if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 && + b != 0 && b != 1 && b != 11 && b != 101 && b != 111) + asm volatile ("r0 /= 0;" ::: "r0"); + return 1000 * a + b + c; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index db6b3143338b..f61d623b1ce8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -119,15 +119,41 @@ __naked int global_subprog_result_precise(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body, + * r1 and r4 are always precise for bpf_loop() calls. + */ +__msg("9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: parent state regs=r4 stack=:") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") +__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: parent state regs=r1 stack=:") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") +__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0") +__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8") +__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* r6 precision propagation */ __msg("14: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 14 first_idx 10") +__msg("mark_precise: frame0: last_idx 14 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4") __msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0") -__msg("mark_precise: frame0: parent state regs=r0 stack=:") -__msg("mark_precise: frame0: last_idx 18 first_idx 0") -__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit") +__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop") +/* State entering callback body popped from states stack */ +__msg("from 9 to 17: frame1:") +__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("17: (b7) r0 = 0") +__msg("18: (95) exit") +__msg("returning from callee:") +__msg("to caller at 9:") +__msg("frame 0: propagating r1,r4") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit") +__msg("from 18 to 9: safe") __naked int callback_result_precise(void) { asm volatile ( @@ -233,20 +259,36 @@ __naked int parent_callee_saved_reg_precise_global(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("12: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 12 first_idx 10") +__msg("mark_precise: frame0: last_idx 12 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs=r6 stack=:") -__msg("mark_precise: frame0: last_idx 16 first_idx 0") -__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") -__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8") __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* State entering callback body popped from states stack */ +__msg("from 9 to 15: frame1:") +__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("15: (b7) r0 = 0") +__msg("16: (95) exit") +__msg("returning from callee:") +__msg("to caller at 9:") +/* r1, r4 are always precise for bpf_loop(), + * r6 was marked before backtracking to callback body. + */ +__msg("frame 0: propagating r1,r4,r6") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop") +__msg("mark_precise: frame0: parent state regs= stack=:") +__msg("from 16 to 9: safe") __naked int parent_callee_saved_reg_precise_with_callback(void) { asm volatile ( @@ -373,22 +415,38 @@ __naked int parent_stack_slot_precise_global(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("14: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 14 first_idx 11") +__msg("mark_precise: frame0: last_idx 14 first_idx 10") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs= stack=-8:") -__msg("mark_precise: frame0: last_idx 18 first_idx 0") -__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") -__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10") __msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8") __msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6") __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* State entering callback body popped from states stack */ +__msg("from 10 to 17: frame1:") +__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("17: (b7) r0 = 0") +__msg("18: (95) exit") +__msg("returning from callee:") +__msg("to caller at 10:") +/* r1, r4 are always precise for bpf_loop(), + * fp-8 was marked before backtracking to callback body. + */ +__msg("frame 0: propagating r1,r4,fp-8") +__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1") +__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: parent state regs= stack=:") +__msg("from 18 to 10: safe") __naked int parent_stack_slot_precise_with_callback(void) { asm volatile ( diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index e959336c7a73..80f620602d50 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -53,6 +53,8 @@ #define DEFAULT_TTL 64 #define MAX_ALLOWED_PORTS 8 +#define MAX_PACKET_OFF 0xffff + #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) @@ -183,63 +185,76 @@ static __always_inline __u32 tcp_clock_ms(void) } struct tcpopt_context { - __u8 *ptr; - __u8 *end; + void *data; void *data_end; __be32 *tsecr; __u8 wscale; bool option_timestamp; bool option_sack; + __u32 off; }; -static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) +static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz) { - __u8 opcode, opsize; + __u64 off = ctx->off; + __u8 *data; - if (ctx->ptr >= ctx->end) - return 1; - if (ctx->ptr >= ctx->data_end) - return 1; + /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */ + if (off > MAX_PACKET_OFF - sz) + return NULL; - opcode = ctx->ptr[0]; + data = ctx->data + off; + barrier_var(data); + if (data + sz >= ctx->data_end) + return NULL; - if (opcode == TCPOPT_EOL) - return 1; - if (opcode == TCPOPT_NOP) { - ++ctx->ptr; - return 0; - } + ctx->off += sz; + return data; +} - if (ctx->ptr + 1 >= ctx->end) - return 1; - if (ctx->ptr + 1 >= ctx->data_end) +static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) +{ + __u8 *opcode, *opsize, *wscale, *tsecr; + __u32 off = ctx->off; + + opcode = next(ctx, 1); + if (!opcode) return 1; - opsize = ctx->ptr[1]; - if (opsize < 2) + + if (*opcode == TCPOPT_EOL) return 1; + if (*opcode == TCPOPT_NOP) + return 0; - if (ctx->ptr + opsize > ctx->end) + opsize = next(ctx, 1); + if (!opsize || *opsize < 2) return 1; - switch (opcode) { + switch (*opcode) { case TCPOPT_WINDOW: - if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end) - ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE; + wscale = next(ctx, 1); + if (!wscale) + return 1; + if (*opsize == TCPOLEN_WINDOW) + ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE; break; case TCPOPT_TIMESTAMP: - if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) { + tsecr = next(ctx, 4); + if (!tsecr) + return 1; + if (*opsize == TCPOLEN_TIMESTAMP) { ctx->option_timestamp = true; /* Client's tsval becomes our tsecr. */ - *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2)); + *ctx->tsecr = get_unaligned((__be32 *)tsecr); } break; case TCPOPT_SACK_PERM: - if (opsize == TCPOLEN_SACK_PERM) + if (*opsize == TCPOLEN_SACK_PERM) ctx->option_sack = true; break; } - ctx->ptr += opsize; + ctx->off = off + *opsize; return 0; } @@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(__u32 index, void *context) static __always_inline bool tscookie_init(struct tcphdr *tcp_header, __u16 tcp_len, __be32 *tsval, - __be32 *tsecr, void *data_end) + __be32 *tsecr, void *data, void *data_end) { struct tcpopt_context loop_ctx = { - .ptr = (__u8 *)(tcp_header + 1), - .end = (__u8 *)tcp_header + tcp_len, + .data = data, .data_end = data_end, .tsecr = tsecr, .wscale = TS_OPT_WSCALE_MASK, .option_timestamp = false, .option_sack = false, + /* Note: currently verifier would track .off as unbound scalar. + * In case if verifier would at some point get smarter and + * compute bounded value for this var, beware that it might + * hinder bpf_loop() convergence validation. + */ + .off = (__u8 *)(tcp_header + 1) - (__u8 *)data, }; u32 cookie; @@ -635,7 +655,7 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, cookie = (__u32)value; if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, - &tsopt_buf[0], &tsopt_buf[1], data_end)) + &tsopt_buf[0], &tsopt_buf[1], data, data_end)) tsopt = tsopt_buf; /* Check that there is enough space for a SYNACK. It also covers diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index cc920c79ff1c..4ff10ea61461 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -45,3 +45,4 @@ mdwe_test gup_longterm mkdirty va_high_addr_switch +hugetlb_fault_after_madv diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 0161fb49fc6e..befab43719ba 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -94,19 +94,19 @@ int init_uffd(void) uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); if (uffd == -1) - ksft_exit_fail_msg("uffd syscall failed\n"); + return uffd; uffdio_api.api = UFFD_API; uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC | UFFD_FEATURE_WP_HUGETLBFS_SHMEM; if (ioctl(uffd, UFFDIO_API, &uffdio_api)) - ksft_exit_fail_msg("UFFDIO_API\n"); + return -1; if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) || !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) || !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) || !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) - ksft_exit_fail_msg("UFFDIO_API error %llu\n", uffdio_api.api); + return -1; return 0; } @@ -1151,7 +1151,7 @@ int sanity_tests(void) /* 9. Memory mapped file */ fd = open(__FILE__, O_RDONLY); if (fd < 0) - ksft_exit_fail_msg("%s Memory mapped file\n"); + ksft_exit_fail_msg("%s Memory mapped file\n", __func__); ret = stat(__FILE__, &sbuf); if (ret < 0) @@ -1159,7 +1159,7 @@ int sanity_tests(void) fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) - ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); tmp_buf = malloc(sbuf.st_size); memcpy(tmp_buf, fmem, sbuf.st_size); @@ -1189,7 +1189,7 @@ int sanity_tests(void) fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) - ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); wp_init(fmem, buf_size); wp_addr_range(fmem, buf_size); @@ -1479,6 +1479,10 @@ int main(void) struct stat sbuf; ksft_print_header(); + + if (init_uffd()) + return ksft_exit_pass(); + ksft_set_plan(115); page_size = getpagesize(); @@ -1488,9 +1492,6 @@ int main(void) if (pagemap_fd < 0) return -EINVAL; - if (init_uffd()) - ksft_exit_fail_msg("uffd init failed\n"); - /* 1. Sanity testing */ sanity_tests_sd(); @@ -1595,7 +1596,7 @@ int main(void) fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) - ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); wp_init(fmem, sbuf.st_size); wp_addr_range(fmem, sbuf.st_size); @@ -1623,7 +1624,7 @@ int main(void) fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) - ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); wp_init(fmem, buf_size); wp_addr_range(fmem, buf_size); diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index cc16f6ca8533..00757445278e 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -223,9 +223,12 @@ CATEGORY="hugetlb" run_test ./hugepage-mremap CATEGORY="hugetlb" run_test ./hugepage-vmemmap CATEGORY="hugetlb" run_test ./hugetlb-madvise +nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) # For this test, we need one and just one huge page echo 1 > /proc/sys/vm/nr_hugepages CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv +# Restore the previous number of huge pages, since further tests rely on it +echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages if test_selected "hugetlb"; then echo "NOTE: These hugetlb tests provide minimal coverage. Use" diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 5f2b3f6c0d74..38be9706c45f 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -859,7 +859,7 @@ kci_test_gretap() run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - run_cmd ip -netns "$testns" link set dev $DEV_NS ups + run_cmd ip -netns "$testns" link set dev $DEV_NS up run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 5b0e93f9996c..01fa816868bc 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -353,11 +353,12 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) } #define SOCK_BUF_SIZE (2 * 1024 * 1024) -#define MAX_MSG_SIZE (32 * 1024) +#define MAX_MSG_PAGES 4 static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) { unsigned long curr_hash; + size_t max_msg_size; int page_size; int msg_count; int fd; @@ -373,7 +374,8 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) curr_hash = 0; page_size = getpagesize(); - msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE; + max_msg_size = MAX_MSG_PAGES * page_size; + msg_count = SOCK_BUF_SIZE / max_msg_size; for (int i = 0; i < msg_count; i++) { size_t buf_size; @@ -383,7 +385,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) /* Use "small" buffers and "big" buffers. */ if (i & 1) buf_size = page_size + - (rand() % (MAX_MSG_SIZE - page_size)); + (rand() % (max_msg_size - page_size)); else buf_size = 1 + (rand() % page_size); @@ -429,7 +431,6 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) unsigned long remote_hash; unsigned long curr_hash; int fd; - char buf[MAX_MSG_SIZE]; struct msghdr msg = {0}; struct iovec iov = {0}; @@ -457,8 +458,13 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) control_writeln("SRVREADY"); /* Wait, until peer sends whole data. */ control_expectln("SENDDONE"); - iov.iov_base = buf; - iov.iov_len = sizeof(buf); + iov.iov_len = MAX_MSG_PAGES * getpagesize(); + iov.iov_base = malloc(iov.iov_len); + if (!iov.iov_base) { + perror("malloc"); + exit(EXIT_FAILURE); + } + msg.msg_iov = &iov; msg.msg_iovlen = 1; @@ -483,6 +489,7 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) curr_hash += hash_djb2(msg.msg_iov[0].iov_base, recv_size); } + free(iov.iov_base); close(fd); remote_hash = control_readulong(); |