summaryrefslogtreecommitdiff
path: root/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-26 15:04:47 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-26 15:04:47 -0800
commit3f59dbcace56fae7e4ed303bab90f1bedadcfdf4 (patch)
treec425529202b9dbe3e3b3dde072c1edf51b1b9e93 /tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
parentdf28204bb0f29cc475c0a8893c99b46a11a4903f (diff)
parentceb9e77324fa661b1001a0ae66f061b5fcb4e4e6 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "The main kernel side changes in this cycle were: - Various Intel-PT updates and optimizations (Alexander Shishkin) - Prohibit kprobes on Xen/KVM emulate prefixes (Masami Hiramatsu) - Add support for LSM and SELinux checks to control access to the perf syscall (Joel Fernandes) - Misc other changes, optimizations, fixes and cleanups - see the shortlog for details. There were numerous tooling changes as well - 254 non-merge commits. Here are the main changes - too many to list in detail: - Enhancements to core tooling infrastructure, perf.data, libperf, libtraceevent, event parsing, vendor events, Intel PT, callchains, BPF support and instruction decoding. - There were updates to the following tools: perf annotate perf diff perf inject perf kvm perf list perf maps perf parse perf probe perf record perf report perf script perf stat perf test perf trace - And a lot of other changes: please see the shortlog and Git log for more details" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (279 commits) perf parse: Fix potential memory leak when handling tracepoint errors perf probe: Fix spelling mistake "addrees" -> "address" libtraceevent: Fix memory leakage in copy_filter_type libtraceevent: Fix header installation perf intel-bts: Does not support AUX area sampling perf intel-pt: Add support for decoding AUX area samples perf intel-pt: Add support for recording AUX area samples perf pmu: When using default config, record which bits of config were changed by the user perf auxtrace: Add support for queuing AUX area samples perf session: Add facility to peek at all events perf auxtrace: Add support for dumping AUX area samples perf inject: Cut AUX area samples perf record: Add aux-sample-size config term perf record: Add support for AUX area sampling perf auxtrace: Add support for AUX area sample recording perf auxtrace: Move perf_evsel__find_pmu() perf record: Add a function to test for kernel support for AUX area sampling perf tools: Add kernel AUX area sampling definitions perf/core: Make the mlock accounting simple again perf report: Jump to symbol source view from total cycles view ...
Diffstat (limited to 'tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json')
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json170
1 files changed, 85 insertions, 85 deletions
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
index bc4d5fc284a0..e2446966b651 100644
--- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -1,340 +1,340 @@
[
{
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
- "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
"MetricGroup": "TopdownL1",
- "MetricName": "Frontend_Bound"
+ "MetricName": "Frontend_Bound",
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
},
{
- "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
- "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
"MetricGroup": "TopdownL1_SMT",
- "MetricName": "Frontend_Bound_SMT"
+ "MetricName": "Frontend_Bound_SMT",
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
- "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
- "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
"MetricGroup": "TopdownL1",
- "MetricName": "Bad_Speculation"
+ "MetricName": "Bad_Speculation",
+ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example."
},
{
- "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
- "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
"MetricGroup": "TopdownL1_SMT",
- "MetricName": "Bad_Speculation_SMT"
+ "MetricName": "Bad_Speculation_SMT",
+ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
- "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
- "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+ "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
"MetricGroup": "TopdownL1",
- "MetricName": "Backend_Bound"
+ "MetricName": "Backend_Bound",
+ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound."
},
{
- "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
- "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
"MetricGroup": "TopdownL1_SMT",
- "MetricName": "Backend_Bound_SMT"
+ "MetricName": "Backend_Bound_SMT",
+ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
- "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
"MetricGroup": "TopdownL1",
- "MetricName": "Retiring"
+ "MetricName": "Retiring",
+ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. "
},
{
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
- "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
"MetricGroup": "TopdownL1_SMT",
- "MetricName": "Retiring_SMT"
+ "MetricName": "Retiring_SMT",
+ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
},
{
+ "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
- "BriefDescription": "Instructions Per Cycle (per logical thread)",
"MetricGroup": "TopDownL1",
"MetricName": "IPC"
},
{
- "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
"BriefDescription": "Uops Per Instruction",
- "MetricGroup": "Pipeline;Retiring",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline;Retire",
"MetricName": "UPI"
},
{
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
"BriefDescription": "Instruction per taken branch",
- "MetricGroup": "Branches;PGO",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fetch_BW;PGO",
"MetricName": "IpTB"
},
{
- "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
"BriefDescription": "Branch instructions per taken branch. ",
+ "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
"MetricGroup": "Branches;PGO",
"MetricName": "BpTB"
},
{
- "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4 ) )",
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
- "MetricGroup": "PGO",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4 ) )",
+ "MetricGroup": "PGO;IcMiss",
"MetricName": "IFetch_Line_Utilization"
},
{
- "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
- "MetricGroup": "DSB;Frontend_Bandwidth",
+ "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
+ "MetricGroup": "DSB;Fetch_BW",
"MetricName": "DSB_Coverage"
},
{
+ "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
"MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
- "BriefDescription": "Cycles Per Instruction (threaded)",
"MetricGroup": "Pipeline;Summary",
"MetricName": "CPI"
},
{
+ "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
- "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
"MetricGroup": "Summary",
"MetricName": "CLKS"
},
{
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
"MetricExpr": "4 * cycles",
- "BriefDescription": "Total issue-pipeline slots (per core)",
"MetricGroup": "TopDownL1",
"MetricName": "SLOTS"
},
{
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
"MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
- "BriefDescription": "Total issue-pipeline slots (per core)",
"MetricGroup": "TopDownL1_SMT",
"MetricName": "SLOTS_SMT"
},
{
+ "BriefDescription": "Instructions per Load (lower number means higher occurance rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
- "BriefDescription": "Instructions per Load (lower number means loads are more frequent)",
- "MetricGroup": "Instruction_Type;L1_Bound",
+ "MetricGroup": "Instruction_Type",
"MetricName": "IpL"
},
{
+ "BriefDescription": "Instructions per Store (lower number means higher occurance rate)",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
- "BriefDescription": "Instructions per Store",
- "MetricGroup": "Instruction_Type;Store_Bound",
+ "MetricGroup": "Instruction_Type",
"MetricName": "IpS"
},
{
+ "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
- "BriefDescription": "Instructions per Branch",
- "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6",
+ "MetricGroup": "Branches;Instruction_Type",
"MetricName": "IpB"
},
{
+ "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
- "BriefDescription": "Instruction per (near) call",
"MetricGroup": "Branches",
"MetricName": "IpCall"
},
{
- "MetricExpr": "INST_RETIRED.ANY",
"BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary",
"MetricName": "Instructions"
},
{
- "MetricExpr": "INST_RETIRED.ANY / cycles",
"BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / cycles",
"MetricGroup": "SMT",
"MetricName": "CoreIPC"
},
{
- "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "SMT",
"MetricName": "CoreIPC_SMT"
},
{
- "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / cycles",
"BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / cycles",
"MetricGroup": "FLOPS",
"MetricName": "FLOPc"
},
{
- "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"BriefDescription": "Floating Point Operations Per Cycle",
+ "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "FLOPS_SMT",
"MetricName": "FLOPc_SMT"
},
{
- "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
- "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+ "MetricGroup": "Pipeline",
"MetricName": "ILP"
},
{
- "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
"BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
- "MetricGroup": "Branch_Mispredicts",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "BrMispredicts",
"MetricName": "IpMispredict"
},
{
+ "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
"MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
- "BriefDescription": "Core actual clocks when any thread is active on the physical core",
"MetricGroup": "SMT",
"MetricName": "CORE_CLKS"
},
{
- "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
"MetricGroup": "Memory_Bound;Memory_Lat",
"MetricName": "Load_Miss_Real_Latency"
},
{
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
"MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)",
"MetricGroup": "Memory_Bound;Memory_BW",
"MetricName": "MLP"
},
{
- "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / cycles",
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / cycles",
"MetricGroup": "TLB",
"MetricName": "Page_Walks_Utilization"
},
{
- "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
"MetricGroup": "TLB_SMT",
"MetricName": "Page_Walks_Utilization_SMT"
},
{
- "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
"BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+ "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
"MetricGroup": "Memory_BW",
"MetricName": "L1D_Cache_Fill_BW"
},
{
- "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
"BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+ "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
"MetricGroup": "Memory_BW",
"MetricName": "L2_Cache_Fill_BW"
},
{
- "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+ "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
"MetricGroup": "Memory_BW",
"MetricName": "L3_Cache_Fill_BW"
},
{
- "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
- "MetricGroup": "Cache_Misses;",
+ "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "Cache_Misses",
"MetricName": "L1MPKI"
},
{
- "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
- "MetricGroup": "Cache_Misses;",
+ "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "Cache_Misses",
"MetricName": "L2MPKI"
},
{
- "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
"BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
- "MetricGroup": "Cache_Misses;",
+ "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "Cache_Misses",
"MetricName": "L2MPKI_All"
},
{
- "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
- "MetricGroup": "Cache_Misses;",
+ "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "Cache_Misses",
"MetricName": "L2HPKI_All"
},
{
- "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.LLC_MISS / INST_RETIRED.ANY",
"BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
- "MetricGroup": "Cache_Misses;",
+ "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.LLC_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "Cache_Misses",
"MetricName": "L3MPKI"
},
{
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "CPU_Utilization"
},
{
- "MetricExpr": "( (( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / 1000000000 ) / duration_time",
"BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "( (( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / 1000000000 ) / duration_time",
"MetricGroup": "FLOPS;Summary",
"MetricName": "GFLOPs"
},
{
- "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
"MetricName": "Turbo_Utilization"
},
{
+ "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
- "BriefDescription": "Fraction of cycles where both hardware threads were active",
"MetricGroup": "SMT;Summary",
"MetricName": "SMT_2T_Utilization"
},
{
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
"BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
{
- "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000",
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000",
"MetricGroup": "Memory_BW",
"MetricName": "DRAM_BW_Use"
},
{
+ "BriefDescription": "C3 residency percent per core",
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
- "BriefDescription": "C3 residency percent per core",
"MetricName": "C3_Core_Residency"
},
{
+ "BriefDescription": "C6 residency percent per core",
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
- "BriefDescription": "C6 residency percent per core",
"MetricName": "C6_Core_Residency"
},
{
+ "BriefDescription": "C7 residency percent per core",
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
- "BriefDescription": "C7 residency percent per core",
"MetricName": "C7_Core_Residency"
},
{
+ "BriefDescription": "C2 residency percent per package",
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
- "BriefDescription": "C2 residency percent per package",
"MetricName": "C2_Pkg_Residency"
},
{
+ "BriefDescription": "C3 residency percent per package",
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
- "BriefDescription": "C3 residency percent per package",
"MetricName": "C3_Pkg_Residency"
},
{
+ "BriefDescription": "C6 residency percent per package",
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
- "BriefDescription": "C6 residency percent per package",
"MetricName": "C6_Pkg_Residency"
},
{
+ "BriefDescription": "C7 residency percent per package",
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
- "BriefDescription": "C7 residency percent per package",
"MetricName": "C7_Pkg_Residency"
}
]