diff options
Diffstat (limited to 'tools/perf/pmu-events/arch/x86')
52 files changed, 1623 insertions, 248 deletions
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/cache.json b/tools/perf/pmu-events/arch/x86/alderlake/cache.json index c2802fbb853b..5461576dafc7 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/cache.json @@ -728,7 +728,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in DRAM. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x80", "Unit": "cpu_atom" @@ -739,7 +738,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.HITM", - "PublicDescription": "Counts the number of load uops retired that hit in the L3 cache, in which a snoop was required and modified data was forwarded from another core or module. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x20", "Unit": "cpu_atom" @@ -750,7 +748,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in the L1 data cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x1", "Unit": "cpu_atom" @@ -761,7 +758,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", - "PublicDescription": "Counts the number of load uops retired that miss in the L1 data cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x8", "Unit": "cpu_atom" @@ -772,7 +768,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in the L2 cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x2", "Unit": "cpu_atom" @@ -783,7 +778,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", - "PublicDescription": "Counts the number of load uops retired that miss in the L2 cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x10", "Unit": "cpu_atom" @@ -794,7 +788,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in the L3 cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x4", "Unit": "cpu_atom" @@ -805,7 +798,6 @@ "Data_LA": "1", "EventCode": "0xd2", "EventName": "MEM_LOAD_UOPS_RETIRED_MISC.HIT_E_F", - "PublicDescription": "Counts the number of load uops retired that hit in the L3 cache, in which a snoop was required, and non-modified data was forwarded. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x40", "Unit": "cpu_atom" @@ -816,7 +808,6 @@ "Data_LA": "1", "EventCode": "0xd2", "EventName": "MEM_LOAD_UOPS_RETIRED_MISC.L3_MISS", - "PublicDescription": "Counts the number of load uops retired that miss in the L3 cache. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x20", "Unit": "cpu_atom" @@ -873,7 +864,7 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", - "PublicDescription": "Counts the total number of load uops retired. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of load uops retired.", "SampleAfterValue": "200003", "UMask": "0x81", "Unit": "cpu_atom" @@ -884,111 +875,111 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", - "PublicDescription": "Counts the total number of store uops retired. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of store uops retired.", "SampleAfterValue": "200003", "UMask": "0x82", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", "MSRValue": "0x80", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", "MSRIndex": "0x3F6", "MSRValue": "0x10", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", "MSRValue": "0x100", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", "MSRIndex": "0x3F6", "MSRValue": "0x20", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", "MSRValue": "0x4", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", "MSRIndex": "0x3F6", "MSRValue": "0x200", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", "MSRIndex": "0x3F6", "MSRValue": "0x40", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", "MSRIndex": "0x3F6", "MSRValue": "0x8", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5", "Unit": "cpu_atom" @@ -999,7 +990,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", - "PublicDescription": "Counts the number of load uops retired that performed one or more locks. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x21", "Unit": "cpu_atom" @@ -1010,7 +1000,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", - "PublicDescription": "Counts the number of retired split load uops. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x41", "Unit": "cpu_atom" @@ -1021,7 +1010,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STLB_MISS", - "PublicDescription": "Counts the total number of load and store uops retired that missed in the second level TLB. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x13", "Unit": "cpu_atom" @@ -1032,7 +1020,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", - "PublicDescription": "Counts the number of load ops retired that miss in the second Level TLB. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x11", "Unit": "cpu_atom" @@ -1043,7 +1030,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", - "PublicDescription": "Counts the number of store ops retired that miss in the second level TLB. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x12", "Unit": "cpu_atom" @@ -1054,7 +1040,7 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", - "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x6", "Unit": "cpu_atom" @@ -1478,12 +1464,12 @@ "Unit": "cpu_core" }, { - "BriefDescription": "For every cycle where the core is waiting on at least 1 outstanding Demand RFO request, increments by 1.", + "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", "CounterMask": "1", "EventCode": "0x20", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO Available PDIST counters: 0", + "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x4", "Unit": "cpu_core" diff --git a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json index ce570b96360a..d01f1b163ed8 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json @@ -213,7 +213,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.FPDIV", - "PublicDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt). Available PDIST counters: 0", "SampleAfterValue": "2000003", "UMask": "0x8", "Unit": "cpu_atom" diff --git a/tools/perf/pmu-events/arch/x86/alderlake/other.json b/tools/perf/pmu-events/arch/x86/alderlake/other.json index e4e75b088ccc..5f64138edfe4 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/other.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/other.json @@ -55,7 +55,6 @@ "Deprecated": "1", "EventCode": "0xe4", "EventName": "LBR_INSERTS.ANY", - "PublicDescription": "This event is deprecated. [This event is alias to MISC_RETIRED.LBR_INSERTS] Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x1", "Unit": "cpu_atom" diff --git a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json index 7e0e33792c45..48ef2a8cc49a 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json @@ -128,7 +128,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", "SampleAfterValue": "200003", "Unit": "cpu_atom" }, @@ -147,7 +147,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.CALL", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf9", "Unit": "cpu_atom" @@ -157,7 +156,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND", - "PublicDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e", "Unit": "cpu_atom" @@ -187,7 +185,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND_TAKEN", - "PublicDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe", "Unit": "cpu_atom" @@ -207,7 +204,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "PublicDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xbf", "Unit": "cpu_atom" @@ -227,7 +223,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT", - "PublicDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb", "Unit": "cpu_atom" @@ -247,7 +242,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT_CALL", - "PublicDescription": "Counts the number of near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb", "Unit": "cpu_atom" @@ -258,7 +252,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.IND_CALL", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb", "Unit": "cpu_atom" @@ -269,7 +262,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e", "Unit": "cpu_atom" @@ -279,7 +271,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_CALL", - "PublicDescription": "Counts the number of near CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf9", "Unit": "cpu_atom" @@ -299,7 +290,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PublicDescription": "Counts the number of near RET branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf7", "Unit": "cpu_atom" @@ -319,7 +309,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "PublicDescription": "Counts the number of near taken branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xc0", "Unit": "cpu_atom" @@ -340,7 +329,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NON_RETURN_IND", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb", "Unit": "cpu_atom" @@ -350,7 +338,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.REL_CALL", - "PublicDescription": "Counts the number of near relative CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfd", "Unit": "cpu_atom" @@ -361,7 +348,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.RETURN", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf7", "Unit": "cpu_atom" @@ -372,7 +358,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.TAKEN_JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe", "Unit": "cpu_atom" @@ -382,7 +367,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", "SampleAfterValue": "200003", "Unit": "cpu_atom" }, @@ -400,7 +385,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND", - "PublicDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e", "Unit": "cpu_atom" @@ -430,7 +414,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND_TAKEN", - "PublicDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe", "Unit": "cpu_atom" @@ -450,7 +433,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.INDIRECT", - "PublicDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb", "Unit": "cpu_atom" @@ -470,7 +452,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", - "PublicDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb", "Unit": "cpu_atom" @@ -491,7 +472,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.IND_CALL", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb", "Unit": "cpu_atom" @@ -502,7 +482,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e", "Unit": "cpu_atom" @@ -512,7 +491,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Counts the number of mispredicted near taken branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x80", "Unit": "cpu_atom" @@ -533,7 +511,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb", "Unit": "cpu_atom" @@ -553,7 +530,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.RETURN", - "PublicDescription": "Counts the number of mispredicted near RET branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf7", "Unit": "cpu_atom" @@ -564,7 +540,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.TAKEN_JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe", "Unit": "cpu_atom" @@ -934,7 +909,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc0", "EventName": "INST_RETIRED.ANY_P", - "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.", "SampleAfterValue": "2000003", "Unit": "cpu_atom" }, @@ -1126,7 +1101,6 @@ "Deprecated": "1", "EventCode": "0x03", "EventName": "LD_BLOCKS.4K_ALIAS", - "PublicDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x4", "Unit": "cpu_atom" @@ -1136,7 +1110,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0x03", "EventName": "LD_BLOCKS.ADDRESS_ALIAS", - "PublicDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x4", "Unit": "cpu_atom" @@ -1156,7 +1129,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0x03", "EventName": "LD_BLOCKS.DATA_UNKNOWN", - "PublicDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x1", "Unit": "cpu_atom" @@ -1186,7 +1158,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", "SampleAfterValue": "100003", "UMask": "0x1", "Unit": "cpu_core" @@ -1306,7 +1278,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xe4", "EventName": "MISC_RETIRED.LBR_INSERTS", - "PublicDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. This event is PDIR on GP0 and NPEBS on all other GPs [This event is alias to LBR_INSERTS.ANY] Available PDIST counters: 0", + "PublicDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. This event is PDIR on GP0 and NPEBS on all other GPs [This event is alias to LBR_INSERTS.ANY]", "SampleAfterValue": "1000003", "UMask": "0x1", "Unit": "cpu_atom" @@ -1681,7 +1653,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "TOPDOWN_RETIRING.ALL", - "PublicDescription": "Counts the total number of consumed retirement slots. Available PDIST counters: 0", "SampleAfterValue": "1000003", "Unit": "cpu_atom" }, @@ -1933,7 +1904,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.ALL", - "PublicDescription": "Counts the total number of uops retired. Available PDIST counters: 0", "SampleAfterValue": "2000003", "Unit": "cpu_atom" }, @@ -1963,7 +1933,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.IDIV", - "PublicDescription": "Counts the number of integer divide uops retired. Available PDIST counters: 0", "SampleAfterValue": "2000003", "UMask": "0x10", "Unit": "cpu_atom" @@ -1973,7 +1942,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.MS", - "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows. Available PDIST counters: 0", + "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", "SampleAfterValue": "2000003", "UMask": "0x1", "Unit": "cpu_atom" @@ -2030,7 +1999,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.X87", - "PublicDescription": "Counts the number of x87 uops retired, includes those in MS flows. Available PDIST counters: 0", "SampleAfterValue": "2000003", "UMask": "0x2", "Unit": "cpu_atom" diff --git a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json index 3d15275eca61..ffbbd08acc68 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json @@ -266,7 +266,6 @@ "Deprecated": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS", - "PublicDescription": "This event is deprecated. Refer to new event MEM_UOPS_RETIRED.STLB_MISS Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x13", "Unit": "cpu_atom" @@ -278,7 +277,6 @@ "Deprecated": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS", - "PublicDescription": "This event is deprecated. Refer to new event MEM_UOPS_RETIRED.STLB_MISS_LOADS Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x11", "Unit": "cpu_atom" @@ -290,7 +288,6 @@ "Deprecated": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES", - "PublicDescription": "This event is deprecated. Refer to new event MEM_UOPS_RETIRED.STLB_MISS_STORES Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x12", "Unit": "cpu_atom" diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/cache.json b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json index bf691aee1ef4..669f4979b651 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/cache.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json @@ -118,7 +118,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in DRAM. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x80" }, @@ -128,7 +127,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.HITM", - "PublicDescription": "Counts the number of load uops retired that hit in the L3 cache, in which a snoop was required and modified data was forwarded from another core or module. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x20" }, @@ -138,7 +136,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in the L1 data cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x1" }, @@ -148,7 +145,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", - "PublicDescription": "Counts the number of load uops retired that miss in the L1 data cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x8" }, @@ -158,7 +154,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in the L2 cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x2" }, @@ -168,7 +163,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", - "PublicDescription": "Counts the number of load uops retired that miss in the L2 cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x10" }, @@ -178,7 +172,6 @@ "Data_LA": "1", "EventCode": "0xd1", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", - "PublicDescription": "Counts the number of load uops retired that hit in the L3 cache. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x4" }, @@ -188,7 +181,6 @@ "Data_LA": "1", "EventCode": "0xd2", "EventName": "MEM_LOAD_UOPS_RETIRED_MISC.HIT_E_F", - "PublicDescription": "Counts the number of load uops retired that hit in the L3 cache, in which a snoop was required, and non-modified data was forwarded. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x40" }, @@ -198,7 +190,6 @@ "Data_LA": "1", "EventCode": "0xd2", "EventName": "MEM_LOAD_UOPS_RETIRED_MISC.L3_MISS", - "PublicDescription": "Counts the number of load uops retired that miss in the L3 cache. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x20" }, @@ -240,7 +231,7 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", - "PublicDescription": "Counts the total number of load uops retired. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of load uops retired.", "SampleAfterValue": "200003", "UMask": "0x81" }, @@ -250,103 +241,103 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", - "PublicDescription": "Counts the total number of store uops retired. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of store uops retired.", "SampleAfterValue": "200003", "UMask": "0x82" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", "MSRValue": "0x80", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", "MSRIndex": "0x3F6", "MSRValue": "0x10", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", "MSRValue": "0x100", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", "MSRIndex": "0x3F6", "MSRValue": "0x20", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", "MSRValue": "0x4", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", "MSRIndex": "0x3F6", "MSRValue": "0x200", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", "MSRIndex": "0x3F6", "MSRValue": "0x40", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, { "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", - "Counter": "0,1", + "Counter": "0,1,2,3,4,5", "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", "MSRIndex": "0x3F6", "MSRValue": "0x8", - "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x5" }, @@ -356,7 +347,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", - "PublicDescription": "Counts the number of load uops retired that performed one or more locks. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x21" }, @@ -366,7 +356,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", - "PublicDescription": "Counts the number of retired split load uops. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x41" }, @@ -376,7 +365,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STLB_MISS", - "PublicDescription": "Counts the total number of load and store uops retired that missed in the second level TLB. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x13" }, @@ -386,7 +374,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", - "PublicDescription": "Counts the number of load ops retired that miss in the second Level TLB. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x11" }, @@ -396,7 +383,6 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", - "PublicDescription": "Counts the number of store ops retired that miss in the second level TLB. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x12" }, @@ -406,7 +392,7 @@ "Data_LA": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", - "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly. Available PDIST counters: 0", + "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.", "SampleAfterValue": "1000003", "UMask": "0x6" }, diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json index f44da31ff1f1..ed963fcb6485 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json @@ -29,7 +29,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.FPDIV", - "PublicDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt). Available PDIST counters: 0", "SampleAfterValue": "2000003", "UMask": "0x8" } diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/other.json b/tools/perf/pmu-events/arch/x86/alderlaken/other.json index 8c2b5a284f2a..144d7b06f240 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/other.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/other.json @@ -5,7 +5,6 @@ "Deprecated": "1", "EventCode": "0xe4", "EventName": "LBR_INSERTS.ANY", - "PublicDescription": "This event is deprecated. [This event is alias to MISC_RETIRED.LBR_INSERTS] Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json index 9616bf0e9f1f..1dd61baec1a9 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json @@ -54,7 +54,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", "SampleAfterValue": "200003" }, { @@ -63,7 +63,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.CALL", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf9" }, @@ -72,7 +71,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND", - "PublicDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e" }, @@ -81,7 +79,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND_TAKEN", - "PublicDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe" }, @@ -90,7 +87,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "PublicDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xbf" }, @@ -99,7 +95,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT", - "PublicDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb" }, @@ -108,7 +103,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT_CALL", - "PublicDescription": "Counts the number of near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb" }, @@ -118,7 +112,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.IND_CALL", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb" }, @@ -128,7 +121,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e" }, @@ -137,7 +129,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_CALL", - "PublicDescription": "Counts the number of near CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf9" }, @@ -146,7 +137,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PublicDescription": "Counts the number of near RET branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf7" }, @@ -155,7 +145,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "PublicDescription": "Counts the number of near taken branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xc0" }, @@ -165,7 +154,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NON_RETURN_IND", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb" }, @@ -174,7 +162,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.REL_CALL", - "PublicDescription": "Counts the number of near relative CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfd" }, @@ -184,7 +171,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.RETURN", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf7" }, @@ -194,7 +180,6 @@ "Deprecated": "1", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.TAKEN_JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe" }, @@ -203,7 +188,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", "SampleAfterValue": "200003" }, { @@ -211,7 +196,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND", - "PublicDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e" }, @@ -220,7 +204,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.COND_TAKEN", - "PublicDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe" }, @@ -229,7 +212,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.INDIRECT", - "PublicDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb" }, @@ -238,7 +220,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", - "PublicDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb" }, @@ -248,7 +229,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.IND_CALL", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfb" }, @@ -258,7 +238,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x7e" }, @@ -267,7 +246,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Counts the number of mispredicted near taken branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x80" }, @@ -277,7 +255,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xeb" }, @@ -286,7 +263,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.RETURN", - "PublicDescription": "Counts the number of mispredicted near RET branch instructions retired. Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xf7" }, @@ -296,7 +272,6 @@ "Deprecated": "1", "EventCode": "0xc5", "EventName": "BR_MISP_RETIRED.TAKEN_JCC", - "PublicDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0xfe" }, @@ -371,7 +346,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc0", "EventName": "INST_RETIRED.ANY_P", - "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter. Available PDIST counters: 0", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.", "SampleAfterValue": "2000003" }, { @@ -380,7 +355,6 @@ "Deprecated": "1", "EventCode": "0x03", "EventName": "LD_BLOCKS.4K_ALIAS", - "PublicDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x4" }, @@ -389,7 +363,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0x03", "EventName": "LD_BLOCKS.ADDRESS_ALIAS", - "PublicDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x4" }, @@ -398,7 +371,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0x03", "EventName": "LD_BLOCKS.DATA_UNKNOWN", - "PublicDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready. Available PDIST counters: 0", "SampleAfterValue": "1000003", "UMask": "0x1" }, @@ -448,7 +420,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xe4", "EventName": "MISC_RETIRED.LBR_INSERTS", - "PublicDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. This event is PDIR on GP0 and NPEBS on all other GPs [This event is alias to LBR_INSERTS.ANY] Available PDIST counters: 0", + "PublicDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. This event is PDIR on GP0 and NPEBS on all other GPs [This event is alias to LBR_INSERTS.ANY]", "SampleAfterValue": "1000003", "UMask": "0x1" }, @@ -651,7 +623,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "TOPDOWN_RETIRING.ALL", - "PublicDescription": "Counts the total number of consumed retirement slots. Available PDIST counters: 0", "SampleAfterValue": "1000003" }, { @@ -667,7 +638,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.ALL", - "PublicDescription": "Counts the total number of uops retired. Available PDIST counters: 0", "SampleAfterValue": "2000003" }, { @@ -675,7 +645,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.IDIV", - "PublicDescription": "Counts the number of integer divide uops retired. Available PDIST counters: 0", "SampleAfterValue": "2000003", "UMask": "0x10" }, @@ -684,7 +653,7 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.MS", - "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows. Available PDIST counters: 0", + "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", "SampleAfterValue": "2000003", "UMask": "0x1" }, @@ -693,7 +662,6 @@ "Counter": "0,1,2,3,4,5", "EventCode": "0xc2", "EventName": "UOPS_RETIRED.X87", - "PublicDescription": "Counts the number of x87 uops retired, includes those in MS flows. Available PDIST counters: 0", "SampleAfterValue": "2000003", "UMask": "0x2" } diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json index c348046696bf..d9c737a17df0 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json @@ -57,7 +57,6 @@ "Deprecated": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS", - "PublicDescription": "This event is deprecated. Refer to new event MEM_UOPS_RETIRED.STLB_MISS Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x13" }, @@ -68,7 +67,6 @@ "Deprecated": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS", - "PublicDescription": "This event is deprecated. Refer to new event MEM_UOPS_RETIRED.STLB_MISS_LOADS Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x11" }, @@ -79,7 +77,6 @@ "Deprecated": "1", "EventCode": "0xd0", "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES", - "PublicDescription": "This event is deprecated. Refer to new event MEM_UOPS_RETIRED.STLB_MISS_STORES Available PDIST counters: 0", "SampleAfterValue": "200003", "UMask": "0x12" } diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/cache.json b/tools/perf/pmu-events/arch/x86/arrowlake/cache.json index 70175404540d..91929d8bcf47 100644 --- a/tools/perf/pmu-events/arch/x86/arrowlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/arrowlake/cache.json @@ -237,7 +237,7 @@ "Unit": "cpu_lowpower" }, { - "BriefDescription": "Counts the number of L2 Cache Accesses that miss the L2 and get BBL reject short and long rejects (includes those counted in L2_reject_XQ.any), per core event", + "BriefDescription": "Counts the number of L2 Cache Accesses that miss the L2 and get BBL reject short and long rejects, per core event", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x24", "EventName": "L2_REQUEST.REJECTS", @@ -728,6 +728,17 @@ "EventName": "MEM_LOAD_RETIRED.L1_HIT", "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0", "SampleAfterValue": "1000003", + "UMask": "0x101", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "Counter": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L1_HIT_L0", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0", + "SampleAfterValue": "1000003", "UMask": "0x1", "Unit": "cpu_core" }, diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json b/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json index 67cc83de18d3..56cf1ec63200 100644 --- a/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json @@ -59,6 +59,22 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Counts the number of instructions retired that were tagged with having preceded with frontend bound behavior", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ALL", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged with having preceded with frontend bound behavior", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ALL", + "SampleAfterValue": "1000003", + "Unit": "cpu_lowpower" + }, + { "BriefDescription": "Retired ANT branches", "Counter": "0,1,2,3,4,5,6,7,8,9", "EventCode": "0xc6", @@ -83,6 +99,80 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a baclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_DETECT", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a baclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_DETECT", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_lowpower" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles /empty issue slots due to a btclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_RESTEER", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles /empty issue slots due to a btclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_RESTEER", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_lowpower" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.CISC", + "PublicDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.CISC", + "PublicDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_lowpower" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged every cycle the decoder is unable to send 4 uops", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged every cycle the decoder is unable to send 3 uops per cycle.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_lowpower" + }, + { "BriefDescription": "Retired Instructions who experienced a critical DSB miss.", "Counter": "0,1,2,3,4,5,6,7,8,9", "EventCode": "0xc6", @@ -104,6 +194,15 @@ "Unit": "cpu_atom" }, { + "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to icache miss", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ICACHE", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_lowpower" + }, + { "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", @@ -302,6 +401,42 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Counts the number of instruction retired tagged after a wasted issue slot if none of the previous events occurred", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction retired tagged after a wasted issue slot if none of the previous events occurred", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80", + "Unit": "cpu_lowpower" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a predecode wrong", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a predecode wrong.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_lowpower" + }, + { "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", "Counter": "0,1,2,3,4,5,6,7,8,9", "EventCode": "0xc6", diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json b/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json index 1c709983b65f..3ef6f00f1135 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/floating-point.json @@ -111,7 +111,7 @@ "Counter": "0,1,2,3", "EventCode": "0xCF", "EventName": "FP_ARITH_INST_RETIRED2.128BIT_PACKED_BF16", - "PublicDescription": "Counts once for each Intel AVX-512 computational 512-bit packed BFloat16 floating-point instruction retired. Applies to the ZMM based VDPBF16PS instruction. Each count represents 64 computation operations. This event is only supported on products formerly named Cooper Lake and is not supported on products formerly named Cascade Lake.", + "PublicDescription": "Counts once for each Intel AVX-512 computational 128-bit packed BFloat16 floating-point instruction retired. Applies to the XMM based VDPBF16PS instruction. Each count represents 16 computation operations. This event is only supported on products formerly named Cooper Lake and is not supported on products formerly named Cascade Lake.", "SampleAfterValue": "2000003", "UMask": "0x20" }, @@ -120,7 +120,7 @@ "Counter": "0,1,2,3", "EventCode": "0xCF", "EventName": "FP_ARITH_INST_RETIRED2.256BIT_PACKED_BF16", - "PublicDescription": "Counts once for each Intel AVX-512 computational 128-bit packed BFloat16 floating-point instruction retired. Applies to the XMM based VDPBF16PS instruction. Each count represents 16 computation operations. This event is only supported on products formerly named Cooper Lake and is not supported on products formerly named Cascade Lake.", + "PublicDescription": "Counts once for each Intel AVX-512 computational 256-bit packed BFloat16 floating-point instruction retired. Applies to the YMM based VDPBF16PS instruction. Each count represents 32 computation operations. This event is only supported on products formerly named Cooper Lake and is not supported on products formerly named Cascade Lake.", "SampleAfterValue": "2000003", "UMask": "0x40" }, @@ -129,7 +129,7 @@ "Counter": "0,1,2,3", "EventCode": "0xCF", "EventName": "FP_ARITH_INST_RETIRED2.512BIT_PACKED_BF16", - "PublicDescription": "Counts once for each Intel AVX-512 computational 256-bit packed BFloat16 floating-point instruction retired. Applies to the YMM based VDPBF16PS instruction. Each count represents 32 computation operations. This event is only supported on products formerly named Cooper Lake and is not supported on products formerly named Cascade Lake.", + "PublicDescription": "Counts once for each Intel AVX-512 computational 512-bit packed BFloat16 floating-point instruction retired. Applies to the ZMM based VDPBF16PS instruction. Each count represents 64 computation operations. This event is only supported on products formerly named Cooper Lake and is not supported on products formerly named Cascade Lake.", "SampleAfterValue": "2000003", "UMask": "0x80" }, diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json index 3dd296ab4d78..9a1349527b66 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json @@ -542,7 +542,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4C", "EventName": "LOAD_HIT_PRE.SW_PF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", "SampleAfterValue": "100003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json index 00b05a77c289..48bec483b49a 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json @@ -684,7 +684,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", "SampleAfterValue": "100003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json index 94340dee1c9c..d4cf2199d46b 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json @@ -1822,6 +1822,18 @@ "Unit": "IIO" }, { + "BriefDescription": "Posted requests sent by the integrated IO (IIO) controller to the Ubox, useful for counting message signaled interrupts (MSI).", + "Counter": "0,1,2,3", + "EventCode": "0x8e", + "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED", + "Experimental": "1", + "FCMask": "0x01", + "PerPkg": "1", + "PortMask": "0x00FF", + "UMask": "0x4", + "Unit": "IIO" + }, + { "BriefDescription": "ITC address map 1", "Counter": "0,1,2,3", "EventCode": "0x8f", diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json index aa06088dd26f..68be01dad7c9 100644 --- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json @@ -2146,6 +2146,16 @@ "Unit": "MCHBM" }, { + "BriefDescription": "ECC Correctable Errors", + "Counter": "0,1,2,3", + "EventCode": "0x09", + "EventName": "UNC_MCHBM_ECC_CORRECTABLE_ERRORS", + "Experimental": "1", + "PerPkg": "1", + "PublicDescription": "ECC Correctable Errors. Counts the number of ECC errors detected and corrected by the iMC on this channel. This counter is only useful with ECC devices. This count will increment one time for each correction regardless of the number of bits corrected. The iMC can correct up to 4 bit errors in independent channel mode and 8 bit errors in lockstep mode.", + "Unit": "MCHBM" + }, + { "BriefDescription": "HBM Precharge All Commands", "Counter": "0,1,2,3", "EventCode": "0x44", @@ -2760,6 +2770,16 @@ "Unit": "iMC" }, { + "BriefDescription": "ECC Correctable Errors", + "Counter": "0,1,2,3", + "EventCode": "0x09", + "EventName": "UNC_M_ECC_CORRECTABLE_ERRORS", + "Experimental": "1", + "PerPkg": "1", + "PublicDescription": "ECC Correctable Errors : Counts the number of ECC errors detected and corrected by the iMC on this channel. This counter is only useful with ECC DRAM devices. This count will increment one time for each correction regardless of the number of bits corrected. The iMC can correct up to 4 bit errors in independent channel mode and 8 bit errors in lockstep mode.", + "Unit": "iMC" + }, + { "BriefDescription": "IMC Clockticks at HCLK frequency", "Counter": "0,1,2,3", "EventCode": "0x01", diff --git a/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json b/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json index 1c6dba7b2822..878b1caf12de 100644 --- a/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json @@ -107,6 +107,30 @@ "ScaleUnit": "1MB/s" }, { + "BriefDescription": "The percent of inbound full cache line writes initiated by IO that miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM", + "MetricName": "io_full_write_l3_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Message Signaled Interrupts (MSI) per second sent by the integrated I/O traffic controller (IIO) to System Configuration Controller (Ubox)", + "MetricExpr": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED / duration_time", + "MetricName": "io_msi", + "ScaleUnit": "1per_sec" + }, + { + "BriefDescription": "The percent of inbound partial writes initiated by IO that miss the L3 cache", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)", + "MetricName": "io_partial_write_l3_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "The percent of inbound reads initiated by IO that miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR", + "MetricName": "io_read_l3_miss", + "ScaleUnit": "100%" + }, + { "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions", "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY", "MetricName": "itlb_2nd_level_large_page_mpi", @@ -163,12 +187,6 @@ "ScaleUnit": "1per_instr" }, { - "BriefDescription": "Average latency of a last level cache (LLC) demand data read miss (read memory access) in nano seconds", - "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT) / (UNC_CHA_CLOCKTICKS / (source_count(UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT) * #num_packages)) * duration_time", - "MetricName": "llc_demand_data_read_miss_latency", - "ScaleUnit": "1ns" - }, - { "BriefDescription": "Load operations retired per instruction", "MetricExpr": "MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY", "MetricName": "loads_retired_per_instr", diff --git a/tools/perf/pmu-events/arch/x86/grandridge/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/grandridge/uncore-interconnect.json index 2c18767511f3..c7250332d8aa 100644 --- a/tools/perf/pmu-events/arch/x86/grandridge/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/grandridge/uncore-interconnect.json @@ -261,5 +261,15 @@ "PerPkg": "1", "UMask": "0x8", "Unit": "IRP" + }, + { + "BriefDescription": "Message Received : MSI", + "Counter": "0,1", + "EventCode": "0x42", + "EventName": "UNC_U_EVENT_MSG.MSI_RCVD", + "PerPkg": "1", + "PublicDescription": "Message Received : MSI : Message Signaled Interrupts - interrupts sent by devices (including PCIe via IOxAPIC) (Socket Mode only)", + "UMask": "0x2", + "Unit": "UBOX" } ] diff --git a/tools/perf/pmu-events/arch/x86/grandridge/uncore-io.json b/tools/perf/pmu-events/arch/x86/grandridge/uncore-io.json index c5b05c71c56d..764cf2f0b4a8 100644 --- a/tools/perf/pmu-events/arch/x86/grandridge/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/grandridge/uncore-io.json @@ -908,6 +908,18 @@ "Unit": "IIO" }, { + "BriefDescription": "Posted requests sent by the integrated IO (IIO) controller to the Ubox, useful for counting message signaled interrupts (MSI).", + "Counter": "0,1,2,3", + "EventCode": "0x8e", + "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED", + "FCMask": "0x01", + "PerPkg": "1", + "PortMask": "0x0FF", + "PublicDescription": "-", + "UMask": "0x4", + "Unit": "IIO" + }, + { "BriefDescription": "All 9 bits of Page Walk Tracker Occupancy", "Counter": "0,1,2,3", "EventCode": "0x42", diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/cache.json b/tools/perf/pmu-events/arch/x86/graniterapids/cache.json index 32f99a8a3871..dbdeade6fe6f 100644 --- a/tools/perf/pmu-events/arch/x86/graniterapids/cache.json +++ b/tools/perf/pmu-events/arch/x86/graniterapids/cache.json @@ -978,6 +978,15 @@ "UMask": "0x4" }, { + "BriefDescription": "Offcore Uncacheable memory data read transactions.", + "Counter": "0,1,2,3", + "EventCode": "0x21", + "EventName": "OFFCORE_REQUESTS.MEM_UC", + "PublicDescription": "This event counts noncacheable memory data read transactions. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x20" + }, + { "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", "Counter": "0,1,2,3", "CounterMask": "1", diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/counter.json b/tools/perf/pmu-events/arch/x86/graniterapids/counter.json index 5d3b202eadd3..d97211a0227e 100644 --- a/tools/perf/pmu-events/arch/x86/graniterapids/counter.json +++ b/tools/perf/pmu-events/arch/x86/graniterapids/counter.json @@ -60,6 +60,11 @@ "CountersNumGeneric": 4 }, { + "Unit": "UBOX", + "CountersNumFixed": "0", + "CountersNumGeneric": "2" + }, + { "Unit": "PCU", "CountersNumFixed": "0", "CountersNumGeneric": "4" @@ -73,10 +78,5 @@ "Unit": "MDF", "CountersNumFixed": "0", "CountersNumGeneric": "4" - }, - { - "Unit": "UBOX", - "CountersNumFixed": "0", - "CountersNumGeneric": "2" } ]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json b/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json index af527f7f9d0c..9a620e1b8de8 100644 --- a/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json @@ -96,6 +96,12 @@ "ScaleUnit": "1MB/s" }, { + "BriefDescription": "Bandwidth of inbound IO reads that are initiated by end device controllers that are requesting memory from the CPU and miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_read_l3_miss", + "ScaleUnit": "1MB/s" + }, + { "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket", "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1e6 / duration_time", "MetricName": "io_bandwidth_read_local", @@ -114,6 +120,12 @@ "ScaleUnit": "1MB/s" }, { + "BriefDescription": "Bandwidth of inbound IO writes that are initiated by end device controllers that are writing memory to the CPU", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_write_l3_miss", + "ScaleUnit": "1MB/s" + }, + { "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket", "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL) * 64 / 1e6 / duration_time", "MetricName": "io_bandwidth_write_local", @@ -126,6 +138,30 @@ "ScaleUnit": "1MB/s" }, { + "BriefDescription": "The percent of inbound full cache line writes initiated by IO that miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM", + "MetricName": "io_full_write_l3_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Message Signaled Interrupts (MSI) per second sent by the integrated I/O traffic controller (IIO) to System Configuration Controller (Ubox)", + "MetricExpr": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED / duration_time", + "MetricName": "io_msi", + "ScaleUnit": "1per_sec" + }, + { + "BriefDescription": "The percent of inbound partial writes initiated by IO that miss the L3 cache", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)", + "MetricName": "io_partial_write_l3_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "The percent of inbound reads initiated by IO that miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR", + "MetricName": "io_read_l3_miss", + "ScaleUnit": "100%" + }, + { "BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions", "MetricExpr": "ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY", "MetricName": "itlb_2nd_level_mpi", diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json b/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json index 1edfdad1600d..27af3bd6bacf 100644 --- a/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json @@ -738,7 +738,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", "SampleAfterValue": "100003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json index e5bd11b27bcd..6667fbc50452 100644 --- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json @@ -1916,24 +1916,6 @@ "Unit": "UPI" }, { - "BriefDescription": "Tx Flit Buffer Allocations : Number of allocations into the UPI Tx Flit Buffer. Generally, when data is transmitted across UPI, it will bypass the TxQ and pass directly to the link. However, the TxQ will be used with L0p and when LLR occurs, increasing latency to transfer out to the link. This event can be used in conjunction with the Flit Buffer Occupancy event in order to calculate the average flit buffer lifetime.", - "Counter": "0,1,2,3", - "EventCode": "0x40", - "EventName": "UNC_UPI_TxL_INSERTS", - "Experimental": "1", - "PerPkg": "1", - "Unit": "UPI" - }, - { - "BriefDescription": "Tx Flit Buffer Occupancy : Accumulates the number of flits in the TxQ. Generally, when data is transmitted across UPI, it will bypass the TxQ and pass directly to the link. However, the TxQ will be used with L0p and when LLR occurs, increasing latency to transfer out to the link. This can be used with the cycles not empty event to track average occupancy, or the allocations event to track average lifetime in the TxQ.", - "Counter": "0,1,2,3", - "EventCode": "0x42", - "EventName": "UNC_UPI_TxL_OCCUPANCY", - "Experimental": "1", - "PerPkg": "1", - "Unit": "UPI" - }, - { "BriefDescription": "Message Received : Doorbell", "Counter": "0,1", "EventCode": "0x42", @@ -1970,7 +1952,6 @@ "Counter": "0,1", "EventCode": "0x42", "EventName": "UNC_U_EVENT_MSG.MSI_RCVD", - "Experimental": "1", "PerPkg": "1", "PublicDescription": "Message Received : MSI : Message Signaled Interrupts - interrupts sent by devices (including PCIe via IOxAPIC) (Socket Mode only)", "UMask": "0x2", diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json index 886b99a971be..f4f956966e16 100644 --- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json @@ -1121,8 +1121,9 @@ "Unit": "IIO" }, { - "BriefDescription": "Occupancy of outbound request queue : To device : Counts number of outbound requests/completions IIO is currently processing", + "BriefDescription": "This event is deprecated. [This event is alias to UNC_IIO_NUM_OUTSTANDING_REQ_FROM_CPU.TO_IO]", "Counter": "2,3", + "Deprecated": "1", "EventCode": "0xc5", "EventName": "UNC_IIO_NUM_OUSTANDING_REQ_FROM_CPU.TO_IO", "Experimental": "1", @@ -1133,6 +1134,18 @@ "Unit": "IIO" }, { + "BriefDescription": "Occupancy of outbound request queue : To device : Counts number of outbound requests/completions IIO is currently processing [This event is alias to UNC_IIO_NUM_OUSTANDING_REQ_FROM_CPU.TO_IO]", + "Counter": "2,3", + "EventCode": "0xc5", + "EventName": "UNC_IIO_NUM_OUTSTANDING_REQ_FROM_CPU.TO_IO", + "Experimental": "1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x0FF", + "UMask": "0x8", + "Unit": "IIO" + }, + { "BriefDescription": "Passing data to be written", "Counter": "0,1,2,3", "EventCode": "0x88", @@ -1301,6 +1314,18 @@ "Unit": "IIO" }, { + "BriefDescription": "Posted requests sent by the integrated IO (IIO) controller to the Ubox, useful for counting message signaled interrupts (MSI).", + "Counter": "0,1,2,3", + "EventCode": "0x8e", + "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED", + "FCMask": "0x01", + "PerPkg": "1", + "PortMask": "0x0FF", + "PublicDescription": "-", + "UMask": "0x4", + "Unit": "IIO" + }, + { "BriefDescription": "All 9 bits of Page Walk Tracker Occupancy", "Counter": "0,1,2,3", "EventCode": "0x42", diff --git a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json index f1446f1b67c6..f3a0d7f49af4 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json @@ -477,7 +477,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", "SampleAfterValue": "100003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json index 8c73708befef..6f84ad47276d 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json @@ -8193,7 +8193,6 @@ "Counter": "0,1,2,3", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_RFO", - "Experimental": "1", "PerPkg": "1", "PublicDescription": "TOR Inserts : RFOs issued by IO Devices that missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.", "UMask": "0xc803fe04", @@ -8234,7 +8233,6 @@ "Counter": "0,1,2,3", "EventCode": "0x35", "EventName": "UNC_CHA_TOR_INSERTS.IO_RFO", - "Experimental": "1", "PerPkg": "1", "PublicDescription": "TOR Inserts : RFOs issued by IO Devices : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.", "UMask": "0xc803ff04", diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/cache.json b/tools/perf/pmu-events/arch/x86/lunarlake/cache.json index b1a6bb867a1e..ff37d49611c3 100644 --- a/tools/perf/pmu-events/arch/x86/lunarlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/lunarlake/cache.json @@ -790,6 +790,17 @@ "EventName": "MEM_LOAD_RETIRED.L1_HIT", "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0", "SampleAfterValue": "1000003", + "UMask": "0x101", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "Counter": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_RETIRED.L1_HIT_L0", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0", + "SampleAfterValue": "1000003", "UMask": "0x1", "Unit": "cpu_core" }, diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json b/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json index 4875047fb65c..6ac410510628 100644 --- a/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json @@ -1247,9 +1247,19 @@ "Unit": "cpu_core" }, { - "BriefDescription": "Counts the number of demand loads that match on a wcb (request buffer) allocated by an L1 hardware prefetch", + "BriefDescription": "Counts the number of demand loads that match on a wcb (request buffer) allocated by an L1 hardware prefetch [This event is alias to LOAD_HIT_PREFETCH.HW_PF]", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x4c", + "EventName": "LOAD_HIT_PREFETCH.HWPF", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event is deprecated. [This event is alias to LOAD_HIT_PREFETCH.HWPF]", + "Counter": "0,1,2,3,4,5,6,7", + "Deprecated": "1", + "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.HW_PF", "SampleAfterValue": "1000003", "UMask": "0x2", @@ -1664,7 +1674,7 @@ }, { "BriefDescription": "Fixed Counter: Counts the number of issue slots not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", - "Counter": "36", + "Counter": "Fixed counter 4", "EventName": "TOPDOWN_BAD_SPECULATION.ALL", "PublicDescription": "Fixed Counter: Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the IQ. Also, includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", "SampleAfterValue": "1000003", @@ -1797,7 +1807,7 @@ }, { "BriefDescription": "Fixed Counter: Counts the number of retirement slots not consumed due to front end stalls.", - "Counter": "37", + "Counter": "Fixed counter 5", "EventName": "TOPDOWN_FE_BOUND.ALL", "SampleAfterValue": "1000003", "UMask": "0x6", @@ -1903,7 +1913,7 @@ }, { "BriefDescription": "Fixed Counter: Counts the number of consumed retirement slots.", - "Counter": "38", + "Counter": "Fixed counter 6", "EventName": "TOPDOWN_RETIRING.ALL", "SampleAfterValue": "1000003", "UMask": "0x7", diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/lunarlake/virtual-memory.json index defa3a967754..e60a5e904da2 100644 --- a/tools/perf/pmu-events/arch/x86/lunarlake/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/lunarlake/virtual-memory.json @@ -37,24 +37,6 @@ "Unit": "cpu_core" }, { - "BriefDescription": "Counts the number of first level TLB misses but second level hits due to a demand load that did not start a page walk. Account for 4k page size only. Will result in a DTLB write from STLB.", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0x08", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT_4K", - "SampleAfterValue": "200003", - "UMask": "0x20", - "Unit": "cpu_atom" - }, - { - "BriefDescription": "Counts the number of first level TLB misses but second level hits due to a demand load that did not start a page walk. Account for large page sizes only. Will result in a DTLB write from STLB.", - "Counter": "0,1,2,3,4,5,6,7", - "EventCode": "0x08", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT_LGPG", - "SampleAfterValue": "200003", - "UMask": "0x40", - "Unit": "cpu_atom" - }, - { "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.", "Counter": "0,1,2,3,4,5,6,7,8,9", "CounterMask": "1", diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index bde2f32423a1..354ce241500b 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -1,40 +1,41 @@ Family-model,Version,Filename,EventType -GenuineIntel-6-(97|9A|B7|BA|BF),v1.29,alderlake,core -GenuineIntel-6-BE,v1.29,alderlaken,core -GenuineIntel-6-C[56],v1.08,arrowlake,core +GenuineIntel-6-(97|9A|B7|BA|BF),v1.31,alderlake,core +GenuineIntel-6-BE,v1.31,alderlaken,core +GenuineIntel-6-C[56],v1.09,arrowlake,core GenuineIntel-6-(1C|26|27|35|36),v5,bonnell,core GenuineIntel-6-(3D|47),v30,broadwell,core GenuineIntel-6-56,v12,broadwellde,core GenuineIntel-6-4F,v23,broadwellx,core -GenuineIntel-6-55-[56789ABCDEF],v1.23,cascadelakex,core +GenuineIntel-6-55-[56789ABCDEF],v1.25,cascadelakex,core GenuineIntel-6-DD,v1.00,clearwaterforest,core GenuineIntel-6-9[6C],v1.05,elkhartlake,core -GenuineIntel-6-CF,v1.11,emeraldrapids,core +GenuineIntel-6-CF,v1.14,emeraldrapids,core GenuineIntel-6-5[CF],v13,goldmont,core GenuineIntel-6-7A,v1.01,goldmontplus,core -GenuineIntel-6-B6,v1.07,grandridge,core -GenuineIntel-6-A[DE],v1.08,graniterapids,core +GenuineIntel-6-B6,v1.09,grandridge,core +GenuineIntel-6-A[DE],v1.10,graniterapids,core GenuineIntel-6-(3C|45|46),v36,haswell,core GenuineIntel-6-3F,v29,haswellx,core GenuineIntel-6-7[DE],v1.24,icelake,core -GenuineIntel-6-6[AC],v1.27,icelakex,core +GenuineIntel-6-6[AC],v1.28,icelakex,core GenuineIntel-6-3A,v24,ivybridge,core GenuineIntel-6-3E,v24,ivytown,core GenuineIntel-6-2D,v24,jaketown,core GenuineIntel-6-(57|85),v16,knightslanding,core -GenuineIntel-6-BD,v1.11,lunarlake,core -GenuineIntel-6-(AA|AC|B5),v1.13,meteorlake,core +GenuineIntel-6-BD,v1.14,lunarlake,core +GenuineIntel-6-(AA|AC|B5),v1.14,meteorlake,core GenuineIntel-6-1[AEF],v4,nehalemep,core GenuineIntel-6-2E,v4,nehalemex,core +GenuineIntel-6-CC,v1.00,pantherlake,core GenuineIntel-6-A7,v1.04,rocketlake,core GenuineIntel-6-2A,v19,sandybridge,core -GenuineIntel-6-8F,v1.25,sapphirerapids,core -GenuineIntel-6-AF,v1.09,sierraforest,core +GenuineIntel-6-8F,v1.28,sapphirerapids,core +GenuineIntel-6-AF,v1.11,sierraforest,core GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v59,skylake,core -GenuineIntel-6-55-[01234],v1.36,skylakex,core +GenuineIntel-6-55-[01234],v1.37,skylakex,core GenuineIntel-6-86,v1.23,snowridgex,core -GenuineIntel-6-8[CD],v1.17,tigerlake,core +GenuineIntel-6-8[CD],v1.18,tigerlake,core GenuineIntel-6-2C,v5,westmereep-dp,core GenuineIntel-6-25,v4,westmereep-sp,core GenuineIntel-6-2F,v4,westmereex,core diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json index c980bbee6146..82b115183924 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json @@ -231,7 +231,7 @@ "Unit": "cpu_core" }, { - "BriefDescription": "Counts the number of L2 Cache Accesses that miss the L2 and get BBL reject short and long rejects (includes those counted in L2_reject_XQ.any), per core event", + "BriefDescription": "Counts the number of L2 Cache Accesses that miss the L2 and get BBL reject short and long rejects, per core event", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x24", "EventName": "L2_REQUEST.REJECTS", diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json index 509ce68c2ea6..82727022efb6 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json @@ -50,6 +50,14 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Counts the number of instructions retired that were tagged with having preceded with frontend bound behavior", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ALL", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Retired ANT branches", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", @@ -74,6 +82,43 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a baclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_DETECT", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles /empty issue slots due to a btclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_RESTEER", + "SampleAfterValue": "1000003", + "UMask": "0x40", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.CISC", + "PublicDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged every cycle the decoder is unable to send 3 uops per cycle.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Retired Instructions who experienced a critical DSB miss.", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", @@ -86,6 +131,15 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to icache miss", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ICACHE", + "SampleAfterValue": "1000003", + "UMask": "0x20", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", @@ -287,6 +341,24 @@ "Unit": "cpu_core" }, { + "BriefDescription": "Counts the number of instruction retired tagged after a wasted issue slot if none of the previous events occurred", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a predecode wrong.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4", + "Unit": "cpu_atom" + }, + { "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json index a833d6f53d0e..22b25708e799 100644 --- a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json @@ -1076,7 +1076,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", "SampleAfterValue": "100003", "UMask": "0x1", "Unit": "cpu_core" diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/cache.json b/tools/perf/pmu-events/arch/x86/pantherlake/cache.json new file mode 100644 index 000000000000..c84f3d9fdb10 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/pantherlake/cache.json @@ -0,0 +1,278 @@ +[ + { + "BriefDescription": "Counts the number of L2 cache accesses from front door requests for Code Read, Data Read, RFO, ITOM, and L2 Prefetches. Does not include rejects or recycles, per core event.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_REQUEST.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x1ff", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "L2 code requests", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "PublicDescription": "Counts the total number of L2 code requests.", + "SampleAfterValue": "200003", + "UMask": "0xe4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Demand Data Read access L2 cache", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.", + "SampleAfterValue": "200003", + "UMask": "0xe1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "1000003", + "UMask": "0x41", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Core-originated cacheable requests that missed L3 (Except hardware prefetches to the L3)", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x41", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "1000003", + "UMask": "0x4f", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Core-originated cacheable requests that refer to L3 (Except hardware prefetches to the L3)", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x4f", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts all retired load instructions.", + "Counter": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.ALL_LOADS", + "PublicDescription": "Counts Instructions with at least one architecturally visible load retired. Available PDIST counters: 0", + "SampleAfterValue": "1000003", + "UMask": "0x81", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired store instructions.", + "Counter": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_INST_RETIRED.ALL_STORES", + "PublicDescription": "Counts all retired store instructions. Available PDIST counters: 0", + "SampleAfterValue": "1000003", + "UMask": "0x82", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of load ops retired.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", + "SampleAfterValue": "1000003", + "UMask": "0x81", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of store ops retired.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_STORES", + "SampleAfterValue": "1000003", + "UMask": "0x82", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_1024", + "MSRIndex": "0x3F6", + "MSRValue": "0x400", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_2048", + "MSRIndex": "0x3F6", + "MSRValue": "0x800", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of stores uops retired same as MEM_UOPS_RETIRED.ALL_STORES", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", + "SampleAfterValue": "1000003", + "UMask": "0x6", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand data reads that have any type of response.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001", + "PublicDescription": "Counts demand data reads that have any type of response. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand data reads that have any type of response.", + "Counter": "0,1,2,3", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001", + "PublicDescription": "Counts demand data reads that have any type of response. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10002", + "PublicDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + "Counter": "0,1,2,3", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10002", + "PublicDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + } +] diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/counter.json b/tools/perf/pmu-events/arch/x86/pantherlake/counter.json new file mode 100644 index 000000000000..69f158a97707 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/pantherlake/counter.json @@ -0,0 +1,12 @@ +[ + { + "Unit": "cpu_atom", + "CountersNumFixed": "3", + "CountersNumGeneric": "39" + }, + { + "Unit": "cpu_core", + "CountersNumFixed": "4", + "CountersNumGeneric": "10" + } +]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/frontend.json b/tools/perf/pmu-events/arch/x86/pantherlake/frontend.json new file mode 100644 index 000000000000..aedf631e3c0f --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/pantherlake/frontend.json @@ -0,0 +1,30 @@ +[ + { + "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x80", + "EventName": "ICACHE.ACCESSES", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump and the instruction cache registers bytes are not present. -", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x80", + "EventName": "ICACHE.MISSES", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x9c", + "EventName": "IDQ_BUBBLES.CORE", + "PublicDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + } +] diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/memory.json b/tools/perf/pmu-events/arch/x86/pantherlake/memory.json new file mode 100644 index 000000000000..47daee8cc00f --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/pantherlake/memory.json @@ -0,0 +1,215 @@ +[ + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024", + "MSRIndex": "0x3F6", + "MSRValue": "0x400", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "53", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "1009", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "20011", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_2048", + "MSRIndex": "0x3F6", + "MSRValue": "0x800", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "23", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "503", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "100007", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "101", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "2003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", + "Counter": "2,3,4,5,6,7,8,9", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0", + "SampleAfterValue": "50021", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Retired memory store access operations. A PDist event for PEBS Store Latency Facility.", + "Counter": "0,1", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE", + "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8 Available PDIST counters: 0", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by DRAM.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x7BC000001", + "PublicDescription": "Counts demand data reads that were supplied by DRAM. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by DRAM.", + "Counter": "0,1,2,3", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1E780000001", + "PublicDescription": "Counts demand data reads that were supplied by DRAM. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x13FBFC00001", + "PublicDescription": "Counts demand data reads that were not supplied by the L3 cache. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "Counter": "0,1,2,3", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x9E7FA000001", + "PublicDescription": "Counts demand data reads that were not supplied by the L3 cache. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x13FBFC00002", + "PublicDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "Counter": "0,1,2,3", + "EventCode": "0x2A,0x2B", + "EventName": "OCR.DEMAND_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x9E7FA000002", + "PublicDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache. Available PDIST counters: 0", + "SampleAfterValue": "100003", + "UMask": "0x1", + "Unit": "cpu_core" + } +] diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json b/tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json new file mode 100644 index 000000000000..2caf2f85327f --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json @@ -0,0 +1,325 @@ +[ + { + "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "All branch instructions retired.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all branch instructions retired. Available PDIST counters: 0", + "SampleAfterValue": "400009", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "All mispredicted branch instructions retired.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path. Available PDIST counters: 0", + "SampleAfterValue": "400009", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.CORE", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Core cycles when the core is not in a halt state.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.CORE", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the programmable counters available for other events.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. [This event is alias to CPU_CLK_UNHALTED.THREAD_P]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Thread cycles when thread is not in halt state [This event is alias to CPU_CLK_UNHALTED.THREAD_P]", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. [This event is alias to CPU_CLK_UNHALTED.THREAD_P]", + "SampleAfterValue": "2000003", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of unhalted reference clock cycles.", + "Counter": "Fixed counter 2", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "SampleAfterValue": "1000003", + "UMask": "0x3", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Reference cycles when the core is not in halt state.", + "Counter": "Fixed counter 2", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "SampleAfterValue": "2000003", + "UMask": "0x3", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Reference cycles when the core is not in halt state.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Core cycles when the thread is not in a halt state.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the programmable counters available for other events.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. [This event is alias to CPU_CLK_UNHALTED.CORE_P]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Thread cycles when thread is not in halt state [This event is alias to CPU_CLK_UNHALTED.CORE_P]", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. [This event is alias to CPU_CLK_UNHALTED.CORE_P]", + "SampleAfterValue": "2000003", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of instructions retired.", + "Counter": "Fixed counter 0", + "EventName": "INST_RETIRED.ANY", + "PublicDescription": "Fixed Counter: Counts the number of instructions retired. Available PDIST counters: 32", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Number of instructions retired. Fixed Counter - architectural event", + "Counter": "Fixed counter 0", + "EventName": "INST_RETIRED.ANY", + "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 32", + "SampleAfterValue": "2000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of instructions retired.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.ANY_P", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.ANY_P", + "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 0", + "SampleAfterValue": "2000003", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because its address partially overlapped with an older store.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.", + "SampleAfterValue": "100003", + "UMask": "0x82", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xe4", + "EventName": "MISC_RETIRED.LBR_INSERTS", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "LBR record is inserted", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xe4", + "EventName": "MISC_RETIRED.LBR_INSERTS", + "PublicDescription": "LBR record is inserted Available PDIST counters: 0", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xa4", + "EventName": "TOPDOWN.BACKEND_BOUND_SLOTS", + "PublicDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions. Software can use this event as the numerator for the Backend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.", + "SampleAfterValue": "10000003", + "UMask": "0x2", + "Unit": "cpu_core" + }, + { + "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event", + "Counter": "Fixed counter 3", + "EventName": "TOPDOWN.SLOTS", + "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).", + "SampleAfterValue": "10000003", + "UMask": "0x4", + "Unit": "cpu_core" + }, + { + "BriefDescription": "TMA slots available for an unhalted logical processor. General counter - architectural event", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xa4", + "EventName": "TOPDOWN.SLOTS_P", + "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method.", + "SampleAfterValue": "10000003", + "UMask": "0x1", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", + "Counter": "36", + "EventName": "TOPDOWN_BAD_SPECULATION.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x5", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.ALL_P", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", + "SampleAfterValue": "1000003", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls. [This event is alias to TOPDOWN_BE_BOUND.ALL_P]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xa4", + "EventName": "TOPDOWN_BE_BOUND.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls. [This event is alias to TOPDOWN_BE_BOUND.ALL]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xa4", + "EventName": "TOPDOWN_BE_BOUND.ALL_P", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of retirement slots not consumed due to front end stalls.", + "Counter": "37", + "EventName": "TOPDOWN_FE_BOUND.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x6", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x9c", + "EventName": "TOPDOWN_FE_BOUND.ALL_P", + "SampleAfterValue": "1000003", + "UMask": "0x1", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of consumed retirement slots.", + "Counter": "38", + "EventName": "TOPDOWN_RETIRING.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x7", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Counts the number of consumed retirement slots.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc2", + "EventName": "TOPDOWN_RETIRING.ALL_P", + "PublicDescription": "Counts the number of consumed retirement slots. Available PDIST counters: 0,1", + "SampleAfterValue": "1000003", + "UMask": "0x2", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric.", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.SLOTS", + "PublicDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric. Software can use this event as the numerator for the Retiring metric (or top-level category) of the Top-down Microarchitecture Analysis method.", + "SampleAfterValue": "2000003", + "UMask": "0x2", + "Unit": "cpu_core" + } +] diff --git a/tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json new file mode 100644 index 000000000000..690c5dff9d9e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json @@ -0,0 +1,62 @@ +[ + { + "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "1000003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x12", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "1000003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x13", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe", + "Unit": "cpu_core" + }, + { + "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "1000003", + "UMask": "0xe", + "Unit": "cpu_atom" + }, + { + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", + "Counter": "0,1,2,3,4,5,6,7,8,9", + "EventCode": "0x11", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe", + "Unit": "cpu_core" + } +] diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json index 00b05a77c289..48bec483b49a 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json @@ -684,7 +684,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0", "SampleAfterValue": "100003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json index aab082ff9402..dac7e6c50f31 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json @@ -1902,6 +1902,18 @@ "Unit": "IIO" }, { + "BriefDescription": "Posted requests sent by the integrated IO (IIO) controller to the Ubox, useful for counting message signaled interrupts (MSI).", + "Counter": "0,1,2,3", + "EventCode": "0x8e", + "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED", + "Experimental": "1", + "FCMask": "0x01", + "PerPkg": "1", + "PortMask": "0x00FF", + "UMask": "0x4", + "Unit": "IIO" + }, + { "BriefDescription": "ITC address map 1", "Counter": "0,1,2,3", "EventCode": "0x8f", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json index aa06088dd26f..68be01dad7c9 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json @@ -2146,6 +2146,16 @@ "Unit": "MCHBM" }, { + "BriefDescription": "ECC Correctable Errors", + "Counter": "0,1,2,3", + "EventCode": "0x09", + "EventName": "UNC_MCHBM_ECC_CORRECTABLE_ERRORS", + "Experimental": "1", + "PerPkg": "1", + "PublicDescription": "ECC Correctable Errors. Counts the number of ECC errors detected and corrected by the iMC on this channel. This counter is only useful with ECC devices. This count will increment one time for each correction regardless of the number of bits corrected. The iMC can correct up to 4 bit errors in independent channel mode and 8 bit errors in lockstep mode.", + "Unit": "MCHBM" + }, + { "BriefDescription": "HBM Precharge All Commands", "Counter": "0,1,2,3", "EventCode": "0x44", @@ -2760,6 +2770,16 @@ "Unit": "iMC" }, { + "BriefDescription": "ECC Correctable Errors", + "Counter": "0,1,2,3", + "EventCode": "0x09", + "EventName": "UNC_M_ECC_CORRECTABLE_ERRORS", + "Experimental": "1", + "PerPkg": "1", + "PublicDescription": "ECC Correctable Errors : Counts the number of ECC errors detected and corrected by the iMC on this channel. This counter is only useful with ECC DRAM devices. This count will increment one time for each correction regardless of the number of bits corrected. The iMC can correct up to 4 bit errors in independent channel mode and 8 bit errors in lockstep mode.", + "Unit": "iMC" + }, + { "BriefDescription": "IMC Clockticks at HCLK frequency", "Counter": "0,1,2,3", "EventCode": "0x01", diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/frontend.json b/tools/perf/pmu-events/arch/x86/sierraforest/frontend.json index fef5cba533bb..8a591e31d331 100644 --- a/tools/perf/pmu-events/arch/x86/sierraforest/frontend.json +++ b/tools/perf/pmu-events/arch/x86/sierraforest/frontend.json @@ -9,6 +9,54 @@ "UMask": "0x1" }, { + "BriefDescription": "Counts the number of instructions retired that were tagged with having preceded with frontend bound behavior", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ALL", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a baclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_DETECT", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles /empty issue slots due to a btclear", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.BRANCH_RESTEER", + "SampleAfterValue": "1000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.CISC", + "PublicDescription": "Counts the number of instructions retired that were tagged following an ms flow due to the bubble/wasted issue slot from exiting long ms flow", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged every cycle the decoder is unable to send 3 uops per cycle.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to icache miss", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.ICACHE", + "SampleAfterValue": "1000003", + "UMask": "0x20" + }, + { "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0xc6", @@ -17,6 +65,22 @@ "UMask": "0x10" }, { + "BriefDescription": "Counts the number of instruction retired tagged after a wasted issue slot if none of the previous events occurred", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of instruction retired that are tagged after a branch instruction causes bubbles/empty issue slots due to a predecode wrong.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump.", "Counter": "0,1,2,3,4,5,6,7", "EventCode": "0x80", diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json b/tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json index f56d8d816e53..70af13143024 100644 --- a/tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json @@ -11,6 +11,7 @@ { "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF6, SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.ALL_BRANCHES", "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", @@ -19,6 +20,7 @@ { "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.COND", "SampleAfterValue": "200003", @@ -35,6 +37,7 @@ { "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.FAR_BRANCH", "SampleAfterValue": "200003", @@ -43,6 +46,7 @@ { "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT", "SampleAfterValue": "200003", @@ -51,6 +55,7 @@ { "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.INDIRECT_CALL", "SampleAfterValue": "200003", @@ -68,6 +73,7 @@ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL", "Counter": "0,1,2,3,4,5,6,7", "Deprecated": "1", + "Errata": "SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.IND_CALL", "SampleAfterValue": "200003", @@ -76,6 +82,7 @@ { "BriefDescription": "Counts the number of near CALL branch instructions retired.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF6, SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_CALL", "SampleAfterValue": "200003", @@ -92,6 +99,7 @@ { "BriefDescription": "Counts the number of near taken branch instructions retired.", "Counter": "0,1,2,3,4,5,6,7", + "Errata": "SRF7", "EventCode": "0xc4", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", "SampleAfterValue": "200003", diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json b/tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json index ef629e4e91ce..b9f3c611d87b 100644 --- a/tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json @@ -62,6 +62,18 @@ "ScaleUnit": "1per_instr" }, { + "BriefDescription": "The average number of cores that are in cstate C0 as observed by the power control unit (PCU)", + "MetricExpr": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C0 / pcu_0@UNC_P_CLOCKTICKS@ * #num_packages", + "MetricGroup": "cpu_cstate", + "MetricName": "cpu_cstate_c0" + }, + { + "BriefDescription": "The average number of cores that are in cstate C6 as observed by the power control unit (PCU)", + "MetricExpr": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C6 / pcu_0@UNC_P_CLOCKTICKS@ * #num_packages", + "MetricGroup": "cpu_cstate", + "MetricName": "cpu_cstate_c6" + }, + { "BriefDescription": "CPU operating frequency (in GHz)", "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9", "MetricName": "cpu_operating_frequency", @@ -113,6 +125,12 @@ "ScaleUnit": "1MB/s" }, { + "BriefDescription": "Bandwidth of inbound IO reads that are initiated by end device controllers that are requesting memory from the CPU and miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_read_l3_miss", + "ScaleUnit": "1MB/s" + }, + { "BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket", "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1e6 / duration_time", "MetricName": "io_bandwidth_read_local", @@ -131,6 +149,12 @@ "ScaleUnit": "1MB/s" }, { + "BriefDescription": "Bandwidth of inbound IO writes that are initiated by end device controllers that are writing memory to the CPU", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e6 / duration_time", + "MetricName": "io_bandwidth_write_l3_miss", + "ScaleUnit": "1MB/s" + }, + { "BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket", "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL) * 64 / 1e6 / duration_time", "MetricName": "io_bandwidth_write_local", @@ -143,6 +167,30 @@ "ScaleUnit": "1MB/s" }, { + "BriefDescription": "The percent of inbound full cache line writes initiated by IO that miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM", + "MetricName": "io_full_write_l3_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Message Signaled Interrupts (MSI) per second sent by the integrated I/O traffic controller (IIO) to System Configuration Controller (Ubox)", + "MetricExpr": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED / duration_time", + "MetricName": "io_msi", + "ScaleUnit": "1per_sec" + }, + { + "BriefDescription": "The percent of inbound partial writes initiated by IO that miss the L3 cache", + "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)", + "MetricName": "io_partial_write_l3_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "The percent of inbound reads initiated by IO that miss the L3 cache", + "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR", + "MetricName": "io_read_l3_miss", + "ScaleUnit": "100%" + }, + { "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions", "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY", "MetricName": "itlb_2nd_level_large_page_mpi", diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json index 7182ca00ef8d..3d1fb5f0417e 100644 --- a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json @@ -874,7 +874,7 @@ "Unit": "CHA" }, { - "BriefDescription": "Counts snoop filter capacity evictions for entries tracking exclusive lines in the cores? cache.? Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry.? Does not count clean evictions such as when a core?s cache replaces a tracked cacheline with a new cacheline.", + "BriefDescription": "Counts snoop filter capacity evictions for entries tracking exclusive lines in the core's cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a core's cache replaces a tracked cacheline with a new cacheline.", "Counter": "0,1,2,3", "EventCode": "0x3d", "EventName": "UNC_CHA_SF_EVICTION.E_STATE", @@ -885,7 +885,7 @@ "Unit": "CHA" }, { - "BriefDescription": "Counts snoop filter capacity evictions for entries tracking modified lines in the cores? cache.? Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry.? Does not count clean evictions such as when a core?s cache replaces a tracked cacheline with a new cacheline.", + "BriefDescription": "Counts snoop filter capacity evictions for entries tracking modified lines in the core's cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a core's cache replaces a tracked cacheline with a new cacheline.", "Counter": "0,1,2,3", "EventCode": "0x3d", "EventName": "UNC_CHA_SF_EVICTION.M_STATE", @@ -895,7 +895,7 @@ "Unit": "CHA" }, { - "BriefDescription": "Counts snoop filter capacity evictions for entries tracking shared lines in the cores? cache.? Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry.? Does not count clean evictions such as when a core?s cache replaces a tracked cacheline with a new cacheline.", + "BriefDescription": "Counts snoop filter capacity evictions for entries tracking shared lines in the core's cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a core's cache replaces a tracked cacheline with a new cacheline.", "Counter": "0,1,2,3", "EventCode": "0x3d", "EventName": "UNC_CHA_SF_EVICTION.S_STATE", diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json index 2ccbc8bca24e..952b6de3fefc 100644 --- a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json @@ -1562,21 +1562,56 @@ "Unit": "UPI" }, { - "BriefDescription": "Tx Flit Buffer Allocations : Number of allocations into the UPI Tx Flit Buffer. Generally, when data is transmitted across UPI, it will bypass the TxQ and pass directly to the link. However, the TxQ will be used with L0p and when LLR occurs, increasing latency to transfer out to the link. This event can be used in conjunction with the Flit Buffer Occupancy event in order to calculate the average flit buffer lifetime.", - "Counter": "0,1,2,3", - "EventCode": "0x40", - "EventName": "UNC_UPI_TxL_INSERTS", + "BriefDescription": "Message Received : Doorbell", + "Counter": "0,1", + "EventCode": "0x42", + "EventName": "UNC_U_EVENT_MSG.DOORBELL_RCVD", "Experimental": "1", "PerPkg": "1", - "Unit": "UPI" + "UMask": "0x8", + "Unit": "UBOX" }, { - "BriefDescription": "Tx Flit Buffer Occupancy : Accumulates the number of flits in the TxQ. Generally, when data is transmitted across UPI, it will bypass the TxQ and pass directly to the link. However, the TxQ will be used with L0p and when LLR occurs, increasing latency to transfer out to the link. This can be used with the cycles not empty event to track average occupancy, or the allocations event to track average lifetime in the TxQ.", - "Counter": "0,1,2,3", + "BriefDescription": "Message Received : Interrupt", + "Counter": "0,1", "EventCode": "0x42", - "EventName": "UNC_UPI_TxL_OCCUPANCY", + "EventName": "UNC_U_EVENT_MSG.INT_PRIO", "Experimental": "1", "PerPkg": "1", - "Unit": "UPI" + "PublicDescription": "Message Received : Interrupt : Interrupts", + "UMask": "0x10", + "Unit": "UBOX" + }, + { + "BriefDescription": "Message Received : IPI", + "Counter": "0,1", + "EventCode": "0x42", + "EventName": "UNC_U_EVENT_MSG.IPI_RCVD", + "Experimental": "1", + "PerPkg": "1", + "PublicDescription": "Message Received : IPI : Inter Processor Interrupts", + "UMask": "0x4", + "Unit": "UBOX" + }, + { + "BriefDescription": "Message Received : MSI", + "Counter": "0,1", + "EventCode": "0x42", + "EventName": "UNC_U_EVENT_MSG.MSI_RCVD", + "PerPkg": "1", + "PublicDescription": "Message Received : MSI : Message Signaled Interrupts - interrupts sent by devices (including PCIe via IOxAPIC) (Socket Mode only)", + "UMask": "0x2", + "Unit": "UBOX" + }, + { + "BriefDescription": "Message Received : VLW", + "Counter": "0,1", + "EventCode": "0x42", + "EventName": "UNC_U_EVENT_MSG.VLW_RCVD", + "Experimental": "1", + "PerPkg": "1", + "PublicDescription": "Message Received : VLW : Virtual Logical Wire (legacy) message were received from Uncore.", + "UMask": "0x1", + "Unit": "UBOX" } ] diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json index 886b99a971be..f4f956966e16 100644 --- a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json +++ b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json @@ -1121,8 +1121,9 @@ "Unit": "IIO" }, { - "BriefDescription": "Occupancy of outbound request queue : To device : Counts number of outbound requests/completions IIO is currently processing", + "BriefDescription": "This event is deprecated. [This event is alias to UNC_IIO_NUM_OUTSTANDING_REQ_FROM_CPU.TO_IO]", "Counter": "2,3", + "Deprecated": "1", "EventCode": "0xc5", "EventName": "UNC_IIO_NUM_OUSTANDING_REQ_FROM_CPU.TO_IO", "Experimental": "1", @@ -1133,6 +1134,18 @@ "Unit": "IIO" }, { + "BriefDescription": "Occupancy of outbound request queue : To device : Counts number of outbound requests/completions IIO is currently processing [This event is alias to UNC_IIO_NUM_OUSTANDING_REQ_FROM_CPU.TO_IO]", + "Counter": "2,3", + "EventCode": "0xc5", + "EventName": "UNC_IIO_NUM_OUTSTANDING_REQ_FROM_CPU.TO_IO", + "Experimental": "1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x0FF", + "UMask": "0x8", + "Unit": "IIO" + }, + { "BriefDescription": "Passing data to be written", "Counter": "0,1,2,3", "EventCode": "0x88", @@ -1301,6 +1314,18 @@ "Unit": "IIO" }, { + "BriefDescription": "Posted requests sent by the integrated IO (IIO) controller to the Ubox, useful for counting message signaled interrupts (MSI).", + "Counter": "0,1,2,3", + "EventCode": "0x8e", + "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX_POSTED", + "FCMask": "0x01", + "PerPkg": "1", + "PortMask": "0x0FF", + "PublicDescription": "-", + "UMask": "0x4", + "Unit": "IIO" + }, + { "BriefDescription": "All 9 bits of Page Walk Tracker Occupancy", "Counter": "0,1,2,3", "EventCode": "0x42", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json index 3dd296ab4d78..9a1349527b66 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json @@ -542,7 +542,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4C", "EventName": "LOAD_HIT_PRE.SW_PF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", "SampleAfterValue": "100003", "UMask": "0x1" }, diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json b/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json index 7ef1bac08463..b417e1db9e07 100644 --- a/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json @@ -497,7 +497,7 @@ "Counter": "0,1,2,3", "EventCode": "0x4c", "EventName": "LOAD_HIT_PREFETCH.SWPF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", "SampleAfterValue": "100003", "UMask": "0x1" }, |