summaryrefslogtreecommitdiff
path: root/arch/x86/events/intel/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events/intel/core.c')
-rw-r--r--arch/x86/events/intel/core.c4732
1 files changed, 4306 insertions, 426 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 98b0f0729527..853fe073bab3 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Per core/cpu state
*
@@ -13,11 +14,16 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/nmi.h>
+#include <linux/kvm_host.h>
#include <asm/cpufeature.h>
+#include <asm/debugreg.h>
#include <asm/hardirq.h>
#include <asm/intel-family.h>
+#include <asm/intel_pt.h>
#include <asm/apic.h>
+#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#include "../perf_event.h"
@@ -134,7 +140,7 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
- INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
+ INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMPTY */
INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
@@ -178,6 +184,27 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_v5_gen_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+ FIXED_EVENT_CONSTRAINT(0x0500, 4),
+ FIXED_EVENT_CONSTRAINT(0x0600, 5),
+ FIXED_EVENT_CONSTRAINT(0x0700, 6),
+ FIXED_EVENT_CONSTRAINT(0x0800, 7),
+ FIXED_EVENT_CONSTRAINT(0x0900, 8),
+ FIXED_EVENT_CONSTRAINT(0x0a00, 9),
+ FIXED_EVENT_CONSTRAINT(0x0b00, 10),
+ FIXED_EVENT_CONSTRAINT(0x0c00, 11),
+ FIXED_EVENT_CONSTRAINT(0x0d00, 12),
+ FIXED_EVENT_CONSTRAINT(0x0e00, 13),
+ FIXED_EVENT_CONSTRAINT(0x0f00, 14),
+ FIXED_EVENT_CONSTRAINT(0x1000, 15),
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_slm_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -186,6 +213,25 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_grt_event_constraints[] __read_mostly = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_skt_event_constraints[] __read_mostly = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
+ FIXED_EVENT_CONSTRAINT(0x0073, 4), /* TOPDOWN_BAD_SPECULATION.ALL */
+ FIXED_EVENT_CONSTRAINT(0x019c, 5), /* TOPDOWN_FE_BOUND.ALL */
+ FIXED_EVENT_CONSTRAINT(0x02c2, 6), /* TOPDOWN_RETIRING.ALL */
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_skl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -238,11 +284,162 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
+static struct event_constraint intel_icl_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* old INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
+ INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
+ INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */
+ INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */
+ INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */
+ INTEL_UEVENT_CONSTRAINT(0x14a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */
+ INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */
+ INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf),
+ INTEL_EVENT_CONSTRAINT(0xef, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+ EVENT_EXTRA_END
+};
+
+static struct extra_reg intel_glc_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
+ INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
+ INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
+ EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_glc_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
+ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),
+
+ INTEL_EVENT_CONSTRAINT(0x2e, 0xff),
+ INTEL_EVENT_CONSTRAINT(0x3c, 0xff),
+ /*
+ * Generally event codes < 0x90 are restricted to counters 0-3.
+ * The 0x2E and 0x3C are exception, which has no restriction.
+ */
+ INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf),
+
+ INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x08a3, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
+ INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
+ INTEL_UEVENT_CONSTRAINT(0x02cd, 0x1),
+ INTEL_EVENT_CONSTRAINT(0xce, 0x1),
+ INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
+ /*
+ * Generally event codes >= 0x90 are likely to have no restrictions.
+ * The exception are defined as above.
+ */
+ INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0xff),
+
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_rwc_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE),
+ INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
+ INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
+ INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
+ EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_lnc_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
+ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),
+
+ INTEL_EVENT_CONSTRAINT(0x20, 0xf),
+
+ INTEL_UEVENT_CONSTRAINT(0x012a, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x012b, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x0148, 0x4),
+ INTEL_UEVENT_CONSTRAINT(0x0175, 0x4),
+
+ INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff),
+ INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff),
+
+ INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
+ INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
+ INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
+ INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
+ INTEL_UEVENT_CONSTRAINT(0x10a4, 0x1),
+ INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8),
+ INTEL_UEVENT_CONSTRAINT(0x01cd, 0x3fc),
+ INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3),
+
+ INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
+
+ INTEL_UEVENT_CONSTRAINT(0x00e0, 0xf),
+
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_lnc_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0xfffffffffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0xfffffffffffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE),
+ INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
+ INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0xf, FE),
+ INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
+ EVENT_EXTRA_END
+};
+
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
-static struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_mem_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
NULL,
};
@@ -277,9 +474,17 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
"4", "2");
+EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4");
+EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
+EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81");
+EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82");
+EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83");
+EVENT_ATTR_STR(topdown-heavy-ops, td_heavy_ops, "event=0x00,umask=0x84");
+EVENT_ATTR_STR(topdown-br-mispredict, td_br_mispredict, "event=0x00,umask=0x85");
+EVENT_ATTR_STR(topdown-fetch-lat, td_fetch_lat, "event=0x00,umask=0x86");
+EVENT_ATTR_STR(topdown-mem-bound, td_mem_bound, "event=0x00,umask=0x87");
+
static struct attribute *snb_events_attrs[] = {
- EVENT_PTR(mem_ld_snb),
- EVENT_PTR(mem_st_snb),
EVENT_PTR(td_slots_issued),
EVENT_PTR(td_slots_retired),
EVENT_PTR(td_fetch_bubbles),
@@ -290,6 +495,12 @@ static struct attribute *snb_events_attrs[] = {
NULL,
};
+static struct attribute *snb_mem_events_attrs[] = {
+ EVENT_PTR(mem_ld_snb),
+ EVENT_PTR(mem_st_snb),
+ NULL,
+};
+
static struct event_constraint intel_hsw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -337,6 +548,108 @@ static u64 intel_pmu_event_map(int hw_event)
return intel_perfmon_event_map[hw_event];
}
+static __initconst const u64 glc_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0,
+ [ C(RESULT_MISS) ] = 0xe124,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_MISS) ] = 0xe424,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x12a,
+ [ C(RESULT_MISS) ] = 0x12a,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x12a,
+ [ C(RESULT_MISS) ] = 0x12a,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0,
+ [ C(RESULT_MISS) ] = 0xe12,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0,
+ [ C(RESULT_MISS) ] = 0xe13,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = 0xe11,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x4c4,
+ [ C(RESULT_MISS) ] = 0x4c5,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x12a,
+ [ C(RESULT_MISS) ] = 0x12a,
+ },
+ },
+};
+
+static __initconst const u64 glc_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x10001,
+ [ C(RESULT_MISS) ] = 0x3fbfc00001,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x3f3ffc0002,
+ [ C(RESULT_MISS) ] = 0x3f3fc00002,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x10c000001,
+ [ C(RESULT_MISS) ] = 0x3fb3000001,
+ },
+ },
+};
+
/*
* Notes on the events:
* - data reads do not include code reads (comparable to earlier tables)
@@ -1822,6 +2135,108 @@ static __initconst const u64 glp_hw_cache_extra_regs
},
};
+#define TNT_LOCAL_DRAM BIT_ULL(26)
+#define TNT_DEMAND_READ GLM_DEMAND_DATA_RD
+#define TNT_DEMAND_WRITE GLM_DEMAND_RFO
+#define TNT_LLC_ACCESS GLM_ANY_RESPONSE
+#define TNT_SNP_ANY (SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \
+ SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM)
+#define TNT_LLC_MISS (TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM)
+
+static __initconst const u64 tnt_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = TNT_DEMAND_READ|
+ TNT_LLC_ACCESS,
+ [C(RESULT_MISS)] = TNT_DEMAND_READ|
+ TNT_LLC_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TNT_DEMAND_WRITE|
+ TNT_LLC_ACCESS,
+ [C(RESULT_MISS)] = TNT_DEMAND_WRITE|
+ TNT_LLC_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x0,
+ [C(RESULT_MISS)] = 0x0,
+ },
+ },
+};
+
+EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_tnt, "event=0x71,umask=0x0");
+EVENT_ATTR_STR(topdown-retiring, td_retiring_tnt, "event=0xc2,umask=0x0");
+EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_tnt, "event=0x73,umask=0x6");
+EVENT_ATTR_STR(topdown-be-bound, td_be_bound_tnt, "event=0x74,umask=0x0");
+
+static struct attribute *tnt_events_attrs[] = {
+ EVENT_PTR(td_fe_bound_tnt),
+ EVENT_PTR(td_retiring_tnt),
+ EVENT_PTR(td_bad_spec_tnt),
+ EVENT_PTR(td_be_bound_tnt),
+ NULL,
+};
+
+static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff0ffffff9fffull, RSP_1),
+ EVENT_EXTRA_END
+};
+
+EVENT_ATTR_STR(mem-loads, mem_ld_grt, "event=0xd0,umask=0x5,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_grt, "event=0xd0,umask=0x6");
+
+static struct attribute *grt_mem_attrs[] = {
+ EVENT_PTR(mem_ld_grt),
+ EVENT_PTR(mem_st_grt),
+ NULL
+};
+
+static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0),
+ EVENT_EXTRA_END
+};
+
+EVENT_ATTR_STR(topdown-retiring, td_retiring_cmt, "event=0x72,umask=0x0");
+EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_cmt, "event=0x73,umask=0x0");
+
+static struct attribute *cmt_events_attrs[] = {
+ EVENT_PTR(td_fe_bound_tnt),
+ EVENT_PTR(td_retiring_cmt),
+ EVENT_PTR(td_bad_spec_cmt),
+ EVENT_PTR(td_be_bound_tnt),
+ NULL
+};
+
+static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff3ffffffffffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0),
+ INTEL_UEVENT_EXTRA_REG(0x0127, MSR_SNOOP_RSP_0, 0xffffffffffffffffull, SNOOP_0),
+ INTEL_UEVENT_EXTRA_REG(0x0227, MSR_SNOOP_RSP_1, 0xffffffffffffffffull, SNOOP_1),
+ EVENT_EXTRA_END
+};
+
+EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_skt, "event=0x9c,umask=0x01");
+EVENT_ATTR_STR(topdown-retiring, td_retiring_skt, "event=0xc2,umask=0x02");
+EVENT_ATTR_STR(topdown-be-bound, td_be_bound_skt, "event=0xa4,umask=0x02");
+
+static struct attribute *skt_events_attrs[] = {
+ EVENT_PTR(td_fe_bound_skt),
+ EVENT_PTR(td_retiring_skt),
+ EVENT_PTR(td_bad_spec_cmt),
+ EVENT_PTR(td_be_bound_skt),
+ NULL,
+};
+
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -1870,33 +2285,46 @@ static __initconst const u64 knl_hw_cache_extra_regs
* intel_bts events don't coexist with intel PMU's BTS events because of
* x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them
* disabled around intel PMU's event batching etc, only inside the PMI handler.
+ *
+ * Avoid PEBS_ENABLE MSR access in PMIs.
+ * The GLOBAL_CTRL has been disabled. All the counters do not count anymore.
+ * It doesn't matter if the PEBS is enabled or not.
+ * Usually, the PEBS status are not changed in PMIs. It's unnecessary to
+ * access PEBS_ENABLE MSR in disable_all()/enable_all().
+ * However, there are some cases which may change PEBS status, e.g. PMI
+ * throttle. The PEBS_ENABLE should be updated where the status changes.
*/
-static void __intel_pmu_disable_all(void)
+static __always_inline void __intel_pmu_disable_all(bool bts)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0);
- if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+ if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
intel_pmu_disable_bts();
-
- intel_pmu_pebs_disable_all();
}
-static void intel_pmu_disable_all(void)
+static __always_inline void intel_pmu_disable_all(void)
{
- __intel_pmu_disable_all();
+ __intel_pmu_disable_all(true);
+ static_call_cond(x86_pmu_pebs_disable_all)();
intel_pmu_lbr_disable_all();
}
static void __intel_pmu_enable_all(int added, bool pmi)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
- intel_pmu_pebs_enable_all();
intel_pmu_lbr_enable_all(pmi);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
- x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
+
+ if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) {
+ wrmsrq(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val);
+ cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val;
+ }
+
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL,
+ intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
struct perf_event *event =
@@ -1911,9 +2339,51 @@ static void __intel_pmu_enable_all(int added, bool pmi)
static void intel_pmu_enable_all(int added)
{
+ static_call_cond(x86_pmu_pebs_enable_all)();
__intel_pmu_enable_all(added, false);
}
+static noinline int
+__intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries,
+ unsigned int cnt, unsigned long flags)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ intel_pmu_lbr_read();
+ cnt = min_t(unsigned int, cnt, x86_pmu.lbr_nr);
+
+ memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
+ intel_pmu_enable_all(0);
+ local_irq_restore(flags);
+ return cnt;
+}
+
+static int
+intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
+{
+ unsigned long flags;
+
+ /* must not have branches... */
+ local_irq_save(flags);
+ __intel_pmu_disable_all(false); /* we don't care about BTS */
+ __intel_pmu_lbr_disable();
+ /* ... until here */
+ return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
+}
+
+static int
+intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
+{
+ unsigned long flags;
+
+ /* must not have branches... */
+ local_irq_save(flags);
+ __intel_pmu_disable_all(false); /* we don't care about BTS */
+ __intel_pmu_arch_lbr_disable();
+ /* ... until here */
+ return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
+}
+
/*
* Workaround for:
* Intel Errata AAK100 (model 26)
@@ -1925,8 +2395,8 @@ static void intel_pmu_enable_all(int added)
* magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
* in sequence on the same PMC or on different PMCs.
*
- * In practise it appears some of these events do in fact count, and
- * we need to programm all 4 events.
+ * In practice it appears some of these events do in fact count, and
+ * we need to program all 4 events.
*/
static void intel_pmu_nhm_workaround(void)
{
@@ -1965,26 +2435,26 @@ static void intel_pmu_nhm_workaround(void)
for (i = 0; i < 4; i++) {
event = cpuc->events[i];
if (event)
- x86_perf_event_update(event);
+ static_call(x86_pmu_update)(event);
}
for (i = 0; i < 4; i++) {
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
- wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
+ wrmsrq(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
+ wrmsrq(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
}
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
for (i = 0; i < 4; i++) {
event = cpuc->events[i];
if (event) {
- x86_perf_event_set_period(event);
+ static_call(x86_pmu_set_period)(event);
__x86_pmu_enable_event(&event->hw,
ARCH_PERFMON_EVENTSEL_ENABLE);
} else
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
+ wrmsrq(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
}
}
@@ -1995,140 +2465,680 @@ static void intel_pmu_nhm_enable_all(int added)
intel_pmu_enable_all(added);
}
+static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on)
+{
+ u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0;
+
+ if (cpuc->tfa_shadow != val) {
+ cpuc->tfa_shadow = val;
+ wrmsrq(MSR_TSX_FORCE_ABORT, val);
+ }
+}
+
+static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+ /*
+ * We're going to use PMC3, make sure TFA is set before we touch it.
+ */
+ if (cntr == 3)
+ intel_set_tfa(cpuc, true);
+}
+
+static void intel_tfa_pmu_enable_all(int added)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * If we find PMC3 is no longer used when we enable the PMU, we can
+ * clear TFA.
+ */
+ if (!test_bit(3, cpuc->active_mask))
+ intel_set_tfa(cpuc, false);
+
+ intel_pmu_enable_all(added);
+}
+
static inline u64 intel_pmu_get_status(void)
{
u64 status;
- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+ rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status);
return status;
}
static inline void intel_pmu_ack_status(u64 ack)
{
- wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}
-static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
+static inline bool event_is_checkpointed(struct perf_event *event)
{
- int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
- u64 ctrl_val, mask;
+ return unlikely(event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+}
- mask = 0xfULL << (idx * 4);
+static inline void intel_set_masks(struct perf_event *event, int idx)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- rdmsrl(hwc->config_base, ctrl_val);
- ctrl_val &= ~mask;
- wrmsrl(hwc->config_base, ctrl_val);
+ if (event->attr.exclude_host)
+ __set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
+ if (event->attr.exclude_guest)
+ __set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
+ if (event_is_checkpointed(event))
+ __set_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
}
-static inline bool event_is_checkpointed(struct perf_event *event)
+static inline void intel_clear_masks(struct perf_event *event, int idx)
{
- return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ __clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
+ __clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
+ __clear_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
}
-static void intel_pmu_disable_event(struct perf_event *event)
+static void intel_pmu_disable_fixed(struct perf_event *event)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u64 mask;
+
+ if (is_topdown_idx(idx)) {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * When there are other active TopDown events,
+ * don't disable the fixed counter 3.
+ */
+ if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
+ return;
+ idx = INTEL_PMC_IDX_FIXED_SLOTS;
+ }
+
+ intel_clear_masks(event, idx);
+
+ mask = intel_fixed_bits_by_idx(idx - INTEL_PMC_IDX_FIXED, INTEL_FIXED_BITS_MASK);
+ cpuc->fixed_ctrl_val &= ~mask;
+}
+
+static inline void __intel_pmu_update_event_ext(int idx, u64 ext)
+{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u32 msr;
- if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
- intel_pmu_disable_bts();
- intel_pmu_drain_bts_buffer();
- return;
+ if (idx < INTEL_PMC_IDX_FIXED) {
+ msr = MSR_IA32_PMC_V6_GP0_CFG_C +
+ x86_pmu.addr_offset(idx, false);
+ } else {
+ msr = MSR_IA32_PMC_V6_FX0_CFG_C +
+ x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false);
}
- cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
- cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
- cpuc->intel_cp_status &= ~(1ull << hwc->idx);
+ cpuc->cfg_c_val[idx] = ext;
+ wrmsrq(msr, ext);
+}
+
+static void intel_pmu_disable_event_ext(struct perf_event *event)
+{
+ /*
+ * Only clear CFG_C MSR for PEBS counter group events,
+ * it avoids the HW counter's value to be added into
+ * other PEBS records incorrectly after PEBS counter
+ * group events are disabled.
+ *
+ * For other events, it's unnecessary to clear CFG_C MSRs
+ * since CFG_C doesn't take effect if counter is in
+ * disabled state. That helps to reduce the WRMSR overhead
+ * in context switches.
+ */
+ if (!is_pebs_counter_event_group(event))
+ return;
+
+ __intel_pmu_update_event_ext(event->hw.idx, 0);
+}
+
+DEFINE_STATIC_CALL_NULL(intel_pmu_disable_event_ext, intel_pmu_disable_event_ext);
- if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
- intel_pmu_disable_fixed(hwc);
+static void intel_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ switch (idx) {
+ case 0 ... INTEL_PMC_IDX_FIXED - 1:
+ intel_clear_masks(event, idx);
+ static_call_cond(intel_pmu_disable_event_ext)(event);
+ x86_pmu_disable_event(event);
+ break;
+ case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
+ static_call_cond(intel_pmu_disable_event_ext)(event);
+ fallthrough;
+ case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
+ intel_pmu_disable_fixed(event);
+ break;
+ case INTEL_PMC_IDX_FIXED_BTS:
+ intel_pmu_disable_bts();
+ intel_pmu_drain_bts_buffer();
+ return;
+ case INTEL_PMC_IDX_FIXED_VLBR:
+ intel_clear_masks(event, idx);
+ break;
+ default:
+ intel_clear_masks(event, idx);
+ pr_warn("Failed to disable the event with invalid index %d\n",
+ idx);
return;
}
- x86_pmu_disable_event(event);
-
+ /*
+ * Needs to be called after x86_pmu_disable_event,
+ * so we don't trigger the event without PEBS bit set.
+ */
if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_disable(event);
+ static_call(x86_pmu_pebs_disable)(event);
+}
+
+static void intel_pmu_assign_event(struct perf_event *event, int idx)
+{
+ if (is_pebs_pt(event))
+ perf_report_aux_output_id(event, idx);
+}
+
+static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event)
+{
+ return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK;
}
static void intel_pmu_del_event(struct perf_event *event)
{
- if (needs_branch_stack(event))
+ if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_del(event);
if (event->attr.precise_ip)
intel_pmu_pebs_del(event);
+ if (is_pebs_counter_event_group(event) ||
+ is_acr_event_group(event))
+ this_cpu_ptr(&cpu_hw_events)->n_late_setup--;
+}
+
+static int icl_set_topdown_event_period(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ s64 left = local64_read(&hwc->period_left);
+
+ /*
+ * The values in PERF_METRICS MSR are derived from fixed counter 3.
+ * Software should start both registers, PERF_METRICS and fixed
+ * counter 3, from zero.
+ * Clear PERF_METRICS and Fixed counter 3 in initialization.
+ * After that, both MSRs will be cleared for each read.
+ * Don't need to clear them again.
+ */
+ if (left == x86_pmu.max_period) {
+ wrmsrq(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsrq(MSR_PERF_METRICS, 0);
+ hwc->saved_slots = 0;
+ hwc->saved_metric = 0;
+ }
+
+ if ((hwc->saved_slots) && is_slots_event(event)) {
+ wrmsrq(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
+ wrmsrq(MSR_PERF_METRICS, hwc->saved_metric);
+ }
+
+ perf_event_update_userpage(event);
+
+ return 0;
}
-static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
+DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period);
+
+static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
{
- int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
- u64 ctrl_val, bits, mask;
+ u32 val;
/*
- * Enable IRQ generation (0x8),
+ * The metric is reported as an 8bit integer fraction
+ * summing up to 0xff.
+ * slots-in-metric = (Metric / 0xff) * slots
+ */
+ val = (metric >> ((idx - INTEL_PMC_IDX_METRIC_BASE) * 8)) & 0xff;
+ return mul_u64_u32_div(slots, val, 0xff);
+}
+
+static u64 icl_get_topdown_value(struct perf_event *event,
+ u64 slots, u64 metrics)
+{
+ int idx = event->hw.idx;
+ u64 delta;
+
+ if (is_metric_idx(idx))
+ delta = icl_get_metrics_event_value(metrics, slots, idx);
+ else
+ delta = slots;
+
+ return delta;
+}
+
+static void __icl_update_topdown_event(struct perf_event *event,
+ u64 slots, u64 metrics,
+ u64 last_slots, u64 last_metrics)
+{
+ u64 delta, last = 0;
+
+ delta = icl_get_topdown_value(event, slots, metrics);
+ if (last_slots)
+ last = icl_get_topdown_value(event, last_slots, last_metrics);
+
+ /*
+ * The 8bit integer fraction of metric may be not accurate,
+ * especially when the changes is very small.
+ * For example, if only a few bad_spec happens, the fraction
+ * may be reduced from 1 to 0. If so, the bad_spec event value
+ * will be 0 which is definitely less than the last value.
+ * Avoid update event->count for this case.
+ */
+ if (delta > last) {
+ delta -= last;
+ local64_add(delta, &event->count);
+ }
+}
+
+static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
+ u64 metrics, int metric_end)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *other;
+ int idx;
+
+ event->hw.saved_slots = slots;
+ event->hw.saved_metric = metrics;
+
+ for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
+ if (!is_topdown_idx(idx))
+ continue;
+ other = cpuc->events[idx];
+ other->hw.saved_slots = slots;
+ other->hw.saved_metric = metrics;
+ }
+}
+
+/*
+ * Update all active Topdown events.
+ *
+ * The PERF_METRICS and Fixed counter 3 are read separately. The values may be
+ * modify by a NMI. PMU has to be disabled before calling this function.
+ */
+
+static u64 intel_update_topdown_event(struct perf_event *event, int metric_end, u64 *val)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *other;
+ u64 slots, metrics;
+ bool reset = true;
+ int idx;
+
+ if (!val) {
+ /* read Fixed counter 3 */
+ slots = rdpmc(3 | INTEL_PMC_FIXED_RDPMC_BASE);
+ if (!slots)
+ return 0;
+
+ /* read PERF_METRICS */
+ metrics = rdpmc(INTEL_PMC_FIXED_RDPMC_METRICS);
+ } else {
+ slots = val[0];
+ metrics = val[1];
+ /*
+ * Don't reset the PERF_METRICS and Fixed counter 3
+ * for each PEBS record read. Utilize the RDPMC metrics
+ * clear mode.
+ */
+ reset = false;
+ }
+
+ for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
+ if (!is_topdown_idx(idx))
+ continue;
+ other = cpuc->events[idx];
+ __icl_update_topdown_event(other, slots, metrics,
+ event ? event->hw.saved_slots : 0,
+ event ? event->hw.saved_metric : 0);
+ }
+
+ /*
+ * Check and update this event, which may have been cleared
+ * in active_mask e.g. x86_pmu_stop()
+ */
+ if (event && !test_bit(event->hw.idx, cpuc->active_mask)) {
+ __icl_update_topdown_event(event, slots, metrics,
+ event->hw.saved_slots,
+ event->hw.saved_metric);
+
+ /*
+ * In x86_pmu_stop(), the event is cleared in active_mask first,
+ * then drain the delta, which indicates context switch for
+ * counting.
+ * Save metric and slots for context switch.
+ * Don't need to reset the PERF_METRICS and Fixed counter 3.
+ * Because the values will be restored in next schedule in.
+ */
+ update_saved_topdown_regs(event, slots, metrics, metric_end);
+ reset = false;
+ }
+
+ if (reset) {
+ /* The fixed counter 3 has to be written before the PERF_METRICS. */
+ wrmsrq(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsrq(MSR_PERF_METRICS, 0);
+ if (event)
+ update_saved_topdown_regs(event, 0, 0, metric_end);
+ }
+
+ return slots;
+}
+
+static u64 icl_update_topdown_event(struct perf_event *event, u64 *val)
+{
+ return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE +
+ x86_pmu.num_topdown_events - 1,
+ val);
+}
+
+DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update);
+
+static void intel_pmu_read_event(struct perf_event *event)
+{
+ if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN) ||
+ is_pebs_counter_event_group(event)) {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ bool pmu_enabled = cpuc->enabled;
+
+ /* Only need to call update_topdown_event() once for group read. */
+ if (is_metric_event(event) && (cpuc->txn_flags & PERF_PMU_TXN_READ))
+ return;
+
+ cpuc->enabled = 0;
+ if (pmu_enabled)
+ intel_pmu_disable_all();
+
+ /*
+ * If the PEBS counters snapshotting is enabled,
+ * the topdown event is available in PEBS records.
+ */
+ if (is_topdown_count(event) && !is_pebs_counter_event_group(event))
+ static_call(intel_pmu_update_topdown_event)(event, NULL);
+ else
+ intel_pmu_drain_pebs_buffer();
+
+ cpuc->enabled = pmu_enabled;
+ if (pmu_enabled)
+ intel_pmu_enable_all(0);
+
+ return;
+ }
+
+ x86_perf_event_update(event);
+}
+
+static void intel_pmu_enable_fixed(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u64 bits = 0;
+
+ if (is_topdown_idx(idx)) {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ /*
+ * When there are other active TopDown events,
+ * don't enable the fixed counter 3 again.
+ */
+ if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
+ return;
+
+ idx = INTEL_PMC_IDX_FIXED_SLOTS;
+
+ if (event->attr.config1 & INTEL_TD_CFG_METRIC_CLEAR)
+ bits |= INTEL_FIXED_3_METRICS_CLEAR;
+ }
+
+ intel_set_masks(event, idx);
+
+ /*
+ * Enable IRQ generation (0x8), if not PEBS,
* and enable ring-3 counting (0x2) and ring-0 counting (0x1)
* if requested:
*/
- bits = 0x8ULL;
+ if (!event->attr.precise_ip)
+ bits |= INTEL_FIXED_0_ENABLE_PMI;
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
- bits |= 0x2;
+ bits |= INTEL_FIXED_0_USER;
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
- bits |= 0x1;
+ bits |= INTEL_FIXED_0_KERNEL;
/*
* ANY bit is supported in v3 and up
*/
if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
- bits |= 0x4;
+ bits |= INTEL_FIXED_0_ANYTHREAD;
- bits <<= (idx * 4);
- mask = 0xfULL << (idx * 4);
+ idx -= INTEL_PMC_IDX_FIXED;
+ bits = intel_fixed_bits_by_idx(idx, bits);
+ if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip)
+ bits |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
- rdmsrl(hwc->config_base, ctrl_val);
- ctrl_val &= ~mask;
- ctrl_val |= bits;
- wrmsrl(hwc->config_base, ctrl_val);
+ cpuc->fixed_ctrl_val &= ~intel_fixed_bits_by_idx(idx, INTEL_FIXED_BITS_MASK);
+ cpuc->fixed_ctrl_val |= bits;
}
-static void intel_pmu_enable_event(struct perf_event *event)
+static void intel_pmu_config_acr(int idx, u64 mask, u32 reload)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int msr_b, msr_c;
+ int msr_offset;
+
+ if (!mask && !cpuc->acr_cfg_b[idx])
+ return;
+
+ if (idx < INTEL_PMC_IDX_FIXED) {
+ msr_b = MSR_IA32_PMC_V6_GP0_CFG_B;
+ msr_c = MSR_IA32_PMC_V6_GP0_CFG_C;
+ msr_offset = x86_pmu.addr_offset(idx, false);
+ } else {
+ msr_b = MSR_IA32_PMC_V6_FX0_CFG_B;
+ msr_c = MSR_IA32_PMC_V6_FX0_CFG_C;
+ msr_offset = x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false);
+ }
+
+ if (cpuc->acr_cfg_b[idx] != mask) {
+ wrmsrl(msr_b + msr_offset, mask);
+ cpuc->acr_cfg_b[idx] = mask;
+ }
+ /* Only need to update the reload value when there is a valid config value. */
+ if (mask && cpuc->acr_cfg_c[idx] != reload) {
+ wrmsrl(msr_c + msr_offset, reload);
+ cpuc->acr_cfg_c[idx] = reload;
+ }
+}
+
+static void intel_pmu_enable_acr(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
+
+ if (!is_acr_event_group(event) || !event->attr.config2) {
+ /*
+ * The disable doesn't clear the ACR CFG register.
+ * Check and clear the ACR CFG register.
+ */
+ intel_pmu_config_acr(hwc->idx, 0, 0);
+ return;
+ }
+
+ intel_pmu_config_acr(hwc->idx, hwc->config1, -hwc->sample_period);
+}
+
+DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
+
+static void intel_pmu_enable_event_ext(struct perf_event *event)
+{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ union arch_pebs_index old, new;
+ struct arch_pebs_cap cap;
+ u64 ext = 0;
+
+ cap = hybrid(cpuc->pmu, arch_pebs_cap);
+
+ if (event->attr.precise_ip) {
+ u64 pebs_data_cfg = intel_get_arch_pebs_data_config(event);
- if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
+ ext |= ARCH_PEBS_EN;
+ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD)
+ ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD;
+
+ if (pebs_data_cfg && cap.caps) {
+ if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
+ ext |= ARCH_PEBS_AUX & cap.caps;
+
+ if (pebs_data_cfg & PEBS_DATACFG_GP)
+ ext |= ARCH_PEBS_GPR & cap.caps;
+
+ if (pebs_data_cfg & PEBS_DATACFG_XMMS)
+ ext |= ARCH_PEBS_VECR_XMM & cap.caps;
+
+ if (pebs_data_cfg & PEBS_DATACFG_LBRS)
+ ext |= ARCH_PEBS_LBR & cap.caps;
+
+ if (pebs_data_cfg &
+ (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT))
+ ext |= ARCH_PEBS_CNTR_GP & cap.caps;
+
+ if (pebs_data_cfg &
+ (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT))
+ ext |= ARCH_PEBS_CNTR_FIXED & cap.caps;
+
+ if (pebs_data_cfg & PEBS_DATACFG_METRICS)
+ ext |= ARCH_PEBS_CNTR_METRICS & cap.caps;
+ }
+
+ if (cpuc->n_pebs == cpuc->n_large_pebs)
+ new.thresh = ARCH_PEBS_THRESH_MULTI;
+ else
+ new.thresh = ARCH_PEBS_THRESH_SINGLE;
+
+ rdmsrq(MSR_IA32_PEBS_INDEX, old.whole);
+ if (new.thresh != old.thresh || !old.en) {
+ if (old.thresh == ARCH_PEBS_THRESH_MULTI && old.wr > 0) {
+ /*
+ * Large PEBS was enabled.
+ * Drain PEBS buffer before applying the single PEBS.
+ */
+ intel_pmu_drain_pebs_buffer();
+ } else {
+ new.wr = 0;
+ new.full = 0;
+ new.en = 1;
+ wrmsrq(MSR_IA32_PEBS_INDEX, new.whole);
+ }
+ }
+ }
+
+ if (is_pebs_counter_event_group(event))
+ ext |= ARCH_PEBS_CNTR_ALLOW;
+
+ if (cpuc->cfg_c_val[hwc->idx] != ext)
+ __intel_pmu_update_event_ext(hwc->idx, ext);
+}
+
+DEFINE_STATIC_CALL_NULL(intel_pmu_enable_event_ext, intel_pmu_enable_event_ext);
+
+static void intel_pmu_enable_event(struct perf_event *event)
+{
+ u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (unlikely(event->attr.precise_ip))
+ static_call(x86_pmu_pebs_enable)(event);
+
+ switch (idx) {
+ case 0 ... INTEL_PMC_IDX_FIXED - 1:
+ if (branch_sample_counters(event))
+ enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
+ intel_set_masks(event, idx);
+ static_call_cond(intel_pmu_enable_acr_event)(event);
+ static_call_cond(intel_pmu_enable_event_ext)(event);
+ __x86_pmu_enable_event(hwc, enable_mask);
+ break;
+ case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
+ static_call_cond(intel_pmu_enable_acr_event)(event);
+ static_call_cond(intel_pmu_enable_event_ext)(event);
+ fallthrough;
+ case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
+ intel_pmu_enable_fixed(event);
+ break;
+ case INTEL_PMC_IDX_FIXED_BTS:
if (!__this_cpu_read(cpu_hw_events.enabled))
return;
-
intel_pmu_enable_bts(hwc->config);
- return;
+ break;
+ case INTEL_PMC_IDX_FIXED_VLBR:
+ intel_set_masks(event, idx);
+ break;
+ default:
+ pr_warn("Failed to enable the event with invalid index %d\n",
+ idx);
}
+}
- if (event->attr.exclude_host)
- cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
- if (event->attr.exclude_guest)
- cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
+static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc)
+{
+ struct perf_event *event, *leader;
+ int i, j, idx;
- if (unlikely(event_is_checkpointed(event)))
- cpuc->intel_cp_status |= (1ull << hwc->idx);
+ for (i = 0; i < cpuc->n_events; i++) {
+ leader = cpuc->event_list[i];
+ if (!is_acr_event_group(leader))
+ continue;
- if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
- intel_pmu_enable_fixed(hwc);
- return;
+ /* The ACR events must be contiguous. */
+ for (j = i; j < cpuc->n_events; j++) {
+ event = cpuc->event_list[j];
+ if (event->group_leader != leader->group_leader)
+ break;
+ for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) {
+ if (i + idx >= cpuc->n_events ||
+ !is_acr_event_group(cpuc->event_list[i + idx]))
+ return;
+ __set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1);
+ }
+ }
+ i = j - 1;
}
+}
- if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_enable(event);
+void intel_pmu_late_setup(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ if (!cpuc->n_late_setup)
+ return;
+
+ intel_pmu_pebs_late_setup(cpuc);
+ intel_pmu_acr_late_setup(cpuc);
}
static void intel_pmu_add_event(struct perf_event *event)
{
if (event->attr.precise_ip)
intel_pmu_pebs_add(event);
- if (needs_branch_stack(event))
+ if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_add(event);
+ if (is_pebs_counter_event_group(event) ||
+ is_acr_event_group(event))
+ this_cpu_ptr(&cpu_hw_events)->n_late_setup++;
}
/*
@@ -2137,7 +3147,7 @@ static void intel_pmu_add_event(struct perf_event *event)
*/
int intel_pmu_save_and_restart(struct perf_event *event)
{
- x86_perf_event_update(event);
+ static_call(x86_pmu_update)(event);
/*
* For a checkpointed counter always reset back to 0. This
* avoids a situation where the counter overflows, aborts the
@@ -2146,31 +3156,53 @@ int intel_pmu_save_and_restart(struct perf_event *event)
*/
if (unlikely(event_is_checkpointed(event))) {
/* No race with NMIs because the counter should not be armed */
- wrmsrl(event->hw.event_base, 0);
+ wrmsrq(event->hw.event_base, 0);
local64_set(&event->hw.prev_count, 0);
}
+ return static_call(x86_pmu_set_period)(event);
+}
+
+static int intel_pmu_set_period(struct perf_event *event)
+{
+ if (unlikely(is_topdown_count(event)))
+ return static_call(intel_pmu_set_topdown_event_period)(event);
+
return x86_perf_event_set_period(event);
}
+static u64 intel_pmu_update(struct perf_event *event)
+{
+ if (unlikely(is_topdown_count(event)))
+ return static_call(intel_pmu_update_topdown_event)(event, NULL);
+
+ return x86_perf_event_update(event);
+}
+
static void intel_pmu_reset(void)
{
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask);
+ unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask);
unsigned long flags;
int idx;
- if (!x86_pmu.num_counters)
+ if (!*(u64 *)cntr_mask)
return;
local_irq_save(flags);
pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
- wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
+ for_each_set_bit(idx, cntr_mask, INTEL_PMC_MAX_GENERIC) {
+ wrmsrq_safe(x86_pmu_config_addr(idx), 0ull);
+ wrmsrq_safe(x86_pmu_event_addr(idx), 0ull);
+ }
+ for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) {
+ if (fixed_counter_disabled(idx, cpuc->pmu))
+ continue;
+ wrmsrq_safe(x86_pmu_fixed_ctr_addr(idx), 0ull);
}
- for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
- wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
if (ds)
ds->bts_index = ds->bts_buffer_base;
@@ -2178,7 +3210,7 @@ static void intel_pmu_reset(void)
/* Ack all overflows and disable fixed counters */
if (x86_pmu.version >= 2) {
intel_pmu_ack_status(intel_pmu_get_status());
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0);
}
/* Reset LBRs and LBR freezing */
@@ -2191,50 +3223,52 @@ static void intel_pmu_reset(void)
}
/*
- * This handler is triggered by the local APIC, so the APIC IRQ handling
- * rules apply:
+ * We may be running with guest PEBS events created by KVM, and the
+ * PEBS records are logged into the guest's DS and invisible to host.
+ *
+ * In the case of guest PEBS overflow, we only trigger a fake event
+ * to emulate the PEBS overflow PMI for guest PEBS counters in KVM.
+ * The guest will then vm-entry and check the guest DS area to read
+ * the guest PEBS records.
+ *
+ * The contents and other behavior of the guest event do not matter.
*/
-static int intel_pmu_handle_irq(struct pt_regs *regs)
+static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
+ struct perf_sample_data *data)
{
- struct perf_sample_data data;
- struct cpu_hw_events *cpuc;
- int bit, loops;
- u64 status;
- int handled;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 guest_pebs_idxs = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
+ struct perf_event *event = NULL;
+ int bit;
- cpuc = this_cpu_ptr(&cpu_hw_events);
+ if (!unlikely(perf_guest_state()))
+ return;
- /*
- * No known reason to not always do late ACK,
- * but just in case do it opt-in.
- */
- if (!x86_pmu.late_ack)
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- intel_bts_disable_local();
- __intel_pmu_disable_all();
- handled = intel_pmu_drain_bts_buffer();
- handled += intel_bts_interrupt();
- status = intel_pmu_get_status();
- if (!status)
- goto done;
+ if (!x86_pmu.pebs_ept || !x86_pmu.pebs_active ||
+ !guest_pebs_idxs)
+ return;
- loops = 0;
-again:
- intel_pmu_lbr_read();
- intel_pmu_ack_status(status);
- if (++loops > 100) {
- static bool warned = false;
- if (!warned) {
- WARN(1, "perfevents: irq loop stuck!\n");
- perf_event_print_debug();
- warned = true;
- }
- intel_pmu_reset();
- goto done;
+ for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
+ event = cpuc->events[bit];
+ if (!event->attr.precise_ip)
+ continue;
+
+ perf_sample_data_init(data, 0, event->hw.last_period);
+ perf_event_overflow(event, data, regs);
+
+ /* Inject one fake event is enough. */
+ break;
}
+}
- inc_irq_stat(apic_perf_irqs);
+static int handle_pmi_common(struct pt_regs *regs, u64 status)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int bit;
+ int handled = 0;
+ inc_irq_stat(apic_perf_irqs);
/*
* Ignore a range of extra bits in status that do not indicate
@@ -2244,7 +3278,7 @@ again:
GLOBAL_STATUS_ASIF |
GLOBAL_STATUS_LBRS_FROZEN);
if (!status)
- goto done;
+ return 0;
/*
* In case multiple PEBS events are sampled at the same time,
* it is possible to have GLOBAL_STATUS bit 62 set indicating
@@ -2256,7 +3290,7 @@ again:
* processing loop coming after that the function, otherwise
* phony regular samples may be generated in the sampling buffer
* not marked with the EXACT tag. Another possibility is to have
- * one PEBS event and at least one non-PEBS event whic hoverflows
+ * one PEBS event and at least one non-PEBS event which overflows
* while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will
* not be set, yet the overflow status bit for the PEBS counter will
* be on Skylake.
@@ -2265,26 +3299,71 @@ again:
* counters from the GLOBAL_STATUS mask and we always process PEBS
* events via drain_pebs().
*/
- status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
+ status &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable);
/*
* PEBS overflow sets bit 62 in the global status register
*/
- if (__test_and_clear_bit(62, (unsigned long *)&status)) {
+ if (__test_and_clear_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, (unsigned long *)&status)) {
+ u64 pebs_enabled = cpuc->pebs_enabled;
+
+ handled++;
+ x86_pmu_handle_guest_pebs(regs, &data);
+ static_call(x86_pmu_drain_pebs)(regs, &data);
+
+ /*
+ * PMI throttle may be triggered, which stops the PEBS event.
+ * Although cpuc->pebs_enabled is updated accordingly, the
+ * MSR_IA32_PEBS_ENABLE is not updated. Because the
+ * cpuc->enabled has been forced to 0 in PMI.
+ * Update the MSR if pebs_enabled is changed.
+ */
+ if (pebs_enabled != cpuc->pebs_enabled)
+ wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+
+ /*
+ * Above PEBS handler (PEBS counters snapshotting) has updated fixed
+ * counter 3 and perf metrics counts if they are in counter group,
+ * unnecessary to update again.
+ */
+ if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] &&
+ is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS]))
+ status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
+ }
+
+ /*
+ * Arch PEBS sets bit 54 in the global status register
+ */
+ if (__test_and_clear_bit(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT,
+ (unsigned long *)&status)) {
handled++;
- x86_pmu.drain_pebs(regs);
- status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
+ static_call(x86_pmu_drain_pebs)(regs, &data);
+
+ if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] &&
+ is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS]))
+ status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
}
/*
* Intel PT
*/
- if (__test_and_clear_bit(55, (unsigned long *)&status)) {
+ if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) {
handled++;
- intel_pt_interrupt();
+ if (!perf_guest_handle_intel_pt_intr())
+ intel_pt_interrupt();
}
/*
+ * Intel Perf metrics
+ */
+ if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
+ handled++;
+ static_call(intel_pmu_update_topdown_event)(NULL, NULL);
+ }
+
+ status &= hybrid(cpuc->pmu, intel_ctrl);
+
+ /*
* Checkpointed counters can lead to 'spurious' PMIs because the
* rollback caused by the PMI will have cleared the overflow status
* bit. Therefore always force probe these counters.
@@ -2293,24 +3372,106 @@ again:
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
+ u64 last_period;
handled++;
if (!test_bit(bit, cpuc->active_mask))
continue;
+ /*
+ * There may be unprocessed PEBS records in the PEBS buffer,
+ * which still stores the previous values.
+ * Process those records first before handling the latest value.
+ * For example,
+ * A is a regular counter
+ * B is a PEBS event which reads A
+ * C is a PEBS event
+ *
+ * The following can happen:
+ * B-assist A=1
+ * C A=2
+ * B-assist A=3
+ * A-overflow-PMI A=4
+ * C-assist-PMI (PEBS buffer) A=5
+ *
+ * The PEBS buffer has to be drained before handling the A-PMI
+ */
+ if (is_pebs_counter_event_group(event))
+ static_call(x86_pmu_drain_pebs)(regs, &data);
+
+ last_period = event->hw.last_period;
+
if (!intel_pmu_save_and_restart(event))
continue;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
if (has_branch_stack(event))
- data.br_stack = &cpuc->lbr_stack;
+ intel_pmu_lbr_save_brstack(&data, cpuc, event);
+
+ perf_event_overflow(event, &data, regs);
+ }
+
+ return handled;
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int intel_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ bool late_ack = hybrid_bit(cpuc->pmu, late_ack);
+ bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack);
+ int loops;
+ u64 status;
+ int handled;
+ int pmu_enabled;
+
+ /*
+ * Save the PMU state.
+ * It needs to be restored when leaving the handler.
+ */
+ pmu_enabled = cpuc->enabled;
+ /*
+ * In general, the early ACK is only applied for old platforms.
+ * For the big core starts from Haswell, the late ACK should be
+ * applied.
+ * For the small core after Tremont, we have to do the ACK right
+ * before re-enabling counters, which is in the middle of the
+ * NMI handler.
+ */
+ if (!late_ack && !mid_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ intel_bts_disable_local();
+ cpuc->enabled = 0;
+ __intel_pmu_disable_all(true);
+ handled = intel_pmu_drain_bts_buffer();
+ handled += intel_bts_interrupt();
+ status = intel_pmu_get_status();
+ if (!status)
+ goto done;
+
+ loops = 0;
+again:
+ intel_pmu_lbr_read();
+ intel_pmu_ack_status(status);
+ if (++loops > 100) {
+ static bool warned;
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ if (!warned) {
+ WARN(1, "perfevents: irq loop stuck!\n");
+ perf_event_print_debug();
+ warned = true;
+ }
+ intel_pmu_reset();
+ goto done;
}
+ handled += handle_pmi_common(regs, status);
+
/*
* Repeat if there is more work to be done:
*/
@@ -2319,8 +3480,11 @@ again:
goto again;
done:
+ if (mid_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
/* Only restore PMU state when it's active. See x86_pmu_disable(). */
- if (cpuc->enabled)
+ cpuc->enabled = pmu_enabled;
+ if (pmu_enabled)
__intel_pmu_enable_all(0, true);
intel_bts_enable_local();
@@ -2329,7 +3493,7 @@ done:
* have been reset. This avoids spurious NMIs on
* Haswell CPUs.
*/
- if (x86_pmu.late_ack)
+ if (late_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI);
return handled;
}
@@ -2337,23 +3501,32 @@ done:
static struct event_constraint *
intel_bts_constraints(struct perf_event *event)
{
- struct hw_perf_event *hwc = &event->hw;
- unsigned int hw_event, bts_event;
+ if (unlikely(intel_pmu_has_bts(event)))
+ return &bts_constraint;
- if (event->attr.freq)
- return NULL;
+ return NULL;
+}
- hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
- bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+/*
+ * Note: matches a fake event, like Fixed2.
+ */
+static struct event_constraint *
+intel_vlbr_constraints(struct perf_event *event)
+{
+ struct event_constraint *c = &vlbr_constraint;
- if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
- return &bts_constraint;
+ if (unlikely(constraint_match(c, event->hw.config))) {
+ event->hw.flags |= c->flags;
+ return c;
+ }
return NULL;
}
-static int intel_alt_er(int idx, u64 config)
+static int intel_alt_er(struct cpu_hw_events *cpuc,
+ int idx, u64 config)
{
+ struct extra_reg *extra_regs = hybrid(cpuc->pmu, extra_regs);
int alt_idx = idx;
if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
@@ -2365,7 +3538,7 @@ static int intel_alt_er(int idx, u64 config)
if (idx == EXTRA_REG_RSP_1)
alt_idx = EXTRA_REG_RSP_0;
- if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
+ if (config & ~extra_regs[alt_idx].valid_mask)
return idx;
return alt_idx;
@@ -2373,15 +3546,16 @@ static int intel_alt_er(int idx, u64 config)
static void intel_fixup_er(struct perf_event *event, int idx)
{
+ struct extra_reg *extra_regs = hybrid(event->pmu, extra_regs);
event->hw.extra_reg.idx = idx;
if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
+ event->hw.config |= extra_regs[EXTRA_REG_RSP_0].event;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
} else if (idx == EXTRA_REG_RSP_1) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
+ event->hw.config |= extra_regs[EXTRA_REG_RSP_1].event;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
}
}
@@ -2457,7 +3631,7 @@ again:
*/
c = NULL;
} else {
- idx = intel_alt_er(idx, reg->config);
+ idx = intel_alt_er(cpuc, idx, reg->config);
if (idx != reg->idx) {
raw_spin_unlock_irqrestore(&era->lock, flags);
goto again;
@@ -2522,18 +3696,19 @@ struct event_constraint *
x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
+ struct event_constraint *event_constraints = hybrid(cpuc->pmu, event_constraints);
struct event_constraint *c;
- if (x86_pmu.event_constraints) {
- for_each_event_constraint(c, x86_pmu.event_constraints) {
- if ((event->hw.config & c->cmask) == c->code) {
+ if (event_constraints) {
+ for_each_event_constraint(c, event_constraints) {
+ if (constraint_match(c, event->hw.config)) {
event->hw.flags |= c->flags;
return c;
}
}
}
- return &unconstrained;
+ return &hybrid_var(cpuc->pmu, unconstrained);
}
static struct event_constraint *
@@ -2542,6 +3717,10 @@ __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
{
struct event_constraint *c;
+ c = intel_vlbr_constraints(event);
+ if (c)
+ return c;
+
c = intel_bts_constraints(event);
if (c)
return c;
@@ -2641,13 +3820,42 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
}
static struct event_constraint *
+dyn_constraint(struct cpu_hw_events *cpuc, struct event_constraint *c, int idx)
+{
+ WARN_ON_ONCE(!cpuc->constraint_list);
+
+ if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+ struct event_constraint *cx;
+
+ /*
+ * grab pre-allocated constraint entry
+ */
+ cx = &cpuc->constraint_list[idx];
+
+ /*
+ * initialize dynamic constraint
+ * with static constraint
+ */
+ *cx = *c;
+
+ /*
+ * mark constraint as dynamic
+ */
+ cx->flags |= PERF_X86_EVENT_DYNAMIC;
+ c = cx;
+ }
+
+ return c;
+}
+
+static struct event_constraint *
intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
int idx, struct event_constraint *c)
{
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct intel_excl_states *xlo;
int tid = cpuc->excl_thread_id;
- int is_excl, i;
+ int is_excl, i, w;
/*
* validating a group does not require
@@ -2670,27 +3878,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* only needed when constraint has not yet
* been cloned (marked dynamic)
*/
- if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
- struct event_constraint *cx;
-
- /*
- * grab pre-allocated constraint entry
- */
- cx = &cpuc->constraint_list[idx];
-
- /*
- * initialize dynamic constraint
- * with static constraint
- */
- *cx = *c;
-
- /*
- * mark constraint as dynamic, so we
- * can free it later on
- */
- cx->flags |= PERF_X86_EVENT_DYNAMIC;
- c = cx;
- }
+ c = dyn_constraint(cpuc, c, idx);
/*
* From here on, the constraint is dynamic.
@@ -2723,36 +3911,40 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* SHARED : sibling counter measuring non-exclusive event
* UNUSED : sibling counter unused
*/
+ w = c->weight;
for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
/*
* exclusive event in sibling counter
* our corresponding counter cannot be used
* regardless of our event
*/
- if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+ if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) {
__clear_bit(i, c->idxmsk);
+ w--;
+ continue;
+ }
/*
* if measuring an exclusive event, sibling
* measuring non-exclusive, then counter cannot
* be used
*/
- if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+ if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) {
__clear_bit(i, c->idxmsk);
+ w--;
+ continue;
+ }
}
/*
- * recompute actual bit weight for scheduling algorithm
- */
- c->weight = hweight64(c->idxmsk64);
-
- /*
* if we return an empty mask, then switch
* back to static empty constraint to avoid
* the cost of freeing later on
*/
- if (c->weight == 0)
+ if (!w)
c = &emptyconstraint;
+ c->weight = w;
+
return c;
}
@@ -2760,11 +3952,9 @@ static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
- struct event_constraint *c1 = NULL;
- struct event_constraint *c2;
+ struct event_constraint *c1, *c2;
- if (idx >= 0) /* fake does < 0 */
- c1 = cpuc->event_constraint[idx];
+ c1 = cpuc->event_constraint[idx];
/*
* first time only
@@ -2772,7 +3962,8 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
* - dynamic constraint: handled by intel_get_excl_constraints()
*/
c2 = __intel_get_event_constraints(cpuc, idx, event);
- if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
+ if (c1) {
+ WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC));
bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
c1->weight = c2->weight;
c2 = c1;
@@ -2781,6 +3972,12 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
if (cpuc->excl_cntrs)
return intel_get_excl_constraints(cpuc, event, idx, c2);
+ if (event->hw.dyn_constraint != ~0ULL) {
+ c2 = dyn_constraint(cpuc, c2, idx);
+ c2->idxmsk64 &= event->hw.dyn_constraint;
+ c2->weight = hweight64(c2->idxmsk64);
+ }
+
return c2;
}
@@ -2952,15 +4149,221 @@ static void intel_pebs_aliases_skl(struct perf_event *event)
return intel_pebs_aliases_precdist(event);
}
-static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
{
- unsigned long flags = x86_pmu.free_running_flags;
+ unsigned long flags = x86_pmu.large_pebs_flags;
if (event->attr.use_clockid)
flags &= ~PERF_SAMPLE_TIME;
+ if (!event->attr.exclude_kernel)
+ flags &= ~PERF_SAMPLE_REGS_USER;
+ if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
+ flags &= ~PERF_SAMPLE_REGS_USER;
+ if (event->attr.sample_regs_intr & ~PEBS_GP_REGS)
+ flags &= ~PERF_SAMPLE_REGS_INTR;
return flags;
}
+static int intel_pmu_bts_config(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+
+ if (unlikely(intel_pmu_has_bts(event))) {
+ /* BTS is not supported by this architecture. */
+ if (!x86_pmu.bts_active)
+ return -EOPNOTSUPP;
+
+ /* BTS is currently only allowed for user-mode. */
+ if (!attr->exclude_kernel)
+ return -EOPNOTSUPP;
+
+ /* BTS is not allowed for precise events. */
+ if (attr->precise_ip)
+ return -EOPNOTSUPP;
+
+ /* disallow bts if conflicting events are present */
+ if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+ return -EBUSY;
+
+ event->destroy = hw_perf_lbr_event_destroy;
+ }
+
+ return 0;
+}
+
+static int core_pmu_hw_config(struct perf_event *event)
+{
+ int ret = x86_pmu_hw_config(event);
+
+ if (ret)
+ return ret;
+
+ return intel_pmu_bts_config(event);
+}
+
+#define INTEL_TD_METRIC_AVAILABLE_MAX (INTEL_TD_METRIC_RETIRING + \
+ ((x86_pmu.num_topdown_events - 1) << 8))
+
+static bool is_available_metric_event(struct perf_event *event)
+{
+ return is_metric_event(event) &&
+ event->attr.config <= INTEL_TD_METRIC_AVAILABLE_MAX;
+}
+
+static inline bool is_mem_loads_event(struct perf_event *event)
+{
+ return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xcd, .umask=0x01);
+}
+
+static inline bool is_mem_loads_aux_event(struct perf_event *event)
+{
+ return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82);
+}
+
+static inline bool require_mem_loads_aux_event(struct perf_event *event)
+{
+ if (!(x86_pmu.flags & PMU_FL_MEM_LOADS_AUX))
+ return false;
+
+ if (is_hybrid())
+ return hybrid_pmu(event->pmu)->pmu_type == hybrid_big;
+
+ return true;
+}
+
+static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
+{
+ union perf_capabilities *intel_cap = &hybrid(event->pmu, intel_cap);
+
+ return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
+}
+
+static u64 intel_pmu_freq_start_period(struct perf_event *event)
+{
+ int type = event->attr.type;
+ u64 config, factor;
+ s64 start;
+
+ /*
+ * The 127 is the lowest possible recommended SAV (sample after value)
+ * for a 4000 freq (default freq), according to the event list JSON file.
+ * Also, assume the workload is idle 50% time.
+ */
+ factor = 64 * 4000;
+ if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE)
+ goto end;
+
+ /*
+ * The estimation of the start period in the freq mode is
+ * based on the below assumption.
+ *
+ * For a cycles or an instructions event, 1GHZ of the
+ * underlying platform, 1 IPC. The workload is idle 50% time.
+ * The start period = 1,000,000,000 * 1 / freq / 2.
+ * = 500,000,000 / freq
+ *
+ * Usually, the branch-related events occur less than the
+ * instructions event. According to the Intel event list JSON
+ * file, the SAV (sample after value) of a branch-related event
+ * is usually 1/4 of an instruction event.
+ * The start period of branch-related events = 125,000,000 / freq.
+ *
+ * The cache-related events occurs even less. The SAV is usually
+ * 1/20 of an instruction event.
+ * The start period of cache-related events = 25,000,000 / freq.
+ */
+ config = event->attr.config & PERF_HW_EVENT_MASK;
+ if (type == PERF_TYPE_HARDWARE) {
+ switch (config) {
+ case PERF_COUNT_HW_CPU_CYCLES:
+ case PERF_COUNT_HW_INSTRUCTIONS:
+ case PERF_COUNT_HW_BUS_CYCLES:
+ case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND:
+ case PERF_COUNT_HW_STALLED_CYCLES_BACKEND:
+ case PERF_COUNT_HW_REF_CPU_CYCLES:
+ factor = 500000000;
+ break;
+ case PERF_COUNT_HW_BRANCH_INSTRUCTIONS:
+ case PERF_COUNT_HW_BRANCH_MISSES:
+ factor = 125000000;
+ break;
+ case PERF_COUNT_HW_CACHE_REFERENCES:
+ case PERF_COUNT_HW_CACHE_MISSES:
+ factor = 25000000;
+ break;
+ default:
+ goto end;
+ }
+ }
+
+ if (type == PERF_TYPE_HW_CACHE)
+ factor = 25000000;
+end:
+ /*
+ * Usually, a prime or a number with less factors (close to prime)
+ * is chosen as an SAV, which makes it less likely that the sampling
+ * period synchronizes with some periodic event in the workload.
+ * Minus 1 to make it at least avoiding values near power of twos
+ * for the default freq.
+ */
+ start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1;
+
+ if (start > x86_pmu.max_period)
+ start = x86_pmu.max_period;
+
+ if (x86_pmu.limit_period)
+ x86_pmu.limit_period(event, &start);
+
+ return start;
+}
+
+static inline bool intel_pmu_has_acr(struct pmu *pmu)
+{
+ return !!hybrid(pmu, acr_cause_mask64);
+}
+
+static bool intel_pmu_is_acr_group(struct perf_event *event)
+{
+ /* The group leader has the ACR flag set */
+ if (is_acr_event_group(event))
+ return true;
+
+ /* The acr_mask is set */
+ if (event->attr.config2)
+ return true;
+
+ return false;
+}
+
+static inline bool intel_pmu_has_pebs_counter_group(struct pmu *pmu)
+{
+ u64 caps;
+
+ if (x86_pmu.intel_cap.pebs_format >= 6 && x86_pmu.intel_cap.pebs_baseline)
+ return true;
+
+ caps = hybrid(pmu, arch_pebs_cap).caps;
+ if (x86_pmu.arch_pebs && (caps & ARCH_PEBS_CNTR_MASK))
+ return true;
+
+ return false;
+}
+
+static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
+ u64 *cause_mask, int *num)
+{
+ event->hw.dyn_constraint &= hybrid(event->pmu, acr_cntr_mask64);
+ *cause_mask |= event->attr.config2;
+ *num += 1;
+}
+
+static inline void intel_pmu_set_acr_caused_constr(struct perf_event *event,
+ int idx, u64 cause_mask)
+{
+ if (test_bit(idx, (unsigned long *)&cause_mask))
+ event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64);
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -2968,26 +4371,116 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
+ ret = intel_pmu_bts_config(event);
+ if (ret)
+ return ret;
+
+ if (event->attr.freq && event->attr.sample_freq) {
+ event->hw.sample_period = intel_pmu_freq_start_period(event);
+ event->hw.last_period = event->hw.sample_period;
+ local64_set(&event->hw.period_left, event->hw.sample_period);
+ }
+
if (event->attr.precise_ip) {
- if (!event->attr.freq) {
+ struct arch_pebs_cap pebs_cap = hybrid(event->pmu, arch_pebs_cap);
+
+ if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
+ return -EINVAL;
+
+ if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
- if (!(event->attr.sample_type &
- ~intel_pmu_free_running_flags(event)))
- event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
+ if (!(event->attr.sample_type & ~intel_pmu_large_pebs_flags(event)) &&
+ !has_aux_action(event)) {
+ event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
+ event->attach_state |= PERF_ATTACH_SCHED_CB;
+ }
}
if (x86_pmu.pebs_aliases)
x86_pmu.pebs_aliases(event);
+
+ if (x86_pmu.arch_pebs) {
+ u64 cntr_mask = hybrid(event->pmu, intel_ctrl) &
+ ~GLOBAL_CTRL_EN_PERF_METRICS;
+ u64 pebs_mask = event->attr.precise_ip >= 3 ?
+ pebs_cap.pdists : pebs_cap.counters;
+ if (cntr_mask != pebs_mask)
+ event->hw.dyn_constraint &= pebs_mask;
+ }
}
if (needs_branch_stack(event)) {
+ /* Avoid branch stack setup for counting events in SAMPLE READ */
+ if (is_sampling_event(event) ||
+ !(event->attr.sample_type & PERF_SAMPLE_READ))
+ event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+ }
+
+ if (branch_sample_counters(event)) {
+ struct perf_event *leader, *sibling;
+ int num = 0;
+
+ if (!(x86_pmu.flags & PMU_FL_BR_CNTR) ||
+ (event->attr.config & ~INTEL_ARCH_EVENT_MASK))
+ return -EINVAL;
+
+ /*
+ * The branch counter logging is not supported in the call stack
+ * mode yet, since we cannot simply flush the LBR during e.g.,
+ * multiplexing. Also, there is no obvious usage with the call
+ * stack mode. Simply forbids it for now.
+ *
+ * If any events in the group enable the branch counter logging
+ * feature, the group is treated as a branch counter logging
+ * group, which requires the extra space to store the counters.
+ */
+ leader = event->group_leader;
+ if (branch_sample_call_stack(leader))
+ return -EINVAL;
+ if (branch_sample_counters(leader)) {
+ num++;
+ leader->hw.dyn_constraint &= x86_pmu.lbr_counters;
+ }
+ leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS;
+
+ for_each_sibling_event(sibling, leader) {
+ if (branch_sample_call_stack(sibling))
+ return -EINVAL;
+ if (branch_sample_counters(sibling)) {
+ num++;
+ sibling->hw.dyn_constraint &= x86_pmu.lbr_counters;
+ }
+ }
+
+ if (num > fls(x86_pmu.lbr_counters))
+ return -EINVAL;
+ /*
+ * Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't
+ * require any branch stack setup.
+ * Clear the bit to avoid unnecessary branch stack setup.
+ */
+ if (0 == (event->attr.branch_sample_type &
+ ~(PERF_SAMPLE_BRANCH_PLM_ALL |
+ PERF_SAMPLE_BRANCH_COUNTERS)))
+ event->hw.flags &= ~PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+
+ /*
+ * Force the leader to be a LBR event. So LBRs can be reset
+ * with the leader event. See intel_pmu_lbr_del() for details.
+ */
+ if (!intel_pmu_needs_branch_stack(leader))
+ return -EINVAL;
+ }
+
+ if (intel_pmu_needs_branch_stack(event)) {
ret = intel_pmu_setup_lbr_filter(event);
if (ret)
return ret;
+ event->attach_state |= PERF_ATTACH_SCHED_CB;
/*
* BTS is set up earlier in this path, so don't account twice
*/
- if (!intel_pmu_has_bts(event)) {
+ if (!unlikely(intel_pmu_has_bts(event))) {
/* disallow lbr if conflicting events are present */
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
return -EBUSY;
@@ -2996,60 +4489,319 @@ static int intel_pmu_hw_config(struct perf_event *event)
}
}
- if (event->attr.type != PERF_TYPE_RAW)
+ if (event->attr.aux_output) {
+ if (!event->attr.precise_ip)
+ return -EINVAL;
+
+ event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
+ }
+
+ if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
+ intel_pmu_has_pebs_counter_group(event->pmu) &&
+ is_sampling_event(event) &&
+ event->attr.precise_ip)
+ event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
+
+ if (intel_pmu_has_acr(event->pmu) && intel_pmu_is_acr_group(event)) {
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct pmu *pmu = event->pmu;
+ bool has_sw_event = false;
+ int num = 0, idx = 0;
+ u64 cause_mask = 0;
+
+ /* Not support perf metrics */
+ if (is_metric_event(event))
+ return -EINVAL;
+
+ /* Not support freq mode */
+ if (event->attr.freq)
+ return -EINVAL;
+
+ /* PDist is not supported */
+ if (event->attr.config2 && event->attr.precise_ip > 2)
+ return -EINVAL;
+
+ /* The reload value cannot exceeds the max period */
+ if (event->attr.sample_period > x86_pmu.max_period)
+ return -EINVAL;
+ /*
+ * The counter-constraints of each event cannot be finalized
+ * unless the whole group is scanned. However, it's hard
+ * to know whether the event is the last one of the group.
+ * Recalculate the counter-constraints for each event when
+ * adding a new event.
+ *
+ * The group is traversed twice, which may be optimized later.
+ * In the first round,
+ * - Find all events which do reload when other events
+ * overflow and set the corresponding counter-constraints
+ * - Add all events, which can cause other events reload,
+ * in the cause_mask
+ * - Error out if the number of events exceeds the HW limit
+ * - The ACR events must be contiguous.
+ * Error out if there are non-X86 events between ACR events.
+ * This is not a HW limit, but a SW limit.
+ * With the assumption, the intel_pmu_acr_late_setup() can
+ * easily convert the event idx to counter idx without
+ * traversing the whole event list.
+ */
+ if (!is_x86_event(leader))
+ return -EINVAL;
+
+ if (leader->attr.config2)
+ intel_pmu_set_acr_cntr_constr(leader, &cause_mask, &num);
+
+ if (leader->nr_siblings) {
+ for_each_sibling_event(sibling, leader) {
+ if (!is_x86_event(sibling)) {
+ has_sw_event = true;
+ continue;
+ }
+ if (!sibling->attr.config2)
+ continue;
+ if (has_sw_event)
+ return -EINVAL;
+ intel_pmu_set_acr_cntr_constr(sibling, &cause_mask, &num);
+ }
+ }
+ if (leader != event && event->attr.config2) {
+ if (has_sw_event)
+ return -EINVAL;
+ intel_pmu_set_acr_cntr_constr(event, &cause_mask, &num);
+ }
+
+ if (hweight64(cause_mask) > hweight64(hybrid(pmu, acr_cause_mask64)) ||
+ num > hweight64(hybrid(event->pmu, acr_cntr_mask64)))
+ return -EINVAL;
+ /*
+ * In the second round, apply the counter-constraints for
+ * the events which can cause other events reload.
+ */
+ intel_pmu_set_acr_caused_constr(leader, idx++, cause_mask);
+
+ if (leader->nr_siblings) {
+ for_each_sibling_event(sibling, leader)
+ intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask);
+ }
+
+ if (leader != event)
+ intel_pmu_set_acr_caused_constr(event, idx, cause_mask);
+
+ leader->hw.flags |= PERF_X86_EVENT_ACR;
+ }
+
+ if ((event->attr.type == PERF_TYPE_HARDWARE) ||
+ (event->attr.type == PERF_TYPE_HW_CACHE))
return 0;
+ /*
+ * Config Topdown slots and metric events
+ *
+ * The slots event on Fixed Counter 3 can support sampling,
+ * which will be handled normally in x86_perf_event_update().
+ *
+ * Metric events don't support sampling and require being paired
+ * with a slots event as group leader. When the slots event
+ * is used in a metrics group, it too cannot support sampling.
+ */
+ if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) {
+ /* The metrics_clear can only be set for the slots event */
+ if (event->attr.config1 &&
+ (!is_slots_event(event) || (event->attr.config1 & ~INTEL_TD_CFG_METRIC_CLEAR)))
+ return -EINVAL;
+
+ if (event->attr.config2)
+ return -EINVAL;
+
+ /*
+ * The TopDown metrics events and slots event don't
+ * support any filters.
+ */
+ if (event->attr.config & X86_ALL_EVENT_FLAGS)
+ return -EINVAL;
+
+ if (is_available_metric_event(event)) {
+ struct perf_event *leader = event->group_leader;
+
+ /* The metric events don't support sampling. */
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ /* The metric events require a slots group leader. */
+ if (!is_slots_event(leader))
+ return -EINVAL;
+
+ /*
+ * The leader/SLOTS must not be a sampling event for
+ * metric use; hardware requires it starts at 0 when used
+ * in conjunction with MSR_PERF_METRICS.
+ */
+ if (is_sampling_event(leader))
+ return -EINVAL;
+
+ event->event_caps |= PERF_EV_CAP_SIBLING;
+ /*
+ * Only once we have a METRICs sibling do we
+ * need TopDown magic.
+ */
+ leader->hw.flags |= PERF_X86_EVENT_TOPDOWN;
+ event->hw.flags |= PERF_X86_EVENT_TOPDOWN;
+ }
+ }
+
+ /*
+ * The load latency event X86_CONFIG(.event=0xcd, .umask=0x01) on SPR
+ * doesn't function quite right. As a work-around it needs to always be
+ * co-scheduled with a auxiliary event X86_CONFIG(.event=0x03, .umask=0x82).
+ * The actual count of this second event is irrelevant it just needs
+ * to be active to make the first event function correctly.
+ *
+ * In a group, the auxiliary event must be in front of the load latency
+ * event. The rule is to simplify the implementation of the check.
+ * That's because perf cannot have a complete group at the moment.
+ */
+ if (require_mem_loads_aux_event(event) &&
+ (event->attr.sample_type & PERF_SAMPLE_DATA_SRC) &&
+ is_mem_loads_event(event)) {
+ struct perf_event *leader = event->group_leader;
+ struct perf_event *sibling = NULL;
+
+ /*
+ * When this memload event is also the first event (no group
+ * exists yet), then there is no aux event before it.
+ */
+ if (leader == event)
+ return -ENODATA;
+
+ if (!is_mem_loads_aux_event(leader)) {
+ for_each_sibling_event(sibling, leader) {
+ if (is_mem_loads_aux_event(sibling))
+ break;
+ }
+ if (list_entry_is_head(sibling, &leader->sibling_list, sibling_list))
+ return -ENODATA;
+ }
+ }
+
if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
return 0;
if (x86_pmu.version < 3)
return -EINVAL;
- if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
- return -EACCES;
+ ret = perf_allow_cpu();
+ if (ret)
+ return ret;
event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
return 0;
}
-struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
-{
- if (x86_pmu.guest_get_msrs)
- return x86_pmu.guest_get_msrs(nr);
- *nr = 0;
- return NULL;
-}
-EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
-
-static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
+/*
+ * Currently, the only caller of this function is the atomic_switch_perf_msrs().
+ * The host perf context helps to prepare the values of the real hardware for
+ * a set of msrs that need to be switched atomically in a vmx transaction.
+ *
+ * For example, the pseudocode needed to add a new msr should look like:
+ *
+ * arr[(*nr)++] = (struct perf_guest_switch_msr){
+ * .msr = the hardware msr address,
+ * .host = the value the hardware has when it doesn't run a guest,
+ * .guest = the value the hardware has when it runs a guest,
+ * };
+ *
+ * These values have nothing to do with the emulated values the guest sees
+ * when it uses {RD,WR}MSR, which should be handled by the KVM context,
+ * specifically in the intel_pmu_{get,set}_msr().
+ */
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+ struct kvm_pmu *kvm_pmu = (struct kvm_pmu *)data;
+ u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
+ u64 pebs_mask = cpuc->pebs_enabled & x86_pmu.pebs_capable;
+ int global_ctrl, pebs_enable;
- arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
- arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
- arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
/*
- * If PMU counter has PEBS enabled it is not enough to disable counter
- * on a guest entry since PEBS memory write can overshoot guest entry
- * and corrupt guest memory. Disabling PEBS solves the problem.
+ * In addition to obeying exclude_guest/exclude_host, remove bits being
+ * used for PEBS when running a guest, because PEBS writes to virtual
+ * addresses (not physical addresses).
*/
- arr[1].msr = MSR_IA32_PEBS_ENABLE;
- arr[1].host = cpuc->pebs_enabled;
- arr[1].guest = 0;
+ *nr = 0;
+ global_ctrl = (*nr)++;
+ arr[global_ctrl] = (struct perf_guest_switch_msr){
+ .msr = MSR_CORE_PERF_GLOBAL_CTRL,
+ .host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask,
+ .guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask,
+ };
+
+ if (!x86_pmu.ds_pebs)
+ return arr;
+
+ /*
+ * If PMU counter has PEBS enabled it is not enough to
+ * disable counter on a guest entry since PEBS memory
+ * write can overshoot guest entry and corrupt guest
+ * memory. Disabling PEBS solves the problem.
+ *
+ * Don't do this if the CPU already enforces it.
+ */
+ if (x86_pmu.pebs_no_isolation) {
+ arr[(*nr)++] = (struct perf_guest_switch_msr){
+ .msr = MSR_IA32_PEBS_ENABLE,
+ .host = cpuc->pebs_enabled,
+ .guest = 0,
+ };
+ return arr;
+ }
+
+ if (!kvm_pmu || !x86_pmu.pebs_ept)
+ return arr;
+
+ arr[(*nr)++] = (struct perf_guest_switch_msr){
+ .msr = MSR_IA32_DS_AREA,
+ .host = (unsigned long)cpuc->ds,
+ .guest = kvm_pmu->ds_area,
+ };
+
+ if (x86_pmu.intel_cap.pebs_baseline) {
+ arr[(*nr)++] = (struct perf_guest_switch_msr){
+ .msr = MSR_PEBS_DATA_CFG,
+ .host = cpuc->active_pebs_data_cfg,
+ .guest = kvm_pmu->pebs_data_cfg,
+ };
+ }
+
+ pebs_enable = (*nr)++;
+ arr[pebs_enable] = (struct perf_guest_switch_msr){
+ .msr = MSR_IA32_PEBS_ENABLE,
+ .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask,
+ .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable,
+ };
+
+ if (arr[pebs_enable].host) {
+ /* Disable guest PEBS if host PEBS is enabled. */
+ arr[pebs_enable].guest = 0;
+ } else {
+ /* Disable guest PEBS thoroughly for cross-mapped PEBS counters. */
+ arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask;
+ arr[global_ctrl].guest &= ~kvm_pmu->host_cross_mapped_mask;
+ /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
+ arr[global_ctrl].guest |= arr[pebs_enable].guest;
+ }
- *nr = 2;
return arr;
}
-static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
int idx;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[idx];
arr[idx].msr = x86_pmu_config_addr(idx);
@@ -3067,7 +4819,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
}
- *nr = x86_pmu.num_counters;
+ *nr = x86_pmu_max_num_counters(cpuc->pmu);
return arr;
}
@@ -3082,7 +4834,7 @@ static void core_pmu_enable_all(int added)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
if (!test_bit(idx, cpuc->active_mask) ||
@@ -3133,9 +4885,27 @@ static int hsw_hw_config(struct perf_event *event)
static struct event_constraint counter0_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
+static struct event_constraint counter1_constraint =
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x2);
+
+static struct event_constraint counter0_1_constraint =
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x3);
+
static struct event_constraint counter2_constraint =
EVENT_CONSTRAINT(0, 0x4, 0);
+static struct event_constraint fixed0_constraint =
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0);
+
+static struct event_constraint fixed0_counter0_constraint =
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);
+
+static struct event_constraint fixed0_counter0_1_constraint =
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000003ULL);
+
+static struct event_constraint counters_1_7_constraint =
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xfeULL);
+
static struct event_constraint *
hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@@ -3155,6 +4925,46 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
}
static struct event_constraint *
+icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ /*
+ * Fixed counter 0 has less skid.
+ * Force instruction:ppp in Fixed counter 0
+ */
+ if ((event->attr.precise_ip == 3) &&
+ constraint_match(&fixed0_constraint, event->hw.config))
+ return &fixed0_constraint;
+
+ return hsw_get_event_constraints(cpuc, idx, event);
+}
+
+static struct event_constraint *
+glc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ c = icl_get_event_constraints(cpuc, idx, event);
+
+ /*
+ * The :ppp indicates the Precise Distribution (PDist) facility, which
+ * is only supported on the GP counter 0. If a :ppp event which is not
+ * available on the GP counter 0, error out.
+ * Exception: Instruction PDIR is only available on the fixed counter 0.
+ */
+ if ((event->attr.precise_ip == 3) &&
+ !constraint_match(&fixed0_constraint, event->hw.config)) {
+ if (c->idxmsk64 & BIT_ULL(0))
+ return &counter0_constraint;
+
+ return &emptyconstraint;
+ }
+
+ return c;
+}
+
+static struct event_constraint *
glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
@@ -3169,6 +4979,200 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
return c;
}
+static struct event_constraint *
+tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ c = intel_get_event_constraints(cpuc, idx, event);
+
+ /*
+ * :ppp means to do reduced skid PEBS,
+ * which is available on PMC0 and fixed counter 0.
+ */
+ if (event->attr.precise_ip == 3) {
+ /* Force instruction:ppp on PMC0 and Fixed counter 0 */
+ if (constraint_match(&fixed0_constraint, event->hw.config))
+ return &fixed0_counter0_constraint;
+
+ return &counter0_constraint;
+ }
+
+ return c;
+}
+
+static bool allow_tsx_force_abort = true;
+
+static struct event_constraint *
+tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c = hsw_get_event_constraints(cpuc, idx, event);
+
+ /*
+ * Without TFA we must not use PMC3.
+ */
+ if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
+ c = dyn_constraint(cpuc, c, idx);
+ c->idxmsk64 &= ~(1ULL << 3);
+ c->weight--;
+ }
+
+ return c;
+}
+
+static struct event_constraint *
+adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->pmu_type == hybrid_big)
+ return glc_get_event_constraints(cpuc, idx, event);
+ else if (pmu->pmu_type == hybrid_small)
+ return tnt_get_event_constraints(cpuc, idx, event);
+
+ WARN_ON(1);
+ return &emptyconstraint;
+}
+
+static struct event_constraint *
+cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ c = intel_get_event_constraints(cpuc, idx, event);
+
+ /*
+ * The :ppp indicates the Precise Distribution (PDist) facility, which
+ * is only supported on the GP counter 0 & 1 and Fixed counter 0.
+ * If a :ppp event which is not available on the above eligible counters,
+ * error out.
+ */
+ if (event->attr.precise_ip == 3) {
+ /* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */
+ if (constraint_match(&fixed0_constraint, event->hw.config)) {
+ /* The fixed counter 0 doesn't support LBR event logging. */
+ if (branch_sample_counters(event))
+ return &counter0_1_constraint;
+ else
+ return &fixed0_counter0_1_constraint;
+ }
+
+ switch (c->idxmsk64 & 0x3ull) {
+ case 0x1:
+ return &counter0_constraint;
+ case 0x2:
+ return &counter1_constraint;
+ case 0x3:
+ return &counter0_1_constraint;
+ }
+ return &emptyconstraint;
+ }
+
+ return c;
+}
+
+static struct event_constraint *
+rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ c = glc_get_event_constraints(cpuc, idx, event);
+
+ /* The Retire Latency is not supported by the fixed counter 0. */
+ if (event->attr.precise_ip &&
+ (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
+ constraint_match(&fixed0_constraint, event->hw.config)) {
+ /*
+ * The Instruction PDIR is only available
+ * on the fixed counter 0. Error out for this case.
+ */
+ if (event->attr.precise_ip == 3)
+ return &emptyconstraint;
+ return &counters_1_7_constraint;
+ }
+
+ return c;
+}
+
+static struct event_constraint *
+mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->pmu_type == hybrid_big)
+ return rwc_get_event_constraints(cpuc, idx, event);
+ if (pmu->pmu_type == hybrid_small)
+ return cmt_get_event_constraints(cpuc, idx, event);
+
+ WARN_ON(1);
+ return &emptyconstraint;
+}
+
+static int adl_hw_config(struct perf_event *event)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->pmu_type == hybrid_big)
+ return hsw_hw_config(event);
+ else if (pmu->pmu_type == hybrid_small)
+ return intel_pmu_hw_config(event);
+
+ WARN_ON(1);
+ return -EOPNOTSUPP;
+}
+
+static enum intel_cpu_type adl_get_hybrid_cpu_type(void)
+{
+ return INTEL_CPU_TYPE_CORE;
+}
+
+static inline bool erratum_hsw11(struct perf_event *event)
+{
+ return (event->hw.config & INTEL_ARCH_EVENT_MASK) ==
+ X86_CONFIG(.event=0xc0, .umask=0x01);
+}
+
+static struct event_constraint *
+arl_h_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->pmu_type == hybrid_tiny)
+ return cmt_get_event_constraints(cpuc, idx, event);
+
+ return mtl_get_event_constraints(cpuc, idx, event);
+}
+
+static int arl_h_hw_config(struct perf_event *event)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->pmu_type == hybrid_tiny)
+ return intel_pmu_hw_config(event);
+
+ return adl_hw_config(event);
+}
+
+/*
+ * The HSW11 requires a period larger than 100 which is the same as the BDM11.
+ * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL.
+ *
+ * The message 'interrupt took too long' can be observed on any counter which
+ * was armed with a period < 32 and two events expired in the same NMI.
+ * A minimum period of 32 is enforced for the rest of the events.
+ */
+static void hsw_limit_period(struct perf_event *event, s64 *left)
+{
+ *left = max(*left, erratum_hsw11(event) ? 128 : 32);
+}
+
/*
* Broadwell:
*
@@ -3184,15 +5188,24 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
* Therefore the effective (average) period matches the requested period,
* despite coarser hardware granularity.
*/
-static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
+static void bdw_limit_period(struct perf_event *event, s64 *left)
{
- if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
- X86_CONFIG(.event=0xc0, .umask=0x01)) {
- if (left < 128)
- left = 128;
- left &= ~0x3fu;
+ if (erratum_hsw11(event)) {
+ if (*left < 128)
+ *left = 128;
+ *left &= ~0x3fULL;
}
- return left;
+}
+
+static void nhm_limit_period(struct perf_event *event, s64 *left)
+{
+ *left = max(*left, 32LL);
+}
+
+static void glc_limit_period(struct perf_event *event, s64 *left)
+{
+ if (event->attr.precise_ip == 3)
+ *left = max(*left, 128LL);
}
PMU_FORMAT_ATTR(event, "config:0-7" );
@@ -3202,8 +5215,65 @@ PMU_FORMAT_ATTR(pc, "config:19" );
PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
PMU_FORMAT_ATTR(inv, "config:23" );
PMU_FORMAT_ATTR(cmask, "config:24-31" );
-PMU_FORMAT_ATTR(in_tx, "config:32");
-PMU_FORMAT_ATTR(in_tx_cp, "config:33");
+PMU_FORMAT_ATTR(in_tx, "config:32" );
+PMU_FORMAT_ATTR(in_tx_cp, "config:33" );
+PMU_FORMAT_ATTR(eq, "config:36" ); /* v6 + */
+
+PMU_FORMAT_ATTR(metrics_clear, "config1:0"); /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
+
+static ssize_t umask2_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ u64 mask = hybrid(dev_get_drvdata(dev), config_mask) & ARCH_PERFMON_EVENTSEL_UMASK2;
+
+ if (mask == ARCH_PERFMON_EVENTSEL_UMASK2)
+ return sprintf(page, "config:8-15,40-47\n");
+
+ /* Roll back to the old format if umask2 is not supported. */
+ return sprintf(page, "config:8-15\n");
+}
+
+static struct device_attribute format_attr_umask2 =
+ __ATTR(umask, 0444, umask2_show, NULL);
+
+static struct attribute *format_evtsel_ext_attrs[] = {
+ &format_attr_umask2.attr,
+ &format_attr_eq.attr,
+ &format_attr_metrics_clear.attr,
+ NULL
+};
+
+static umode_t
+evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ u64 mask;
+
+ /*
+ * The umask and umask2 have different formats but share the
+ * same attr name. In update mode, the previous value of the
+ * umask is unconditionally removed before is_visible. If
+ * umask2 format is not enumerated, it's impossible to roll
+ * back to the old format.
+ * Does the check in umask2_show rather than is_visible.
+ */
+ if (i == 0)
+ return attr->mode;
+
+ mask = hybrid(dev_get_drvdata(dev), config_mask);
+ if (i == 1)
+ return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0;
+
+ /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
+ if (i == 2) {
+ union perf_capabilities intel_cap = hybrid(dev_get_drvdata(dev), intel_cap);
+
+ return intel_cap.rdpmc_metrics_clear ? attr->mode : 0;
+ }
+
+ return 0;
+}
static struct attribute *intel_arch_formats_attr[] = {
&format_attr_event.attr,
@@ -3222,7 +5292,7 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
return x86_event_sysfs_show(page, config, event);
}
-struct intel_shared_regs *allocate_shared_regs(int cpu)
+static struct intel_shared_regs *allocate_shared_regs(int cpu)
{
struct intel_shared_regs *regs;
int i;
@@ -3254,23 +5324,26 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
return c;
}
-static int intel_pmu_cpu_prepare(int cpu)
+
+int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ cpuc->pebs_record_size = x86_pmu.pebs_record_size;
- if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
+ if (is_hybrid() || x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
cpuc->shared_regs = allocate_shared_regs(cpu);
if (!cpuc->shared_regs)
goto err;
}
- if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+ if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_DYN_CONSTRAINT)) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
- cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
+ cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
if (!cpuc->constraint_list)
goto err_shared_regs;
+ }
+ if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
if (!cpuc->excl_cntrs)
goto err_constraint_list;
@@ -3292,6 +5365,17 @@ err:
return -ENOMEM;
}
+static int intel_pmu_cpu_prepare(int cpu)
+{
+ int ret;
+
+ ret = intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
+ if (ret)
+ return ret;
+
+ return alloc_arch_pebs_buf_on_cpu(cpu);
+}
+
static void flip_smm_bit(void *data)
{
unsigned long set = *(unsigned long *)data;
@@ -3305,21 +5389,445 @@ static void flip_smm_bit(void *data)
}
}
+static void intel_pmu_check_counters_mask(u64 *cntr_mask,
+ u64 *fixed_cntr_mask,
+ u64 *intel_ctrl)
+{
+ unsigned int bit;
+
+ bit = fls64(*cntr_mask);
+ if (bit > INTEL_PMC_MAX_GENERIC) {
+ WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+ bit, INTEL_PMC_MAX_GENERIC);
+ *cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
+ }
+ *intel_ctrl = *cntr_mask;
+
+ bit = fls64(*fixed_cntr_mask);
+ if (bit > INTEL_PMC_MAX_FIXED) {
+ WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+ bit, INTEL_PMC_MAX_FIXED);
+ *fixed_cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
+ }
+
+ *intel_ctrl |= *fixed_cntr_mask << INTEL_PMC_IDX_FIXED;
+}
+
+static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
+ u64 cntr_mask,
+ u64 fixed_cntr_mask,
+ u64 intel_ctrl);
+
+enum dyn_constr_type {
+ DYN_CONSTR_NONE,
+ DYN_CONSTR_BR_CNTR,
+ DYN_CONSTR_ACR_CNTR,
+ DYN_CONSTR_ACR_CAUSE,
+ DYN_CONSTR_PEBS,
+ DYN_CONSTR_PDIST,
+
+ DYN_CONSTR_MAX,
+};
+
+static const char * const dyn_constr_type_name[] = {
+ [DYN_CONSTR_NONE] = "a normal event",
+ [DYN_CONSTR_BR_CNTR] = "a branch counter logging event",
+ [DYN_CONSTR_ACR_CNTR] = "an auto-counter reload event",
+ [DYN_CONSTR_ACR_CAUSE] = "an auto-counter reload cause event",
+ [DYN_CONSTR_PEBS] = "a PEBS event",
+ [DYN_CONSTR_PDIST] = "a PEBS PDIST event",
+};
+
+static void __intel_pmu_check_dyn_constr(struct event_constraint *constr,
+ enum dyn_constr_type type, u64 mask)
+{
+ struct event_constraint *c1, *c2;
+ int new_weight, check_weight;
+ u64 new_mask, check_mask;
+
+ for_each_event_constraint(c1, constr) {
+ new_mask = c1->idxmsk64 & mask;
+ new_weight = hweight64(new_mask);
+
+ /* ignore topdown perf metrics event */
+ if (c1->idxmsk64 & INTEL_PMC_MSK_TOPDOWN)
+ continue;
+
+ if (!new_weight && fls64(c1->idxmsk64) < INTEL_PMC_IDX_FIXED) {
+ pr_info("The event 0x%llx is not supported as %s.\n",
+ c1->code, dyn_constr_type_name[type]);
+ }
+
+ if (new_weight <= 1)
+ continue;
+
+ for_each_event_constraint(c2, c1 + 1) {
+ bool check_fail = false;
+
+ check_mask = c2->idxmsk64 & mask;
+ check_weight = hweight64(check_mask);
+
+ if (c2->idxmsk64 & INTEL_PMC_MSK_TOPDOWN ||
+ !check_weight)
+ continue;
+
+ /* The same constraints or no overlap */
+ if (new_mask == check_mask ||
+ (new_mask ^ check_mask) == (new_mask | check_mask))
+ continue;
+
+ /*
+ * A scheduler issue may be triggered in the following cases.
+ * - Two overlap constraints have the same weight.
+ * E.g., A constraints: 0x3, B constraints: 0x6
+ * event counter failure case
+ * B PMC[2:1] 1
+ * A PMC[1:0] 0
+ * A PMC[1:0] FAIL
+ * - Two overlap constraints have different weight.
+ * The constraint has a low weight, but has high last bit.
+ * E.g., A constraints: 0x7, B constraints: 0xC
+ * event counter failure case
+ * B PMC[3:2] 2
+ * A PMC[2:0] 0
+ * A PMC[2:0] 1
+ * A PMC[2:0] FAIL
+ */
+ if (new_weight == check_weight) {
+ check_fail = true;
+ } else if (new_weight < check_weight) {
+ if ((new_mask | check_mask) != check_mask &&
+ fls64(new_mask) > fls64(check_mask))
+ check_fail = true;
+ } else {
+ if ((new_mask | check_mask) != new_mask &&
+ fls64(new_mask) < fls64(check_mask))
+ check_fail = true;
+ }
+
+ if (check_fail) {
+ pr_info("The two events 0x%llx and 0x%llx may not be "
+ "fully scheduled under some circumstances as "
+ "%s.\n",
+ c1->code, c2->code, dyn_constr_type_name[type]);
+ }
+ }
+ }
+}
+
+static void intel_pmu_check_dyn_constr(struct pmu *pmu,
+ struct event_constraint *constr,
+ u64 cntr_mask)
+{
+ enum dyn_constr_type i;
+ u64 mask;
+
+ for (i = DYN_CONSTR_NONE; i < DYN_CONSTR_MAX; i++) {
+ mask = 0;
+ switch (i) {
+ case DYN_CONSTR_NONE:
+ mask = cntr_mask;
+ break;
+ case DYN_CONSTR_BR_CNTR:
+ if (x86_pmu.flags & PMU_FL_BR_CNTR)
+ mask = x86_pmu.lbr_counters;
+ break;
+ case DYN_CONSTR_ACR_CNTR:
+ mask = hybrid(pmu, acr_cntr_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
+ break;
+ case DYN_CONSTR_ACR_CAUSE:
+ if (hybrid(pmu, acr_cntr_mask64) == hybrid(pmu, acr_cause_mask64))
+ continue;
+ mask = hybrid(pmu, acr_cause_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
+ break;
+ case DYN_CONSTR_PEBS:
+ if (x86_pmu.arch_pebs)
+ mask = hybrid(pmu, arch_pebs_cap).counters;
+ break;
+ case DYN_CONSTR_PDIST:
+ if (x86_pmu.arch_pebs)
+ mask = hybrid(pmu, arch_pebs_cap).pdists;
+ break;
+ default:
+ pr_warn("Unsupported dynamic constraint type %d\n", i);
+ }
+
+ if (mask)
+ __intel_pmu_check_dyn_constr(constr, i, mask);
+ }
+}
+
+static void intel_pmu_check_event_constraints_all(struct pmu *pmu)
+{
+ struct event_constraint *event_constraints = hybrid(pmu, event_constraints);
+ struct event_constraint *pebs_constraints = hybrid(pmu, pebs_constraints);
+ u64 cntr_mask = hybrid(pmu, cntr_mask64);
+ u64 fixed_cntr_mask = hybrid(pmu, fixed_cntr_mask64);
+ u64 intel_ctrl = hybrid(pmu, intel_ctrl);
+
+ intel_pmu_check_event_constraints(event_constraints, cntr_mask,
+ fixed_cntr_mask, intel_ctrl);
+
+ if (event_constraints)
+ intel_pmu_check_dyn_constr(pmu, event_constraints, cntr_mask);
+
+ if (pebs_constraints)
+ intel_pmu_check_dyn_constr(pmu, pebs_constraints, cntr_mask);
+}
+
+static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
+
+static inline bool intel_pmu_broken_perf_cap(void)
+{
+ /* The Perf Metric (Bit 15) is always cleared */
+ if (boot_cpu_data.x86_vfm == INTEL_METEORLAKE ||
+ boot_cpu_data.x86_vfm == INTEL_METEORLAKE_L)
+ return true;
+
+ return false;
+}
+
+static inline void __intel_update_pmu_caps(struct pmu *pmu)
+{
+ struct pmu *dest_pmu = pmu ? pmu : x86_get_pmu(smp_processor_id());
+
+ if (hybrid(pmu, arch_pebs_cap).caps & ARCH_PEBS_VECR_XMM)
+ dest_pmu->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
+}
+
+static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
+{
+ u64 caps = hybrid(pmu, arch_pebs_cap).caps;
+
+ x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
+ if (caps & ARCH_PEBS_LBR)
+ x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
+ if (caps & ARCH_PEBS_CNTR_MASK)
+ x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
+
+ if (!(caps & ARCH_PEBS_AUX))
+ x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
+ if (!(caps & ARCH_PEBS_GPR)) {
+ x86_pmu.large_pebs_flags &=
+ ~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER);
+ }
+}
+
+#define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED))
+
+static void update_pmu_cap(struct pmu *pmu)
+{
+ unsigned int eax, ebx, ecx, edx;
+ union cpuid35_eax eax_0;
+ union cpuid35_ebx ebx_0;
+ u64 cntrs_mask = 0;
+ u64 pebs_mask = 0;
+ u64 pdists_mask = 0;
+
+ cpuid(ARCH_PERFMON_EXT_LEAF, &eax_0.full, &ebx_0.full, &ecx, &edx);
+
+ if (ebx_0.split.umask2)
+ hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2;
+ if (ebx_0.split.eq)
+ hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ;
+
+ if (eax_0.split.cntr_subleaf) {
+ cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
+ &eax, &ebx, &ecx, &edx);
+ hybrid(pmu, cntr_mask64) = eax;
+ hybrid(pmu, fixed_cntr_mask64) = ebx;
+ cntrs_mask = counter_mask(eax, ebx);
+ }
+
+ if (eax_0.split.acr_subleaf) {
+ cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF,
+ &eax, &ebx, &ecx, &edx);
+ /* The mask of the counters which can be reloaded */
+ hybrid(pmu, acr_cntr_mask64) = counter_mask(eax, ebx);
+ /* The mask of the counters which can cause a reload of reloadable counters */
+ hybrid(pmu, acr_cause_mask64) = counter_mask(ecx, edx);
+ }
+
+ /* Bits[5:4] should be set simultaneously if arch-PEBS is supported */
+ if (eax_0.split.pebs_caps_subleaf && eax_0.split.pebs_cnts_subleaf) {
+ cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_CAP_LEAF,
+ &eax, &ebx, &ecx, &edx);
+ hybrid(pmu, arch_pebs_cap).caps = (u64)ebx << 32;
+
+ cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_COUNTER_LEAF,
+ &eax, &ebx, &ecx, &edx);
+ pebs_mask = counter_mask(eax, ecx);
+ pdists_mask = counter_mask(ebx, edx);
+ hybrid(pmu, arch_pebs_cap).counters = pebs_mask;
+ hybrid(pmu, arch_pebs_cap).pdists = pdists_mask;
+
+ if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask)) {
+ x86_pmu.arch_pebs = 0;
+ } else {
+ __intel_update_pmu_caps(pmu);
+ __intel_update_large_pebs_flags(pmu);
+ }
+ } else {
+ WARN_ON(x86_pmu.arch_pebs == 1);
+ x86_pmu.arch_pebs = 0;
+ }
+
+ if (!intel_pmu_broken_perf_cap()) {
+ /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
+ rdmsrq(MSR_IA32_PERF_CAPABILITIES, hybrid(pmu, intel_cap).capabilities);
+ }
+}
+
+static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
+{
+ intel_pmu_check_counters_mask(&pmu->cntr_mask64, &pmu->fixed_cntr_mask64,
+ &pmu->intel_ctrl);
+ pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
+ pmu->unconstrained = (struct event_constraint)
+ __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
+ 0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
+
+ if (pmu->intel_cap.perf_metrics)
+ pmu->intel_ctrl |= GLOBAL_CTRL_EN_PERF_METRICS;
+ else
+ pmu->intel_ctrl &= ~GLOBAL_CTRL_EN_PERF_METRICS;
+
+ intel_pmu_check_event_constraints_all(&pmu->pmu);
+
+ intel_pmu_check_extra_regs(pmu->extra_regs);
+}
+
+static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+ enum intel_cpu_type cpu_type = c->topo.intel_type;
+ int i;
+
+ /*
+ * This is running on a CPU model that is known to have hybrid
+ * configurations. But the CPU told us it is not hybrid, shame
+ * on it. There should be a fixup function provided for these
+ * troublesome CPUs (->get_hybrid_cpu_type).
+ */
+ if (cpu_type == INTEL_CPU_TYPE_UNKNOWN) {
+ if (x86_pmu.get_hybrid_cpu_type)
+ cpu_type = x86_pmu.get_hybrid_cpu_type();
+ else
+ return NULL;
+ }
+
+ /*
+ * This essentially just maps between the 'hybrid_cpu_type'
+ * and 'hybrid_pmu_type' enums except for ARL-H processor
+ * which needs to compare atom uarch native id since ARL-H
+ * contains two different atom uarchs.
+ */
+ for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
+ enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type;
+ u32 native_id;
+
+ if (cpu_type == INTEL_CPU_TYPE_CORE && pmu_type == hybrid_big)
+ return &x86_pmu.hybrid_pmu[i];
+ if (cpu_type == INTEL_CPU_TYPE_ATOM) {
+ if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small)
+ return &x86_pmu.hybrid_pmu[i];
+
+ native_id = c->topo.intel_native_model_id;
+ if (native_id == INTEL_ATOM_SKT_NATIVE_ID && pmu_type == hybrid_small)
+ return &x86_pmu.hybrid_pmu[i];
+ if (native_id == INTEL_ATOM_CMT_NATIVE_ID && pmu_type == hybrid_tiny)
+ return &x86_pmu.hybrid_pmu[i];
+ }
+ }
+
+ return NULL;
+}
+
+static bool init_hybrid_pmu(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ struct x86_hybrid_pmu *pmu = find_hybrid_pmu_for_cpu();
+
+ if (WARN_ON_ONCE(!pmu || (pmu->pmu.type == -1))) {
+ cpuc->pmu = NULL;
+ return false;
+ }
+
+ /* Only check and dump the PMU information for the first CPU */
+ if (!cpumask_empty(&pmu->supported_cpus))
+ goto end;
+
+ if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
+ update_pmu_cap(&pmu->pmu);
+
+ intel_pmu_check_hybrid_pmus(pmu);
+
+ if (!check_hw_exists(&pmu->pmu, pmu->cntr_mask, pmu->fixed_cntr_mask))
+ return false;
+
+ pr_info("%s PMU driver: ", pmu->name);
+
+ pr_cont("\n");
+
+ x86_pmu_show_pmu_cap(&pmu->pmu);
+
+end:
+ cpumask_set_cpu(cpu, &pmu->supported_cpus);
+ cpuc->pmu = &pmu->pmu;
+
+ return true;
+}
+
static void intel_pmu_cpu_starting(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
int core_id = topology_core_id(cpu);
int i;
+ if (is_hybrid() && !init_hybrid_pmu(cpu))
+ return;
+
init_debug_store_on_cpu(cpu);
+ init_arch_pebs_on_cpu(cpu);
/*
- * Deal with CPUs that don't clear their LBRs on power-up.
+ * Deal with CPUs that don't clear their LBRs on power-up, and that may
+ * even boot with LBRs enabled.
*/
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && x86_pmu.lbr_nr)
+ msr_clear_bit(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR_BIT);
intel_pmu_lbr_reset();
cpuc->lbr_sel = NULL;
- flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
+ if (x86_pmu.flags & PMU_FL_TFA) {
+ WARN_ON_ONCE(cpuc->tfa_shadow);
+ cpuc->tfa_shadow = ~0ULL;
+ intel_set_tfa(cpuc, false);
+ }
+
+ if (x86_pmu.version > 1)
+ flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
+
+ /*
+ * Disable perf metrics if any added CPU doesn't support it.
+ *
+ * Turn off the check for a hybrid architecture, because the
+ * architecture MSR, MSR_IA32_PERF_CAPABILITIES, only indicate
+ * the architecture features. The perf metrics is a model-specific
+ * feature for now. The corresponding bit should always be 0 on
+ * a hybrid platform, e.g., Alder Lake.
+ */
+ if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) {
+ union perf_capabilities perf_cap;
+
+ rdmsrq(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities);
+ if (!perf_cap.perf_metrics) {
+ x86_pmu.intel_cap.perf_metrics = 0;
+ x86_pmu.intel_ctrl &= ~GLOBAL_CTRL_EN_PERF_METRICS;
+ }
+ }
+
+ __intel_update_pmu_caps(cpuc->pmu);
if (!cpuc->shared_regs)
return;
@@ -3362,9 +5870,8 @@ static void intel_pmu_cpu_starting(int cpu)
}
}
-static void free_excl_cntrs(int cpu)
+static void free_excl_cntrs(struct cpu_hw_events *cpuc)
{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_excl_cntrs *c;
c = cpuc->excl_cntrs;
@@ -3372,14 +5879,20 @@ static void free_excl_cntrs(int cpu)
if (c->core_id == -1 || --c->refcnt == 0)
kfree(c);
cpuc->excl_cntrs = NULL;
- kfree(cpuc->constraint_list);
- cpuc->constraint_list = NULL;
}
+
+ kfree(cpuc->constraint_list);
+ cpuc->constraint_list = NULL;
}
static void intel_pmu_cpu_dying(int cpu)
{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ fini_debug_store_on_cpu(cpu);
+ fini_arch_pebs_on_cpu(cpu);
+}
+
+void intel_cpuc_finish(struct cpu_hw_events *cpuc)
+{
struct intel_shared_regs *pc;
pc = cpuc->shared_regs;
@@ -3389,16 +5902,53 @@ static void intel_pmu_cpu_dying(int cpu)
cpuc->shared_regs = NULL;
}
- free_excl_cntrs(cpu);
+ free_excl_cntrs(cpuc);
+}
- fini_debug_store_on_cpu(cpu);
+static void intel_pmu_cpu_dead(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+ release_arch_pebs_buf_on_cpu(cpu);
+ intel_cpuc_finish(cpuc);
+
+ if (is_hybrid() && cpuc->pmu)
+ cpumask_clear_cpu(cpu, &hybrid_pmu(cpuc->pmu)->supported_cpus);
}
-static void intel_pmu_sched_task(struct perf_event_context *ctx,
- bool sched_in)
+static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
- intel_pmu_pebs_sched_task(ctx, sched_in);
- intel_pmu_lbr_sched_task(ctx, sched_in);
+ intel_pmu_pebs_sched_task(pmu_ctx, sched_in);
+ intel_pmu_lbr_sched_task(pmu_ctx, task, sched_in);
+}
+
+static int intel_pmu_check_period(struct perf_event *event, u64 value)
+{
+ return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
+}
+
+static void intel_aux_output_init(void)
+{
+ /* Refer also intel_pmu_aux_output_match() */
+ if (x86_pmu.intel_cap.pebs_output_pt_available)
+ x86_pmu.assign = intel_pmu_assign_event;
+}
+
+static int intel_pmu_aux_output_match(struct perf_event *event)
+{
+ /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
+ if (!x86_pmu.intel_cap.pebs_output_pt_available)
+ return 0;
+
+ return is_intel_pt_event(event);
+}
+
+static void intel_pmu_filter(struct pmu *pmu, int cpu, bool *ret)
+{
+ struct x86_hybrid_pmu *hpmu = hybrid_pmu(pmu);
+
+ *ret = !cpumask_test_cpu(cpu, &hpmu->supported_cpus);
}
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
@@ -3407,6 +5957,8 @@ PMU_FORMAT_ATTR(ldlat, "config1:0-15");
PMU_FORMAT_ATTR(frontend, "config1:0-23");
+PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63");
+
static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@@ -3415,12 +5967,33 @@ static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_any.attr,
&format_attr_inv.attr,
&format_attr_cmask.attr,
+ NULL,
+};
+
+static struct attribute *hsw_format_attr[] = {
&format_attr_in_tx.attr,
&format_attr_in_tx_cp.attr,
+ &format_attr_offcore_rsp.attr,
+ &format_attr_ldlat.attr,
+ NULL
+};
- &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
- &format_attr_ldlat.attr, /* PEBS load latency */
- NULL,
+static struct attribute *nhm_format_attr[] = {
+ &format_attr_offcore_rsp.attr,
+ &format_attr_ldlat.attr,
+ NULL
+};
+
+static struct attribute *slm_format_attr[] = {
+ &format_attr_offcore_rsp.attr,
+ NULL
+};
+
+static struct attribute *cmt_format_attr[] = {
+ &format_attr_offcore_rsp.attr,
+ &format_attr_ldlat.attr,
+ &format_attr_snoop_rsp.attr,
+ NULL
};
static struct attribute *skl_format_attr[] = {
@@ -3435,14 +6008,15 @@ static __initconst const struct x86_pmu core_pmu = {
.enable_all = core_pmu_enable_all,
.enable = core_pmu_enable_event,
.disable = x86_pmu_disable_event,
- .hw_config = x86_pmu_hw_config,
+ .hw_config = core_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+ .fixedctr = MSR_ARCH_PERFMON_FIXED_CTR0,
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
- .free_running_flags = PEBS_FREERUNNING_FLAGS,
+ .large_pebs_flags = LARGE_PEBS_FLAGS,
/*
* Intel PMCs cannot be accessed sanely above 32-bit width,
@@ -3466,6 +6040,14 @@ static __initconst const struct x86_pmu core_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
+
+ .check_period = intel_pmu_check_period,
+
+ .lbr_reset = intel_pmu_lbr_reset_64,
+ .lbr_read = intel_pmu_lbr_read_64,
+ .lbr_save = intel_pmu_lbr_save,
+ .lbr_restore = intel_pmu_lbr_restore,
};
static __initconst const struct x86_pmu intel_pmu = {
@@ -3477,14 +6059,18 @@ static __initconst const struct x86_pmu intel_pmu = {
.disable = intel_pmu_disable_event,
.add = intel_pmu_add_event,
.del = intel_pmu_del_event,
+ .read = intel_pmu_read_event,
+ .set_period = intel_pmu_set_period,
+ .update = intel_pmu_update,
.hw_config = intel_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+ .fixedctr = MSR_ARCH_PERFMON_FIXED_CTR0,
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
- .free_running_flags = PEBS_FREERUNNING_FLAGS,
+ .large_pebs_flags = LARGE_PEBS_FLAGS,
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
@@ -3501,8 +6087,32 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
+
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
+
+ .check_period = intel_pmu_check_period,
+
+ .aux_output_match = intel_pmu_aux_output_match,
+
+ .lbr_reset = intel_pmu_lbr_reset_64,
+ .lbr_read = intel_pmu_lbr_read_64,
+ .lbr_save = intel_pmu_lbr_save,
+ .lbr_restore = intel_pmu_lbr_restore,
+
+ /*
+ * SMM has access to all 4 rings and while traditionally SMM code only
+ * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM.
+ *
+ * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction
+ * between SMM or not, this results in what should be pure userspace
+ * counters including SMM data.
+ *
+ * This is a clear privilege issue, therefore globally disable
+ * counting SMM by default.
+ */
+ .attr_freeze_on_smi = 1,
};
static __init void intel_clovertown_quirk(void)
@@ -3527,40 +6137,60 @@ static __init void intel_clovertown_quirk(void)
* these chips.
*/
pr_warn("PEBS disabled due to CPU errata\n");
- x86_pmu.pebs = 0;
+ x86_pmu.ds_pebs = 0;
x86_pmu.pebs_constraints = NULL;
}
-static int intel_snb_pebs_broken(int cpu)
+static const struct x86_cpu_id isolation_ucodes[] = {
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL, 3, 3, 0x0000001f),
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_L, 1, 1, 0x0000001e),
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_G, 1, 1, 0x00000015),
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 2, 2, 0x00000037),
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 4, 4, 0x0000000a),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL, 4, 4, 0x00000023),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_G, 1, 1, 0x00000014),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 2, 2, 0x00000010),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 3, 3, 0x07000009),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 4, 4, 0x0f000009),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 5, 5, 0x0e000002),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_X, 1, 1, 0x0b000014),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 3, 3, 0x00000021),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 4, 7, 0x00000000),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 11, 11, 0x00000000),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_L, 3, 3, 0x0000007c),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE, 3, 3, 0x0000007c),
+ X86_MATCH_VFM_STEPS(INTEL_KABYLAKE, 9, 13, 0x0000004e),
+ X86_MATCH_VFM_STEPS(INTEL_KABYLAKE_L, 9, 12, 0x0000004e),
+ {}
+};
+
+static void intel_check_pebs_isolation(void)
{
- u32 rev = UINT_MAX; /* default to broken for unknown models */
+ x86_pmu.pebs_no_isolation = !x86_match_min_microcode_rev(isolation_ucodes);
+}
- switch (cpu_data(cpu).x86_model) {
- case INTEL_FAM6_SANDYBRIDGE:
- rev = 0x28;
- break;
+static __init void intel_pebs_isolation_quirk(void)
+{
+ WARN_ON_ONCE(x86_pmu.check_microcode);
+ x86_pmu.check_microcode = intel_check_pebs_isolation;
+ intel_check_pebs_isolation();
+}
- case INTEL_FAM6_SANDYBRIDGE_X:
- switch (cpu_data(cpu).x86_mask) {
- case 6: rev = 0x618; break;
- case 7: rev = 0x70c; break;
- }
- }
+static const struct x86_cpu_id pebs_ucodes[] = {
+ X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE, 7, 7, 0x00000028),
+ X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE_X, 6, 6, 0x00000618),
+ X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE_X, 7, 7, 0x0000070c),
+ {}
+};
- return (cpu_data(cpu).microcode < rev);
+static bool intel_snb_pebs_broken(void)
+{
+ return !x86_match_min_microcode_rev(pebs_ucodes);
}
static void intel_snb_check_microcode(void)
{
- int pebs_broken = 0;
- int cpu;
-
- for_each_online_cpu(cpu) {
- if ((pebs_broken = intel_snb_pebs_broken(cpu)))
- break;
- }
-
- if (pebs_broken == x86_pmu.pebs_broken)
+ if (intel_snb_pebs_broken() == x86_pmu.pebs_broken)
return;
/*
@@ -3591,28 +6221,35 @@ static bool check_msr(unsigned long msr, u64 mask)
u64 val_old, val_new, val_tmp;
/*
+ * Disable the check for real HW, so we don't
+ * mess with potentially enabled registers:
+ */
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return true;
+
+ /*
* Read the current value, change it and read it back to see if it
* matches, this is needed to detect certain hardware emulators
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
*/
- if (rdmsrl_safe(msr, &val_old))
+ if (rdmsrq_safe(msr, &val_old))
return false;
/*
- * Only change the bits which can be updated by wrmsrl.
+ * Only change the bits which can be updated by wrmsrq.
*/
val_tmp = val_old ^ mask;
if (is_lbr_from(msr))
val_tmp = lbr_from_signext_quirk_wr(val_tmp);
- if (wrmsrl_safe(msr, val_tmp) ||
- rdmsrl_safe(msr, &val_new))
+ if (wrmsrq_safe(msr, val_tmp) ||
+ rdmsrq_safe(msr, &val_new))
return false;
/*
- * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
- * should equal rdmsrl()'s even with the quirk.
+ * Quirk only affects validation in wrmsr(), so wrmsrq()'s value
+ * should equal rdmsrq()'s even with the quirk.
*/
if (val_new != val_tmp)
return false;
@@ -3623,7 +6260,7 @@ static bool check_msr(unsigned long msr, u64 mask)
/* Here it's sure that the MSR can be safely accessed.
* Restore the old value and return.
*/
- wrmsrl(msr, val_old);
+ wrmsrq(msr, val_old);
return true;
}
@@ -3650,7 +6287,7 @@ static __init void intel_arch_events_quirk(void)
{
int bit;
- /* disable event that reported as not presend by cpuid */
+ /* disable event that reported as not present by cpuid */
for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
pr_warn("CPUID marked event: \'%s\' unavailable\n",
@@ -3716,6 +6353,23 @@ EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1");
EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1");
static struct attribute *hsw_events_attrs[] = {
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
+ NULL
+};
+
+static struct attribute *hsw_mem_events_attrs[] = {
+ EVENT_PTR(mem_ld_hsw),
+ EVENT_PTR(mem_st_hsw),
+ NULL,
+};
+
+static struct attribute *hsw_tsx_events_attrs[] = {
EVENT_PTR(tx_start),
EVENT_PTR(tx_commit),
EVENT_PTR(tx_abort),
@@ -3728,16 +6382,81 @@ static struct attribute *hsw_events_attrs[] = {
EVENT_PTR(el_conflict),
EVENT_PTR(cycles_t),
EVENT_PTR(cycles_ct),
+ NULL
+};
+
+EVENT_ATTR_STR(tx-capacity-read, tx_capacity_read, "event=0x54,umask=0x80");
+EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-capacity-read, el_capacity_read, "event=0x54,umask=0x80");
+EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2");
+
+static struct attribute *icl_events_attrs[] = {
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
- EVENT_PTR(td_slots_issued),
- EVENT_PTR(td_slots_retired),
- EVENT_PTR(td_fetch_bubbles),
- EVENT_PTR(td_total_slots),
- EVENT_PTR(td_total_slots_scale),
- EVENT_PTR(td_recovery_bubbles),
- EVENT_PTR(td_recovery_bubbles_scale),
- NULL
+ NULL,
+};
+
+static struct attribute *icl_td_events_attrs[] = {
+ EVENT_PTR(slots),
+ EVENT_PTR(td_retiring),
+ EVENT_PTR(td_bad_spec),
+ EVENT_PTR(td_fe_bound),
+ EVENT_PTR(td_be_bound),
+ NULL,
+};
+
+static struct attribute *icl_tsx_events_attrs[] = {
+ EVENT_PTR(tx_start),
+ EVENT_PTR(tx_abort),
+ EVENT_PTR(tx_commit),
+ EVENT_PTR(tx_capacity_read),
+ EVENT_PTR(tx_capacity_write),
+ EVENT_PTR(tx_conflict),
+ EVENT_PTR(el_start),
+ EVENT_PTR(el_abort),
+ EVENT_PTR(el_commit),
+ EVENT_PTR(el_capacity_read),
+ EVENT_PTR(el_capacity_write),
+ EVENT_PTR(el_conflict),
+ EVENT_PTR(cycles_t),
+ EVENT_PTR(cycles_ct),
+ NULL,
+};
+
+
+EVENT_ATTR_STR(mem-stores, mem_st_spr, "event=0xcd,umask=0x2");
+EVENT_ATTR_STR(mem-loads-aux, mem_ld_aux, "event=0x03,umask=0x82");
+
+static struct attribute *glc_events_attrs[] = {
+ EVENT_PTR(mem_ld_hsw),
+ EVENT_PTR(mem_st_spr),
+ EVENT_PTR(mem_ld_aux),
+ NULL,
+};
+
+static struct attribute *glc_td_events_attrs[] = {
+ EVENT_PTR(slots),
+ EVENT_PTR(td_retiring),
+ EVENT_PTR(td_bad_spec),
+ EVENT_PTR(td_fe_bound),
+ EVENT_PTR(td_be_bound),
+ EVENT_PTR(td_heavy_ops),
+ EVENT_PTR(td_br_mispredict),
+ EVENT_PTR(td_fetch_lat),
+ EVENT_PTR(td_mem_bound),
+ NULL,
+};
+
+static struct attribute *glc_tsx_events_attrs[] = {
+ EVENT_PTR(tx_start),
+ EVENT_PTR(tx_abort),
+ EVENT_PTR(tx_commit),
+ EVENT_PTR(tx_capacity_read),
+ EVENT_PTR(tx_capacity_write),
+ EVENT_PTR(tx_conflict),
+ EVENT_PTR(cycles_t),
+ EVENT_PTR(cycles_ct),
+ NULL,
};
static ssize_t freeze_on_smi_show(struct device *cdev,
@@ -3770,41 +6489,762 @@ static ssize_t freeze_on_smi_store(struct device *cdev,
x86_pmu.attr_freeze_on_smi = val;
- get_online_cpus();
+ cpus_read_lock();
on_each_cpu(flip_smm_bit, &val, 1);
- put_online_cpus();
+ cpus_read_unlock();
done:
mutex_unlock(&freeze_on_smi_mutex);
return count;
}
+static void update_tfa_sched(void *ignored)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * check if PMC3 is used
+ * and if so force schedule out for all event types all contexts
+ */
+ if (test_bit(3, cpuc->active_mask))
+ perf_pmu_resched(x86_get_pmu(smp_processor_id()));
+}
+
+static ssize_t show_sysctl_tfa(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, 40, "%d\n", allow_tsx_force_abort);
+}
+
+static ssize_t set_sysctl_tfa(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ bool val;
+ ssize_t ret;
+
+ ret = kstrtobool(buf, &val);
+ if (ret)
+ return ret;
+
+ /* no change */
+ if (val == allow_tsx_force_abort)
+ return count;
+
+ allow_tsx_force_abort = val;
+
+ cpus_read_lock();
+ on_each_cpu(update_tfa_sched, NULL, 1);
+ cpus_read_unlock();
+
+ return count;
+}
+
+
static DEVICE_ATTR_RW(freeze_on_smi);
+static ssize_t branches_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
+}
+
+static DEVICE_ATTR_RO(branches);
+
+static ssize_t branch_counter_nr_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", fls(x86_pmu.lbr_counters));
+}
+
+static DEVICE_ATTR_RO(branch_counter_nr);
+
+static ssize_t branch_counter_width_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", LBR_INFO_BR_CNTR_BITS);
+}
+
+static DEVICE_ATTR_RO(branch_counter_width);
+
+static struct attribute *lbr_attrs[] = {
+ &dev_attr_branches.attr,
+ &dev_attr_branch_counter_nr.attr,
+ &dev_attr_branch_counter_width.attr,
+ NULL
+};
+
+static umode_t
+lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ /* branches */
+ if (i == 0)
+ return x86_pmu.lbr_nr ? attr->mode : 0;
+
+ return (x86_pmu.flags & PMU_FL_BR_CNTR) ? attr->mode : 0;
+}
+
+static char pmu_name_str[30];
+
+static DEVICE_STRING_ATTR_RO(pmu_name, 0444, pmu_name_str);
+
+static struct attribute *intel_pmu_caps_attrs[] = {
+ &dev_attr_pmu_name.attr.attr,
+ NULL
+};
+
+static DEVICE_ATTR(allow_tsx_force_abort, 0644,
+ show_sysctl_tfa,
+ set_sysctl_tfa);
+
static struct attribute *intel_pmu_attrs[] = {
&dev_attr_freeze_on_smi.attr,
+ &dev_attr_allow_tsx_force_abort.attr,
+ NULL,
+};
+
+static umode_t
+default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ if (attr == &dev_attr_allow_tsx_force_abort.attr)
+ return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
+
+ return attr->mode;
+}
+
+static umode_t
+tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0;
+}
+
+static umode_t
+pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return intel_pmu_has_pebs() ? attr->mode : 0;
+}
+
+static umode_t
+mem_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ if (attr == &event_attr_mem_ld_aux.attr.attr)
+ return x86_pmu.flags & PMU_FL_MEM_LOADS_AUX ? attr->mode : 0;
+
+ return pebs_is_visible(kobj, attr, i);
+}
+
+static umode_t
+exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return x86_pmu.version >= 2 ? attr->mode : 0;
+}
+
+static umode_t
+td_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ /*
+ * Hide the perf metrics topdown events
+ * if the feature is not enumerated.
+ */
+ if (x86_pmu.num_topdown_events)
+ return x86_pmu.intel_cap.perf_metrics ? attr->mode : 0;
+
+ return attr->mode;
+}
+
+PMU_FORMAT_ATTR(acr_mask, "config2:0-63");
+
+static struct attribute *format_acr_attrs[] = {
+ &format_attr_acr_mask.attr,
+ NULL
+};
+
+static umode_t
+acr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+
+ return intel_pmu_has_acr(dev_get_drvdata(dev)) ? attr->mode : 0;
+}
+
+static struct attribute_group group_events_td = {
+ .name = "events",
+ .is_visible = td_is_visible,
+};
+
+static struct attribute_group group_events_mem = {
+ .name = "events",
+ .is_visible = mem_is_visible,
+};
+
+static struct attribute_group group_events_tsx = {
+ .name = "events",
+ .is_visible = tsx_is_visible,
+};
+
+static struct attribute_group group_caps_gen = {
+ .name = "caps",
+ .attrs = intel_pmu_caps_attrs,
+};
+
+static struct attribute_group group_caps_lbr = {
+ .name = "caps",
+ .attrs = lbr_attrs,
+ .is_visible = lbr_is_visible,
+};
+
+static struct attribute_group group_format_extra = {
+ .name = "format",
+ .is_visible = exra_is_visible,
+};
+
+static struct attribute_group group_format_extra_skl = {
+ .name = "format",
+ .is_visible = exra_is_visible,
+};
+
+static struct attribute_group group_format_evtsel_ext = {
+ .name = "format",
+ .attrs = format_evtsel_ext_attrs,
+ .is_visible = evtsel_ext_is_visible,
+};
+
+static struct attribute_group group_format_acr = {
+ .name = "format",
+ .attrs = format_acr_attrs,
+ .is_visible = acr_is_visible,
+};
+
+static struct attribute_group group_default = {
+ .attrs = intel_pmu_attrs,
+ .is_visible = default_is_visible,
+};
+
+static const struct attribute_group *attr_update[] = {
+ &group_events_td,
+ &group_events_mem,
+ &group_events_tsx,
+ &group_caps_gen,
+ &group_caps_lbr,
+ &group_format_extra,
+ &group_format_extra_skl,
+ &group_format_evtsel_ext,
+ &group_format_acr,
+ &group_default,
+ NULL,
+};
+
+EVENT_ATTR_STR_HYBRID(slots, slots_adl, "event=0x00,umask=0x4", hybrid_big);
+EVENT_ATTR_STR_HYBRID(topdown-retiring, td_retiring_adl, "event=0xc2,umask=0x0;event=0x00,umask=0x80", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(topdown-bad-spec, td_bad_spec_adl, "event=0x73,umask=0x0;event=0x00,umask=0x81", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(topdown-fe-bound, td_fe_bound_adl, "event=0x71,umask=0x0;event=0x00,umask=0x82", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(topdown-be-bound, td_be_bound_adl, "event=0x74,umask=0x0;event=0x00,umask=0x83", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(topdown-heavy-ops, td_heavy_ops_adl, "event=0x00,umask=0x84", hybrid_big);
+EVENT_ATTR_STR_HYBRID(topdown-br-mispredict, td_br_mis_adl, "event=0x00,umask=0x85", hybrid_big);
+EVENT_ATTR_STR_HYBRID(topdown-fetch-lat, td_fetch_lat_adl, "event=0x00,umask=0x86", hybrid_big);
+EVENT_ATTR_STR_HYBRID(topdown-mem-bound, td_mem_bound_adl, "event=0x00,umask=0x87", hybrid_big);
+
+static struct attribute *adl_hybrid_events_attrs[] = {
+ EVENT_PTR(slots_adl),
+ EVENT_PTR(td_retiring_adl),
+ EVENT_PTR(td_bad_spec_adl),
+ EVENT_PTR(td_fe_bound_adl),
+ EVENT_PTR(td_be_bound_adl),
+ EVENT_PTR(td_heavy_ops_adl),
+ EVENT_PTR(td_br_mis_adl),
+ EVENT_PTR(td_fetch_lat_adl),
+ EVENT_PTR(td_mem_bound_adl),
+ NULL,
+};
+
+EVENT_ATTR_STR_HYBRID(topdown-retiring, td_retiring_lnl, "event=0xc2,umask=0x02;event=0x00,umask=0x80", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(topdown-fe-bound, td_fe_bound_lnl, "event=0x9c,umask=0x01;event=0x00,umask=0x82", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(topdown-be-bound, td_be_bound_lnl, "event=0xa4,umask=0x02;event=0x00,umask=0x83", hybrid_big_small);
+
+static struct attribute *lnl_hybrid_events_attrs[] = {
+ EVENT_PTR(slots_adl),
+ EVENT_PTR(td_retiring_lnl),
+ EVENT_PTR(td_bad_spec_adl),
+ EVENT_PTR(td_fe_bound_lnl),
+ EVENT_PTR(td_be_bound_lnl),
+ EVENT_PTR(td_heavy_ops_adl),
+ EVENT_PTR(td_br_mis_adl),
+ EVENT_PTR(td_fetch_lat_adl),
+ EVENT_PTR(td_mem_bound_adl),
+ NULL
+};
+
+/* The event string must be in PMU IDX order. */
+EVENT_ATTR_STR_HYBRID(topdown-retiring,
+ td_retiring_arl_h,
+ "event=0xc2,umask=0x02;event=0x00,umask=0x80;event=0xc2,umask=0x0",
+ hybrid_big_small_tiny);
+EVENT_ATTR_STR_HYBRID(topdown-bad-spec,
+ td_bad_spec_arl_h,
+ "event=0x73,umask=0x0;event=0x00,umask=0x81;event=0x73,umask=0x0",
+ hybrid_big_small_tiny);
+EVENT_ATTR_STR_HYBRID(topdown-fe-bound,
+ td_fe_bound_arl_h,
+ "event=0x9c,umask=0x01;event=0x00,umask=0x82;event=0x71,umask=0x0",
+ hybrid_big_small_tiny);
+EVENT_ATTR_STR_HYBRID(topdown-be-bound,
+ td_be_bound_arl_h,
+ "event=0xa4,umask=0x02;event=0x00,umask=0x83;event=0x74,umask=0x0",
+ hybrid_big_small_tiny);
+
+static struct attribute *arl_h_hybrid_events_attrs[] = {
+ EVENT_PTR(slots_adl),
+ EVENT_PTR(td_retiring_arl_h),
+ EVENT_PTR(td_bad_spec_arl_h),
+ EVENT_PTR(td_fe_bound_arl_h),
+ EVENT_PTR(td_be_bound_arl_h),
+ EVENT_PTR(td_heavy_ops_adl),
+ EVENT_PTR(td_br_mis_adl),
+ EVENT_PTR(td_fetch_lat_adl),
+ EVENT_PTR(td_mem_bound_adl),
+ NULL,
+};
+
+/* Must be in IDX order */
+EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(mem-loads-aux, mem_ld_aux_adl, "event=0x03,umask=0x82", hybrid_big);
+
+static struct attribute *adl_hybrid_mem_attrs[] = {
+ EVENT_PTR(mem_ld_adl),
+ EVENT_PTR(mem_st_adl),
+ EVENT_PTR(mem_ld_aux_adl),
NULL,
};
+static struct attribute *mtl_hybrid_mem_attrs[] = {
+ EVENT_PTR(mem_ld_adl),
+ EVENT_PTR(mem_st_adl),
+ NULL
+};
+
+EVENT_ATTR_STR_HYBRID(mem-loads,
+ mem_ld_arl_h,
+ "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3;event=0xd0,umask=0x5,ldlat=3",
+ hybrid_big_small_tiny);
+EVENT_ATTR_STR_HYBRID(mem-stores,
+ mem_st_arl_h,
+ "event=0xd0,umask=0x6;event=0xcd,umask=0x2;event=0xd0,umask=0x6",
+ hybrid_big_small_tiny);
+
+static struct attribute *arl_h_hybrid_mem_attrs[] = {
+ EVENT_PTR(mem_ld_arl_h),
+ EVENT_PTR(mem_st_arl_h),
+ NULL,
+};
+
+EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big);
+EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big);
+EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big);
+EVENT_ATTR_STR_HYBRID(tx-conflict, tx_conflict_adl, "event=0x54,umask=0x1", hybrid_big);
+EVENT_ATTR_STR_HYBRID(cycles-t, cycles_t_adl, "event=0x3c,in_tx=1", hybrid_big);
+EVENT_ATTR_STR_HYBRID(cycles-ct, cycles_ct_adl, "event=0x3c,in_tx=1,in_tx_cp=1", hybrid_big);
+EVENT_ATTR_STR_HYBRID(tx-capacity-read, tx_capacity_read_adl, "event=0x54,umask=0x80", hybrid_big);
+EVENT_ATTR_STR_HYBRID(tx-capacity-write, tx_capacity_write_adl, "event=0x54,umask=0x2", hybrid_big);
+
+static struct attribute *adl_hybrid_tsx_attrs[] = {
+ EVENT_PTR(tx_start_adl),
+ EVENT_PTR(tx_abort_adl),
+ EVENT_PTR(tx_commit_adl),
+ EVENT_PTR(tx_capacity_read_adl),
+ EVENT_PTR(tx_capacity_write_adl),
+ EVENT_PTR(tx_conflict_adl),
+ EVENT_PTR(cycles_t_adl),
+ EVENT_PTR(cycles_ct_adl),
+ NULL,
+};
+
+FORMAT_ATTR_HYBRID(in_tx, hybrid_big);
+FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big);
+FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small_tiny);
+FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small_tiny);
+FORMAT_ATTR_HYBRID(frontend, hybrid_big);
+
+#define ADL_HYBRID_RTM_FORMAT_ATTR \
+ FORMAT_HYBRID_PTR(in_tx), \
+ FORMAT_HYBRID_PTR(in_tx_cp)
+
+#define ADL_HYBRID_FORMAT_ATTR \
+ FORMAT_HYBRID_PTR(offcore_rsp), \
+ FORMAT_HYBRID_PTR(ldlat), \
+ FORMAT_HYBRID_PTR(frontend)
+
+static struct attribute *adl_hybrid_extra_attr_rtm[] = {
+ ADL_HYBRID_RTM_FORMAT_ATTR,
+ ADL_HYBRID_FORMAT_ATTR,
+ NULL
+};
+
+static struct attribute *adl_hybrid_extra_attr[] = {
+ ADL_HYBRID_FORMAT_ATTR,
+ NULL
+};
+
+FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small_tiny);
+
+static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
+ ADL_HYBRID_RTM_FORMAT_ATTR,
+ ADL_HYBRID_FORMAT_ATTR,
+ FORMAT_HYBRID_PTR(snoop_rsp),
+ NULL
+};
+
+static struct attribute *mtl_hybrid_extra_attr[] = {
+ ADL_HYBRID_FORMAT_ATTR,
+ FORMAT_HYBRID_PTR(snoop_rsp),
+ NULL
+};
+
+static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct x86_hybrid_pmu *pmu =
+ container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu);
+ struct perf_pmu_events_hybrid_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_hybrid_attr, attr.attr);
+
+ return pmu->pmu_type & pmu_attr->pmu_type;
+}
+
+static umode_t hybrid_events_is_visible(struct kobject *kobj,
+ struct attribute *attr, int i)
+{
+ return is_attr_for_this_pmu(kobj, attr) ? attr->mode : 0;
+}
+
+static inline int hybrid_find_supported_cpu(struct x86_hybrid_pmu *pmu)
+{
+ int cpu = cpumask_first(&pmu->supported_cpus);
+
+ return (cpu >= nr_cpu_ids) ? -1 : cpu;
+}
+
+static umode_t hybrid_tsx_is_visible(struct kobject *kobj,
+ struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct x86_hybrid_pmu *pmu =
+ container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu);
+ int cpu = hybrid_find_supported_cpu(pmu);
+
+ return (cpu >= 0) && is_attr_for_this_pmu(kobj, attr) && cpu_has(&cpu_data(cpu), X86_FEATURE_RTM) ? attr->mode : 0;
+}
+
+static umode_t hybrid_format_is_visible(struct kobject *kobj,
+ struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct x86_hybrid_pmu *pmu =
+ container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu);
+ struct perf_pmu_format_hybrid_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_format_hybrid_attr, attr.attr);
+ int cpu = hybrid_find_supported_cpu(pmu);
+
+ return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0;
+}
+
+static umode_t hybrid_td_is_visible(struct kobject *kobj,
+ struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct x86_hybrid_pmu *pmu =
+ container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu);
+
+ if (!is_attr_for_this_pmu(kobj, attr))
+ return 0;
+
+
+ /* Only the big core supports perf metrics */
+ if (pmu->pmu_type == hybrid_big)
+ return pmu->intel_cap.perf_metrics ? attr->mode : 0;
+
+ return attr->mode;
+}
+
+static struct attribute_group hybrid_group_events_td = {
+ .name = "events",
+ .is_visible = hybrid_td_is_visible,
+};
+
+static struct attribute_group hybrid_group_events_mem = {
+ .name = "events",
+ .is_visible = hybrid_events_is_visible,
+};
+
+static struct attribute_group hybrid_group_events_tsx = {
+ .name = "events",
+ .is_visible = hybrid_tsx_is_visible,
+};
+
+static struct attribute_group hybrid_group_format_extra = {
+ .name = "format",
+ .is_visible = hybrid_format_is_visible,
+};
+
+static ssize_t intel_hybrid_get_attr_cpus(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct x86_hybrid_pmu *pmu =
+ container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu);
+
+ return cpumap_print_to_pagebuf(true, buf, &pmu->supported_cpus);
+}
+
+static DEVICE_ATTR(cpus, S_IRUGO, intel_hybrid_get_attr_cpus, NULL);
+static struct attribute *intel_hybrid_cpus_attrs[] = {
+ &dev_attr_cpus.attr,
+ NULL,
+};
+
+static struct attribute_group hybrid_group_cpus = {
+ .attrs = intel_hybrid_cpus_attrs,
+};
+
+static const struct attribute_group *hybrid_attr_update[] = {
+ &hybrid_group_events_td,
+ &hybrid_group_events_mem,
+ &hybrid_group_events_tsx,
+ &group_caps_gen,
+ &group_caps_lbr,
+ &hybrid_group_format_extra,
+ &group_format_evtsel_ext,
+ &group_format_acr,
+ &group_default,
+ &hybrid_group_cpus,
+ NULL,
+};
+
+static struct attribute *empty_attrs;
+
+static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
+ u64 cntr_mask,
+ u64 fixed_cntr_mask,
+ u64 intel_ctrl)
+{
+ struct event_constraint *c;
+
+ if (!event_constraints)
+ return;
+
+ /*
+ * event on fixed counter2 (REF_CYCLES) only works on this
+ * counter, so do not extend mask to generic counters
+ */
+ for_each_event_constraint(c, event_constraints) {
+ /*
+ * Don't extend the topdown slots and metrics
+ * events to the generic counters.
+ */
+ if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) {
+ /*
+ * Disable topdown slots and metrics events,
+ * if slots event is not in CPUID.
+ */
+ if (!(INTEL_PMC_MSK_FIXED_SLOTS & intel_ctrl))
+ c->idxmsk64 = 0;
+ c->weight = hweight64(c->idxmsk64);
+ continue;
+ }
+
+ if (c->cmask == FIXED_EVENT_FLAGS) {
+ /* Disabled fixed counters which are not in CPUID */
+ c->idxmsk64 &= intel_ctrl;
+
+ /*
+ * Don't extend the pseudo-encoding to the
+ * generic counters
+ */
+ if (!use_fixed_pseudo_encoding(c->code))
+ c->idxmsk64 |= cntr_mask;
+ }
+ c->idxmsk64 &= cntr_mask | (fixed_cntr_mask << INTEL_PMC_IDX_FIXED);
+ c->weight = hweight64(c->idxmsk64);
+ }
+}
+
+static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs)
+{
+ struct extra_reg *er;
+
+ /*
+ * Access extra MSR may cause #GP under certain circumstances.
+ * E.g. KVM doesn't support offcore event
+ * Check all extra_regs here.
+ */
+ if (!extra_regs)
+ return;
+
+ for (er = extra_regs; er->msr; er++) {
+ er->extra_msr_access = check_msr(er->msr, 0x11UL);
+ /* Disable LBR select mapping */
+ if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
+ x86_pmu.lbr_sel_map = NULL;
+ }
+}
+
+static inline int intel_pmu_v6_addr_offset(int index, bool eventsel)
+{
+ return MSR_IA32_PMC_V6_STEP * index;
+}
+
+static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
+ { hybrid_small, "cpu_atom" },
+ { hybrid_big, "cpu_core" },
+ { hybrid_tiny, "cpu_lowpower" },
+};
+
+static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
+{
+ unsigned long pmus_mask = pmus;
+ struct x86_hybrid_pmu *pmu;
+ int idx = 0, bit;
+
+ x86_pmu.num_hybrid_pmus = hweight_long(pmus_mask);
+ x86_pmu.hybrid_pmu = kcalloc(x86_pmu.num_hybrid_pmus,
+ sizeof(struct x86_hybrid_pmu),
+ GFP_KERNEL);
+ if (!x86_pmu.hybrid_pmu)
+ return -ENOMEM;
+
+ static_branch_enable(&perf_is_hybrid);
+ x86_pmu.filter = intel_pmu_filter;
+
+ for_each_set_bit(bit, &pmus_mask, ARRAY_SIZE(intel_hybrid_pmu_type_map)) {
+ pmu = &x86_pmu.hybrid_pmu[idx++];
+ pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id;
+ pmu->name = intel_hybrid_pmu_type_map[bit].name;
+
+ pmu->cntr_mask64 = x86_pmu.cntr_mask64;
+ pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
+ pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
+ pmu->config_mask = X86_RAW_EVENT_MASK;
+ pmu->unconstrained = (struct event_constraint)
+ __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
+ 0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
+
+ pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
+ if (pmu->pmu_type & hybrid_small_tiny) {
+ pmu->intel_cap.perf_metrics = 0;
+ pmu->mid_ack = true;
+ } else if (pmu->pmu_type & hybrid_big) {
+ pmu->intel_cap.perf_metrics = 1;
+ pmu->late_ack = true;
+ }
+ }
+
+ return 0;
+}
+
+static __always_inline void intel_pmu_ref_cycles_ext(void)
+{
+ if (!(x86_pmu.events_maskl & (INTEL_PMC_MSK_FIXED_REF_CYCLES >> INTEL_PMC_IDX_FIXED)))
+ intel_perfmon_event_map[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x013c;
+}
+
+static __always_inline void intel_pmu_init_glc(struct pmu *pmu)
+{
+ x86_pmu.late_ack = true;
+ x86_pmu.limit_period = glc_limit_period;
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.pebs_block = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+ x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+ x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
+ x86_pmu.lbr_pt_coexist = true;
+ x86_pmu.num_topdown_events = 8;
+ static_call_update(intel_pmu_update_topdown_event,
+ &icl_update_topdown_event);
+ static_call_update(intel_pmu_set_topdown_event_period,
+ &icl_set_topdown_event_period);
+
+ memcpy(hybrid_var(pmu, hw_cache_event_ids), glc_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hybrid_var(pmu, hw_cache_extra_regs), glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+ hybrid(pmu, event_constraints) = intel_glc_event_constraints;
+ hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints;
+
+ intel_pmu_ref_cycles_ext();
+}
+
+static __always_inline void intel_pmu_init_grt(struct pmu *pmu)
+{
+ x86_pmu.mid_ack = true;
+ x86_pmu.limit_period = glc_limit_period;
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.pebs_block = true;
+ x86_pmu.lbr_pt_coexist = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+
+ memcpy(hybrid_var(pmu, hw_cache_event_ids), glp_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hybrid_var(pmu, hw_cache_extra_regs), tnt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+ hybrid_var(pmu, hw_cache_event_ids)[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+ hybrid(pmu, event_constraints) = intel_grt_event_constraints;
+ hybrid(pmu, pebs_constraints) = intel_grt_pebs_event_constraints;
+ hybrid(pmu, extra_regs) = intel_grt_extra_regs;
+
+ intel_pmu_ref_cycles_ext();
+}
+
+static __always_inline void intel_pmu_init_lnc(struct pmu *pmu)
+{
+ intel_pmu_init_glc(pmu);
+ hybrid(pmu, event_constraints) = intel_lnc_event_constraints;
+ hybrid(pmu, pebs_constraints) = intel_lnc_pebs_event_constraints;
+ hybrid(pmu, extra_regs) = intel_lnc_extra_regs;
+}
+
+static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
+{
+ intel_pmu_init_grt(pmu);
+ hybrid(pmu, event_constraints) = intel_skt_event_constraints;
+ hybrid(pmu, extra_regs) = intel_cmt_extra_regs;
+ static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
+}
+
__init int intel_pmu_init(void)
{
+ struct attribute **extra_skl_attr = &empty_attrs;
+ struct attribute **extra_attr = &empty_attrs;
+ struct attribute **td_attr = &empty_attrs;
+ struct attribute **mem_attr = &empty_attrs;
+ struct attribute **tsx_attr = &empty_attrs;
union cpuid10_edx edx;
union cpuid10_eax eax;
union cpuid10_ebx ebx;
- struct event_constraint *c;
- unsigned int unused;
- struct extra_reg *er;
+ unsigned int fixed_mask;
+ bool pmem = false;
int version, i;
+ char *name;
+ struct x86_hybrid_pmu *pmu;
+ /* Architectural Perfmon was introduced starting with Core "Yonah" */
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
switch (boot_cpu_data.x86) {
- case 0x6:
- return p6_pmu_init();
- case 0xb:
+ case 6:
+ if (boot_cpu_data.x86_vfm < INTEL_CORE_YONAH)
+ return p6_pmu_init();
+ break;
+ case 11:
return knc_pmu_init();
- case 0xf:
+ case 15:
return p4_pmu_init();
}
+
+ pr_cont("unsupported CPU family %d model %d ",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
return -ENODEV;
}
@@ -3812,7 +7252,7 @@ __init int intel_pmu_init(void)
* Check whether the Architectural PerfMon supports
* Branch Misses Retired hw_event or not.
*/
- cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
+ cpuid(10, &eax.full, &ebx.full, &fixed_mask, &edx.full);
if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
return -ENODEV;
@@ -3823,52 +7263,79 @@ __init int intel_pmu_init(void)
x86_pmu = intel_pmu;
x86_pmu.version = version;
- x86_pmu.num_counters = eax.split.num_counters;
+ x86_pmu.cntr_mask64 = GENMASK_ULL(eax.split.num_counters - 1, 0);
x86_pmu.cntval_bits = eax.split.bit_width;
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
x86_pmu.events_maskl = ebx.full;
x86_pmu.events_mask_len = eax.split.mask_length;
- x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
-
+ x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64);
+ x86_pmu.pebs_capable = PEBS_COUNTER_MASK;
+ x86_pmu.config_mask = X86_RAW_EVENT_MASK;
- x86_pmu.attrs = intel_pmu_attrs;
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
* assume at least 3 events, when not running in a hypervisor:
*/
- if (version > 1) {
+ if (version > 1 && version < 5) {
int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
- x86_pmu.num_counters_fixed =
- max((int)edx.split.num_counters_fixed, assume);
- }
+ x86_pmu.fixed_cntr_mask64 =
+ GENMASK_ULL(max((int)edx.split.num_counters_fixed, assume) - 1, 0);
+ } else if (version >= 5)
+ x86_pmu.fixed_cntr_mask64 = fixed_mask;
if (boot_cpu_has(X86_FEATURE_PDCM)) {
u64 capabilities;
- rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
+ rdmsrq(MSR_IA32_PERF_CAPABILITIES, capabilities);
x86_pmu.intel_cap.capabilities = capabilities;
}
- intel_ds_init();
+ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) {
+ x86_pmu.lbr_reset = intel_pmu_lbr_reset_32;
+ x86_pmu.lbr_read = intel_pmu_lbr_read_32;
+ }
+
+ if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
+ intel_pmu_arch_lbr_init();
+
+ intel_pebs_init();
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
+ if (version >= 5) {
+ x86_pmu.intel_cap.anythread_deprecated = edx.split.anythread_deprecated;
+ if (x86_pmu.intel_cap.anythread_deprecated)
+ pr_cont(" AnyThread deprecated, ");
+ }
+
+ /*
+ * Many features on and after V6 require dynamic constraint,
+ * e.g., Arch PEBS, ACR.
+ */
+ if (version >= 6) {
+ x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
+ x86_pmu.late_setup = intel_pmu_late_setup;
+ }
+
/*
* Install the hw-cache-events table:
*/
- switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_CORE_YONAH:
+ switch (boot_cpu_data.x86_vfm) {
+ case INTEL_CORE_YONAH:
pr_cont("Core events, ");
+ name = "core";
break;
- case INTEL_FAM6_CORE2_MEROM:
+ case INTEL_CORE2_MEROM:
x86_add_quirk(intel_clovertown_quirk);
- case INTEL_FAM6_CORE2_MEROM_L:
- case INTEL_FAM6_CORE2_PENRYN:
- case INTEL_FAM6_CORE2_DUNNINGTON:
+ fallthrough;
+
+ case INTEL_CORE2_MEROM_L:
+ case INTEL_CORE2_PENRYN:
+ case INTEL_CORE2_DUNNINGTON:
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3877,11 +7344,12 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_core2_event_constraints;
x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
pr_cont("Core2 events, ");
+ name = "core2";
break;
- case INTEL_FAM6_NEHALEM:
- case INTEL_FAM6_NEHALEM_EP:
- case INTEL_FAM6_NEHALEM_EX:
+ case INTEL_NEHALEM:
+ case INTEL_NEHALEM_EP:
+ case INTEL_NEHALEM_EX:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3893,8 +7361,9 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.extra_regs = intel_nehalem_extra_regs;
+ x86_pmu.limit_period = nhm_limit_period;
- x86_pmu.cpu_events = nhm_events_attrs;
+ mem_attr = nhm_mem_events_attrs;
/* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
@@ -3905,15 +7374,18 @@ __init int intel_pmu_init(void)
intel_pmu_pebs_data_source_nhm();
x86_add_quirk(intel_nehalem_quirk);
+ x86_pmu.pebs_no_tlb = 1;
+ extra_attr = nhm_format_attr;
pr_cont("Nehalem events, ");
+ name = "nehalem";
break;
- case INTEL_FAM6_ATOM_PINEVIEW:
- case INTEL_FAM6_ATOM_LINCROFT:
- case INTEL_FAM6_ATOM_PENWELL:
- case INTEL_FAM6_ATOM_CLOVERVIEW:
- case INTEL_FAM6_ATOM_CEDARVIEW:
+ case INTEL_ATOM_BONNELL:
+ case INTEL_ATOM_BONNELL_MID:
+ case INTEL_ATOM_SALTWELL:
+ case INTEL_ATOM_SALTWELL_MID:
+ case INTEL_ATOM_SALTWELL_TABLET:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3923,11 +7395,14 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
pr_cont("Atom events, ");
+ name = "bonnell";
break;
- case INTEL_FAM6_ATOM_SILVERMONT1:
- case INTEL_FAM6_ATOM_SILVERMONT2:
- case INTEL_FAM6_ATOM_AIRMONT:
+ case INTEL_ATOM_SILVERMONT:
+ case INTEL_ATOM_SILVERMONT_D:
+ case INTEL_ATOM_SILVERMONT_MID:
+ case INTEL_ATOM_AIRMONT:
+ case INTEL_ATOM_SILVERMONT_MID2:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -3939,12 +7414,14 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.cpu_events = slm_events_attrs;
+ td_attr = slm_events_attrs;
+ extra_attr = slm_format_attr;
pr_cont("Silvermont events, ");
+ name = "silvermont";
break;
- case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_DENVERTON:
+ case INTEL_ATOM_GOLDMONT:
+ case INTEL_ATOM_GOLDMONT_D:
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -3964,11 +7441,13 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_prec_dist = true;
x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.cpu_events = glm_events_attrs;
+ td_attr = glm_events_attrs;
+ extra_attr = slm_format_attr;
pr_cont("Goldmont events, ");
+ name = "goldmont";
break;
- case INTEL_FAM6_ATOM_GEMINI_LAKE:
+ case INTEL_ATOM_GOLDMONT_PLUS:
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
@@ -3977,7 +7456,6 @@ __init int intel_pmu_init(void)
intel_pmu_lbr_init_skl();
x86_pmu.event_constraints = intel_slm_event_constraints;
- x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints;
x86_pmu.extra_regs = intel_glm_extra_regs;
/*
* It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
@@ -3986,17 +7464,88 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
x86_pmu.lbr_pt_coexist = true;
+ x86_pmu.pebs_capable = ~0ULL;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.get_event_constraints = glp_get_event_constraints;
- x86_pmu.cpu_events = glm_events_attrs;
+ td_attr = glm_events_attrs;
/* Goldmont Plus has 4-wide pipeline */
event_attr_td_total_slots_scale_glm.event_str = "4";
+ extra_attr = slm_format_attr;
pr_cont("Goldmont plus events, ");
+ name = "goldmont_plus";
+ break;
+
+ case INTEL_ATOM_TREMONT_D:
+ case INTEL_ATOM_TREMONT:
+ case INTEL_ATOM_TREMONT_L:
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+ hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+ intel_pmu_lbr_init_skl();
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.extra_regs = intel_tnt_extra_regs;
+ /*
+ * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
+ * for precise cycles.
+ */
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.lbr_pt_coexist = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.get_event_constraints = tnt_get_event_constraints;
+ td_attr = tnt_events_attrs;
+ extra_attr = slm_format_attr;
+ pr_cont("Tremont events, ");
+ name = "Tremont";
break;
- case INTEL_FAM6_WESTMERE:
- case INTEL_FAM6_WESTMERE_EP:
- case INTEL_FAM6_WESTMERE_EX:
+ case INTEL_ATOM_GRACEMONT:
+ intel_pmu_init_grt(NULL);
+ intel_pmu_pebs_data_source_grt();
+ x86_pmu.pebs_latency_data = grt_latency_data;
+ x86_pmu.get_event_constraints = tnt_get_event_constraints;
+ td_attr = tnt_events_attrs;
+ mem_attr = grt_mem_attrs;
+ extra_attr = nhm_format_attr;
+ pr_cont("Gracemont events, ");
+ name = "gracemont";
+ break;
+
+ case INTEL_ATOM_CRESTMONT:
+ case INTEL_ATOM_CRESTMONT_X:
+ intel_pmu_init_grt(NULL);
+ x86_pmu.extra_regs = intel_cmt_extra_regs;
+ intel_pmu_pebs_data_source_cmt();
+ x86_pmu.pebs_latency_data = cmt_latency_data;
+ x86_pmu.get_event_constraints = cmt_get_event_constraints;
+ td_attr = cmt_events_attrs;
+ mem_attr = grt_mem_attrs;
+ extra_attr = cmt_format_attr;
+ pr_cont("Crestmont events, ");
+ name = "crestmont";
+ break;
+
+ case INTEL_ATOM_DARKMONT_X:
+ intel_pmu_init_skt(NULL);
+ intel_pmu_pebs_data_source_cmt();
+ x86_pmu.pebs_latency_data = cmt_latency_data;
+ x86_pmu.get_event_constraints = cmt_get_event_constraints;
+ td_attr = skt_events_attrs;
+ mem_attr = grt_mem_attrs;
+ extra_attr = cmt_format_attr;
+ pr_cont("Darkmont events, ");
+ name = "darkmont";
+ break;
+
+ case INTEL_WESTMERE:
+ case INTEL_WESTMERE_EP:
+ case INTEL_WESTMERE_EX:
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -4010,7 +7559,7 @@ __init int intel_pmu_init(void)
x86_pmu.extra_regs = intel_westmere_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
- x86_pmu.cpu_events = nhm_events_attrs;
+ mem_attr = nhm_mem_events_attrs;
/* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
@@ -4020,11 +7569,13 @@ __init int intel_pmu_init(void)
X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
intel_pmu_pebs_data_source_nhm();
+ extra_attr = nhm_format_attr;
pr_cont("Westmere events, ");
+ name = "westmere";
break;
- case INTEL_FAM6_SANDYBRIDGE:
- case INTEL_FAM6_SANDYBRIDGE_X:
+ case INTEL_SANDYBRIDGE:
+ case INTEL_SANDYBRIDGE_X:
x86_add_quirk(intel_sandybridge_quirk);
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
@@ -4037,7 +7588,7 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
+ if (boot_cpu_data.x86_vfm == INTEL_SANDYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -4047,7 +7598,8 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
- x86_pmu.cpu_events = snb_events_attrs;
+ td_attr = snb_events_attrs;
+ mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
@@ -4056,11 +7608,14 @@ __init int intel_pmu_init(void)
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
+ extra_attr = nhm_format_attr;
+
pr_cont("SandyBridge events, ");
+ name = "sandybridge";
break;
- case INTEL_FAM6_IVYBRIDGE:
- case INTEL_FAM6_IVYBRIDGE_X:
+ case INTEL_IVYBRIDGE:
+ case INTEL_IVYBRIDGE_X:
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -4076,7 +7631,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
- if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
+ if (boot_cpu_data.x86_vfm == INTEL_IVYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -4084,21 +7639,26 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
- x86_pmu.cpu_events = snb_events_attrs;
+ td_attr = snb_events_attrs;
+ mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+ extra_attr = nhm_format_attr;
+
pr_cont("IvyBridge events, ");
+ name = "ivybridge";
break;
- case INTEL_FAM6_HASWELL_CORE:
- case INTEL_FAM6_HASWELL_X:
- case INTEL_FAM6_HASWELL_ULT:
- case INTEL_FAM6_HASWELL_GT3E:
+ case INTEL_HASWELL:
+ case INTEL_HASWELL_X:
+ case INTEL_HASWELL_L:
+ case INTEL_HASWELL_G:
x86_add_quirk(intel_ht_bug);
+ x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -4116,15 +7676,22 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.limit_period = hsw_limit_period;
x86_pmu.lbr_double_abort = true;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ hsw_format_attr : nhm_format_attr;
+ td_attr = hsw_events_attrs;
+ mem_attr = hsw_mem_events_attrs;
+ tsx_attr = hsw_tsx_events_attrs;
pr_cont("Haswell events, ");
+ name = "haswell";
break;
- case INTEL_FAM6_BROADWELL_CORE:
- case INTEL_FAM6_BROADWELL_XEON_D:
- case INTEL_FAM6_BROADWELL_GT3E:
- case INTEL_FAM6_BROADWELL_X:
+ case INTEL_BROADWELL:
+ case INTEL_BROADWELL_D:
+ case INTEL_BROADWELL_G:
+ case INTEL_BROADWELL_X:
+ x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -4152,13 +7719,18 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.limit_period = bdw_limit_period;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ hsw_format_attr : nhm_format_attr;
+ td_attr = hsw_events_attrs;
+ mem_attr = hsw_mem_events_attrs;
+ tsx_attr = hsw_tsx_events_attrs;
pr_cont("Broadwell events, ");
+ name = "broadwell";
break;
- case INTEL_FAM6_XEON_PHI_KNL:
- case INTEL_FAM6_XEON_PHI_KNM:
+ case INTEL_XEON_PHI_KNL:
+ case INTEL_XEON_PHI_KNM:
memcpy(hw_cache_event_ids,
slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs,
@@ -4172,15 +7744,21 @@ __init int intel_pmu_init(void)
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
+ extra_attr = slm_format_attr;
pr_cont("Knights Landing/Mill events, ");
+ name = "knights-landing";
break;
- case INTEL_FAM6_SKYLAKE_MOBILE:
- case INTEL_FAM6_SKYLAKE_DESKTOP:
- case INTEL_FAM6_SKYLAKE_X:
- case INTEL_FAM6_KABYLAKE_MOBILE:
- case INTEL_FAM6_KABYLAKE_DESKTOP:
+ case INTEL_SKYLAKE_X:
+ pmem = true;
+ fallthrough;
+ case INTEL_SKYLAKE_L:
+ case INTEL_SKYLAKE:
+ case INTEL_KABYLAKE_L:
+ case INTEL_KABYLAKE:
+ case INTEL_COMETLAKE_L:
+ case INTEL_COMETLAKE:
+ x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -4203,11 +7781,262 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
- skl_format_attr);
- WARN_ON(!x86_pmu.format_attrs);
- x86_pmu.cpu_events = hsw_events_attrs;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ hsw_format_attr : nhm_format_attr;
+ extra_skl_attr = skl_format_attr;
+ td_attr = hsw_events_attrs;
+ mem_attr = hsw_mem_events_attrs;
+ tsx_attr = hsw_tsx_events_attrs;
+ intel_pmu_pebs_data_source_skl(pmem);
+
+ /*
+ * Processors with CPUID.RTM_ALWAYS_ABORT have TSX deprecated by default.
+ * TSX force abort hooks are not required on these systems. Only deploy
+ * workaround when microcode has not enabled X86_FEATURE_RTM_ALWAYS_ABORT.
+ */
+ if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) &&
+ !boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) {
+ x86_pmu.flags |= PMU_FL_TFA;
+ x86_pmu.get_event_constraints = tfa_get_event_constraints;
+ x86_pmu.enable_all = intel_tfa_pmu_enable_all;
+ x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
+ }
+
pr_cont("Skylake events, ");
+ name = "skylake";
+ break;
+
+ case INTEL_ICELAKE_X:
+ case INTEL_ICELAKE_D:
+ x86_pmu.pebs_ept = 1;
+ pmem = true;
+ fallthrough;
+ case INTEL_ICELAKE_L:
+ case INTEL_ICELAKE:
+ case INTEL_TIGERLAKE_L:
+ case INTEL_TIGERLAKE:
+ case INTEL_ROCKETLAKE:
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+ hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+ intel_pmu_lbr_init_skl();
+
+ x86_pmu.event_constraints = intel_icl_event_constraints;
+ x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_icl_extra_regs;
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+ x86_pmu.hw_config = hsw_hw_config;
+ x86_pmu.get_event_constraints = icl_get_event_constraints;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ hsw_format_attr : nhm_format_attr;
+ extra_skl_attr = skl_format_attr;
+ mem_attr = icl_events_attrs;
+ td_attr = icl_td_events_attrs;
+ tsx_attr = icl_tsx_events_attrs;
+ x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
+ x86_pmu.lbr_pt_coexist = true;
+ intel_pmu_pebs_data_source_skl(pmem);
+ x86_pmu.num_topdown_events = 4;
+ static_call_update(intel_pmu_update_topdown_event,
+ &icl_update_topdown_event);
+ static_call_update(intel_pmu_set_topdown_event_period,
+ &icl_set_topdown_event_period);
+ pr_cont("Icelake events, ");
+ name = "icelake";
+ break;
+
+ case INTEL_SAPPHIRERAPIDS_X:
+ case INTEL_EMERALDRAPIDS_X:
+ x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
+ x86_pmu.extra_regs = intel_glc_extra_regs;
+ pr_cont("Sapphire Rapids events, ");
+ name = "sapphire_rapids";
+ goto glc_common;
+
+ case INTEL_GRANITERAPIDS_X:
+ case INTEL_GRANITERAPIDS_D:
+ x86_pmu.extra_regs = intel_rwc_extra_regs;
+ pr_cont("Granite Rapids events, ");
+ name = "granite_rapids";
+
+ glc_common:
+ intel_pmu_init_glc(NULL);
+ x86_pmu.pebs_ept = 1;
+ x86_pmu.hw_config = hsw_hw_config;
+ x86_pmu.get_event_constraints = glc_get_event_constraints;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ hsw_format_attr : nhm_format_attr;
+ extra_skl_attr = skl_format_attr;
+ mem_attr = glc_events_attrs;
+ td_attr = glc_td_events_attrs;
+ tsx_attr = glc_tsx_events_attrs;
+ intel_pmu_pebs_data_source_skl(true);
+ break;
+
+ case INTEL_ALDERLAKE:
+ case INTEL_ALDERLAKE_L:
+ case INTEL_RAPTORLAKE:
+ case INTEL_RAPTORLAKE_P:
+ case INTEL_RAPTORLAKE_S:
+ /*
+ * Alder Lake has 2 types of CPU, core and atom.
+ *
+ * Initialize the common PerfMon capabilities here.
+ */
+ intel_pmu_init_hybrid(hybrid_big_small);
+
+ x86_pmu.pebs_latency_data = grt_latency_data;
+ x86_pmu.get_event_constraints = adl_get_event_constraints;
+ x86_pmu.hw_config = adl_hw_config;
+ x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type;
+
+ td_attr = adl_hybrid_events_attrs;
+ mem_attr = adl_hybrid_mem_attrs;
+ tsx_attr = adl_hybrid_tsx_attrs;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ adl_hybrid_extra_attr_rtm : adl_hybrid_extra_attr;
+
+ /* Initialize big core specific PerfMon capabilities.*/
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
+ intel_pmu_init_glc(&pmu->pmu);
+ if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
+ pmu->cntr_mask64 <<= 2;
+ pmu->cntr_mask64 |= 0x3;
+ pmu->fixed_cntr_mask64 <<= 1;
+ pmu->fixed_cntr_mask64 |= 0x1;
+ } else {
+ pmu->cntr_mask64 = x86_pmu.cntr_mask64;
+ pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
+ }
+
+ /*
+ * Quirk: For some Alder Lake machine, when all E-cores are disabled in
+ * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However,
+ * the X86_FEATURE_HYBRID_CPU is still set. The above codes will
+ * mistakenly add extra counters for P-cores. Correct the number of
+ * counters here.
+ */
+ if ((x86_pmu_num_counters(&pmu->pmu) > 8) || (x86_pmu_num_counters_fixed(&pmu->pmu) > 4)) {
+ pmu->cntr_mask64 = x86_pmu.cntr_mask64;
+ pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
+ }
+
+ pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
+ pmu->unconstrained = (struct event_constraint)
+ __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
+ 0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
+
+ pmu->extra_regs = intel_glc_extra_regs;
+
+ /* Initialize Atom core specific PerfMon capabilities.*/
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
+ intel_pmu_init_grt(&pmu->pmu);
+
+ x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
+ intel_pmu_pebs_data_source_adl();
+ pr_cont("Alderlake Hybrid events, ");
+ name = "alderlake_hybrid";
+ break;
+
+ case INTEL_METEORLAKE:
+ case INTEL_METEORLAKE_L:
+ case INTEL_ARROWLAKE_U:
+ intel_pmu_init_hybrid(hybrid_big_small);
+
+ x86_pmu.pebs_latency_data = cmt_latency_data;
+ x86_pmu.get_event_constraints = mtl_get_event_constraints;
+ x86_pmu.hw_config = adl_hw_config;
+
+ td_attr = adl_hybrid_events_attrs;
+ mem_attr = mtl_hybrid_mem_attrs;
+ tsx_attr = adl_hybrid_tsx_attrs;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
+
+ /* Initialize big core specific PerfMon capabilities.*/
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
+ intel_pmu_init_glc(&pmu->pmu);
+ pmu->extra_regs = intel_rwc_extra_regs;
+
+ /* Initialize Atom core specific PerfMon capabilities.*/
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
+ intel_pmu_init_grt(&pmu->pmu);
+ pmu->extra_regs = intel_cmt_extra_regs;
+
+ intel_pmu_pebs_data_source_mtl();
+ pr_cont("Meteorlake Hybrid events, ");
+ name = "meteorlake_hybrid";
+ break;
+
+ case INTEL_PANTHERLAKE_L:
+ case INTEL_WILDCATLAKE_L:
+ pr_cont("Pantherlake Hybrid events, ");
+ name = "pantherlake_hybrid";
+ goto lnl_common;
+
+ case INTEL_LUNARLAKE_M:
+ case INTEL_ARROWLAKE:
+ pr_cont("Lunarlake Hybrid events, ");
+ name = "lunarlake_hybrid";
+
+ lnl_common:
+ intel_pmu_init_hybrid(hybrid_big_small);
+
+ x86_pmu.pebs_latency_data = lnl_latency_data;
+ x86_pmu.get_event_constraints = mtl_get_event_constraints;
+ x86_pmu.hw_config = adl_hw_config;
+
+ td_attr = lnl_hybrid_events_attrs;
+ mem_attr = mtl_hybrid_mem_attrs;
+ tsx_attr = adl_hybrid_tsx_attrs;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
+
+ /* Initialize big core specific PerfMon capabilities.*/
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
+ intel_pmu_init_lnc(&pmu->pmu);
+
+ /* Initialize Atom core specific PerfMon capabilities.*/
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
+ intel_pmu_init_skt(&pmu->pmu);
+
+ intel_pmu_pebs_data_source_lnl();
+ break;
+
+ case INTEL_ARROWLAKE_H:
+ intel_pmu_init_hybrid(hybrid_big_small_tiny);
+
+ x86_pmu.pebs_latency_data = arl_h_latency_data;
+ x86_pmu.get_event_constraints = arl_h_get_event_constraints;
+ x86_pmu.hw_config = arl_h_hw_config;
+
+ td_attr = arl_h_hybrid_events_attrs;
+ mem_attr = arl_h_hybrid_mem_attrs;
+ tsx_attr = adl_hybrid_tsx_attrs;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
+
+ /* Initialize big core specific PerfMon capabilities. */
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
+ intel_pmu_init_lnc(&pmu->pmu);
+
+ /* Initialize Atom core specific PerfMon capabilities. */
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
+ intel_pmu_init_skt(&pmu->pmu);
+
+ /* Initialize Lower Power Atom specific PerfMon capabilities. */
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX];
+ intel_pmu_init_grt(&pmu->pmu);
+ pmu->extra_regs = intel_cmt_extra_regs;
+
+ intel_pmu_pebs_data_source_arl_h();
+ pr_cont("ArrowLake-H Hybrid events, ");
+ name = "arrowlake_h_hybrid";
break;
default:
@@ -4215,56 +8044,91 @@ __init int intel_pmu_init(void)
case 1:
x86_pmu.event_constraints = intel_v1_event_constraints;
pr_cont("generic architected perfmon v1, ");
+ name = "generic_arch_v1";
break;
- default:
+ case 2:
+ case 3:
+ case 4:
/*
* default constraints for v2 and up
*/
x86_pmu.event_constraints = intel_gen_event_constraints;
pr_cont("generic architected perfmon, ");
+ name = "generic_arch_v2+";
+ break;
+ default:
+ /*
+ * The default constraints for v5 and up can support up to
+ * 16 fixed counters. For the fixed counters 4 and later,
+ * the pseudo-encoding is applied.
+ * The constraints may be cut according to the CPUID enumeration
+ * by inserting the EVENT_CONSTRAINT_END.
+ */
+ if (fls64(x86_pmu.fixed_cntr_mask64) > INTEL_PMC_MAX_FIXED)
+ x86_pmu.fixed_cntr_mask64 &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
+ intel_v5_gen_event_constraints[fls64(x86_pmu.fixed_cntr_mask64)].weight = -1;
+ x86_pmu.event_constraints = intel_v5_gen_event_constraints;
+ pr_cont("generic architected perfmon, ");
+ name = "generic_arch_v5+";
break;
}
}
- if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
- WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
- x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
- x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
+ snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name);
+
+ if (!is_hybrid()) {
+ group_events_td.attrs = td_attr;
+ group_events_mem.attrs = mem_attr;
+ group_events_tsx.attrs = tsx_attr;
+ group_format_extra.attrs = extra_attr;
+ group_format_extra_skl.attrs = extra_skl_attr;
+
+ x86_pmu.attr_update = attr_update;
+ } else {
+ hybrid_group_events_td.attrs = td_attr;
+ hybrid_group_events_mem.attrs = mem_attr;
+ hybrid_group_events_tsx.attrs = tsx_attr;
+ hybrid_group_format_extra.attrs = extra_attr;
+
+ x86_pmu.attr_update = hybrid_attr_update;
}
- x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1;
- if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
- WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
- x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
- x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
+ /*
+ * The archPerfmonExt (0x23) includes an enhanced enumeration of
+ * PMU architectural features with a per-core view. For non-hybrid,
+ * each core has the same PMU capabilities. It's good enough to
+ * update the x86_pmu from the booting CPU. For hybrid, the x86_pmu
+ * is used to keep the common capabilities. Still keep the values
+ * from the leaf 0xa. The core specific update will be done later
+ * when a new type is online.
+ */
+ if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
+ update_pmu_cap(NULL);
+
+ if (x86_pmu.arch_pebs) {
+ static_call_update(intel_pmu_disable_event_ext,
+ intel_pmu_disable_event_ext);
+ static_call_update(intel_pmu_enable_event_ext,
+ intel_pmu_enable_event_ext);
+ pr_cont("Architectural PEBS, ");
}
- x86_pmu.intel_ctrl |=
- ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
+ intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
+ &x86_pmu.fixed_cntr_mask64,
+ &x86_pmu.intel_ctrl);
- if (x86_pmu.event_constraints) {
- /*
- * event on fixed counter2 (REF_CYCLES) only works on this
- * counter, so do not extend mask to generic counters
- */
- for_each_event_constraint(c, x86_pmu.event_constraints) {
- if (c->cmask == FIXED_EVENT_FLAGS
- && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
- c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
- }
- c->idxmsk64 &=
- ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
- c->weight = hweight64(c->idxmsk64);
- }
- }
+ /* AnyThread may be deprecated on arch perfmon v5 or later */
+ if (x86_pmu.intel_cap.anythread_deprecated)
+ x86_pmu.format_attrs = intel_arch_formats_attr;
+
+ intel_pmu_check_event_constraints_all(NULL);
/*
* Access LBR MSR may cause #GP under certain circumstances.
- * E.g. KVM doesn't support LBR MSR
- * Check all LBT MSR here.
+ * Check all LBR MSR here.
* Disable LBR access if any LBR MSRs can not be accessed.
*/
- if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
+ if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL))
x86_pmu.lbr_nr = 0;
for (i = 0; i < x86_pmu.lbr_nr; i++) {
if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
@@ -4272,22 +8136,25 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_nr = 0;
}
- if (x86_pmu.lbr_nr)
+ if (x86_pmu.lbr_nr) {
+ intel_pmu_lbr_init();
+
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
- /*
- * Access extra MSR may cause #GP under certain circumstances.
- * E.g. KVM doesn't support offcore event
- * Check all extra_regs here.
- */
- if (x86_pmu.extra_regs) {
- for (er = x86_pmu.extra_regs; er->msr; er++) {
- er->extra_msr_access = check_msr(er->msr, 0x11UL);
- /* Disable LBR select mapping */
- if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
- x86_pmu.lbr_sel_map = NULL;
+
+ /* only support branch_stack snapshot for perfmon >= v2 */
+ if (x86_pmu.disable_all == intel_pmu_disable_all) {
+ if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) {
+ static_call_update(perf_snapshot_branch_stack,
+ intel_pmu_snapshot_arch_branch_stack);
+ } else {
+ static_call_update(perf_snapshot_branch_stack,
+ intel_pmu_snapshot_branch_stack);
+ }
}
}
+ intel_pmu_check_extra_regs(x86_pmu.extra_regs);
+
/* Support full width counters using alternative MSR range */
if (x86_pmu.intel_cap.full_width_write) {
x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
@@ -4295,6 +8162,22 @@ __init int intel_pmu_init(void)
pr_cont("full-width counters, ");
}
+ /* Support V6+ MSR Aliasing */
+ if (x86_pmu.version >= 6) {
+ x86_pmu.perfctr = MSR_IA32_PMC_V6_GP0_CTR;
+ x86_pmu.eventsel = MSR_IA32_PMC_V6_GP0_CFG_A;
+ x86_pmu.fixedctr = MSR_IA32_PMC_V6_FX0_CTR;
+ x86_pmu.addr_offset = intel_pmu_v6_addr_offset;
+ }
+
+ if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
+ x86_pmu.intel_ctrl |= GLOBAL_CTRL_EN_PERF_METRICS;
+
+ if (x86_pmu.intel_cap.pebs_timing_info)
+ x86_pmu.flags |= PMU_FL_RETIRE_LATENCY;
+
+ intel_aux_output_init();
+
return 0;
}
@@ -4318,10 +8201,9 @@ static __init int fixup_ht_bug(void)
return 0;
}
- if (lockup_detector_suspend() != 0) {
- pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n");
- return 0;
- }
+ cpus_read_lock();
+
+ hardlockup_detector_perf_stop();
x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
@@ -4329,12 +8211,10 @@ static __init int fixup_ht_bug(void)
x86_pmu.commit_scheduling = NULL;
x86_pmu.stop_scheduling = NULL;
- lockup_detector_resume();
-
- cpus_read_lock();
+ hardlockup_detector_perf_restart();
for_each_online_cpu(c)
- free_excl_cntrs(c);
+ free_excl_cntrs(&per_cpu(cpu_hw_events, c));
cpus_read_unlock();
pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");