summaryrefslogtreecommitdiff
path: root/arch/x86/events
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events')
-rw-r--r--arch/x86/events/amd/brs.c3
-rw-r--r--arch/x86/events/amd/core.c117
-rw-r--r--arch/x86/events/amd/ibs.c344
-rw-r--r--arch/x86/events/amd/iommu.c2
-rw-r--r--arch/x86/events/amd/lbr.c22
-rw-r--r--arch/x86/events/amd/uncore.c41
-rw-r--r--arch/x86/events/core.c291
-rw-r--r--arch/x86/events/intel/bts.c48
-rw-r--r--arch/x86/events/intel/core.c1096
-rw-r--r--arch/x86/events/intel/cstate.c335
-rw-r--r--arch/x86/events/intel/ds.c606
-rw-r--r--arch/x86/events/intel/knc.c2
-rw-r--r--arch/x86/events/intel/lbr.c77
-rw-r--r--arch/x86/events/intel/p4.c19
-rw-r--r--arch/x86/events/intel/p6.c28
-rw-r--r--arch/x86/events/intel/pt.c132
-rw-r--r--arch/x86/events/intel/pt.h13
-rw-r--r--arch/x86/events/intel/uncore.c231
-rw-r--r--arch/x86/events/intel/uncore.h11
-rw-r--r--arch/x86/events/intel/uncore_discovery.c306
-rw-r--r--arch/x86/events/intel/uncore_discovery.h22
-rw-r--r--arch/x86/events/intel/uncore_nhmex.c3
-rw-r--r--arch/x86/events/intel/uncore_snb.c187
-rw-r--r--arch/x86/events/intel/uncore_snbep.c187
-rw-r--r--arch/x86/events/msr.c132
-rw-r--r--arch/x86/events/perf_event.h185
-rw-r--r--arch/x86/events/perf_event_flags.h2
-rw-r--r--arch/x86/events/rapl.c601
-rw-r--r--arch/x86/events/zhaoxin/core.c12
29 files changed, 3422 insertions, 1633 deletions
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
index 780acd3dff22..ec3427463382 100644
--- a/arch/x86/events/amd/brs.c
+++ b/arch/x86/events/amd/brs.c
@@ -381,7 +381,8 @@ static void amd_brs_poison_buffer(void)
* On ctxswin, sched_in = true, called after the PMU has started
* On ctxswout, sched_in = false, called before the PMU is stopped
*/
-void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index aec16e581f5b..30d6ceb4c8ad 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,39 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
+static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x100000120,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a)
+ return amd_zen4_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
@@ -403,8 +432,10 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
* be removed on one CPU at a time AND PMU is disabled
* when we come here
*/
- for (i = 0; i < x86_pmu.num_counters; i++) {
- if (cmpxchg(nb->owners + i, event, NULL) == event)
+ for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
+ struct perf_event *tmp = event;
+
+ if (try_cmpxchg(nb->owners + i, &tmp, NULL))
break;
}
}
@@ -470,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
* because of successive calls to x86_schedule_events() from
* hw_perf_group_sched_in() without hw_perf_enable()
*/
- for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
+ for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
if (new == -1 || hwc->idx == idx)
/* assign free slot, prefer hwc->idx */
old = cmpxchg(nb->owners + idx, NULL, event);
@@ -513,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
/*
* initialize all possible NB constraints
*/
- for (i = 0; i < x86_pmu.num_counters; i++) {
+ for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
__set_bit(i, nb->event_constraints[i].idxmsk);
nb->event_constraints[i].weight = 1;
}
@@ -618,7 +649,7 @@ static void amd_pmu_cpu_dead(int cpu)
}
}
-static inline void amd_pmu_set_global_ctl(u64 ctl)
+static __always_inline void amd_pmu_set_global_ctl(u64 ctl)
{
wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
}
@@ -706,7 +737,7 @@ static void amd_pmu_check_overflow(void)
* counters are always enabled when this function is called and
* ARCH_PERFMON_EVENTSEL_INT is always set.
*/
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -726,7 +757,7 @@ static void amd_pmu_enable_all(int added)
amd_brs_enable_all();
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
/* only activate events which are marked as active */
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -878,14 +909,46 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
return amd_pmu_adjust_nmi_window(handled);
}
+/*
+ * AMD-specific callback invoked through perf_snapshot_branch_stack static
+ * call, defined in include/linux/perf_event.h. See its definition for API
+ * details. It's up to caller to provide enough space in *entries* to fit all
+ * LBR records, otherwise returned result will be truncated to *cnt* entries.
+ */
+static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
+{
+ struct cpu_hw_events *cpuc;
+ unsigned long flags;
+
+ /*
+ * The sequence of steps to freeze LBR should be completely inlined
+ * and contain no branches to minimize contamination of LBR snapshot
+ */
+ local_irq_save(flags);
+ amd_pmu_core_disable_all();
+ __amd_pmu_lbr_disable();
+
+ cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ amd_pmu_lbr_read();
+ cnt = min(cnt, x86_pmu.lbr_nr);
+ memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
+
+ amd_pmu_v2_enable_all(0);
+ local_irq_restore(flags);
+
+ return cnt;
+}
+
static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ static atomic64_t status_warned = ATOMIC64_INIT(0);
+ u64 reserved, status, mask, new_bits, prev_bits;
struct perf_sample_data data;
struct hw_perf_event *hwc;
struct perf_event *event;
int handled = 0, idx;
- u64 reserved, status, mask;
bool pmu_enabled;
/*
@@ -904,8 +967,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!status)
goto done;
- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
@@ -918,7 +981,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
/* Clear any reserved bits set by buggy microcode */
status &= amd_pmu_global_cntr_mask;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -938,8 +1001,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!x86_perf_event_set_period(event))
continue;
- if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
@@ -950,7 +1012,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
* the corresponding PMCs are expected to be inactive according to the
* active_mask
*/
- WARN_ON(status > 0);
+ if (status > 0) {
+ prev_bits = atomic64_fetch_or(status, &status_warned);
+ // A new bit was set for the very first time.
+ new_bits = status & ~prev_bits;
+ WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits);
+ }
/* Clear overflow and freeze bits */
amd_pmu_ack_global_status(~status);
@@ -1253,7 +1320,7 @@ static __initconst const struct x86_pmu amd_pmu = {
.addr_offset = amd_pmu_addr_offset,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
- .num_counters = AMD64_NUM_COUNTERS,
+ .cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
.add = amd_pmu_add_event,
.del = amd_pmu_del_event,
.cntval_bits = 48,
@@ -1352,7 +1419,7 @@ static int __init amd_core_pmu_init(void)
*/
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
- x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
+ x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
/* Check for Performance Monitoring v2 support */
if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
@@ -1362,9 +1429,9 @@ static int __init amd_core_pmu_init(void)
x86_pmu.version = 2;
/* Find the number of available Core PMCs */
- x86_pmu.num_counters = ebx.split.num_core_pmc;
+ x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
- amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
+ amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;
/* Update PMC handling functions */
x86_pmu.enable_all = amd_pmu_v2_enable_all;
@@ -1392,12 +1459,12 @@ static int __init amd_core_pmu_init(void)
* even numbered counter that has a consecutive adjacent odd
* numbered counter following it.
*/
- for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
+ for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
even_ctr_mask |= BIT_ULL(i);
pair_constraint = (struct event_constraint)
__EVENT_CONSTRAINT(0, even_ctr_mask, 0,
- x86_pmu.num_counters / 2, 0,
+ x86_pmu_max_num_counters(NULL) / 2, 0,
PERF_X86_EVENT_PAIR);
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
@@ -1414,6 +1481,10 @@ static int __init amd_core_pmu_init(void)
static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset);
static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add);
static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del);
+
+ /* Only support branch_stack snapshot on perfmon v2 */
+ if (x86_pmu.handle_irq == amd_pmu_v2_handle_irq)
+ static_call_update(perf_snapshot_branch_stack, amd_pmu_v2_snapshot_branch_stack);
} else if (!amd_brs_init()) {
/*
* BRS requires special event constraints and flushing on ctxsw.
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index e91970b01d62..0252b7ea8bca 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -28,9 +28,8 @@ static u32 ibs_caps;
#include <asm/nmi.h>
#include <asm/amd-ibs.h>
-#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
-#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
-
+/* attr.config2 */
+#define IBS_SW_FILTER_MASK 1
/*
* IBS states:
@@ -87,6 +86,7 @@ struct perf_ibs {
u64 cnt_mask;
u64 enable_mask;
u64 valid_mask;
+ u16 min_period;
u64 max_period;
unsigned long offset_mask[1];
int offset_max;
@@ -268,11 +268,19 @@ static int validate_group(struct perf_event *event)
return 0;
}
+static bool perf_ibs_ldlat_event(struct perf_ibs *perf_ibs,
+ struct perf_event *event)
+{
+ return perf_ibs == &perf_ibs_op &&
+ (ibs_caps & IBS_CAPS_OPLDLAT) &&
+ (event->attr.config1 & 0xFFF);
+}
+
static int perf_ibs_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs;
- u64 max_cnt, config;
+ u64 config;
int ret;
perf_ibs = get_ibs_pmu(event->attr.type);
@@ -290,6 +298,16 @@ static int perf_ibs_init(struct perf_event *event)
if (has_branch_stack(event))
return -EOPNOTSUPP;
+ /* handle exclude_{user,kernel} in the IRQ handler */
+ if (event->attr.exclude_host || event->attr.exclude_guest ||
+ event->attr.exclude_idle)
+ return -EINVAL;
+
+ if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
+ (event->attr.exclude_kernel || event->attr.exclude_user ||
+ event->attr.exclude_hv))
+ return -EINVAL;
+
ret = validate_group(event);
if (ret)
return ret;
@@ -298,25 +316,47 @@ static int perf_ibs_init(struct perf_event *event)
if (config & perf_ibs->cnt_mask)
/* raw max_cnt may not be set */
return -EINVAL;
- if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
- /*
- * lower 4 bits can not be set in ibs max cnt,
- * but allowing it in case we adjust the
- * sample period to set a frequency.
- */
- return -EINVAL;
- hwc->sample_period &= ~0x0FULL;
- if (!hwc->sample_period)
- hwc->sample_period = 0x10;
+
+ if (event->attr.freq) {
+ hwc->sample_period = perf_ibs->min_period;
+ } else {
+ /* Silently mask off lower nibble. IBS hw mandates it. */
+ hwc->sample_period &= ~0x0FULL;
+ if (hwc->sample_period < perf_ibs->min_period)
+ return -EINVAL;
+ }
} else {
- max_cnt = config & perf_ibs->cnt_mask;
+ u64 period = 0;
+
+ if (event->attr.freq)
+ return -EINVAL;
+
+ if (perf_ibs == &perf_ibs_op) {
+ period = (config & IBS_OP_MAX_CNT) << 4;
+ if (ibs_caps & IBS_CAPS_OPCNTEXT)
+ period |= config & IBS_OP_MAX_CNT_EXT_MASK;
+ } else {
+ period = (config & IBS_FETCH_MAX_CNT) << 4;
+ }
+
config &= ~perf_ibs->cnt_mask;
- event->attr.sample_period = max_cnt << 4;
- hwc->sample_period = event->attr.sample_period;
+ event->attr.sample_period = period;
+ hwc->sample_period = period;
+
+ if (hwc->sample_period < perf_ibs->min_period)
+ return -EINVAL;
}
- if (!hwc->sample_period)
- return -EINVAL;
+ if (perf_ibs_ldlat_event(perf_ibs, event)) {
+ u64 ldlat = event->attr.config1 & 0xFFF;
+
+ if (ldlat < 128 || ldlat > 2048)
+ return -EINVAL;
+ ldlat >>= 7;
+
+ config |= (ldlat - 1) << 59;
+ config |= IBS_OP_L3MISSONLY | IBS_OP_LDLAT_EN;
+ }
/*
* If we modify hwc->sample_period, we also need to update
@@ -337,7 +377,8 @@ static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
int overflow;
/* ignore lower 4 bits in min count: */
- overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
+ overflow = perf_event_set_period(hwc, perf_ibs->min_period,
+ perf_ibs->max_period, period);
local64_set(&hwc->prev_count, 0);
return overflow;
@@ -435,6 +476,9 @@ static void perf_ibs_start(struct perf_event *event, int flags)
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
+ if (event->attr.freq && hwc->sample_period < perf_ibs->min_period)
+ hwc->sample_period = perf_ibs->min_period;
+
perf_ibs_set_period(perf_ibs, hwc, &period);
if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) {
config |= period & IBS_OP_MAX_CNT_EXT_MASK;
@@ -542,6 +586,28 @@ static void perf_ibs_del(struct perf_event *event, int flags)
static void perf_ibs_read(struct perf_event *event) { }
+static int perf_ibs_check_period(struct perf_event *event, u64 value)
+{
+ struct perf_ibs *perf_ibs;
+ u64 low_nibble;
+
+ if (event->attr.freq)
+ return 0;
+
+ perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ low_nibble = value & 0xFULL;
+
+ /*
+ * This contradicts with perf_ibs_init() which allows sample period
+ * with lower nibble bits set but silently masks them off. Whereas
+ * this returns error.
+ */
+ if (low_nibble || value < perf_ibs->min_period)
+ return -EINVAL;
+
+ return 0;
+}
+
/*
* We need to initialize with empty group if all attributes in the
* group are dynamic.
@@ -550,27 +616,20 @@ static struct attribute *attrs_empty[] = {
NULL,
};
-static struct attribute_group empty_format_group = {
- .name = "format",
- .attrs = attrs_empty,
-};
-
static struct attribute_group empty_caps_group = {
.name = "caps",
.attrs = attrs_empty,
};
-static const struct attribute_group *empty_attr_groups[] = {
- &empty_format_group,
- &empty_caps_group,
- NULL,
-};
-
PMU_FORMAT_ATTR(rand_en, "config:57");
PMU_FORMAT_ATTR(cnt_ctl, "config:19");
+PMU_FORMAT_ATTR(swfilt, "config2:0");
PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59");
PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16");
+PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_format, "config1:0-11");
PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1");
+PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_cap, "1");
+PMU_EVENT_ATTR_STRING(dtlb_pgsize, ibs_op_dtlb_pgsize_cap, "1");
static umode_t
zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i)
@@ -578,8 +637,21 @@ zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int
return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0;
}
-static struct attribute *rand_en_attrs[] = {
+static umode_t
+ibs_op_ldlat_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return ibs_caps & IBS_CAPS_OPLDLAT ? attr->mode : 0;
+}
+
+static umode_t
+ibs_op_dtlb_pgsize_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return ibs_caps & IBS_CAPS_OPDTLBPGSIZE ? attr->mode : 0;
+}
+
+static struct attribute *fetch_attrs[] = {
&format_attr_rand_en.attr,
+ &format_attr_swfilt.attr,
NULL,
};
@@ -593,9 +665,19 @@ static struct attribute *zen4_ibs_extensions_attrs[] = {
NULL,
};
-static struct attribute_group group_rand_en = {
+static struct attribute *ibs_op_ldlat_cap_attrs[] = {
+ &ibs_op_ldlat_cap.attr.attr,
+ NULL,
+};
+
+static struct attribute *ibs_op_dtlb_pgsize_cap_attrs[] = {
+ &ibs_op_dtlb_pgsize_cap.attr.attr,
+ NULL,
+};
+
+static struct attribute_group group_fetch_formats = {
.name = "format",
- .attrs = rand_en_attrs,
+ .attrs = fetch_attrs,
};
static struct attribute_group group_fetch_l3missonly = {
@@ -610,8 +692,20 @@ static struct attribute_group group_zen4_ibs_extensions = {
.is_visible = zen4_ibs_extensions_is_visible,
};
+static struct attribute_group group_ibs_op_ldlat_cap = {
+ .name = "caps",
+ .attrs = ibs_op_ldlat_cap_attrs,
+ .is_visible = ibs_op_ldlat_is_visible,
+};
+
+static struct attribute_group group_ibs_op_dtlb_pgsize_cap = {
+ .name = "caps",
+ .attrs = ibs_op_dtlb_pgsize_cap_attrs,
+ .is_visible = ibs_op_dtlb_pgsize_is_visible,
+};
+
static const struct attribute_group *fetch_attr_groups[] = {
- &group_rand_en,
+ &group_fetch_formats,
&empty_caps_group,
NULL,
};
@@ -628,6 +722,11 @@ cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i)
return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0;
}
+static struct attribute *op_attrs[] = {
+ &format_attr_swfilt.attr,
+ NULL,
+};
+
static struct attribute *cnt_ctl_attrs[] = {
&format_attr_cnt_ctl.attr,
NULL,
@@ -638,6 +737,16 @@ static struct attribute *op_l3missonly_attrs[] = {
NULL,
};
+static struct attribute_group group_op_formats = {
+ .name = "format",
+ .attrs = op_attrs,
+};
+
+static struct attribute *ibs_op_ldlat_format_attrs[] = {
+ &ibs_op_ldlat_format.attr.attr,
+ NULL,
+};
+
static struct attribute_group group_cnt_ctl = {
.name = "format",
.attrs = cnt_ctl_attrs,
@@ -650,10 +759,25 @@ static struct attribute_group group_op_l3missonly = {
.is_visible = zen4_ibs_extensions_is_visible,
};
+static const struct attribute_group *op_attr_groups[] = {
+ &group_op_formats,
+ &empty_caps_group,
+ NULL,
+};
+
+static struct attribute_group group_ibs_op_ldlat_format = {
+ .name = "format",
+ .attrs = ibs_op_ldlat_format_attrs,
+ .is_visible = ibs_op_ldlat_is_visible,
+};
+
static const struct attribute_group *op_attr_update[] = {
&group_cnt_ctl,
&group_op_l3missonly,
&group_zen4_ibs_extensions,
+ &group_ibs_op_ldlat_cap,
+ &group_ibs_op_ldlat_format,
+ &group_ibs_op_dtlb_pgsize_cap,
NULL,
};
@@ -667,13 +791,14 @@ static struct perf_ibs perf_ibs_fetch = {
.start = perf_ibs_start,
.stop = perf_ibs_stop,
.read = perf_ibs_read,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .check_period = perf_ibs_check_period,
},
.msr = MSR_AMD64_IBSFETCHCTL,
- .config_mask = IBS_FETCH_CONFIG_MASK,
+ .config_mask = IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN,
.cnt_mask = IBS_FETCH_MAX_CNT,
.enable_mask = IBS_FETCH_ENABLE,
.valid_mask = IBS_FETCH_VAL,
+ .min_period = 0x10,
.max_period = IBS_FETCH_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
@@ -691,14 +816,15 @@ static struct perf_ibs perf_ibs_op = {
.start = perf_ibs_start,
.stop = perf_ibs_stop,
.read = perf_ibs_read,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .check_period = perf_ibs_check_period,
},
.msr = MSR_AMD64_IBSOPCTL,
- .config_mask = IBS_OP_CONFIG_MASK,
+ .config_mask = IBS_OP_MAX_CNT,
.cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
IBS_OP_CUR_CNT_RAND,
.enable_mask = IBS_OP_ENABLE,
.valid_mask = IBS_OP_VAL,
+ .min_period = 0x90,
.max_period = IBS_OP_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
@@ -900,6 +1026,10 @@ static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3,
if (!op_data3->dc_lin_addr_valid)
return;
+ if ((ibs_caps & IBS_CAPS_OPDTLBPGSIZE) &&
+ !op_data3->dc_phy_addr_valid)
+ return;
+
if (!op_data3->dc_l1tlb_miss) {
data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT;
return;
@@ -924,6 +1054,8 @@ static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3,
data_src->mem_lock = PERF_MEM_LOCK_LOCKED;
}
+/* Be careful. Works only for contiguous MSRs. */
+#define ibs_fetch_msr_idx(msr) (msr - MSR_AMD64_IBSFETCHCTL)
#define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL)
static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
@@ -1004,21 +1136,92 @@ static void perf_ibs_parse_ld_st_data(__u64 sample_type,
}
}
-static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type,
+static bool perf_ibs_is_mem_sample_type(struct perf_ibs *perf_ibs,
+ struct perf_event *event)
+{
+ u64 sample_type = event->attr.sample_type;
+
+ return perf_ibs == &perf_ibs_op &&
+ sample_type & (PERF_SAMPLE_DATA_SRC |
+ PERF_SAMPLE_WEIGHT_TYPE |
+ PERF_SAMPLE_ADDR |
+ PERF_SAMPLE_PHYS_ADDR);
+}
+
+static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs,
+ struct perf_event *event,
int check_rip)
{
- if (sample_type & PERF_SAMPLE_RAW ||
- (perf_ibs == &perf_ibs_op &&
- (sample_type & PERF_SAMPLE_DATA_SRC ||
- sample_type & PERF_SAMPLE_WEIGHT_TYPE ||
- sample_type & PERF_SAMPLE_ADDR ||
- sample_type & PERF_SAMPLE_PHYS_ADDR)))
+ if (event->attr.sample_type & PERF_SAMPLE_RAW ||
+ perf_ibs_is_mem_sample_type(perf_ibs, event) ||
+ perf_ibs_ldlat_event(perf_ibs, event))
return perf_ibs->offset_max;
else if (check_rip)
return 3;
return 1;
}
+static bool perf_ibs_is_kernel_data_addr(struct perf_event *event,
+ struct perf_ibs_data *ibs_data)
+{
+ u64 sample_type_mask = PERF_SAMPLE_ADDR | PERF_SAMPLE_RAW;
+ union ibs_op_data3 op_data3;
+ u64 dc_lin_addr;
+
+ op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)];
+ dc_lin_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)];
+
+ return unlikely((event->attr.sample_type & sample_type_mask) &&
+ op_data3.dc_lin_addr_valid && kernel_ip(dc_lin_addr));
+}
+
+static bool perf_ibs_is_kernel_br_target(struct perf_event *event,
+ struct perf_ibs_data *ibs_data,
+ int br_target_idx)
+{
+ union ibs_op_data op_data;
+ u64 br_target;
+
+ op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)];
+ br_target = ibs_data->regs[br_target_idx];
+
+ return unlikely((event->attr.sample_type & PERF_SAMPLE_RAW) &&
+ op_data.op_brn_ret && kernel_ip(br_target));
+}
+
+static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event,
+ struct pt_regs *regs, struct perf_ibs_data *ibs_data,
+ int br_target_idx)
+{
+ if (perf_exclude_event(event, regs))
+ return true;
+
+ if (perf_ibs != &perf_ibs_op || !event->attr.exclude_kernel)
+ return false;
+
+ if (perf_ibs_is_kernel_data_addr(event, ibs_data))
+ return true;
+
+ if (br_target_idx != -1 &&
+ perf_ibs_is_kernel_br_target(event, ibs_data, br_target_idx))
+ return true;
+
+ return false;
+}
+
+static void perf_ibs_phyaddr_clear(struct perf_ibs *perf_ibs,
+ struct perf_ibs_data *ibs_data)
+{
+ if (perf_ibs == &perf_ibs_op) {
+ ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)] &= ~(1ULL << 18);
+ ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)] = 0;
+ return;
+ }
+
+ ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)] &= ~(1ULL << 52);
+ ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHPHYSAD)] = 0;
+}
+
static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
{
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
@@ -1031,6 +1234,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
int offset, size, check_rip, offset_max, throttle = 0;
unsigned int msr;
u64 *buf, *config, period, new_config = 0;
+ int br_target_idx = -1;
if (!test_bit(IBS_STARTED, pcpu->state)) {
fail:
@@ -1067,7 +1271,7 @@ fail:
offset = 1;
check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
- offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip);
+ offset_max = perf_ibs_get_offset_max(perf_ibs, event, check_rip);
do {
rdmsrl(msr + offset, *buf++);
@@ -1076,6 +1280,22 @@ fail:
perf_ibs->offset_max,
offset + 1);
} while (offset < offset_max);
+
+ if (perf_ibs_ldlat_event(perf_ibs, event)) {
+ union ibs_op_data3 op_data3;
+
+ op_data3.val = ibs_data.regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)];
+ /*
+ * Opening event is errored out if load latency threshold is
+ * outside of [128, 2048] range. Since the event has reached
+ * interrupt handler, we can safely assume the threshold is
+ * within [128, 2048] range.
+ */
+ if (!op_data3.ld_op || !op_data3.dc_miss ||
+ op_data3.dc_miss_lat <= (event->attr.config1 & 0xFFF))
+ goto out;
+ }
+
/*
* Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately
* depending on their availability.
@@ -1085,6 +1305,7 @@ fail:
if (perf_ibs == &perf_ibs_op) {
if (ibs_caps & IBS_CAPS_BRNTRGT) {
rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
+ br_target_idx = size;
size++;
}
if (ibs_caps & IBS_CAPS_OPDATA4) {
@@ -1111,6 +1332,21 @@ fail:
regs.flags |= PERF_EFLAGS_EXACT;
}
+ if ((event->attr.config2 & IBS_SW_FILTER_MASK) &&
+ perf_ibs_swfilt_discard(perf_ibs, event, &regs, &ibs_data, br_target_idx)) {
+ throttle = perf_event_account_interrupt(event);
+ goto out;
+ }
+ /*
+ * Prevent leaking physical addresses to unprivileged users. Skip
+ * PERF_SAMPLE_PHYS_ADDR check since generic code prevents it for
+ * unprivileged users.
+ */
+ if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
+ perf_allow_kernel()) {
+ perf_ibs_phyaddr_clear(perf_ibs, &ibs_data);
+ }
+
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw = (struct perf_raw_record){
.frag = {
@@ -1118,7 +1354,7 @@ fail:
.data = ibs_data.data,
},
};
- perf_sample_save_raw_data(&data, &raw);
+ perf_sample_save_raw_data(&data, event, &raw);
}
if (perf_ibs == &perf_ibs_op)
@@ -1129,10 +1365,13 @@ fail:
* recorded as part of interrupt regs. Thus we need to use rip from
* interrupt regs while unwinding call stack.
*/
- if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
- perf_sample_save_callchain(&data, event, iregs);
+ perf_sample_save_callchain(&data, event, iregs);
throttle = perf_event_overflow(event, &data, &regs);
+
+ if (event->attr.freq && hwc->sample_period < perf_ibs->min_period)
+ hwc->sample_period = perf_ibs->min_period;
+
out:
if (throttle) {
perf_ibs_stop(event, 0);
@@ -1222,13 +1461,14 @@ static __init int perf_ibs_op_init(void)
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK;
- perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
+ perf_ibs_op.cnt_mask |= (IBS_OP_MAX_CNT_EXT_MASK |
+ IBS_OP_CUR_CNT_EXT_MASK);
}
if (ibs_caps & IBS_CAPS_ZEN4)
perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;
- perf_ibs_op.pmu.attr_groups = empty_attr_groups;
+ perf_ibs_op.pmu.attr_groups = op_attr_groups;
perf_ibs_op.pmu.attr_update = op_attr_update;
return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index b15f7b950d2e..f8228d8243f7 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -30,7 +30,7 @@
#define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL)
#define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL)
-#define IOMMU_NAME_SIZE 16
+#define IOMMU_NAME_SIZE 24
struct perf_amd_iommu {
struct list_head list;
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index 4a1e600314d5..c06ccca96851 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -310,10 +310,6 @@ int amd_pmu_lbr_hw_config(struct perf_event *event)
{
int ret = 0;
- /* LBR is not recommended in counting mode */
- if (!is_sampling_event(event))
- return -EINVAL;
-
ret = amd_pmu_lbr_setup_filter(event);
if (!ret)
event->attach_state |= PERF_ATTACH_SCHED_CB;
@@ -375,7 +371,8 @@ void amd_pmu_lbr_del(struct perf_event *event)
perf_sched_cb_dec(event->pmu);
}
-void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -402,26 +399,23 @@ void amd_pmu_lbr_enable_all(void)
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
void amd_pmu_lbr_disable_all(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- u64 dbg_ctl, dbg_extn_cfg;
if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
return;
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
- wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ __amd_pmu_lbr_disable();
}
__init int amd_pmu_lbr_init(void)
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 4ccb8fa483e6..49c26ce2b115 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -162,7 +162,9 @@ static int amd_uncore_add(struct perf_event *event, int flags)
/* if not, take the first available counter */
hwc->idx = -1;
for (i = 0; i < pmu->num_counters; i++) {
- if (cmpxchg(&ctx->events[i], NULL, event) == NULL) {
+ struct perf_event *tmp = NULL;
+
+ if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
hwc->idx = i;
break;
}
@@ -196,7 +198,9 @@ static void amd_uncore_del(struct perf_event *event, int flags)
event->pmu->stop(event, PERF_EF_UPDATE);
for (i = 0; i < pmu->num_counters; i++) {
- if (cmpxchg(&ctx->events[i], event, NULL) == event)
+ struct perf_event *tmp = event;
+
+ if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
break;
}
@@ -639,7 +643,7 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
info.split.aux_data = 0;
info.split.num_pmcs = NUM_COUNTERS_NB;
info.split.gid = 0;
- info.split.cid = topology_die_id(cpu);
+ info.split.cid = topology_logical_package_id(cpu);
if (pmu_version >= 2) {
ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
@@ -654,17 +658,20 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
{
struct attribute **df_attr = amd_uncore_df_format_attr;
struct amd_uncore_pmu *pmu;
+ int num_counters;
/* Run just once */
if (uncore->init_done)
return amd_uncore_ctx_init(uncore, cpu);
+ num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
+ if (!num_counters)
+ goto done;
+
/* No grouping, single instance for a system */
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
- if (!uncore->pmus) {
- uncore->num_pmus = 0;
+ if (!uncore->pmus)
goto done;
- }
/*
* For Family 17h and above, the Northbridge counters are repurposed
@@ -674,7 +681,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
pmu = &uncore->pmus[0];
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
sizeof(pmu->name));
- pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
+ pmu->num_counters = num_counters;
pmu->msr_base = MSR_F15H_NB_PERF_CTL;
pmu->rdpmc_base = RDPMC_BASE_NB;
pmu->group = amd_uncore_ctx_gid(uncore, cpu);
@@ -785,17 +792,20 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
{
struct attribute **l3_attr = amd_uncore_l3_format_attr;
struct amd_uncore_pmu *pmu;
+ int num_counters;
/* Run just once */
if (uncore->init_done)
return amd_uncore_ctx_init(uncore, cpu);
+ num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
+ if (!num_counters)
+ goto done;
+
/* No grouping, single instance for a system */
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
- if (!uncore->pmus) {
- uncore->num_pmus = 0;
+ if (!uncore->pmus)
goto done;
- }
/*
* For Family 17h and above, L3 cache counters are available instead
@@ -805,7 +815,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
pmu = &uncore->pmus[0];
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
sizeof(pmu->name));
- pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
+ pmu->num_counters = num_counters;
pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
pmu->rdpmc_base = RDPMC_BASE_LLC;
pmu->group = amd_uncore_ctx_gid(uncore, cpu);
@@ -893,8 +903,8 @@ void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
info.split.aux_data = ecx; /* stash active mask */
info.split.num_pmcs = ebx.split.num_umc_pmc;
- info.split.gid = topology_die_id(cpu);
- info.split.cid = topology_die_id(cpu);
+ info.split.gid = topology_logical_package_id(cpu);
+ info.split.cid = topology_logical_package_id(cpu);
*per_cpu_ptr(uncore->info, cpu) = info;
}
@@ -906,7 +916,8 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
union amd_uncore_info info;
struct amd_uncore_pmu *pmu;
- int index = 0, gid, i;
+ int gid, i;
+ u16 index = 0;
if (pmu_version < 2)
return 0;
@@ -938,7 +949,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
for (i = 0; i < group_num_pmus[gid]; i++) {
pmu = &uncore->pmus[index];
- snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index);
+ snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%hu", index);
pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
pmu->rdpmc_base = -1;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 09050641ce5d..6866cc5acb0b 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -41,6 +41,8 @@
#include <asm/desc.h>
#include <asm/ldt.h>
#include <asm/unwind.h>
+#include <asm/uprobes.h>
+#include <asm/ibt.h>
#include "perf_event.h"
@@ -85,13 +87,14 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling);
DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling);
DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task);
-DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
+DEFINE_STATIC_CALL_NULL(x86_pmu_late_setup, *x86_pmu.late_setup);
+
/*
* This one is magic, it will get called even when PMU init fails (because
* there is no PMU), in which case it should simply return NULL.
@@ -189,29 +192,31 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
#ifdef CONFIG_X86_LOCAL_APIC
-static inline int get_possible_num_counters(void)
+static inline u64 get_possible_counter_mask(void)
{
- int i, num_counters = x86_pmu.num_counters;
+ u64 cntr_mask = x86_pmu.cntr_mask64;
+ int i;
if (!is_hybrid())
- return num_counters;
+ return cntr_mask;
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++)
- num_counters = max_t(int, num_counters, x86_pmu.hybrid_pmu[i].num_counters);
+ cntr_mask |= x86_pmu.hybrid_pmu[i].cntr_mask64;
- return num_counters;
+ return cntr_mask;
}
static bool reserve_pmc_hardware(void)
{
- int i, num_counters = get_possible_num_counters();
+ u64 cntr_mask = get_possible_counter_mask();
+ int i, end;
- for (i = 0; i < num_counters; i++) {
+ for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
goto perfctr_fail;
}
- for (i = 0; i < num_counters; i++) {
+ for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
goto eventsel_fail;
}
@@ -219,13 +224,14 @@ static bool reserve_pmc_hardware(void)
return true;
eventsel_fail:
- for (i--; i >= 0; i--)
+ end = i;
+ for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
release_evntsel_nmi(x86_pmu_config_addr(i));
-
- i = num_counters;
+ i = X86_PMC_IDX_MAX;
perfctr_fail:
- for (i--; i >= 0; i--)
+ end = i;
+ for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
release_perfctr_nmi(x86_pmu_event_addr(i));
return false;
@@ -233,9 +239,10 @@ perfctr_fail:
static void release_pmc_hardware(void)
{
- int i, num_counters = get_possible_num_counters();
+ u64 cntr_mask = get_possible_counter_mask();
+ int i;
- for (i = 0; i < num_counters; i++) {
+ for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
release_perfctr_nmi(x86_pmu_event_addr(i));
release_evntsel_nmi(x86_pmu_config_addr(i));
}
@@ -248,7 +255,8 @@ static void release_pmc_hardware(void) {}
#endif
-bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
+bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
+ unsigned long *fixed_cntr_mask)
{
u64 val, val_fail = -1, val_new= ~0;
int i, reg, reg_fail = -1, ret = 0;
@@ -259,7 +267,7 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
* Check to see if the BIOS enabled any of the counters, if so
* complain and bail.
*/
- for (i = 0; i < num_counters; i++) {
+ for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) {
reg = x86_pmu_config_addr(i);
ret = rdmsrl_safe(reg, &val);
if (ret)
@@ -273,12 +281,12 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
}
}
- if (num_counters_fixed) {
+ if (*(u64 *)fixed_cntr_mask) {
reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
ret = rdmsrl_safe(reg, &val);
if (ret)
goto msr_fail;
- for (i = 0; i < num_counters_fixed; i++) {
+ for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) {
if (fixed_counter_disabled(i, pmu))
continue;
if (val & (0x03ULL << i*4)) {
@@ -619,9 +627,9 @@ int x86_pmu_hw_config(struct perf_event *event)
event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
if (event->attr.type == event->pmu->type)
- event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
+ event->hw.config |= x86_pmu_get_event_config(event);
- if (event->attr.sample_period && x86_pmu.limit_period) {
+ if (!event->attr.freq && x86_pmu.limit_period) {
s64 left = event->attr.sample_period;
x86_pmu.limit_period(event, &left);
if (left > event->attr.sample_period)
@@ -679,7 +687,7 @@ void x86_pmu_disable_all(void)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
u64 val;
@@ -736,7 +744,7 @@ void x86_pmu_enable_all(int added)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
if (!test_bit(idx, cpuc->active_mask))
@@ -975,7 +983,6 @@ EXPORT_SYMBOL_GPL(perf_assign_events);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
- int num_counters = hybrid(cpuc->pmu, num_counters);
struct event_constraint *c;
struct perf_event *e;
int n0, i, wmin, wmax, unsched = 0;
@@ -1051,7 +1058,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
/* slow path */
if (i != n) {
- int gpmax = num_counters;
+ int gpmax = x86_pmu_max_num_counters(cpuc->pmu);
/*
* Do not allow scheduling of more than half the available
@@ -1072,7 +1079,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
* the extra Merge events needed by large increment events.
*/
if (x86_pmu.flags & PMU_FL_PAIR) {
- gpmax = num_counters - cpuc->n_pair;
+ gpmax -= cpuc->n_pair;
WARN_ON(gpmax <= 0);
}
@@ -1157,12 +1164,10 @@ static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
*/
static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
{
- int num_counters = hybrid(cpuc->pmu, num_counters);
- int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
struct perf_event *event;
int n, max_count;
- max_count = num_counters + num_counters_fixed;
+ max_count = x86_pmu_num_counters(cpuc->pmu) + x86_pmu_num_counters_fixed(cpuc->pmu);
/* current number of events already accepted */
n = cpuc->n_events;
@@ -1234,8 +1239,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
fallthrough;
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
- (idx - INTEL_PMC_IDX_FIXED);
+ hwc->event_base = x86_pmu_fixed_ctr_addr(idx - INTEL_PMC_IDX_FIXED);
hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) |
INTEL_PMC_FIXED_RDPMC_BASE;
break;
@@ -1295,6 +1299,15 @@ static void x86_pmu_enable(struct pmu *pmu)
if (cpuc->n_added) {
int n_running = cpuc->n_events - cpuc->n_added;
+
+ /*
+ * The late setup (after counters are scheduled)
+ * is required for some cases, e.g., PEBS counters
+ * snapshotting. Because an accurate counter index
+ * is needed.
+ */
+ static_call_cond(x86_pmu_late_setup)();
+
/*
* apply assignment obtained either from
* hw_perf_group_sched_in() or x86_pmu_enable()
@@ -1519,19 +1532,22 @@ static void x86_pmu_start(struct perf_event *event, int flags)
void perf_event_print_debug(void)
{
u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
+ unsigned long *cntr_mask, *fixed_cntr_mask;
+ struct event_constraint *pebs_constraints;
+ struct cpu_hw_events *cpuc;
u64 pebs, debugctl;
- int cpu = smp_processor_id();
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- int num_counters = hybrid(cpuc->pmu, num_counters);
- int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
- struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints);
- unsigned long flags;
- int idx;
+ int cpu, idx;
- if (!num_counters)
- return;
+ guard(irqsave)();
- local_irq_save(flags);
+ cpu = smp_processor_id();
+ cpuc = &per_cpu(cpu_hw_events, cpu);
+ cntr_mask = hybrid(cpuc->pmu, cntr_mask);
+ fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask);
+ pebs_constraints = hybrid(cpuc->pmu, pebs_constraints);
+
+ if (!*(u64 *)cntr_mask)
+ return;
if (x86_pmu.version >= 2) {
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
@@ -1555,7 +1571,7 @@ void perf_event_print_debug(void)
}
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
- for (idx = 0; idx < num_counters; idx++) {
+ for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) {
rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
rdmsrl(x86_pmu_event_addr(idx), pmc_count);
@@ -1568,15 +1584,14 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
cpu, idx, prev_left);
}
- for (idx = 0; idx < num_counters_fixed; idx++) {
+ for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
if (fixed_counter_disabled(idx, cpuc->pmu))
continue;
- rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+ rdmsrl(x86_pmu_fixed_ctr_addr(idx), pmc_count);
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
cpu, idx, pmc_count);
}
- local_irq_restore(flags);
}
void x86_pmu_stop(struct perf_event *event, int flags)
@@ -1644,6 +1659,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ cpuc->assign[i-1] = cpuc->assign[i];
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
@@ -1681,7 +1697,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
*/
apic_write(APIC_LVTPC, APIC_DM_NMI);
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -1701,8 +1717,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
perf_sample_data_init(&data, 0, event->hw.last_period);
- if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
@@ -2023,13 +2038,14 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling);
static_call_update(x86_pmu_sched_task, x86_pmu.sched_task);
- static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx);
static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
static_call_update(x86_pmu_filter, x86_pmu.filter);
+
+ static_call_update(x86_pmu_late_setup, x86_pmu.late_setup);
}
static void _x86_pmu_read(struct perf_event *event)
@@ -2037,18 +2053,15 @@ static void _x86_pmu_read(struct perf_event *event)
static_call(x86_pmu_update)(event);
}
-void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
- u64 intel_ctrl)
+void x86_pmu_show_pmu_cap(struct pmu *pmu)
{
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
- pr_info("... generic registers: %d\n", num_counters);
+ pr_info("... generic registers: %d\n", x86_pmu_num_counters(pmu));
pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
pr_info("... max period: %016Lx\n", x86_pmu.max_period);
- pr_info("... fixed-purpose events: %lu\n",
- hweight64((((1ULL << num_counters_fixed) - 1)
- << INTEL_PMC_IDX_FIXED) & intel_ctrl));
- pr_info("... event mask: %016Lx\n", intel_ctrl);
+ pr_info("... fixed-purpose events: %d\n", x86_pmu_num_counters_fixed(pmu));
+ pr_info("... event mask: %016Lx\n", hybrid(pmu, intel_ctrl));
}
static int __init init_hw_perf_events(void)
@@ -2085,7 +2098,7 @@ static int __init init_hw_perf_events(void)
pmu_check_apic();
/* sanity check that the hardware exists or is emulated */
- if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed))
+ if (!check_hw_exists(&pmu, x86_pmu.cntr_mask, x86_pmu.fixed_cntr_mask))
goto out_bad_pmu;
pr_cont("%s PMU driver.\n", x86_pmu.name);
@@ -2096,14 +2109,17 @@ static int __init init_hw_perf_events(void)
quirk->func();
if (!x86_pmu.intel_ctrl)
- x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+ x86_pmu.intel_ctrl = x86_pmu.cntr_mask64;
+
+ if (!x86_pmu.config_mask)
+ x86_pmu.config_mask = X86_RAW_EVENT_MASK;
perf_events_lapic_init();
register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
unconstrained = (struct event_constraint)
- __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
- 0, x86_pmu.num_counters, 0, 0);
+ __EVENT_CONSTRAINT(0, x86_pmu.cntr_mask64,
+ 0, x86_pmu_num_counters(NULL), 0, 0);
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
@@ -2112,11 +2128,8 @@ static int __init init_hw_perf_events(void)
pmu.attr_update = x86_pmu.attr_update;
- if (!is_hybrid()) {
- x86_pmu_show_pmu_cap(x86_pmu.num_counters,
- x86_pmu.num_counters_fixed,
- x86_pmu.intel_ctrl);
- }
+ if (!is_hybrid())
+ x86_pmu_show_pmu_cap(NULL);
if (!x86_pmu.read)
x86_pmu.read = _x86_pmu_read;
@@ -2480,10 +2493,10 @@ void perf_clear_dirty_counters(void)
for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
if (i >= INTEL_PMC_IDX_FIXED) {
/* Metrics and fake events don't have corresponding HW counters. */
- if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed))
+ if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mask)))
continue;
- wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
+ wrmsrl(x86_pmu_fixed_ctr_addr(i - INTEL_PMC_IDX_FIXED), 0);
} else {
wrmsrl(x86_pmu_event_addr(i), 0);
}
@@ -2546,6 +2559,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
struct device_attribute *attr,
const char *buf, size_t count)
{
+ static DEFINE_MUTEX(rdpmc_mutex);
unsigned long val;
ssize_t ret;
@@ -2559,6 +2573,8 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
if (x86_pmu.attr_rdpmc_broken)
return -ENOTSUPP;
+ guard(mutex)(&rdpmc_mutex);
+
if (val != x86_pmu.attr_rdpmc) {
/*
* Changing into or out of never available or always available,
@@ -2620,15 +2636,10 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
NULL,
};
-static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
- static_call_cond(x86_pmu_sched_task)(pmu_ctx, sched_in);
-}
-
-static void x86_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
- struct perf_event_pmu_context *next_epc)
-{
- static_call_cond(x86_pmu_swap_task_ctx)(prev_epc, next_epc);
+ static_call_cond(x86_pmu_sched_task)(pmu_ctx, task, sched_in);
}
void perf_check_microcode(void)
@@ -2695,7 +2706,6 @@ static struct pmu pmu = {
.event_idx = x86_pmu_event_idx,
.sched_task = x86_pmu_sched_task,
- .swap_task_ctx = x86_pmu_swap_task_ctx,
.check_period = x86_pmu_check_period,
.aux_output_match = x86_pmu_aux_output_match,
@@ -2812,6 +2822,46 @@ static unsigned long get_segment_base(unsigned int segment)
return get_desc_base(desc);
}
+#ifdef CONFIG_UPROBES
+/*
+ * Heuristic-based check if uprobe is installed at the function entry.
+ *
+ * Under assumption of user code being compiled with frame pointers,
+ * `push %rbp/%ebp` is a good indicator that we indeed are.
+ *
+ * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
+ * If we get this wrong, captured stack trace might have one extra bogus
+ * entry, but the rest of stack trace will still be meaningful.
+ */
+static bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+ struct arch_uprobe *auprobe;
+
+ if (!current->utask)
+ return false;
+
+ auprobe = current->utask->auprobe;
+ if (!auprobe)
+ return false;
+
+ /* push %rbp/%ebp */
+ if (auprobe->insn[0] == 0x55)
+ return true;
+
+ /* endbr64 (64-bit only) */
+ if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
+ return true;
+
+ return false;
+}
+
+#else
+static bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+ return false;
+}
+#endif /* CONFIG_UPROBES */
+
#ifdef CONFIG_IA32_EMULATION
#include <linux/compat.h>
@@ -2823,6 +2873,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
unsigned long ss_base, cs_base;
struct stack_frame_ia32 frame;
const struct stack_frame_ia32 __user *fp;
+ u32 ret_addr;
if (user_64bit_mode(regs))
return 0;
@@ -2832,6 +2883,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
fp = compat_ptr(ss_base + regs->bp);
pagefault_disable();
+
+ /* see perf_callchain_user() below for why we do this */
+ if (is_uprobe_at_func_entry(regs) &&
+ !get_user(ret_addr, (const u32 __user *)regs->sp))
+ perf_callchain_store(entry, ret_addr);
+
while (entry->nr < entry->max_stack) {
if (!valid_user_frame(fp, sizeof(frame)))
break;
@@ -2860,6 +2917,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
{
struct stack_frame frame;
const struct stack_frame __user *fp;
+ unsigned long ret_addr;
if (perf_guest_state()) {
/* TODO: We don't support guest os callchain now */
@@ -2883,6 +2941,19 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
return;
pagefault_disable();
+
+ /*
+ * If we are called from uprobe handler, and we are indeed at the very
+ * entry to user function (which is normally a `push %rbp` instruction,
+ * under assumption of application being compiled with frame pointers),
+ * we should read return address from *regs->sp before proceeding
+ * to follow frame pointers, otherwise we'll skip immediate caller
+ * as %rbp is not yet setup.
+ */
+ if (is_uprobe_at_func_entry(regs) &&
+ !get_user(ret_addr, (const unsigned long __user *)regs->sp))
+ perf_callchain_store(entry, ret_addr);
+
while (entry->nr < entry->max_stack) {
if (!valid_user_frame(fp, sizeof(frame)))
break;
@@ -2936,35 +3007,57 @@ static unsigned long code_segment_base(struct pt_regs *regs)
return 0;
}
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
{
- if (perf_guest_state())
- return perf_guest_get_ip();
-
return regs->ip + code_segment_base(regs);
}
-unsigned long perf_misc_flags(struct pt_regs *regs)
+static unsigned long common_misc_flags(struct pt_regs *regs)
{
- unsigned int guest_state = perf_guest_state();
- int misc = 0;
+ if (regs->flags & PERF_EFLAGS_EXACT)
+ return PERF_RECORD_MISC_EXACT_IP;
- if (guest_state) {
- if (guest_state & PERF_GUEST_USER)
- misc |= PERF_RECORD_MISC_GUEST_USER;
- else
- misc |= PERF_RECORD_MISC_GUEST_KERNEL;
- } else {
- if (user_mode(regs))
- misc |= PERF_RECORD_MISC_USER;
- else
- misc |= PERF_RECORD_MISC_KERNEL;
- }
+ return 0;
+}
- if (regs->flags & PERF_EFLAGS_EXACT)
- misc |= PERF_RECORD_MISC_EXACT_IP;
+static unsigned long guest_misc_flags(struct pt_regs *regs)
+{
+ unsigned long guest_state = perf_guest_state();
+
+ if (!(guest_state & PERF_GUEST_ACTIVE))
+ return 0;
+
+ if (guest_state & PERF_GUEST_USER)
+ return PERF_RECORD_MISC_GUEST_USER;
+ else
+ return PERF_RECORD_MISC_GUEST_KERNEL;
+
+}
+
+static unsigned long host_misc_flags(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return PERF_RECORD_MISC_USER;
+ else
+ return PERF_RECORD_MISC_KERNEL;
+}
+
+unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
+{
+ unsigned long flags = common_misc_flags(regs);
+
+ flags |= guest_misc_flags(regs);
+
+ return flags;
+}
+
+unsigned long perf_arch_misc_flags(struct pt_regs *regs)
+{
+ unsigned long flags = common_misc_flags(regs);
+
+ flags |= host_misc_flags(regs);
- return misc;
+ return flags;
}
void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
@@ -2982,8 +3075,8 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
* base PMU holds the correct number of counters for P-cores.
*/
cap->version = x86_pmu.version;
- cap->num_counters_gp = x86_pmu.num_counters;
- cap->num_counters_fixed = x86_pmu.num_counters_fixed;
+ cap->num_counters_gp = x86_pmu_num_counters(NULL);
+ cap->num_counters_fixed = x86_pmu_num_counters_fixed(NULL);
cap->bit_width_gp = x86_pmu.cntval_bits;
cap->bit_width_fixed = x86_pmu.cntval_bits;
cap->events_mask = (unsigned int)x86_pmu.events_maskl;
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 974e917e65b2..a95e6c91c4d7 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -36,7 +36,7 @@ enum {
BTS_STATE_ACTIVE,
};
-static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
+static struct bts_ctx __percpu *bts_ctx;
#define BTS_RECORD_SIZE 24
#define BTS_SAFETY_MARGIN 4080
@@ -58,7 +58,7 @@ struct bts_buffer {
local_t head;
unsigned long end;
void **data_pages;
- struct bts_phys buf[];
+ struct bts_phys buf[] __counted_by(nr_bufs);
};
static struct pmu bts_pmu;
@@ -231,7 +231,7 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
static void __bts_event_start(struct perf_event *event)
{
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
struct bts_buffer *buf = perf_get_aux(&bts->handle);
u64 config = 0;
@@ -260,7 +260,7 @@ static void __bts_event_start(struct perf_event *event)
static void bts_event_start(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
struct bts_buffer *buf;
buf = perf_aux_output_begin(&bts->handle, event);
@@ -290,7 +290,7 @@ fail_stop:
static void __bts_event_stop(struct perf_event *event, int state)
{
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
/* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */
WRITE_ONCE(bts->state, state);
@@ -305,7 +305,7 @@ static void __bts_event_stop(struct perf_event *event, int state)
static void bts_event_stop(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
struct bts_buffer *buf = NULL;
int state = READ_ONCE(bts->state);
@@ -338,9 +338,14 @@ static void bts_event_stop(struct perf_event *event, int flags)
void intel_bts_enable_local(void)
{
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
- int state = READ_ONCE(bts->state);
+ struct bts_ctx *bts;
+ int state;
+
+ if (!bts_ctx)
+ return;
+ bts = this_cpu_ptr(bts_ctx);
+ state = READ_ONCE(bts->state);
/*
* Here we transition from INACTIVE to ACTIVE;
* if we instead are STOPPED from the interrupt handler,
@@ -358,7 +363,12 @@ void intel_bts_enable_local(void)
void intel_bts_disable_local(void)
{
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct bts_ctx *bts;
+
+ if (!bts_ctx)
+ return;
+
+ bts = this_cpu_ptr(bts_ctx);
/*
* Here we transition from ACTIVE to INACTIVE;
@@ -450,12 +460,17 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
int intel_bts_interrupt(void)
{
struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds;
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
- struct perf_event *event = bts->handle.event;
+ struct bts_ctx *bts;
+ struct perf_event *event;
struct bts_buffer *buf;
s64 old_head;
int err = -ENOSPC, handled = 0;
+ if (!bts_ctx)
+ return 0;
+
+ bts = this_cpu_ptr(bts_ctx);
+ event = bts->handle.event;
/*
* The only surefire way of knowing if this NMI is ours is by checking
* the write ptr against the PMI threshold.
@@ -518,7 +533,7 @@ static void bts_event_del(struct perf_event *event, int mode)
static int bts_event_add(struct perf_event *event, int mode)
{
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
@@ -557,12 +572,9 @@ static int bts_event_init(struct perf_event *event)
* disabled, so disallow intel_bts driver for unprivileged
* users on paranoid systems since it provides trace data
* to the user in a zero-copy fashion.
- *
- * Note that the default paranoia setting permits unprivileged
- * users to profile the kernel.
*/
if (event->attr.exclude_kernel) {
- ret = perf_allow_kernel(&event->attr);
+ ret = perf_allow_kernel();
if (ret)
return ret;
}
@@ -608,6 +620,10 @@ static __init int bts_init(void)
return -ENODEV;
}
+ bts_ctx = alloc_percpu(struct bts_ctx);
+ if (!bts_ctx)
+ return -ENOMEM;
+
bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
PERF_PMU_CAP_EXCLUSIVE;
bts_pmu.task_ctx_nr = perf_sw_context;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 768d1414897f..09d2d66c9f21 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -220,6 +220,17 @@ static struct event_constraint intel_grt_event_constraints[] __read_mostly = {
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_skt_event_constraints[] __read_mostly = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
+ FIXED_EVENT_CONSTRAINT(0x0073, 4), /* TOPDOWN_BAD_SPECULATION.ALL */
+ FIXED_EVENT_CONSTRAINT(0x019c, 5), /* TOPDOWN_FE_BOUND.ALL */
+ FIXED_EVENT_CONSTRAINT(0x02c2, 6), /* TOPDOWN_RETIRING.ALL */
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_skl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -370,6 +381,59 @@ static struct extra_reg intel_rwc_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
+static struct event_constraint intel_lnc_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
+ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),
+
+ INTEL_EVENT_CONSTRAINT(0x20, 0xf),
+
+ INTEL_UEVENT_CONSTRAINT(0x012a, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x012b, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x0148, 0x4),
+ INTEL_UEVENT_CONSTRAINT(0x0175, 0x4),
+
+ INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff),
+ INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff),
+
+ INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
+ INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
+ INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
+ INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
+ INTEL_UEVENT_CONSTRAINT(0x10a4, 0x1),
+ INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8),
+ INTEL_UEVENT_CONSTRAINT(0x01cd, 0x3fc),
+ INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3),
+
+ INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
+
+ INTEL_UEVENT_CONSTRAINT(0x00e0, 0xf),
+
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_lnc_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0xfffffffffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0xfffffffffffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE),
+ INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
+ INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0xf, FE),
+ INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
+ EVENT_EXTRA_END
+};
+
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
@@ -2650,7 +2714,7 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
* modify by a NMI. PMU has to be disabled before calling this function.
*/
-static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
+static u64 intel_update_topdown_event(struct perf_event *event, int metric_end, u64 *val)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *other;
@@ -2658,13 +2722,24 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
bool reset = true;
int idx;
- /* read Fixed counter 3 */
- rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
- if (!slots)
- return 0;
+ if (!val) {
+ /* read Fixed counter 3 */
+ rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
+ if (!slots)
+ return 0;
- /* read PERF_METRICS */
- rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
+ /* read PERF_METRICS */
+ rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
+ } else {
+ slots = val[0];
+ metrics = val[1];
+ /*
+ * Don't reset the PERF_METRICS and Fixed counter 3
+ * for each PEBS record read. Utilize the RDPMC metrics
+ * clear mode.
+ */
+ reset = false;
+ }
for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
if (!is_topdown_idx(idx))
@@ -2707,36 +2782,47 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
return slots;
}
-static u64 icl_update_topdown_event(struct perf_event *event)
+static u64 icl_update_topdown_event(struct perf_event *event, u64 *val)
{
return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE +
- x86_pmu.num_topdown_events - 1);
+ x86_pmu.num_topdown_events - 1,
+ val);
}
-DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
+DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update);
-static void intel_pmu_read_topdown_event(struct perf_event *event)
+static void intel_pmu_read_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN) ||
+ is_pebs_counter_event_group(event)) {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ bool pmu_enabled = cpuc->enabled;
- /* Only need to call update_topdown_event() once for group read. */
- if ((cpuc->txn_flags & PERF_PMU_TXN_READ) &&
- !is_slots_event(event))
- return;
+ /* Only need to call update_topdown_event() once for group read. */
+ if (is_metric_event(event) && (cpuc->txn_flags & PERF_PMU_TXN_READ))
+ return;
- perf_pmu_disable(event->pmu);
- static_call(intel_pmu_update_topdown_event)(event);
- perf_pmu_enable(event->pmu);
-}
+ cpuc->enabled = 0;
+ if (pmu_enabled)
+ intel_pmu_disable_all();
-static void intel_pmu_read_event(struct perf_event *event)
-{
- if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
- intel_pmu_auto_reload_read(event);
- else if (is_topdown_count(event))
- intel_pmu_read_topdown_event(event);
- else
- x86_perf_event_update(event);
+ /*
+ * If the PEBS counters snapshotting is enabled,
+ * the topdown event is available in PEBS records.
+ */
+ if (is_topdown_event(event) && !is_pebs_counter_event_group(event))
+ static_call(intel_pmu_update_topdown_event)(event, NULL);
+ else
+ intel_pmu_drain_pebs_buffer();
+
+ cpuc->enabled = pmu_enabled;
+ if (pmu_enabled)
+ intel_pmu_enable_all(0);
+
+ return;
+ }
+
+ x86_perf_event_update(event);
}
static void intel_pmu_enable_fixed(struct perf_event *event)
@@ -2756,6 +2842,9 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
return;
idx = INTEL_PMC_IDX_FIXED_SLOTS;
+
+ if (event->attr.config1 & INTEL_TD_CFG_METRIC_CLEAR)
+ bits |= INTEL_FIXED_3_METRICS_CLEAR;
}
intel_set_masks(event, idx);
@@ -2865,7 +2954,7 @@ static int intel_pmu_set_period(struct perf_event *event)
static u64 intel_pmu_update(struct perf_event *event)
{
if (unlikely(is_topdown_count(event)))
- return static_call(intel_pmu_update_topdown_event)(event);
+ return static_call(intel_pmu_update_topdown_event)(event, NULL);
return x86_perf_event_update(event);
}
@@ -2874,26 +2963,26 @@ static void intel_pmu_reset(void)
{
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
- int num_counters = hybrid(cpuc->pmu, num_counters);
+ unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask);
+ unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask);
unsigned long flags;
int idx;
- if (!num_counters)
+ if (!*(u64 *)cntr_mask)
return;
local_irq_save(flags);
pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
- for (idx = 0; idx < num_counters; idx++) {
+ for_each_set_bit(idx, cntr_mask, INTEL_PMC_MAX_GENERIC) {
wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
}
- for (idx = 0; idx < num_counters_fixed; idx++) {
+ for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) {
if (fixed_counter_disabled(idx, cpuc->pmu))
continue;
- wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+ wrmsrl_safe(x86_pmu_fixed_ctr_addr(idx), 0ull);
}
if (ds)
@@ -2940,8 +3029,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
!guest_pebs_idxs)
return;
- for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs,
- INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) {
+ for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
event = cpuc->events[bit];
if (!event->attr.precise_ip)
continue;
@@ -3004,7 +3092,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
handled++;
x86_pmu_handle_guest_pebs(regs, &data);
- x86_pmu.drain_pebs(regs, &data);
+ static_call(x86_pmu_drain_pebs)(regs, &data);
status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
/*
@@ -3032,7 +3120,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/
if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
handled++;
- static_call(intel_pmu_update_topdown_event)(NULL);
+ static_call(intel_pmu_update_topdown_event)(NULL, NULL);
}
/*
@@ -3050,6 +3138,27 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
if (!test_bit(bit, cpuc->active_mask))
continue;
+ /*
+ * There may be unprocessed PEBS records in the PEBS buffer,
+ * which still stores the previous values.
+ * Process those records first before handling the latest value.
+ * For example,
+ * A is a regular counter
+ * B is a PEBS event which reads A
+ * C is a PEBS event
+ *
+ * The following can happen:
+ * B-assist A=1
+ * C A=2
+ * B-assist A=3
+ * A-overflow-PMI A=4
+ * C-assist-PMI (PEBS buffer) A=5
+ *
+ * The PEBS buffer has to be drained before handling the A-PMI
+ */
+ if (is_pebs_counter_event_group(event))
+ x86_pmu.drain_pebs(regs, &data);
+
if (!intel_pmu_save_and_restart(event))
continue;
@@ -3886,6 +3995,85 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
}
+static u64 intel_pmu_freq_start_period(struct perf_event *event)
+{
+ int type = event->attr.type;
+ u64 config, factor;
+ s64 start;
+
+ /*
+ * The 127 is the lowest possible recommended SAV (sample after value)
+ * for a 4000 freq (default freq), according to the event list JSON file.
+ * Also, assume the workload is idle 50% time.
+ */
+ factor = 64 * 4000;
+ if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE)
+ goto end;
+
+ /*
+ * The estimation of the start period in the freq mode is
+ * based on the below assumption.
+ *
+ * For a cycles or an instructions event, 1GHZ of the
+ * underlying platform, 1 IPC. The workload is idle 50% time.
+ * The start period = 1,000,000,000 * 1 / freq / 2.
+ * = 500,000,000 / freq
+ *
+ * Usually, the branch-related events occur less than the
+ * instructions event. According to the Intel event list JSON
+ * file, the SAV (sample after value) of a branch-related event
+ * is usually 1/4 of an instruction event.
+ * The start period of branch-related events = 125,000,000 / freq.
+ *
+ * The cache-related events occurs even less. The SAV is usually
+ * 1/20 of an instruction event.
+ * The start period of cache-related events = 25,000,000 / freq.
+ */
+ config = event->attr.config & PERF_HW_EVENT_MASK;
+ if (type == PERF_TYPE_HARDWARE) {
+ switch (config) {
+ case PERF_COUNT_HW_CPU_CYCLES:
+ case PERF_COUNT_HW_INSTRUCTIONS:
+ case PERF_COUNT_HW_BUS_CYCLES:
+ case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND:
+ case PERF_COUNT_HW_STALLED_CYCLES_BACKEND:
+ case PERF_COUNT_HW_REF_CPU_CYCLES:
+ factor = 500000000;
+ break;
+ case PERF_COUNT_HW_BRANCH_INSTRUCTIONS:
+ case PERF_COUNT_HW_BRANCH_MISSES:
+ factor = 125000000;
+ break;
+ case PERF_COUNT_HW_CACHE_REFERENCES:
+ case PERF_COUNT_HW_CACHE_MISSES:
+ factor = 25000000;
+ break;
+ default:
+ goto end;
+ }
+ }
+
+ if (type == PERF_TYPE_HW_CACHE)
+ factor = 25000000;
+end:
+ /*
+ * Usually, a prime or a number with less factors (close to prime)
+ * is chosen as an SAV, which makes it less likely that the sampling
+ * period synchronizes with some periodic event in the workload.
+ * Minus 1 to make it at least avoiding values near power of twos
+ * for the default freq.
+ */
+ start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1;
+
+ if (start > x86_pmu.max_period)
+ start = x86_pmu.max_period;
+
+ if (x86_pmu.limit_period)
+ x86_pmu.limit_period(event, &start);
+
+ return start;
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -3897,14 +4085,20 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
+ if (event->attr.freq && event->attr.sample_freq) {
+ event->hw.sample_period = intel_pmu_freq_start_period(event);
+ event->hw.last_period = event->hw.sample_period;
+ local64_set(&event->hw.period_left, event->hw.sample_period);
+ }
+
if (event->attr.precise_ip) {
if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
return -EINVAL;
if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
- if (!(event->attr.sample_type &
- ~intel_pmu_large_pebs_flags(event))) {
+ if (!(event->attr.sample_type & ~intel_pmu_large_pebs_flags(event)) &&
+ !has_aux_action(event)) {
event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
event->attach_state |= PERF_ATTACH_SCHED_CB;
}
@@ -3913,8 +4107,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
x86_pmu.pebs_aliases(event);
}
- if (needs_branch_stack(event) && is_sampling_event(event))
- event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+ if (needs_branch_stack(event)) {
+ /* Avoid branch stack setup for counting events in SAMPLE READ */
+ if (is_sampling_event(event) ||
+ !(event->attr.sample_type & PERF_SAMPLE_READ))
+ event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+ }
if (branch_sample_counters(event)) {
struct perf_event *leader, *sibling;
@@ -3993,6 +4191,13 @@ static int intel_pmu_hw_config(struct perf_event *event)
event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
}
+ if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
+ (x86_pmu.intel_cap.pebs_format >= 6) &&
+ x86_pmu.intel_cap.pebs_baseline &&
+ is_sampling_event(event) &&
+ event->attr.precise_ip)
+ event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
+
if ((event->attr.type == PERF_TYPE_HARDWARE) ||
(event->attr.type == PERF_TYPE_HW_CACHE))
return 0;
@@ -4008,7 +4213,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
* is used in a metrics group, it too cannot support sampling.
*/
if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) {
- if (event->attr.config1 || event->attr.config2)
+ /* The metrics_clear can only be set for the slots event */
+ if (event->attr.config1 &&
+ (!is_slots_event(event) || (event->attr.config1 & ~INTEL_TD_CFG_METRIC_CLEAR)))
+ return -EINVAL;
+
+ if (event->attr.config2)
return -EINVAL;
/*
@@ -4087,7 +4297,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (x86_pmu.version < 3)
return -EINVAL;
- ret = perf_allow_cpu(&event->attr);
+ ret = perf_allow_cpu();
if (ret)
return ret;
@@ -4199,7 +4409,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
int idx;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[idx];
arr[idx].msr = x86_pmu_config_addr(idx);
@@ -4217,7 +4427,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
}
- *nr = x86_pmu.num_counters;
+ *nr = x86_pmu_max_num_counters(cpuc->pmu);
return arr;
}
@@ -4232,7 +4442,7 @@ static void core_pmu_enable_all(int added)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
if (!test_bit(idx, cpuc->active_mask) ||
@@ -4525,9 +4735,50 @@ static int adl_hw_config(struct perf_event *event)
return -EOPNOTSUPP;
}
-static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void)
+static enum intel_cpu_type adl_get_hybrid_cpu_type(void)
{
- return HYBRID_INTEL_CORE;
+ return INTEL_CPU_TYPE_CORE;
+}
+
+static inline bool erratum_hsw11(struct perf_event *event)
+{
+ return (event->hw.config & INTEL_ARCH_EVENT_MASK) ==
+ X86_CONFIG(.event=0xc0, .umask=0x01);
+}
+
+static struct event_constraint *
+arl_h_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->pmu_type == hybrid_tiny)
+ return cmt_get_event_constraints(cpuc, idx, event);
+
+ return mtl_get_event_constraints(cpuc, idx, event);
+}
+
+static int arl_h_hw_config(struct perf_event *event)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->pmu_type == hybrid_tiny)
+ return intel_pmu_hw_config(event);
+
+ return adl_hw_config(event);
+}
+
+/*
+ * The HSW11 requires a period larger than 100 which is the same as the BDM11.
+ * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL.
+ *
+ * The message 'interrupt took too long' can be observed on any counter which
+ * was armed with a period < 32 and two events expired in the same NMI.
+ * A minimum period of 32 is enforced for the rest of the events.
+ */
+static void hsw_limit_period(struct perf_event *event, s64 *left)
+{
+ *left = max(*left, erratum_hsw11(event) ? 128 : 32);
}
/*
@@ -4547,8 +4798,7 @@ static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void)
*/
static void bdw_limit_period(struct perf_event *event, s64 *left)
{
- if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
- X86_CONFIG(.event=0xc0, .umask=0x01)) {
+ if (erratum_hsw11(event)) {
if (*left < 128)
*left = 128;
*left &= ~0x3fULL;
@@ -4573,8 +4823,65 @@ PMU_FORMAT_ATTR(pc, "config:19" );
PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
PMU_FORMAT_ATTR(inv, "config:23" );
PMU_FORMAT_ATTR(cmask, "config:24-31" );
-PMU_FORMAT_ATTR(in_tx, "config:32");
-PMU_FORMAT_ATTR(in_tx_cp, "config:33");
+PMU_FORMAT_ATTR(in_tx, "config:32" );
+PMU_FORMAT_ATTR(in_tx_cp, "config:33" );
+PMU_FORMAT_ATTR(eq, "config:36" ); /* v6 + */
+
+PMU_FORMAT_ATTR(metrics_clear, "config1:0"); /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
+
+static ssize_t umask2_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ u64 mask = hybrid(dev_get_drvdata(dev), config_mask) & ARCH_PERFMON_EVENTSEL_UMASK2;
+
+ if (mask == ARCH_PERFMON_EVENTSEL_UMASK2)
+ return sprintf(page, "config:8-15,40-47\n");
+
+ /* Roll back to the old format if umask2 is not supported. */
+ return sprintf(page, "config:8-15\n");
+}
+
+static struct device_attribute format_attr_umask2 =
+ __ATTR(umask, 0444, umask2_show, NULL);
+
+static struct attribute *format_evtsel_ext_attrs[] = {
+ &format_attr_umask2.attr,
+ &format_attr_eq.attr,
+ &format_attr_metrics_clear.attr,
+ NULL
+};
+
+static umode_t
+evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ u64 mask;
+
+ /*
+ * The umask and umask2 have different formats but share the
+ * same attr name. In update mode, the previous value of the
+ * umask is unconditionally removed before is_visible. If
+ * umask2 format is not enumerated, it's impossible to roll
+ * back to the old format.
+ * Does the check in umask2_show rather than is_visible.
+ */
+ if (i == 0)
+ return attr->mode;
+
+ mask = hybrid(dev_get_drvdata(dev), config_mask);
+ if (i == 1)
+ return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0;
+
+ /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
+ if (i == 2) {
+ union perf_capabilities intel_cap = hybrid(dev_get_drvdata(dev), intel_cap);
+
+ return intel_cap.rdpmc_metrics_clear ? attr->mode : 0;
+ }
+
+ return 0;
+}
static struct attribute *intel_arch_formats_attr[] = {
&format_attr_event.attr,
@@ -4684,13 +4991,33 @@ static void flip_smm_bit(void *data)
}
}
-static void intel_pmu_check_num_counters(int *num_counters,
- int *num_counters_fixed,
- u64 *intel_ctrl, u64 fixed_mask);
+static void intel_pmu_check_counters_mask(u64 *cntr_mask,
+ u64 *fixed_cntr_mask,
+ u64 *intel_ctrl)
+{
+ unsigned int bit;
+
+ bit = fls64(*cntr_mask);
+ if (bit > INTEL_PMC_MAX_GENERIC) {
+ WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+ bit, INTEL_PMC_MAX_GENERIC);
+ *cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
+ }
+ *intel_ctrl = *cntr_mask;
+
+ bit = fls64(*fixed_cntr_mask);
+ if (bit > INTEL_PMC_MAX_FIXED) {
+ WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+ bit, INTEL_PMC_MAX_FIXED);
+ *fixed_cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
+ }
+
+ *intel_ctrl |= *fixed_cntr_mask << INTEL_PMC_IDX_FIXED;
+}
static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
- int num_counters,
- int num_counters_fixed,
+ u64 cntr_mask,
+ u64 fixed_cntr_mask,
u64 intel_ctrl);
static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
@@ -4698,8 +5025,8 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
static inline bool intel_pmu_broken_perf_cap(void)
{
/* The Perf Metric (Bit 15) is always cleared */
- if ((boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE) ||
- (boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE_L))
+ if (boot_cpu_data.x86_vfm == INTEL_METEORLAKE ||
+ boot_cpu_data.x86_vfm == INTEL_METEORLAKE_L)
return true;
return false;
@@ -4707,17 +5034,24 @@ static inline bool intel_pmu_broken_perf_cap(void)
static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
{
- unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF);
- unsigned int eax, ebx, ecx, edx;
+ unsigned int cntr, fixed_cntr, ecx, edx;
+ union cpuid35_eax eax;
+ union cpuid35_ebx ebx;
+
+ cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx);
- if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
+ if (ebx.split.umask2)
+ pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2;
+ if (ebx.split.eq)
+ pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ;
+
+ if (eax.split.cntr_subleaf) {
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
- &eax, &ebx, &ecx, &edx);
- pmu->num_counters = fls(eax);
- pmu->num_counters_fixed = fls(ebx);
+ &cntr, &fixed_cntr, &ecx, &edx);
+ pmu->cntr_mask64 = cntr;
+ pmu->fixed_cntr_mask64 = fixed_cntr;
}
-
if (!intel_pmu_broken_perf_cap()) {
/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
@@ -4726,26 +5060,21 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
{
- intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
- &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
- pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+ intel_pmu_check_counters_mask(&pmu->cntr_mask64, &pmu->fixed_cntr_mask64,
+ &pmu->intel_ctrl);
+ pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
pmu->unconstrained = (struct event_constraint)
- __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
- 0, pmu->num_counters, 0, 0);
+ __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
+ 0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
if (pmu->intel_cap.perf_metrics)
pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
else
pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS);
- if (pmu->intel_cap.pebs_output_pt_available)
- pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
- else
- pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT;
-
intel_pmu_check_event_constraints(pmu->event_constraints,
- pmu->num_counters,
- pmu->num_counters_fixed,
+ pmu->cntr_mask64,
+ pmu->fixed_cntr_mask64,
pmu->intel_ctrl);
intel_pmu_check_extra_regs(pmu->extra_regs);
@@ -4753,7 +5082,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
{
- u8 cpu_type = get_this_hybrid_cpu_type();
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+ enum intel_cpu_type cpu_type = c->topo.intel_type;
int i;
/*
@@ -4762,7 +5092,7 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
* on it. There should be a fixup function provided for these
* troublesome CPUs (->get_hybrid_cpu_type).
*/
- if (cpu_type == HYBRID_INTEL_NONE) {
+ if (cpu_type == INTEL_CPU_TYPE_UNKNOWN) {
if (x86_pmu.get_hybrid_cpu_type)
cpu_type = x86_pmu.get_hybrid_cpu_type();
else
@@ -4771,17 +5101,26 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
/*
* This essentially just maps between the 'hybrid_cpu_type'
- * and 'hybrid_pmu_type' enums:
+ * and 'hybrid_pmu_type' enums except for ARL-H processor
+ * which needs to compare atom uarch native id since ARL-H
+ * contains two different atom uarchs.
*/
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type;
+ u32 native_id;
- if (cpu_type == HYBRID_INTEL_CORE &&
- pmu_type == hybrid_big)
- return &x86_pmu.hybrid_pmu[i];
- if (cpu_type == HYBRID_INTEL_ATOM &&
- pmu_type == hybrid_small)
+ if (cpu_type == INTEL_CPU_TYPE_CORE && pmu_type == hybrid_big)
return &x86_pmu.hybrid_pmu[i];
+ if (cpu_type == INTEL_CPU_TYPE_ATOM) {
+ if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small)
+ return &x86_pmu.hybrid_pmu[i];
+
+ native_id = c->topo.intel_native_model_id;
+ if (native_id == INTEL_ATOM_SKT_NATIVE_ID && pmu_type == hybrid_small)
+ return &x86_pmu.hybrid_pmu[i];
+ if (native_id == INTEL_ATOM_CMT_NATIVE_ID && pmu_type == hybrid_tiny)
+ return &x86_pmu.hybrid_pmu[i];
+ }
}
return NULL;
@@ -4806,18 +5145,14 @@ static bool init_hybrid_pmu(int cpu)
intel_pmu_check_hybrid_pmus(pmu);
- if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
+ if (!check_hw_exists(&pmu->pmu, pmu->cntr_mask, pmu->fixed_cntr_mask))
return false;
pr_info("%s PMU driver: ", pmu->name);
- if (pmu->intel_cap.pebs_output_pt_available)
- pr_cont("PEBS-via-PT ");
-
pr_cont("\n");
- x86_pmu_show_pmu_cap(pmu->num_counters, pmu->num_counters_fixed,
- pmu->intel_ctrl);
+ x86_pmu_show_pmu_cap(&pmu->pmu);
end:
cpumask_set_cpu(cpu, &pmu->supported_cpus);
@@ -4837,8 +5172,11 @@ static void intel_pmu_cpu_starting(int cpu)
init_debug_store_on_cpu(cpu);
/*
- * Deal with CPUs that don't clear their LBRs on power-up.
+ * Deal with CPUs that don't clear their LBRs on power-up, and that may
+ * even boot with LBRs enabled.
*/
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && x86_pmu.lbr_nr)
+ msr_clear_bit(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR_BIT);
intel_pmu_lbr_reset();
cpuc->lbr_sel = NULL;
@@ -4957,16 +5295,10 @@ static void intel_pmu_cpu_dead(int cpu)
}
static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
- bool sched_in)
+ struct task_struct *task, bool sched_in)
{
intel_pmu_pebs_sched_task(pmu_ctx, sched_in);
- intel_pmu_lbr_sched_task(pmu_ctx, sched_in);
-}
-
-static void intel_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
- struct perf_event_pmu_context *next_epc)
-{
- intel_pmu_lbr_swap_task_ctx(prev_epc, next_epc);
+ intel_pmu_lbr_sched_task(pmu_ctx, task, sched_in);
}
static int intel_pmu_check_period(struct perf_event *event, u64 value)
@@ -5058,6 +5390,7 @@ static __initconst const struct x86_pmu core_pmu = {
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+ .fixedctr = MSR_ARCH_PERFMON_FIXED_CTR0,
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
@@ -5111,6 +5444,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+ .fixedctr = MSR_ARCH_PERFMON_FIXED_CTR0,
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
@@ -5135,7 +5469,6 @@ static __initconst const struct x86_pmu intel_pmu = {
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
- .swap_task_ctx = intel_pmu_swap_task_ctx,
.check_period = intel_pmu_check_period,
@@ -5186,42 +5519,32 @@ static __init void intel_clovertown_quirk(void)
x86_pmu.pebs_constraints = NULL;
}
-static const struct x86_cpu_desc isolation_ucodes[] = {
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL, 3, 0x0000001f),
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL_L, 1, 0x0000001e),
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL_G, 1, 0x00000015),
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037),
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL, 4, 0x00000023),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_G, 1, 0x00000014),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 2, 0x00000010),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 3, 0x07000009),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 4, 0x0f000009),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 5, 0x0e000002),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 1, 0x0b000014),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 9, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 10, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 11, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 12, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 10, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 11, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 12, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 13, 0x0000004e),
+static const struct x86_cpu_id isolation_ucodes[] = {
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL, 3, 3, 0x0000001f),
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_L, 1, 1, 0x0000001e),
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_G, 1, 1, 0x00000015),
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 2, 2, 0x00000037),
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 4, 4, 0x0000000a),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL, 4, 4, 0x00000023),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_G, 1, 1, 0x00000014),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 2, 2, 0x00000010),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 3, 3, 0x07000009),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 4, 4, 0x0f000009),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 5, 5, 0x0e000002),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_X, 1, 1, 0x0b000014),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 3, 3, 0x00000021),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 4, 7, 0x00000000),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 11, 11, 0x00000000),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_L, 3, 3, 0x0000007c),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE, 3, 3, 0x0000007c),
+ X86_MATCH_VFM_STEPS(INTEL_KABYLAKE, 9, 13, 0x0000004e),
+ X86_MATCH_VFM_STEPS(INTEL_KABYLAKE_L, 9, 12, 0x0000004e),
{}
};
static void intel_check_pebs_isolation(void)
{
- x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes);
+ x86_pmu.pebs_no_isolation = !x86_match_min_microcode_rev(isolation_ucodes);
}
static __init void intel_pebs_isolation_quirk(void)
@@ -5231,16 +5554,16 @@ static __init void intel_pebs_isolation_quirk(void)
intel_check_pebs_isolation();
}
-static const struct x86_cpu_desc pebs_ucodes[] = {
- INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE, 7, 0x00000028),
- INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 6, 0x00000618),
- INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 7, 0x0000070c),
+static const struct x86_cpu_id pebs_ucodes[] = {
+ X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE, 7, 7, 0x00000028),
+ X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE_X, 6, 6, 0x00000618),
+ X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE_X, 7, 7, 0x0000070c),
{}
};
static bool intel_snb_pebs_broken(void)
{
- return !x86_cpu_has_min_microcode_rev(pebs_ucodes);
+ return !x86_match_min_microcode_rev(pebs_ucodes);
}
static void intel_snb_check_microcode(void)
@@ -5645,18 +5968,11 @@ lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
static char pmu_name_str[30];
-static ssize_t pmu_name_show(struct device *cdev,
- struct device_attribute *attr,
- char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%s\n", pmu_name_str);
-}
-
-static DEVICE_ATTR_RO(pmu_name);
+static DEVICE_STRING_ATTR_RO(pmu_name, 0444, pmu_name_str);
static struct attribute *intel_pmu_caps_attrs[] = {
- &dev_attr_pmu_name.attr,
- NULL
+ &dev_attr_pmu_name.attr.attr,
+ NULL
};
static DEVICE_ATTR(allow_tsx_force_abort, 0644,
@@ -5705,8 +6021,22 @@ exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
return x86_pmu.version >= 2 ? attr->mode : 0;
}
+static umode_t
+td_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ /*
+ * Hide the perf metrics topdown events
+ * if the feature is not enumerated.
+ */
+ if (x86_pmu.num_topdown_events)
+ return x86_pmu.intel_cap.perf_metrics ? attr->mode : 0;
+
+ return attr->mode;
+}
+
static struct attribute_group group_events_td = {
.name = "events",
+ .is_visible = td_is_visible,
};
static struct attribute_group group_events_mem = {
@@ -5740,6 +6070,12 @@ static struct attribute_group group_format_extra_skl = {
.is_visible = exra_is_visible,
};
+static struct attribute_group group_format_evtsel_ext = {
+ .name = "format",
+ .attrs = format_evtsel_ext_attrs,
+ .is_visible = evtsel_ext_is_visible,
+};
+
static struct attribute_group group_default = {
.attrs = intel_pmu_attrs,
.is_visible = default_is_visible,
@@ -5753,6 +6089,7 @@ static const struct attribute_group *attr_update[] = {
&group_caps_lbr,
&group_format_extra,
&group_format_extra_skl,
+ &group_format_evtsel_ext,
&group_default,
NULL,
};
@@ -5780,6 +6117,54 @@ static struct attribute *adl_hybrid_events_attrs[] = {
NULL,
};
+EVENT_ATTR_STR_HYBRID(topdown-retiring, td_retiring_lnl, "event=0xc2,umask=0x02;event=0x00,umask=0x80", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(topdown-fe-bound, td_fe_bound_lnl, "event=0x9c,umask=0x01;event=0x00,umask=0x82", hybrid_big_small);
+EVENT_ATTR_STR_HYBRID(topdown-be-bound, td_be_bound_lnl, "event=0xa4,umask=0x02;event=0x00,umask=0x83", hybrid_big_small);
+
+static struct attribute *lnl_hybrid_events_attrs[] = {
+ EVENT_PTR(slots_adl),
+ EVENT_PTR(td_retiring_lnl),
+ EVENT_PTR(td_bad_spec_adl),
+ EVENT_PTR(td_fe_bound_lnl),
+ EVENT_PTR(td_be_bound_lnl),
+ EVENT_PTR(td_heavy_ops_adl),
+ EVENT_PTR(td_br_mis_adl),
+ EVENT_PTR(td_fetch_lat_adl),
+ EVENT_PTR(td_mem_bound_adl),
+ NULL
+};
+
+/* The event string must be in PMU IDX order. */
+EVENT_ATTR_STR_HYBRID(topdown-retiring,
+ td_retiring_arl_h,
+ "event=0xc2,umask=0x02;event=0x00,umask=0x80;event=0xc2,umask=0x0",
+ hybrid_big_small_tiny);
+EVENT_ATTR_STR_HYBRID(topdown-bad-spec,
+ td_bad_spec_arl_h,
+ "event=0x73,umask=0x0;event=0x00,umask=0x81;event=0x73,umask=0x0",
+ hybrid_big_small_tiny);
+EVENT_ATTR_STR_HYBRID(topdown-fe-bound,
+ td_fe_bound_arl_h,
+ "event=0x9c,umask=0x01;event=0x00,umask=0x82;event=0x71,umask=0x0",
+ hybrid_big_small_tiny);
+EVENT_ATTR_STR_HYBRID(topdown-be-bound,
+ td_be_bound_arl_h,
+ "event=0xa4,umask=0x02;event=0x00,umask=0x83;event=0x74,umask=0x0",
+ hybrid_big_small_tiny);
+
+static struct attribute *arl_h_hybrid_events_attrs[] = {
+ EVENT_PTR(slots_adl),
+ EVENT_PTR(td_retiring_arl_h),
+ EVENT_PTR(td_bad_spec_arl_h),
+ EVENT_PTR(td_fe_bound_arl_h),
+ EVENT_PTR(td_be_bound_arl_h),
+ EVENT_PTR(td_heavy_ops_adl),
+ EVENT_PTR(td_br_mis_adl),
+ EVENT_PTR(td_fetch_lat_adl),
+ EVENT_PTR(td_mem_bound_adl),
+ NULL,
+};
+
/* Must be in IDX order */
EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small);
EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small);
@@ -5798,6 +6183,21 @@ static struct attribute *mtl_hybrid_mem_attrs[] = {
NULL
};
+EVENT_ATTR_STR_HYBRID(mem-loads,
+ mem_ld_arl_h,
+ "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3;event=0xd0,umask=0x5,ldlat=3",
+ hybrid_big_small_tiny);
+EVENT_ATTR_STR_HYBRID(mem-stores,
+ mem_st_arl_h,
+ "event=0xd0,umask=0x6;event=0xcd,umask=0x2;event=0xd0,umask=0x6",
+ hybrid_big_small_tiny);
+
+static struct attribute *arl_h_hybrid_mem_attrs[] = {
+ EVENT_PTR(mem_ld_arl_h),
+ EVENT_PTR(mem_st_arl_h),
+ NULL,
+};
+
EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big);
@@ -5821,8 +6221,8 @@ static struct attribute *adl_hybrid_tsx_attrs[] = {
FORMAT_ATTR_HYBRID(in_tx, hybrid_big);
FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big);
-FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small);
-FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small);
+FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small_tiny);
+FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small_tiny);
FORMAT_ATTR_HYBRID(frontend, hybrid_big);
#define ADL_HYBRID_RTM_FORMAT_ATTR \
@@ -5845,7 +6245,7 @@ static struct attribute *adl_hybrid_extra_attr[] = {
NULL
};
-FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);
+FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small_tiny);
static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
ADL_HYBRID_RTM_FORMAT_ATTR,
@@ -5908,9 +6308,27 @@ static umode_t hybrid_format_is_visible(struct kobject *kobj,
return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0;
}
+static umode_t hybrid_td_is_visible(struct kobject *kobj,
+ struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct x86_hybrid_pmu *pmu =
+ container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu);
+
+ if (!is_attr_for_this_pmu(kobj, attr))
+ return 0;
+
+
+ /* Only the big core supports perf metrics */
+ if (pmu->pmu_type == hybrid_big)
+ return pmu->intel_cap.perf_metrics ? attr->mode : 0;
+
+ return attr->mode;
+}
+
static struct attribute_group hybrid_group_events_td = {
.name = "events",
- .is_visible = hybrid_events_is_visible,
+ .is_visible = hybrid_td_is_visible,
};
static struct attribute_group hybrid_group_events_mem = {
@@ -5955,6 +6373,7 @@ static const struct attribute_group *hybrid_attr_update[] = {
&group_caps_gen,
&group_caps_lbr,
&hybrid_group_format_extra,
+ &group_format_evtsel_ext,
&group_default,
&hybrid_group_cpus,
NULL,
@@ -5962,29 +6381,9 @@ static const struct attribute_group *hybrid_attr_update[] = {
static struct attribute *empty_attrs;
-static void intel_pmu_check_num_counters(int *num_counters,
- int *num_counters_fixed,
- u64 *intel_ctrl, u64 fixed_mask)
-{
- if (*num_counters > INTEL_PMC_MAX_GENERIC) {
- WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
- *num_counters, INTEL_PMC_MAX_GENERIC);
- *num_counters = INTEL_PMC_MAX_GENERIC;
- }
- *intel_ctrl = (1ULL << *num_counters) - 1;
-
- if (*num_counters_fixed > INTEL_PMC_MAX_FIXED) {
- WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
- *num_counters_fixed, INTEL_PMC_MAX_FIXED);
- *num_counters_fixed = INTEL_PMC_MAX_FIXED;
- }
-
- *intel_ctrl |= fixed_mask << INTEL_PMC_IDX_FIXED;
-}
-
static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
- int num_counters,
- int num_counters_fixed,
+ u64 cntr_mask,
+ u64 fixed_cntr_mask,
u64 intel_ctrl)
{
struct event_constraint *c;
@@ -6021,10 +6420,9 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con
* generic counters
*/
if (!use_fixed_pseudo_encoding(c->code))
- c->idxmsk64 |= (1ULL << num_counters) - 1;
+ c->idxmsk64 |= cntr_mask;
}
- c->idxmsk64 &=
- ~(~0ULL << (INTEL_PMC_IDX_FIXED + num_counters_fixed));
+ c->idxmsk64 &= cntr_mask | (fixed_cntr_mask << INTEL_PMC_IDX_FIXED);
c->weight = hweight64(c->idxmsk64);
}
}
@@ -6049,9 +6447,15 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs)
}
}
+static inline int intel_pmu_v6_addr_offset(int index, bool eventsel)
+{
+ return MSR_IA32_PMC_V6_STEP * index;
+}
+
static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
- { hybrid_small, "cpu_atom" },
- { hybrid_big, "cpu_core" },
+ { hybrid_small, "cpu_atom" },
+ { hybrid_big, "cpu_core" },
+ { hybrid_tiny, "cpu_lowpower" },
};
static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
@@ -6075,21 +6479,20 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id;
pmu->name = intel_hybrid_pmu_type_map[bit].name;
- pmu->num_counters = x86_pmu.num_counters;
- pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
- pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+ pmu->cntr_mask64 = x86_pmu.cntr_mask64;
+ pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
+ pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
+ pmu->config_mask = X86_RAW_EVENT_MASK;
pmu->unconstrained = (struct event_constraint)
- __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
- 0, pmu->num_counters, 0, 0);
+ __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
+ 0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
- if (pmu->pmu_type & hybrid_small) {
+ if (pmu->pmu_type & hybrid_small_tiny) {
pmu->intel_cap.perf_metrics = 0;
- pmu->intel_cap.pebs_output_pt_available = 1;
pmu->mid_ack = true;
} else if (pmu->pmu_type & hybrid_big) {
pmu->intel_cap.perf_metrics = 1;
- pmu->intel_cap.pebs_output_pt_available = 0;
pmu->late_ack = true;
}
}
@@ -6150,6 +6553,21 @@ static __always_inline void intel_pmu_init_grt(struct pmu *pmu)
intel_pmu_ref_cycles_ext();
}
+static __always_inline void intel_pmu_init_lnc(struct pmu *pmu)
+{
+ intel_pmu_init_glc(pmu);
+ hybrid(pmu, event_constraints) = intel_lnc_event_constraints;
+ hybrid(pmu, pebs_constraints) = intel_lnc_pebs_event_constraints;
+ hybrid(pmu, extra_regs) = intel_lnc_extra_regs;
+}
+
+static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
+{
+ intel_pmu_init_grt(pmu);
+ hybrid(pmu, event_constraints) = intel_skt_event_constraints;
+ hybrid(pmu, extra_regs) = intel_cmt_extra_regs;
+}
+
__init int intel_pmu_init(void)
{
struct attribute **extra_skl_attr = &empty_attrs;
@@ -6166,15 +6584,21 @@ __init int intel_pmu_init(void)
char *name;
struct x86_hybrid_pmu *pmu;
+ /* Architectural Perfmon was introduced starting with Core "Yonah" */
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
switch (boot_cpu_data.x86) {
- case 0x6:
- return p6_pmu_init();
- case 0xb:
+ case 6:
+ if (boot_cpu_data.x86_vfm < INTEL_CORE_YONAH)
+ return p6_pmu_init();
+ break;
+ case 11:
return knc_pmu_init();
- case 0xf:
+ case 15:
return p4_pmu_init();
}
+
+ pr_cont("unsupported CPU family %d model %d ",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
return -ENODEV;
}
@@ -6193,14 +6617,14 @@ __init int intel_pmu_init(void)
x86_pmu = intel_pmu;
x86_pmu.version = version;
- x86_pmu.num_counters = eax.split.num_counters;
+ x86_pmu.cntr_mask64 = GENMASK_ULL(eax.split.num_counters - 1, 0);
x86_pmu.cntval_bits = eax.split.bit_width;
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
x86_pmu.events_maskl = ebx.full;
x86_pmu.events_mask_len = eax.split.mask_length;
- x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+ x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64);
x86_pmu.pebs_capable = PEBS_COUNTER_MASK;
/*
@@ -6210,12 +6634,10 @@ __init int intel_pmu_init(void)
if (version > 1 && version < 5) {
int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
- x86_pmu.num_counters_fixed =
- max((int)edx.split.num_counters_fixed, assume);
-
- fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1;
+ x86_pmu.fixed_cntr_mask64 =
+ GENMASK_ULL(max((int)edx.split.num_counters_fixed, assume) - 1, 0);
} else if (version >= 5)
- x86_pmu.num_counters_fixed = fls(fixed_mask);
+ x86_pmu.fixed_cntr_mask64 = fixed_mask;
if (boot_cpu_has(X86_FEATURE_PDCM)) {
u64 capabilities;
@@ -6245,19 +6667,19 @@ __init int intel_pmu_init(void)
/*
* Install the hw-cache-events table:
*/
- switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_CORE_YONAH:
+ switch (boot_cpu_data.x86_vfm) {
+ case INTEL_CORE_YONAH:
pr_cont("Core events, ");
name = "core";
break;
- case INTEL_FAM6_CORE2_MEROM:
+ case INTEL_CORE2_MEROM:
x86_add_quirk(intel_clovertown_quirk);
fallthrough;
- case INTEL_FAM6_CORE2_MEROM_L:
- case INTEL_FAM6_CORE2_PENRYN:
- case INTEL_FAM6_CORE2_DUNNINGTON:
+ case INTEL_CORE2_MEROM_L:
+ case INTEL_CORE2_PENRYN:
+ case INTEL_CORE2_DUNNINGTON:
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -6269,9 +6691,9 @@ __init int intel_pmu_init(void)
name = "core2";
break;
- case INTEL_FAM6_NEHALEM:
- case INTEL_FAM6_NEHALEM_EP:
- case INTEL_FAM6_NEHALEM_EX:
+ case INTEL_NEHALEM:
+ case INTEL_NEHALEM_EP:
+ case INTEL_NEHALEM_EX:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -6303,11 +6725,11 @@ __init int intel_pmu_init(void)
name = "nehalem";
break;
- case INTEL_FAM6_ATOM_BONNELL:
- case INTEL_FAM6_ATOM_BONNELL_MID:
- case INTEL_FAM6_ATOM_SALTWELL:
- case INTEL_FAM6_ATOM_SALTWELL_MID:
- case INTEL_FAM6_ATOM_SALTWELL_TABLET:
+ case INTEL_ATOM_BONNELL:
+ case INTEL_ATOM_BONNELL_MID:
+ case INTEL_ATOM_SALTWELL:
+ case INTEL_ATOM_SALTWELL_MID:
+ case INTEL_ATOM_SALTWELL_TABLET:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -6320,11 +6742,11 @@ __init int intel_pmu_init(void)
name = "bonnell";
break;
- case INTEL_FAM6_ATOM_SILVERMONT:
- case INTEL_FAM6_ATOM_SILVERMONT_D:
- case INTEL_FAM6_ATOM_SILVERMONT_MID:
- case INTEL_FAM6_ATOM_AIRMONT:
- case INTEL_FAM6_ATOM_AIRMONT_MID:
+ case INTEL_ATOM_SILVERMONT:
+ case INTEL_ATOM_SILVERMONT_D:
+ case INTEL_ATOM_SILVERMONT_MID:
+ case INTEL_ATOM_AIRMONT:
+ case INTEL_ATOM_SILVERMONT_MID2:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -6342,8 +6764,8 @@ __init int intel_pmu_init(void)
name = "silvermont";
break;
- case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_GOLDMONT_D:
+ case INTEL_ATOM_GOLDMONT:
+ case INTEL_ATOM_GOLDMONT_D:
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -6369,7 +6791,7 @@ __init int intel_pmu_init(void)
name = "goldmont";
break;
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ case INTEL_ATOM_GOLDMONT_PLUS:
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
@@ -6398,9 +6820,9 @@ __init int intel_pmu_init(void)
name = "goldmont_plus";
break;
- case INTEL_FAM6_ATOM_TREMONT_D:
- case INTEL_FAM6_ATOM_TREMONT:
- case INTEL_FAM6_ATOM_TREMONT_L:
+ case INTEL_ATOM_TREMONT_D:
+ case INTEL_ATOM_TREMONT:
+ case INTEL_ATOM_TREMONT_L:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -6427,10 +6849,10 @@ __init int intel_pmu_init(void)
name = "Tremont";
break;
- case INTEL_FAM6_ATOM_GRACEMONT:
+ case INTEL_ATOM_GRACEMONT:
intel_pmu_init_grt(NULL);
intel_pmu_pebs_data_source_grt();
- x86_pmu.pebs_latency_data = adl_latency_data_small;
+ x86_pmu.pebs_latency_data = grt_latency_data;
x86_pmu.get_event_constraints = tnt_get_event_constraints;
td_attr = tnt_events_attrs;
mem_attr = grt_mem_attrs;
@@ -6439,12 +6861,12 @@ __init int intel_pmu_init(void)
name = "gracemont";
break;
- case INTEL_FAM6_ATOM_CRESTMONT:
- case INTEL_FAM6_ATOM_CRESTMONT_X:
+ case INTEL_ATOM_CRESTMONT:
+ case INTEL_ATOM_CRESTMONT_X:
intel_pmu_init_grt(NULL);
x86_pmu.extra_regs = intel_cmt_extra_regs;
intel_pmu_pebs_data_source_cmt();
- x86_pmu.pebs_latency_data = mtl_latency_data_small;
+ x86_pmu.pebs_latency_data = cmt_latency_data;
x86_pmu.get_event_constraints = cmt_get_event_constraints;
td_attr = cmt_events_attrs;
mem_attr = grt_mem_attrs;
@@ -6453,9 +6875,9 @@ __init int intel_pmu_init(void)
name = "crestmont";
break;
- case INTEL_FAM6_WESTMERE:
- case INTEL_FAM6_WESTMERE_EP:
- case INTEL_FAM6_WESTMERE_EX:
+ case INTEL_WESTMERE:
+ case INTEL_WESTMERE_EP:
+ case INTEL_WESTMERE_EX:
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -6484,8 +6906,8 @@ __init int intel_pmu_init(void)
name = "westmere";
break;
- case INTEL_FAM6_SANDYBRIDGE:
- case INTEL_FAM6_SANDYBRIDGE_X:
+ case INTEL_SANDYBRIDGE:
+ case INTEL_SANDYBRIDGE_X:
x86_add_quirk(intel_sandybridge_quirk);
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
@@ -6498,7 +6920,7 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
+ if (boot_cpu_data.x86_vfm == INTEL_SANDYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -6524,8 +6946,8 @@ __init int intel_pmu_init(void)
name = "sandybridge";
break;
- case INTEL_FAM6_IVYBRIDGE:
- case INTEL_FAM6_IVYBRIDGE_X:
+ case INTEL_IVYBRIDGE:
+ case INTEL_IVYBRIDGE_X:
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -6541,7 +6963,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
- if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
+ if (boot_cpu_data.x86_vfm == INTEL_IVYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -6563,10 +6985,10 @@ __init int intel_pmu_init(void)
break;
- case INTEL_FAM6_HASWELL:
- case INTEL_FAM6_HASWELL_X:
- case INTEL_FAM6_HASWELL_L:
- case INTEL_FAM6_HASWELL_G:
+ case INTEL_HASWELL:
+ case INTEL_HASWELL_X:
+ case INTEL_HASWELL_L:
+ case INTEL_HASWELL_G:
x86_add_quirk(intel_ht_bug);
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
@@ -6586,6 +7008,7 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
+ x86_pmu.limit_period = hsw_limit_period;
x86_pmu.lbr_double_abort = true;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
@@ -6596,10 +7019,10 @@ __init int intel_pmu_init(void)
name = "haswell";
break;
- case INTEL_FAM6_BROADWELL:
- case INTEL_FAM6_BROADWELL_D:
- case INTEL_FAM6_BROADWELL_G:
- case INTEL_FAM6_BROADWELL_X:
+ case INTEL_BROADWELL:
+ case INTEL_BROADWELL_D:
+ case INTEL_BROADWELL_G:
+ case INTEL_BROADWELL_X:
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -6638,8 +7061,8 @@ __init int intel_pmu_init(void)
name = "broadwell";
break;
- case INTEL_FAM6_XEON_PHI_KNL:
- case INTEL_FAM6_XEON_PHI_KNM:
+ case INTEL_XEON_PHI_KNL:
+ case INTEL_XEON_PHI_KNM:
memcpy(hw_cache_event_ids,
slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs,
@@ -6658,15 +7081,15 @@ __init int intel_pmu_init(void)
name = "knights-landing";
break;
- case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_SKYLAKE_X:
pmem = true;
fallthrough;
- case INTEL_FAM6_SKYLAKE_L:
- case INTEL_FAM6_SKYLAKE:
- case INTEL_FAM6_KABYLAKE_L:
- case INTEL_FAM6_KABYLAKE:
- case INTEL_FAM6_COMETLAKE_L:
- case INTEL_FAM6_COMETLAKE:
+ case INTEL_SKYLAKE_L:
+ case INTEL_SKYLAKE:
+ case INTEL_KABYLAKE_L:
+ case INTEL_KABYLAKE:
+ case INTEL_COMETLAKE_L:
+ case INTEL_COMETLAKE:
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -6715,16 +7138,16 @@ __init int intel_pmu_init(void)
name = "skylake";
break;
- case INTEL_FAM6_ICELAKE_X:
- case INTEL_FAM6_ICELAKE_D:
+ case INTEL_ICELAKE_X:
+ case INTEL_ICELAKE_D:
x86_pmu.pebs_ept = 1;
pmem = true;
fallthrough;
- case INTEL_FAM6_ICELAKE_L:
- case INTEL_FAM6_ICELAKE:
- case INTEL_FAM6_TIGERLAKE_L:
- case INTEL_FAM6_TIGERLAKE:
- case INTEL_FAM6_ROCKETLAKE:
+ case INTEL_ICELAKE_L:
+ case INTEL_ICELAKE:
+ case INTEL_TIGERLAKE_L:
+ case INTEL_TIGERLAKE:
+ case INTEL_ROCKETLAKE:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -6759,16 +7182,22 @@ __init int intel_pmu_init(void)
name = "icelake";
break;
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- case INTEL_FAM6_EMERALDRAPIDS_X:
+ case INTEL_SAPPHIRERAPIDS_X:
+ case INTEL_EMERALDRAPIDS_X:
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
x86_pmu.extra_regs = intel_glc_extra_regs;
- fallthrough;
- case INTEL_FAM6_GRANITERAPIDS_X:
- case INTEL_FAM6_GRANITERAPIDS_D:
+ pr_cont("Sapphire Rapids events, ");
+ name = "sapphire_rapids";
+ goto glc_common;
+
+ case INTEL_GRANITERAPIDS_X:
+ case INTEL_GRANITERAPIDS_D:
+ x86_pmu.extra_regs = intel_rwc_extra_regs;
+ pr_cont("Granite Rapids events, ");
+ name = "granite_rapids";
+
+ glc_common:
intel_pmu_init_glc(NULL);
- if (!x86_pmu.extra_regs)
- x86_pmu.extra_regs = intel_rwc_extra_regs;
x86_pmu.pebs_ept = 1;
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = glc_get_event_constraints;
@@ -6779,15 +7208,13 @@ __init int intel_pmu_init(void)
td_attr = glc_td_events_attrs;
tsx_attr = glc_tsx_events_attrs;
intel_pmu_pebs_data_source_skl(true);
- pr_cont("Sapphire Rapids events, ");
- name = "sapphire_rapids";
break;
- case INTEL_FAM6_ALDERLAKE:
- case INTEL_FAM6_ALDERLAKE_L:
- case INTEL_FAM6_RAPTORLAKE:
- case INTEL_FAM6_RAPTORLAKE_P:
- case INTEL_FAM6_RAPTORLAKE_S:
+ case INTEL_ALDERLAKE:
+ case INTEL_ALDERLAKE_L:
+ case INTEL_RAPTORLAKE:
+ case INTEL_RAPTORLAKE_P:
+ case INTEL_RAPTORLAKE_S:
/*
* Alder Lake has 2 types of CPU, core and atom.
*
@@ -6795,7 +7222,7 @@ __init int intel_pmu_init(void)
*/
intel_pmu_init_hybrid(hybrid_big_small);
- x86_pmu.pebs_latency_data = adl_latency_data_small;
+ x86_pmu.pebs_latency_data = grt_latency_data;
x86_pmu.get_event_constraints = adl_get_event_constraints;
x86_pmu.hw_config = adl_hw_config;
x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type;
@@ -6810,11 +7237,13 @@ __init int intel_pmu_init(void)
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
intel_pmu_init_glc(&pmu->pmu);
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
- pmu->num_counters = x86_pmu.num_counters + 2;
- pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
+ pmu->cntr_mask64 <<= 2;
+ pmu->cntr_mask64 |= 0x3;
+ pmu->fixed_cntr_mask64 <<= 1;
+ pmu->fixed_cntr_mask64 |= 0x1;
} else {
- pmu->num_counters = x86_pmu.num_counters;
- pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+ pmu->cntr_mask64 = x86_pmu.cntr_mask64;
+ pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
}
/*
@@ -6824,15 +7253,16 @@ __init int intel_pmu_init(void)
* mistakenly add extra counters for P-cores. Correct the number of
* counters here.
*/
- if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
- pmu->num_counters = x86_pmu.num_counters;
- pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+ if ((x86_pmu_num_counters(&pmu->pmu) > 8) || (x86_pmu_num_counters_fixed(&pmu->pmu) > 4)) {
+ pmu->cntr_mask64 = x86_pmu.cntr_mask64;
+ pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
}
- pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+ pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
pmu->unconstrained = (struct event_constraint)
- __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
- 0, pmu->num_counters, 0, 0);
+ __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
+ 0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
+
pmu->extra_regs = intel_glc_extra_regs;
/* Initialize Atom core specific PerfMon capabilities.*/
@@ -6845,11 +7275,12 @@ __init int intel_pmu_init(void)
name = "alderlake_hybrid";
break;
- case INTEL_FAM6_METEORLAKE:
- case INTEL_FAM6_METEORLAKE_L:
+ case INTEL_METEORLAKE:
+ case INTEL_METEORLAKE_L:
+ case INTEL_ARROWLAKE_U:
intel_pmu_init_hybrid(hybrid_big_small);
- x86_pmu.pebs_latency_data = mtl_latency_data_small;
+ x86_pmu.pebs_latency_data = cmt_latency_data;
x86_pmu.get_event_constraints = mtl_get_event_constraints;
x86_pmu.hw_config = adl_hw_config;
@@ -6874,6 +7305,64 @@ __init int intel_pmu_init(void)
name = "meteorlake_hybrid";
break;
+ case INTEL_LUNARLAKE_M:
+ case INTEL_ARROWLAKE:
+ intel_pmu_init_hybrid(hybrid_big_small);
+
+ x86_pmu.pebs_latency_data = lnl_latency_data;
+ x86_pmu.get_event_constraints = mtl_get_event_constraints;
+ x86_pmu.hw_config = adl_hw_config;
+
+ td_attr = lnl_hybrid_events_attrs;
+ mem_attr = mtl_hybrid_mem_attrs;
+ tsx_attr = adl_hybrid_tsx_attrs;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
+
+ /* Initialize big core specific PerfMon capabilities.*/
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
+ intel_pmu_init_lnc(&pmu->pmu);
+
+ /* Initialize Atom core specific PerfMon capabilities.*/
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
+ intel_pmu_init_skt(&pmu->pmu);
+
+ intel_pmu_pebs_data_source_lnl();
+ pr_cont("Lunarlake Hybrid events, ");
+ name = "lunarlake_hybrid";
+ break;
+
+ case INTEL_ARROWLAKE_H:
+ intel_pmu_init_hybrid(hybrid_big_small_tiny);
+
+ x86_pmu.pebs_latency_data = arl_h_latency_data;
+ x86_pmu.get_event_constraints = arl_h_get_event_constraints;
+ x86_pmu.hw_config = arl_h_hw_config;
+
+ td_attr = arl_h_hybrid_events_attrs;
+ mem_attr = arl_h_hybrid_mem_attrs;
+ tsx_attr = adl_hybrid_tsx_attrs;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
+
+ /* Initialize big core specific PerfMon capabilities. */
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
+ intel_pmu_init_lnc(&pmu->pmu);
+
+ /* Initialize Atom core specific PerfMon capabilities. */
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
+ intel_pmu_init_skt(&pmu->pmu);
+
+ /* Initialize Lower Power Atom specific PerfMon capabilities. */
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX];
+ intel_pmu_init_grt(&pmu->pmu);
+ pmu->extra_regs = intel_cmt_extra_regs;
+
+ intel_pmu_pebs_data_source_arl_h();
+ pr_cont("ArrowLake-H Hybrid events, ");
+ name = "arrowlake_h_hybrid";
+ break;
+
default:
switch (x86_pmu.version) {
case 1:
@@ -6899,9 +7388,9 @@ __init int intel_pmu_init(void)
* The constraints may be cut according to the CPUID enumeration
* by inserting the EVENT_CONSTRAINT_END.
*/
- if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED)
- x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
- intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1;
+ if (fls64(x86_pmu.fixed_cntr_mask64) > INTEL_PMC_MAX_FIXED)
+ x86_pmu.fixed_cntr_mask64 &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
+ intel_v5_gen_event_constraints[fls64(x86_pmu.fixed_cntr_mask64)].weight = -1;
x86_pmu.event_constraints = intel_v5_gen_event_constraints;
pr_cont("generic architected perfmon, ");
name = "generic_arch_v5+";
@@ -6928,18 +7417,17 @@ __init int intel_pmu_init(void)
x86_pmu.attr_update = hybrid_attr_update;
}
- intel_pmu_check_num_counters(&x86_pmu.num_counters,
- &x86_pmu.num_counters_fixed,
- &x86_pmu.intel_ctrl,
- (u64)fixed_mask);
+ intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
+ &x86_pmu.fixed_cntr_mask64,
+ &x86_pmu.intel_ctrl);
/* AnyThread may be deprecated on arch perfmon v5 or later */
if (x86_pmu.intel_cap.anythread_deprecated)
x86_pmu.format_attrs = intel_arch_formats_attr;
intel_pmu_check_event_constraints(x86_pmu.event_constraints,
- x86_pmu.num_counters,
- x86_pmu.num_counters_fixed,
+ x86_pmu.cntr_mask64,
+ x86_pmu.fixed_cntr_mask64,
x86_pmu.intel_ctrl);
/*
* Access LBR MSR may cause #GP under certain circumstances.
@@ -6980,6 +7468,14 @@ __init int intel_pmu_init(void)
pr_cont("full-width counters, ");
}
+ /* Support V6+ MSR Aliasing */
+ if (x86_pmu.version >= 6) {
+ x86_pmu.perfctr = MSR_IA32_PMC_V6_GP0_CTR;
+ x86_pmu.eventsel = MSR_IA32_PMC_V6_GP0_CFG_A;
+ x86_pmu.fixedctr = MSR_IA32_PMC_V6_FX0_CTR;
+ x86_pmu.addr_offset = intel_pmu_v6_addr_offset;
+ }
+
if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 326c8cd5aa2d..ae4ec16156bb 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -41,7 +41,7 @@
* MSR_CORE_C1_RES: CORE C1 Residency Counter
* perf code: 0x00
* Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
- * MTL,SRF,GRR
+ * MTL,SRF,GRR,ARL,LNL
* Scope: Core (each processor core has a MSR)
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01
@@ -53,50 +53,50 @@
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
* TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF,
- * GRR
+ * GRR,ARL,LNL
* Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03
* Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
- * ICL,TGL,RKL,ADL,RPL,MTL
+ * ICL,TGL,RKL,ADL,RPL,MTL,ARL,LNL
* Scope: Core
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
* perf code: 0x00
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
* KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
- * RPL,SPR,MTL
+ * RPL,SPR,MTL,ARL,LNL,SRF
* Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
* GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
- * ADL,RPL,MTL
+ * ADL,RPL,MTL,ARL,LNL
* Scope: Package (physical package)
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF
+ * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF,
+ * ARL,LNL
* Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
- * KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL
+ * KBL,CML,ICL,TGL,RKL
* Scope: Package (physical package)
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
* perf code: 0x04
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
- * ADL,RPL,MTL
+ * ADL,RPL,MTL,ARL
* Scope: Package (physical package)
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
* perf code: 0x05
- * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
- * ADL,RPL,MTL
+ * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL
* Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
- * TNT,RKL,ADL,RPL,MTL
+ * TNT,RKL,ADL,RPL,MTL,ARL,LNL
* Scope: Package (physical package)
* MSR_MODULE_C6_RES_MS: Module C6 Residency Counter.
* perf code: 0x00
@@ -114,6 +114,7 @@
#include "../perf_event.h"
#include "../probe.h"
+MODULE_DESCRIPTION("Support for Intel cstate performance events");
MODULE_LICENSE("GPL");
#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
@@ -127,10 +128,6 @@ static ssize_t __cstate_##_var##_show(struct device *dev, \
static struct device_attribute format_attr_##_var = \
__ATTR(_name, 0444, __cstate_##_var##_show, NULL)
-static ssize_t cstate_get_attr_cpumask(struct device *dev,
- struct device_attribute *attr,
- char *buf);
-
/* Model -> events mapping */
struct cstate_model {
unsigned long core_events;
@@ -143,12 +140,6 @@ struct cstate_model {
#define SLM_PKG_C6_USE_C7_MSR (1UL << 0)
#define KNL_CORE_C6_MSR (1UL << 1)
-struct perf_cstate_msr {
- u64 msr;
- struct perf_pmu_events_attr *attr;
-};
-
-
/* cstate_core PMU */
static struct pmu cstate_core_pmu;
static bool has_cstate_core;
@@ -211,22 +202,9 @@ static struct attribute_group cstate_format_attr_group = {
.attrs = cstate_format_attrs,
};
-static cpumask_t cstate_core_cpu_mask;
-static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
-
-static struct attribute *cstate_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static struct attribute_group cpumask_attr_group = {
- .attrs = cstate_cpumask_attrs,
-};
-
static const struct attribute_group *cstate_attr_groups[] = {
&cstate_events_attr_group,
&cstate_format_attr_group,
- &cpumask_attr_group,
NULL,
};
@@ -274,8 +252,6 @@ static struct perf_msr pkg_msr[] = {
[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr },
};
-static cpumask_t cstate_pkg_cpu_mask;
-
/* cstate_module PMU */
static struct pmu cstate_module_pmu;
static bool has_cstate_module;
@@ -296,28 +272,9 @@ static struct perf_msr module_msr[] = {
[PERF_CSTATE_MODULE_C6_RES] = { MSR_MODULE_C6_RES_MS, &group_cstate_module_c6, test_msr },
};
-static cpumask_t cstate_module_cpu_mask;
-
-static ssize_t cstate_get_attr_cpumask(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct pmu *pmu = dev_get_drvdata(dev);
-
- if (pmu == &cstate_core_pmu)
- return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
- else if (pmu == &cstate_pkg_pmu)
- return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
- else if (pmu == &cstate_module_pmu)
- return cpumap_print_to_pagebuf(true, buf, &cstate_module_cpu_mask);
- else
- return 0;
-}
-
static int cstate_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config;
- int cpu;
if (event->attr.type != event->pmu->type)
return -ENOENT;
@@ -336,20 +293,13 @@ static int cstate_pmu_event_init(struct perf_event *event)
if (!(core_msr_mask & (1 << cfg)))
return -EINVAL;
event->hw.event_base = core_msr[cfg].msr;
- cpu = cpumask_any_and(&cstate_core_cpu_mask,
- topology_sibling_cpumask(event->cpu));
} else if (event->pmu == &cstate_pkg_pmu) {
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL;
cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
if (!(pkg_msr_mask & (1 << cfg)))
return -EINVAL;
-
- event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
-
event->hw.event_base = pkg_msr[cfg].msr;
- cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
- topology_die_cpumask(event->cpu));
} else if (event->pmu == &cstate_module_pmu) {
if (cfg >= PERF_CSTATE_MODULE_EVENT_MAX)
return -EINVAL;
@@ -357,16 +307,10 @@ static int cstate_pmu_event_init(struct perf_event *event)
if (!(module_msr_mask & (1 << cfg)))
return -EINVAL;
event->hw.event_base = module_msr[cfg].msr;
- cpu = cpumask_any_and(&cstate_module_cpu_mask,
- topology_cluster_cpumask(event->cpu));
} else {
return -ENOENT;
}
- if (cpu >= nr_cpu_ids)
- return -ENODEV;
-
- event->cpu = cpu;
event->hw.config = cfg;
event->hw.idx = -1;
return 0;
@@ -417,84 +361,6 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode)
return 0;
}
-/*
- * Check if exiting cpu is the designated reader. If so migrate the
- * events when there is a valid target available
- */
-static int cstate_cpu_exit(unsigned int cpu)
-{
- unsigned int target;
-
- if (has_cstate_core &&
- cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
-
- target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
- /* Migrate events if there is a valid target */
- if (target < nr_cpu_ids) {
- cpumask_set_cpu(target, &cstate_core_cpu_mask);
- perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
- }
- }
-
- if (has_cstate_pkg &&
- cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
-
- target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
- /* Migrate events if there is a valid target */
- if (target < nr_cpu_ids) {
- cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
- perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
- }
- }
-
- if (has_cstate_module &&
- cpumask_test_and_clear_cpu(cpu, &cstate_module_cpu_mask)) {
-
- target = cpumask_any_but(topology_cluster_cpumask(cpu), cpu);
- /* Migrate events if there is a valid target */
- if (target < nr_cpu_ids) {
- cpumask_set_cpu(target, &cstate_module_cpu_mask);
- perf_pmu_migrate_context(&cstate_module_pmu, cpu, target);
- }
- }
- return 0;
-}
-
-static int cstate_cpu_init(unsigned int cpu)
-{
- unsigned int target;
-
- /*
- * If this is the first online thread of that core, set it in
- * the core cpu mask as the designated reader.
- */
- target = cpumask_any_and(&cstate_core_cpu_mask,
- topology_sibling_cpumask(cpu));
-
- if (has_cstate_core && target >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
-
- /*
- * If this is the first online thread of that package, set it
- * in the package cpu mask as the designated reader.
- */
- target = cpumask_any_and(&cstate_pkg_cpu_mask,
- topology_die_cpumask(cpu));
- if (has_cstate_pkg && target >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
-
- /*
- * If this is the first online thread of that cluster, set it
- * in the cluster cpu mask as the designated reader.
- */
- target = cpumask_any_and(&cstate_module_cpu_mask,
- topology_cluster_cpumask(cpu));
- if (has_cstate_module && target >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_module_cpu_mask);
-
- return 0;
-}
-
static const struct attribute_group *core_attr_update[] = {
&group_cstate_core_c1,
&group_cstate_core_c3,
@@ -531,6 +397,7 @@ static struct pmu cstate_core_pmu = {
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
+ .scope = PERF_PMU_SCOPE_CORE,
.module = THIS_MODULE,
};
@@ -546,6 +413,7 @@ static struct pmu cstate_pkg_pmu = {
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
+ .scope = PERF_PMU_SCOPE_PKG,
.module = THIS_MODULE,
};
@@ -561,6 +429,7 @@ static struct pmu cstate_module_pmu = {
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
+ .scope = PERF_PMU_SCOPE_CLUSTER,
.module = THIS_MODULE,
};
@@ -642,9 +511,18 @@ static const struct cstate_model adl_cstates __initconst = {
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
BIT(PERF_CSTATE_PKG_C3_RES) |
BIT(PERF_CSTATE_PKG_C6_RES) |
- BIT(PERF_CSTATE_PKG_C7_RES) |
BIT(PERF_CSTATE_PKG_C8_RES) |
- BIT(PERF_CSTATE_PKG_C9_RES) |
+ BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
+static const struct cstate_model lnl_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
+ BIT(PERF_CSTATE_CORE_C6_RES) |
+ BIT(PERF_CSTATE_CORE_C7_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
+ BIT(PERF_CSTATE_PKG_C3_RES) |
+ BIT(PERF_CSTATE_PKG_C6_RES) |
BIT(PERF_CSTATE_PKG_C10_RES),
};
@@ -689,85 +567,90 @@ static const struct cstate_model srf_cstates __initconst = {
.core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
BIT(PERF_CSTATE_CORE_C6_RES),
- .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES),
+ .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
+ BIT(PERF_CSTATE_PKG_C6_RES),
.module_events = BIT(PERF_CSTATE_MODULE_C6_RES),
};
static const struct x86_cpu_id intel_cstates_match[] __initconst = {
- X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhm_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhm_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snb_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &snb_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &snb_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &snb_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &snb_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &snb_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hswult_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &slm_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &slm_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &slm_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &snb_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &snb_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &snb_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &snb_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &snb_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &snb_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &snb_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &hswult_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &hswult_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &hswult_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &hswult_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &cnl_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &glm_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &glm_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &glm_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &glm_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &glm_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, &srf_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, &grr_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &icx_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, &icx_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, &icx_cstates),
-
- X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &adl_cstates),
- X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &adl_cstates),
+ X86_MATCH_VFM(INTEL_NEHALEM, &nhm_cstates),
+ X86_MATCH_VFM(INTEL_NEHALEM_EP, &nhm_cstates),
+ X86_MATCH_VFM(INTEL_NEHALEM_EX, &nhm_cstates),
+
+ X86_MATCH_VFM(INTEL_WESTMERE, &nhm_cstates),
+ X86_MATCH_VFM(INTEL_WESTMERE_EP, &nhm_cstates),
+ X86_MATCH_VFM(INTEL_WESTMERE_EX, &nhm_cstates),
+
+ X86_MATCH_VFM(INTEL_SANDYBRIDGE, &snb_cstates),
+ X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &snb_cstates),
+
+ X86_MATCH_VFM(INTEL_IVYBRIDGE, &snb_cstates),
+ X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &snb_cstates),
+
+ X86_MATCH_VFM(INTEL_HASWELL, &snb_cstates),
+ X86_MATCH_VFM(INTEL_HASWELL_X, &snb_cstates),
+ X86_MATCH_VFM(INTEL_HASWELL_G, &snb_cstates),
+
+ X86_MATCH_VFM(INTEL_HASWELL_L, &hswult_cstates),
+
+ X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &slm_cstates),
+ X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_D, &slm_cstates),
+ X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &slm_cstates),
+
+ X86_MATCH_VFM(INTEL_BROADWELL, &snb_cstates),
+ X86_MATCH_VFM(INTEL_BROADWELL_D, &snb_cstates),
+ X86_MATCH_VFM(INTEL_BROADWELL_G, &snb_cstates),
+ X86_MATCH_VFM(INTEL_BROADWELL_X, &snb_cstates),
+
+ X86_MATCH_VFM(INTEL_SKYLAKE_L, &snb_cstates),
+ X86_MATCH_VFM(INTEL_SKYLAKE, &snb_cstates),
+ X86_MATCH_VFM(INTEL_SKYLAKE_X, &snb_cstates),
+
+ X86_MATCH_VFM(INTEL_KABYLAKE_L, &hswult_cstates),
+ X86_MATCH_VFM(INTEL_KABYLAKE, &hswult_cstates),
+ X86_MATCH_VFM(INTEL_COMETLAKE_L, &hswult_cstates),
+ X86_MATCH_VFM(INTEL_COMETLAKE, &hswult_cstates),
+
+ X86_MATCH_VFM(INTEL_CANNONLAKE_L, &cnl_cstates),
+
+ X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &knl_cstates),
+ X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &knl_cstates),
+
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &glm_cstates),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &glm_cstates),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &glm_cstates),
+ X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &glm_cstates),
+ X86_MATCH_VFM(INTEL_ATOM_TREMONT, &glm_cstates),
+ X86_MATCH_VFM(INTEL_ATOM_TREMONT_L, &glm_cstates),
+ X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_cstates),
+ X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &srf_cstates),
+ X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &grr_cstates),
+
+ X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_cstates),
+ X86_MATCH_VFM(INTEL_ICELAKE, &icl_cstates),
+ X86_MATCH_VFM(INTEL_ICELAKE_X, &icx_cstates),
+ X86_MATCH_VFM(INTEL_ICELAKE_D, &icx_cstates),
+ X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &icx_cstates),
+ X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &icx_cstates),
+ X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &icx_cstates),
+ X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &icx_cstates),
+
+ X86_MATCH_VFM(INTEL_TIGERLAKE_L, &icl_cstates),
+ X86_MATCH_VFM(INTEL_TIGERLAKE, &icl_cstates),
+ X86_MATCH_VFM(INTEL_ROCKETLAKE, &icl_cstates),
+ X86_MATCH_VFM(INTEL_ALDERLAKE, &adl_cstates),
+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, &adl_cstates),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE, &adl_cstates),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &adl_cstates),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_cstates),
+ X86_MATCH_VFM(INTEL_METEORLAKE, &adl_cstates),
+ X86_MATCH_VFM(INTEL_METEORLAKE_L, &adl_cstates),
+ X86_MATCH_VFM(INTEL_ARROWLAKE, &adl_cstates),
+ X86_MATCH_VFM(INTEL_ARROWLAKE_H, &adl_cstates),
+ X86_MATCH_VFM(INTEL_ARROWLAKE_U, &adl_cstates),
+ X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
@@ -801,9 +684,6 @@ static int __init cstate_probe(const struct cstate_model *cm)
static inline void cstate_cleanup(void)
{
- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
-
if (has_cstate_core)
perf_pmu_unregister(&cstate_core_pmu);
@@ -818,11 +698,6 @@ static int __init cstate_init(void)
{
int err;
- cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
- "perf/x86/cstate:starting", cstate_cpu_init, NULL);
- cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
- "perf/x86/cstate:online", NULL, cstate_cpu_exit);
-
if (has_cstate_core) {
err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
if (err) {
@@ -835,6 +710,8 @@ static int __init cstate_init(void)
if (has_cstate_pkg) {
if (topology_max_dies_per_package() > 1) {
+ /* CLX-AP is multi-die and the cstate is die-scope */
+ cstate_pkg_pmu.scope = PERF_PMU_SCOPE_DIE;
err = perf_pmu_register(&cstate_pkg_pmu,
"cstate_die", -1);
} else {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 2641ba620f12..1f7e1a692a7a 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -63,6 +63,15 @@ union intel_x86_pebs_dse {
unsigned int mtl_fwd_blk:1;
unsigned int ld_reserved4:24;
};
+ struct {
+ unsigned int lnc_dse:8;
+ unsigned int ld_reserved5:2;
+ unsigned int lnc_stlb_miss:1;
+ unsigned int lnc_locked:1;
+ unsigned int lnc_data_blk:1;
+ unsigned int lnc_addr_blk:1;
+ unsigned int ld_reserved6:18;
+ };
};
@@ -77,7 +86,7 @@ union intel_x86_pebs_dse {
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
/* Version for Sandy Bridge and later */
-static u64 pebs_data_source[] = {
+static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
@@ -168,11 +177,56 @@ void __init intel_pmu_pebs_data_source_mtl(void)
__intel_pmu_pebs_data_source_cmt(data_source);
}
+void __init intel_pmu_pebs_data_source_arl_h(void)
+{
+ u64 *data_source;
+
+ intel_pmu_pebs_data_source_lnl();
+
+ data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX].pebs_data_source;
+ memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
+ __intel_pmu_pebs_data_source_cmt(data_source);
+}
+
void __init intel_pmu_pebs_data_source_cmt(void)
{
__intel_pmu_pebs_data_source_cmt(pebs_data_source);
}
+/* Version for Lion Cove and later */
+static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
+ P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* 0x00: ukn L3 */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 hit */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x02: L1 hit */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x03: LFB/L1 Miss Handling Buffer hit */
+ 0, /* 0x04: Reserved */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x05: L2 Hit */
+ OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE), /* 0x06: L2 Miss Handling Buffer Hit */
+ 0, /* 0x07: Reserved */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x08: L3 Hit */
+ 0, /* 0x09: Reserved */
+ 0, /* 0x0a: Reserved */
+ 0, /* 0x0b: Reserved */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* 0x0c: L3 Hit Snoop Fwd */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0d: L3 Hit Snoop HitM */
+ 0, /* 0x0e: Reserved */
+ P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0f: L3 Miss Snoop HitM */
+ OP_LH | LEVEL(MSC) | P(SNOOP, NONE), /* 0x10: Memory-side Cache Hit */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Memory Hit */
+};
+
+void __init intel_pmu_pebs_data_source_lnl(void)
+{
+ u64 *data_source;
+
+ data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
+ memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source));
+
+ data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
+ memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
+ __intel_pmu_pebs_data_source_cmt(data_source);
+}
+
static u64 precise_store_data(u64 status)
{
union intel_x86_pebs_dse dse;
@@ -257,14 +311,14 @@ static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
}
/* Retrieve the latency data for e-core of ADL */
-static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
- u8 dse, bool tlb, bool lock, bool blk)
+static u64 __grt_latency_data(struct perf_event *event, u64 status,
+ u8 dse, bool tlb, bool lock, bool blk)
{
u64 val;
WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
- dse &= PERF_PEBS_DATA_SOURCE_MASK;
+ dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
val = hybrid_var(event->pmu, pebs_data_source)[dse];
pebs_set_tlb_lock(&val, tlb, lock);
@@ -277,27 +331,82 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
return val;
}
-u64 adl_latency_data_small(struct perf_event *event, u64 status)
+u64 grt_latency_data(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
dse.val = status;
- return __adl_latency_data_small(event, status, dse.ld_dse,
- dse.ld_locked, dse.ld_stlb_miss,
- dse.ld_data_blk);
+ return __grt_latency_data(event, status, dse.ld_dse,
+ dse.ld_locked, dse.ld_stlb_miss,
+ dse.ld_data_blk);
}
/* Retrieve the latency data for e-core of MTL */
-u64 mtl_latency_data_small(struct perf_event *event, u64 status)
+u64 cmt_latency_data(struct perf_event *event, u64 status)
+{
+ union intel_x86_pebs_dse dse;
+
+ dse.val = status;
+
+ return __grt_latency_data(event, status, dse.mtl_dse,
+ dse.mtl_stlb_miss, dse.mtl_locked,
+ dse.mtl_fwd_blk);
+}
+
+static u64 lnc_latency_data(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
+ union perf_mem_data_src src;
+ u64 val;
dse.val = status;
- return __adl_latency_data_small(event, status, dse.mtl_dse,
- dse.mtl_stlb_miss, dse.mtl_locked,
- dse.mtl_fwd_blk);
+ /* LNC core latency data */
+ val = hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_SOURCE_MASK];
+ if (!val)
+ val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
+
+ if (dse.lnc_stlb_miss)
+ val |= P(TLB, MISS) | P(TLB, L2);
+ else
+ val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+ if (dse.lnc_locked)
+ val |= P(LOCK, LOCKED);
+
+ if (dse.lnc_data_blk)
+ val |= P(BLK, DATA);
+ if (dse.lnc_addr_blk)
+ val |= P(BLK, ADDR);
+ if (!dse.lnc_data_blk && !dse.lnc_addr_blk)
+ val |= P(BLK, NA);
+
+ src.val = val;
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+ src.mem_op = P(OP, STORE);
+
+ return src.val;
+}
+
+u64 lnl_latency_data(struct perf_event *event, u64 status)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->pmu_type == hybrid_small)
+ return cmt_latency_data(event, status);
+
+ return lnc_latency_data(event, status);
+}
+
+u64 arl_h_latency_data(struct perf_event *event, u64 status)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->pmu_type == hybrid_tiny)
+ return cmt_latency_data(event, status);
+
+ return lnl_latency_data(event, status);
}
static u64 load_latency_data(struct perf_event *event, u64 status)
@@ -844,11 +953,11 @@ unlock:
return 1;
}
-static inline void intel_pmu_drain_pebs_buffer(void)
+void intel_pmu_drain_pebs_buffer(void)
{
struct perf_sample_data data;
- x86_pmu.drain_pebs(NULL, &data);
+ static_call(x86_pmu_drain_pebs)(NULL, &data);
}
/*
@@ -1086,6 +1195,32 @@ struct event_constraint intel_glc_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_lnc_pebs_event_constraints[] = {
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
+
+ INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3fc),
+ INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
+
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
+
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
+
+ /*
+ * Everything else is handled by PMU_FL_PEBS_ALL, because we
+ * need the full constraints from the main table.
+ */
+
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
@@ -1137,8 +1272,7 @@ void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sche
static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
{
struct debug_store *ds = cpuc->ds;
- int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
- int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
+ int max_pebs_events = intel_pmu_max_num_pebs(cpuc->pmu);
u64 threshold;
int reserved;
@@ -1146,7 +1280,7 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
return;
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
- reserved = max_pebs_events + num_counters_fixed;
+ reserved = max_pebs_events + x86_pmu_max_num_counters_fixed(cpuc->pmu);
else
reserved = max_pebs_events;
@@ -1160,6 +1294,19 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
ds->pebs_interrupt_threshold = threshold;
}
+#define PEBS_DATACFG_CNTRS(x) \
+ ((x >> PEBS_DATACFG_CNTR_SHIFT) & PEBS_DATACFG_CNTR_MASK)
+
+#define PEBS_DATACFG_CNTR_BIT(x) \
+ (((1ULL << x) & PEBS_DATACFG_CNTR_MASK) << PEBS_DATACFG_CNTR_SHIFT)
+
+#define PEBS_DATACFG_FIX(x) \
+ ((x >> PEBS_DATACFG_FIX_SHIFT) & PEBS_DATACFG_FIX_MASK)
+
+#define PEBS_DATACFG_FIX_BIT(x) \
+ (((1ULL << (x)) & PEBS_DATACFG_FIX_MASK) \
+ << PEBS_DATACFG_FIX_SHIFT)
+
static void adaptive_pebs_record_size_update(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1174,10 +1321,58 @@ static void adaptive_pebs_record_size_update(void)
sz += sizeof(struct pebs_xmm);
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
+ if (pebs_data_cfg & (PEBS_DATACFG_METRICS | PEBS_DATACFG_CNTR)) {
+ sz += sizeof(struct pebs_cntr_header);
+
+ /* Metrics base and Metrics Data */
+ if (pebs_data_cfg & PEBS_DATACFG_METRICS)
+ sz += 2 * sizeof(u64);
+
+ if (pebs_data_cfg & PEBS_DATACFG_CNTR) {
+ sz += (hweight64(PEBS_DATACFG_CNTRS(pebs_data_cfg)) +
+ hweight64(PEBS_DATACFG_FIX(pebs_data_cfg))) *
+ sizeof(u64);
+ }
+ }
cpuc->pebs_record_size = sz;
}
+static void __intel_pmu_pebs_update_cfg(struct perf_event *event,
+ int idx, u64 *pebs_data_cfg)
+{
+ if (is_metric_event(event)) {
+ *pebs_data_cfg |= PEBS_DATACFG_METRICS;
+ return;
+ }
+
+ *pebs_data_cfg |= PEBS_DATACFG_CNTR;
+
+ if (idx >= INTEL_PMC_IDX_FIXED)
+ *pebs_data_cfg |= PEBS_DATACFG_FIX_BIT(idx - INTEL_PMC_IDX_FIXED);
+ else
+ *pebs_data_cfg |= PEBS_DATACFG_CNTR_BIT(idx);
+}
+
+
+static void intel_pmu_late_setup(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *event;
+ u64 pebs_data_cfg = 0;
+ int i;
+
+ for (i = 0; i < cpuc->n_events; i++) {
+ event = cpuc->event_list[i];
+ if (!is_pebs_counter_event_group(event))
+ continue;
+ __intel_pmu_pebs_update_cfg(event, cpuc->assign[i], &pebs_data_cfg);
+ }
+
+ if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
+ cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
+}
+
#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \
PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_WEIGHT_TYPE | \
@@ -1237,11 +1432,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct pmu *pmu = event->pmu;
/*
- * Make sure we get updated with the first PEBS
- * event. It will trigger also during removal, but
- * that does not hurt:
+ * Make sure we get updated with the first PEBS event.
+ * During removal, ->pebs_data_cfg is still valid for
+ * the last PEBS event. Don't clear it.
*/
- if (cpuc->n_pebs == 1)
+ if ((cpuc->n_pebs == 1) && add)
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
@@ -1355,7 +1550,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
* hence we need to drain when changing said
* size.
*/
- intel_pmu_drain_large_pebs(cpuc);
+ intel_pmu_drain_pebs_buffer();
adaptive_pebs_record_size_update();
wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg);
cpuc->active_pebs_data_cfg = pebs_data_cfg;
@@ -1655,8 +1850,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
- if (sample_type & PERF_SAMPLE_CALLCHAIN)
- perf_sample_save_callchain(data, event, iregs);
+ perf_sample_save_callchain(data, event, iregs);
/*
* We use the interrupt regs as a base because the PEBS record does not
@@ -1755,8 +1949,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
if (x86_pmu.intel_cap.pebs_format >= 3)
setup_pebs_time(event, data, pebs->tsc);
- if (has_branch_stack(event))
- perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
}
static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1782,14 +1975,89 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs,
#endif
}
+static void intel_perf_event_update_pmc(struct perf_event *event, u64 pmc)
+{
+ int shift = 64 - x86_pmu.cntval_bits;
+ struct hw_perf_event *hwc;
+ u64 delta, prev_pmc;
+
+ /*
+ * A recorded counter may not have an assigned event in the
+ * following cases. The value should be dropped.
+ * - An event is deleted. There is still an active PEBS event.
+ * The PEBS record doesn't shrink on pmu::del().
+ * If the counter of the deleted event once occurred in a PEBS
+ * record, PEBS still records the counter until the counter is
+ * reassigned.
+ * - An event is stopped for some reason, e.g., throttled.
+ * During this period, another event is added and takes the
+ * counter of the stopped event. The stopped event is assigned
+ * to another new and uninitialized counter, since the
+ * x86_pmu_start(RELOAD) is not invoked for a stopped event.
+ * The PEBS__DATA_CFG is updated regardless of the event state.
+ * The uninitialized counter can be recorded in a PEBS record.
+ * But the cpuc->events[uninitialized_counter] is always NULL,
+ * because the event is stopped. The uninitialized value is
+ * safely dropped.
+ */
+ if (!event)
+ return;
+
+ hwc = &event->hw;
+ prev_pmc = local64_read(&hwc->prev_count);
+
+ /* Only update the count when the PMU is disabled */
+ WARN_ON(this_cpu_read(cpu_hw_events.enabled));
+ local64_set(&hwc->prev_count, pmc);
+
+ delta = (pmc << shift) - (prev_pmc << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+}
+
+static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc,
+ struct perf_event *event,
+ struct pebs_cntr_header *cntr,
+ void *next_record)
+{
+ int bit;
+
+ for_each_set_bit(bit, (unsigned long *)&cntr->cntr, INTEL_PMC_MAX_GENERIC) {
+ intel_perf_event_update_pmc(cpuc->events[bit], *(u64 *)next_record);
+ next_record += sizeof(u64);
+ }
+
+ for_each_set_bit(bit, (unsigned long *)&cntr->fixed, INTEL_PMC_MAX_FIXED) {
+ /* The slots event will be handled with perf_metric later */
+ if ((cntr->metrics == INTEL_CNTR_METRICS) &&
+ (bit + INTEL_PMC_IDX_FIXED == INTEL_PMC_IDX_FIXED_SLOTS)) {
+ next_record += sizeof(u64);
+ continue;
+ }
+ intel_perf_event_update_pmc(cpuc->events[bit + INTEL_PMC_IDX_FIXED],
+ *(u64 *)next_record);
+ next_record += sizeof(u64);
+ }
+
+ /* HW will reload the value right after the overflow. */
+ if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+ local64_set(&event->hw.prev_count, (u64)-event->hw.sample_period);
+
+ if (cntr->metrics == INTEL_CNTR_METRICS) {
+ static_call(intel_pmu_update_topdown_event)
+ (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS],
+ (u64 *)next_record);
+ next_record += 2 * sizeof(u64);
+ }
+}
+
#define PEBS_LATENCY_MASK 0xffff
-#define PEBS_CACHE_LATENCY_OFFSET 32
-#define PEBS_RETIRE_LATENCY_OFFSET 32
/*
* With adaptive PEBS the layout depends on what fields are configured.
*/
-
static void setup_pebs_adaptive_sample_data(struct perf_event *event,
struct pt_regs *iregs, void *__pebs,
struct perf_sample_data *data,
@@ -1798,8 +2066,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct pebs_basic *basic = __pebs;
void *next_record = basic + 1;
- u64 sample_type;
- u64 format_size;
+ u64 sample_type, format_group;
struct pebs_meminfo *meminfo = NULL;
struct pebs_gprs *gprs = NULL;
struct x86_perf_regs *perf_regs;
@@ -1811,7 +2078,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
perf_regs->xmm_regs = NULL;
sample_type = event->attr.sample_type;
- format_size = basic->format_size;
+ format_group = basic->format_group;
perf_sample_data_init(data, 0, event->hw.last_period);
data->period = event->hw.last_period;
@@ -1823,28 +2090,31 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
- if (sample_type & PERF_SAMPLE_CALLCHAIN)
- perf_sample_save_callchain(data, event, iregs);
+ perf_sample_save_callchain(data, event, iregs);
*regs = *iregs;
/* The ip in basic is EventingIP */
set_linear_ip(regs, basic->ip);
regs->flags = PERF_EFLAGS_EXACT;
- if ((sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && (x86_pmu.flags & PMU_FL_RETIRE_LATENCY))
- data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
+ if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
+ if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
+ data->weight.var3_w = basic->retire_latency;
+ else
+ data->weight.var3_w = 0;
+ }
/*
* The record for MEMINFO is in front of GP
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
* Save the pointer here but process later.
*/
- if (format_size & PEBS_DATACFG_MEMINFO) {
+ if (format_group & PEBS_DATACFG_MEMINFO) {
meminfo = next_record;
next_record = meminfo + 1;
}
- if (format_size & PEBS_DATACFG_GP) {
+ if (format_group & PEBS_DATACFG_GP) {
gprs = next_record;
next_record = gprs + 1;
@@ -1857,14 +2127,13 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
adaptive_pebs_save_regs(regs, gprs);
}
- if (format_size & PEBS_DATACFG_MEMINFO) {
+ if (format_group & PEBS_DATACFG_MEMINFO) {
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
- u64 weight = meminfo->latency;
+ u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
+ meminfo->cache_latency : meminfo->mem_latency;
- if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
- data->weight.var2_w = weight & PEBS_LATENCY_MASK;
- weight >>= PEBS_CACHE_LATENCY_OFFSET;
- }
+ if (x86_pmu.flags & PMU_FL_INSTR_LATENCY)
+ data->weight.var2_w = meminfo->instr_latency;
/*
* Although meminfo::latency is defined as a u64,
@@ -1872,12 +2141,13 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
* in practice on Ice Lake and earlier platforms.
*/
if (sample_type & PERF_SAMPLE_WEIGHT) {
- data->weight.full = weight ?:
+ data->weight.full = latency ?:
intel_get_tsx_weight(meminfo->tsx_tuning);
} else {
- data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
+ data->weight.var1_dw = (u32)latency ?:
intel_get_tsx_weight(meminfo->tsx_tuning);
}
+
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
}
@@ -1898,16 +2168,16 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
}
}
- if (format_size & PEBS_DATACFG_XMMS) {
+ if (format_group & PEBS_DATACFG_XMMS) {
struct pebs_xmm *xmm = next_record;
next_record = xmm + 1;
perf_regs->xmm_regs = xmm->xmm;
}
- if (format_size & PEBS_DATACFG_LBRS) {
+ if (format_group & PEBS_DATACFG_LBRS) {
struct lbr_entry *lbr = next_record;
- int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
+ int num_lbr = ((format_group >> PEBS_DATACFG_LBR_SHIFT)
& 0xff) + 1;
next_record = next_record + num_lbr * sizeof(struct lbr_entry);
@@ -1917,11 +2187,33 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
}
}
- WARN_ONCE(next_record != __pebs + (format_size >> 48),
- "PEBS record size %llu, expected %llu, config %llx\n",
- format_size >> 48,
+ if (format_group & (PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS)) {
+ struct pebs_cntr_header *cntr = next_record;
+ unsigned int nr;
+
+ next_record += sizeof(struct pebs_cntr_header);
+ /*
+ * The PEBS_DATA_CFG is a global register, which is the
+ * superset configuration for all PEBS events.
+ * For the PEBS record of non-sample-read group, ignore
+ * the counter snapshot fields.
+ */
+ if (is_pebs_counter_event_group(event)) {
+ __setup_pebs_counter_group(cpuc, event, cntr, next_record);
+ data->sample_flags |= PERF_SAMPLE_READ;
+ }
+
+ nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
+ if (cntr->metrics == INTEL_CNTR_METRICS)
+ nr += 2;
+ next_record += nr * sizeof(u64);
+ }
+
+ WARN_ONCE(next_record != __pebs + basic->format_size,
+ "PEBS record size %u, expected %llu, config %llx\n",
+ basic->format_size,
(u64)(next_record - __pebs),
- basic->format_size);
+ format_group);
}
static inline void *
@@ -1962,15 +2254,6 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
return NULL;
}
-void intel_pmu_auto_reload_read(struct perf_event *event)
-{
- WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
-
- perf_pmu_disable(event->pmu);
- intel_pmu_drain_pebs_buffer();
- perf_pmu_enable(event->pmu);
-}
-
/*
* Special variant of intel_pmu_save_and_restart() for auto-reload.
*/
@@ -2032,46 +2315,33 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
return 0;
}
+typedef void (*setup_fn)(struct perf_event *, struct pt_regs *, void *,
+ struct perf_sample_data *, struct pt_regs *);
+
+static struct pt_regs dummy_iregs;
+
static __always_inline void
__intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs,
+ struct pt_regs *regs,
struct perf_sample_data *data,
- void *base, void *top,
- int bit, int count,
- void (*setup_sample)(struct perf_event *,
- struct pt_regs *,
- void *,
- struct perf_sample_data *,
- struct pt_regs *))
+ void *at,
+ setup_fn setup_sample)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
- struct x86_perf_regs perf_regs;
- struct pt_regs *regs = &perf_regs.regs;
- void *at = get_next_pebs_record_by_bit(base, top, bit);
- static struct pt_regs dummy_iregs;
-
- if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
- /*
- * Now, auto-reload is only enabled in fixed period mode.
- * The reload value is always hwc->sample_period.
- * May need to change it, if auto-reload is enabled in
- * freq mode later.
- */
- intel_pmu_save_and_restart_reload(event, count);
- } else if (!intel_pmu_save_and_restart(event))
- return;
-
- if (!iregs)
- iregs = &dummy_iregs;
+ setup_sample(event, iregs, at, data, regs);
+ perf_event_output(event, data, regs);
+}
- while (count > 1) {
- setup_sample(event, iregs, at, data, regs);
- perf_event_output(event, data, regs);
- at += cpuc->pebs_record_size;
- at = get_next_pebs_record_by_bit(at, top, bit);
- count--;
- }
+static __always_inline void
+__intel_pmu_pebs_last_event(struct perf_event *event,
+ struct pt_regs *iregs,
+ struct pt_regs *regs,
+ struct perf_sample_data *data,
+ void *at,
+ int count,
+ setup_fn setup_sample)
+{
+ struct hw_perf_event *hwc = &event->hw;
setup_sample(event, iregs, at, data, regs);
if (iregs == &dummy_iregs) {
@@ -2090,6 +2360,52 @@ __intel_pmu_pebs_event(struct perf_event *event,
if (perf_event_overflow(event, data, regs))
x86_pmu_stop(event, 0);
}
+
+ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+ if ((is_pebs_counter_event_group(event))) {
+ /*
+ * The value of each sample has been updated when setup
+ * the corresponding sample data.
+ */
+ perf_event_update_userpage(event);
+ } else {
+ /*
+ * Now, auto-reload is only enabled in fixed period mode.
+ * The reload value is always hwc->sample_period.
+ * May need to change it, if auto-reload is enabled in
+ * freq mode later.
+ */
+ intel_pmu_save_and_restart_reload(event, count);
+ }
+ } else
+ intel_pmu_save_and_restart(event);
+}
+
+static __always_inline void
+__intel_pmu_pebs_events(struct perf_event *event,
+ struct pt_regs *iregs,
+ struct perf_sample_data *data,
+ void *base, void *top,
+ int bit, int count,
+ setup_fn setup_sample)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct x86_perf_regs perf_regs;
+ struct pt_regs *regs = &perf_regs.regs;
+ void *at = get_next_pebs_record_by_bit(base, top, bit);
+ int cnt = count;
+
+ if (!iregs)
+ iregs = &dummy_iregs;
+
+ while (cnt > 1) {
+ __intel_pmu_pebs_event(event, iregs, regs, data, at, setup_sample);
+ at += cpuc->pebs_record_size;
+ at = get_next_pebs_record_by_bit(at, top, bit);
+ cnt--;
+ }
+
+ __intel_pmu_pebs_last_event(event, iregs, regs, data, at, count, setup_sample);
}
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
@@ -2126,8 +2442,8 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_
return;
}
- __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
- setup_pebs_fixed_sample_data);
+ __intel_pmu_pebs_events(event, iregs, data, at, top, 0, n,
+ setup_pebs_fixed_sample_data);
}
static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
@@ -2157,6 +2473,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
void *base, *at, *top;
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+ int max_pebs_events = intel_pmu_max_num_pebs(NULL);
int bit, i, size;
u64 mask;
@@ -2168,11 +2485,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
ds->pebs_index = ds->pebs_buffer_base;
- mask = (1ULL << x86_pmu.max_pebs_events) - 1;
- size = x86_pmu.max_pebs_events;
+ mask = x86_pmu.pebs_events_mask;
+ size = max_pebs_events;
if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
- mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
- size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
+ mask |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
+ size = INTEL_PMC_IDX_FIXED + x86_pmu_max_num_counters_fixed(NULL);
}
if (unlikely(base >= top)) {
@@ -2208,8 +2525,9 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
pebs_status = p->status = cpuc->pebs_enabled;
bit = find_first_bit((unsigned long *)&pebs_status,
- x86_pmu.max_pebs_events);
- if (bit >= x86_pmu.max_pebs_events)
+ max_pebs_events);
+
+ if (!(x86_pmu.pebs_events_mask & (1 << bit)))
continue;
/*
@@ -2256,9 +2574,9 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
}
if (counts[bit]) {
- __intel_pmu_pebs_event(event, iregs, data, base,
- top, bit, counts[bit],
- setup_pebs_fixed_sample_data);
+ __intel_pmu_pebs_events(event, iregs, data, base,
+ top, bit, counts[bit],
+ setup_pebs_fixed_sample_data);
}
}
}
@@ -2266,13 +2584,15 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
{
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+ void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
- int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
struct debug_store *ds = cpuc->ds;
+ struct x86_perf_regs perf_regs;
+ struct pt_regs *regs = &perf_regs.regs;
+ struct pebs_basic *basic;
struct perf_event *event;
void *base, *at, *top;
- int bit, size;
+ int bit;
u64 mask;
if (!x86_pmu.pebs_active)
@@ -2283,39 +2603,49 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
ds->pebs_index = ds->pebs_buffer_base;
- mask = ((1ULL << max_pebs_events) - 1) |
- (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
- size = INTEL_PMC_IDX_FIXED + num_counters_fixed;
+ mask = hybrid(cpuc->pmu, pebs_events_mask) |
+ (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
if (unlikely(base >= top)) {
- intel_pmu_pebs_event_update_no_drain(cpuc, size);
+ intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
return;
}
- for (at = base; at < top; at += cpuc->pebs_record_size) {
+ if (!iregs)
+ iregs = &dummy_iregs;
+
+ /* Process all but the last event for each counter. */
+ for (at = base; at < top; at += basic->format_size) {
u64 pebs_status;
- pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
- pebs_status &= mask;
+ basic = at;
+ if (basic->format_size != cpuc->pebs_record_size)
+ continue;
+
+ pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
+ for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
+ event = cpuc->events[bit];
+
+ if (WARN_ON_ONCE(!event) ||
+ WARN_ON_ONCE(!event->attr.precise_ip))
+ continue;
- for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
- counts[bit]++;
+ if (counts[bit]++) {
+ __intel_pmu_pebs_event(event, iregs, regs, data, last[bit],
+ setup_pebs_adaptive_sample_data);
+ }
+ last[bit] = at;
+ }
}
- for_each_set_bit(bit, (unsigned long *)&mask, size) {
- if (counts[bit] == 0)
+ for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
+ if (!counts[bit])
continue;
event = cpuc->events[bit];
- if (WARN_ON_ONCE(!event))
- continue;
- if (WARN_ON_ONCE(!event->attr.precise_ip))
- continue;
-
- __intel_pmu_pebs_event(event, iregs, data, base,
- top, bit, counts[bit],
- setup_pebs_adaptive_sample_data);
+ __intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
+ counts[bit], setup_pebs_adaptive_sample_data);
}
}
@@ -2380,6 +2710,12 @@ void __init intel_ds_init(void)
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
break;
+ case 6:
+ if (x86_pmu.intel_cap.pebs_baseline) {
+ x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
+ x86_pmu.late_setup = intel_pmu_late_setup;
+ }
+ fallthrough;
case 5:
x86_pmu.pebs_ept = 1;
fallthrough;
@@ -2404,9 +2740,17 @@ void __init intel_ds_init(void)
PERF_SAMPLE_REGS_USER |
PERF_SAMPLE_REGS_INTR);
}
- pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
+ pr_cont("PEBS fmt%d%c%s, ", format, pebs_type, pebs_qual);
- if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
+ /*
+ * The PEBS-via-PT is not supported on hybrid platforms,
+ * because not all CPUs of a hybrid machine support it.
+ * The global x86_pmu.intel_cap, which only contains the
+ * common capabilities, is used to check the availability
+ * of the feature. The per-PMU pebs_output_pt_available
+ * in a hybrid machine should be ignored.
+ */
+ if (x86_pmu.intel_cap.pebs_output_pt_available) {
pr_cont("PEBS-via-PT, ");
x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
}
diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c
index 618001c208e8..034a1f6a457c 100644
--- a/arch/x86/events/intel/knc.c
+++ b/arch/x86/events/intel/knc.c
@@ -303,7 +303,7 @@ static const struct x86_pmu knc_pmu __initconst = {
.apic = 1,
.max_period = (1ULL << 39) - 1,
.version = 0,
- .num_counters = 2,
+ .cntr_mask64 = 0x3,
.cntval_bits = 40,
.cntval_mask = (1ULL << 40) - 1,
.get_event_constraints = x86_get_event_constraints,
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 78cd5084104e..f44c3d866f24 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -2,6 +2,7 @@
#include <linux/perf_event.h>
#include <linux/types.h>
+#include <asm/cpu_device_id.h>
#include <asm/perf_event.h>
#include <asm/msr.h>
@@ -421,11 +422,17 @@ static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
}
+static inline bool has_lbr_callstack_users(void *ctx)
+{
+ return task_context_opt(ctx)->lbr_callstack_users ||
+ x86_pmu.lbr_callstack_users;
+}
+
static void __intel_pmu_lbr_restore(void *ctx)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- if (task_context_opt(ctx)->lbr_callstack_users == 0 ||
+ if (!has_lbr_callstack_users(ctx) ||
task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
intel_pmu_lbr_reset();
return;
@@ -502,7 +509,7 @@ static void __intel_pmu_lbr_save(void *ctx)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- if (task_context_opt(ctx)->lbr_callstack_users == 0) {
+ if (!has_lbr_callstack_users(ctx)) {
task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
return;
}
@@ -515,32 +522,11 @@ static void __intel_pmu_lbr_save(void *ctx)
cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
}
-void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
- struct perf_event_pmu_context *next_epc)
-{
- void *prev_ctx_data, *next_ctx_data;
-
- swap(prev_epc->task_ctx_data, next_epc->task_ctx_data);
-
- /*
- * Architecture specific synchronization makes sense in case
- * both prev_epc->task_ctx_data and next_epc->task_ctx_data
- * pointers are allocated.
- */
-
- prev_ctx_data = next_epc->task_ctx_data;
- next_ctx_data = prev_epc->task_ctx_data;
-
- if (!prev_ctx_data || !next_ctx_data)
- return;
-
- swap(task_context_opt(prev_ctx_data)->lbr_callstack_users,
- task_context_opt(next_ctx_data)->lbr_callstack_users);
-}
-
-void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_ctx_data *ctx_data;
void *task_ctx;
if (!cpuc->lbr_users)
@@ -551,14 +537,18 @@ void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched
* the task was scheduled out, restore the stack. Otherwise flush
* the LBR stack.
*/
- task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL;
+ rcu_read_lock();
+ ctx_data = rcu_dereference(task->perf_ctx_data);
+ task_ctx = ctx_data ? ctx_data->data : NULL;
if (task_ctx) {
if (sched_in)
__intel_pmu_lbr_restore(task_ctx);
else
__intel_pmu_lbr_save(task_ctx);
+ rcu_read_unlock();
return;
}
+ rcu_read_unlock();
/*
* Since a context switch can flip the address space and LBR entries
@@ -587,9 +577,19 @@ void intel_pmu_lbr_add(struct perf_event *event)
cpuc->br_sel = event->hw.branch_reg.reg;
- if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data)
- task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++;
+ if (branch_user_callstack(cpuc->br_sel)) {
+ if (event->attach_state & PERF_ATTACH_TASK) {
+ struct task_struct *task = event->hw.target;
+ struct perf_ctx_data *ctx_data;
+ rcu_read_lock();
+ ctx_data = rcu_dereference(task->perf_ctx_data);
+ if (ctx_data)
+ task_context_opt(ctx_data->data)->lbr_callstack_users++;
+ rcu_read_unlock();
+ } else
+ x86_pmu.lbr_callstack_users++;
+ }
/*
* Request pmu::sched_task() callback, which will fire inside the
* regular perf event scheduling, so that call will:
@@ -663,9 +663,19 @@ void intel_pmu_lbr_del(struct perf_event *event)
if (!x86_pmu.lbr_nr)
return;
- if (branch_user_callstack(cpuc->br_sel) &&
- event->pmu_ctx->task_ctx_data)
- task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--;
+ if (branch_user_callstack(cpuc->br_sel)) {
+ if (event->attach_state & PERF_ATTACH_TASK) {
+ struct task_struct *task = event->hw.target;
+ struct perf_ctx_data *ctx_data;
+
+ rcu_read_lock();
+ ctx_data = rcu_dereference(task->perf_ctx_data);
+ if (ctx_data)
+ task_context_opt(ctx_data->data)->lbr_callstack_users--;
+ rcu_read_unlock();
+ } else
+ x86_pmu.lbr_callstack_users--;
+ }
if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
cpuc->lbr_select = 0;
@@ -1457,7 +1467,7 @@ void __init intel_pmu_lbr_init_atom(void)
* to have an operational LBR which can freeze
* on PMU interrupt
*/
- if (boot_cpu_data.x86_model == 28
+ if (boot_cpu_data.x86_vfm == INTEL_ATOM_BONNELL
&& boot_cpu_data.x86_stepping < 10) {
pr_cont("LBR disabled due to erratum");
return;
@@ -1693,6 +1703,7 @@ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
lbr->from = x86_pmu.lbr_from;
lbr->to = x86_pmu.lbr_to;
lbr->info = x86_pmu.lbr_info;
+ lbr->has_callstack = x86_pmu_has_lbr_callstack();
}
EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index 35936188db01..c85a9fc44355 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -10,6 +10,7 @@
#include <linux/perf_event.h>
#include <asm/perf_event_p4.h>
+#include <asm/cpu_device_id.h>
#include <asm/hardirq.h>
#include <asm/apic.h>
@@ -732,9 +733,9 @@ static bool p4_event_match_cpu_model(unsigned int event_idx)
{
/* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
if (event_idx == P4_EVENT_INSTR_COMPLETED) {
- if (boot_cpu_data.x86_model != 3 &&
- boot_cpu_data.x86_model != 4 &&
- boot_cpu_data.x86_model != 6)
+ if (boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT &&
+ boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT_2M &&
+ boot_cpu_data.x86_vfm != INTEL_P4_CEDARMILL)
return false;
}
@@ -776,7 +777,7 @@ static int p4_validate_raw_event(struct perf_event *event)
* the user needs special permissions to be able to use it
*/
if (p4_ht_active() && p4_event_bind_map[v].shared) {
- v = perf_allow_cpu(&event->attr);
+ v = perf_allow_cpu();
if (v)
return v;
}
@@ -919,7 +920,7 @@ static void p4_pmu_disable_all(void)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[idx];
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -998,7 +999,7 @@ static void p4_pmu_enable_all(int added)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[idx];
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -1040,7 +1041,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
cpuc = this_cpu_ptr(&cpu_hw_events);
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
int overflow;
if (!test_bit(idx, cpuc->active_mask)) {
@@ -1353,7 +1354,7 @@ static __initconst const struct x86_pmu p4_pmu = {
* though leave it restricted at moment assuming
* HT is on
*/
- .num_counters = ARCH_P4_MAX_CCCR,
+ .cntr_mask64 = GENMASK_ULL(ARCH_P4_MAX_CCCR - 1, 0),
.apic = 1,
.cntval_bits = ARCH_P4_CNTRVAL_BITS,
.cntval_mask = ARCH_P4_CNTRVAL_MASK,
@@ -1395,7 +1396,7 @@ __init int p4_pmu_init(void)
*
* Solve this by zero'ing out the registers to mimic a reset.
*/
- for (i = 0; i < x86_pmu.num_counters; i++) {
+ for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
reg = x86_pmu_config_addr(i);
wrmsrl_safe(reg, 0ULL);
}
diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c
index 408879b0c0d4..65b45e9d7016 100644
--- a/arch/x86/events/intel/p6.c
+++ b/arch/x86/events/intel/p6.c
@@ -2,6 +2,8 @@
#include <linux/perf_event.h>
#include <linux/types.h>
+#include <asm/cpu_device_id.h>
+
#include "../perf_event.h"
/*
@@ -214,7 +216,7 @@ static __initconst const struct x86_pmu p6_pmu = {
.apic = 1,
.max_period = (1ULL << 31) - 1,
.version = 0,
- .num_counters = 2,
+ .cntr_mask64 = 0x3,
/*
* Events have 40 bits implemented. However they are designed such
* that bits [32-39] are sign extensions of bit 31. As such the
@@ -248,30 +250,8 @@ __init int p6_pmu_init(void)
{
x86_pmu = p6_pmu;
- switch (boot_cpu_data.x86_model) {
- case 1: /* Pentium Pro */
+ if (boot_cpu_data.x86_vfm == INTEL_PENTIUM_PRO)
x86_add_quirk(p6_pmu_rdpmc_quirk);
- break;
-
- case 3: /* Pentium II - Klamath */
- case 5: /* Pentium II - Deschutes */
- case 6: /* Pentium II - Mendocino */
- break;
-
- case 7: /* Pentium III - Katmai */
- case 8: /* Pentium III - Coppermine */
- case 10: /* Pentium III Xeon */
- case 11: /* Pentium III - Tualatin */
- break;
-
- case 9: /* Pentium M - Banias */
- case 13: /* Pentium M - Dothan */
- break;
-
- default:
- pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
- return -ENODEV;
- }
memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 8e2a12235e62..fa37565f6418 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -18,11 +18,12 @@
#include <linux/slab.h>
#include <linux/device.h>
+#include <asm/cpuid.h>
#include <asm/perf_event.h>
#include <asm/insn.h>
#include <asm/io.h>
#include <asm/intel_pt.h>
-#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
#include "../perf_event.h"
#include "pt.h"
@@ -201,21 +202,21 @@ static int __init pt_pmu_hw_init(void)
* otherwise, zero for numerator stands for "not enumerated"
* as per SDM
*/
- if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) {
+ if (boot_cpu_data.cpuid_level >= CPUID_LEAF_TSC) {
u32 eax, ebx, ecx, edx;
- cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx);
+ cpuid(CPUID_LEAF_TSC, &eax, &ebx, &ecx, &edx);
pt_pmu.tsc_art_num = ebx;
pt_pmu.tsc_art_den = eax;
}
/* model-specific quirks */
- switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_BROADWELL:
- case INTEL_FAM6_BROADWELL_D:
- case INTEL_FAM6_BROADWELL_G:
- case INTEL_FAM6_BROADWELL_X:
+ switch (boot_cpu_data.x86_vfm) {
+ case INTEL_BROADWELL:
+ case INTEL_BROADWELL_D:
+ case INTEL_BROADWELL_G:
+ case INTEL_BROADWELL_X:
/* not setting BRANCH_EN will #GP, erratum BDM106 */
pt_pmu.branch_en_always_on = true;
break;
@@ -416,7 +417,10 @@ static bool pt_event_valid(struct perf_event *event)
static void pt_config_start(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
- u64 ctl = event->hw.config;
+ u64 ctl = event->hw.aux_config;
+
+ if (READ_ONCE(event->hw.aux_paused))
+ return;
ctl |= RTIT_CTL_TRACEEN;
if (READ_ONCE(pt->vmx_on))
@@ -424,7 +428,7 @@ static void pt_config_start(struct perf_event *event)
else
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
- WRITE_ONCE(event->hw.config, ctl);
+ WRITE_ONCE(event->hw.aux_config, ctl);
}
/* Address ranges and their corresponding msr configuration registers */
@@ -503,7 +507,7 @@ static void pt_config(struct perf_event *event)
u64 reg;
/* First round: clear STATUS, in particular the PSB byte counter. */
- if (!event->hw.config) {
+ if (!event->hw.aux_config) {
perf_event_itrace_started(event);
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
}
@@ -533,14 +537,31 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
- event->hw.config = reg;
+ event->hw.aux_config = reg;
+
+ /*
+ * Allow resume before starting so as not to overwrite a value set by a
+ * PMI.
+ */
+ barrier();
+ WRITE_ONCE(pt->resume_allowed, 1);
+ /* Configuration is complete, it is now OK to handle an NMI */
+ barrier();
+ WRITE_ONCE(pt->handle_nmi, 1);
+ barrier();
pt_config_start(event);
+ barrier();
+ /*
+ * Allow pause after starting so its pt_config_stop() doesn't race with
+ * pt_config_start().
+ */
+ WRITE_ONCE(pt->pause_allowed, 1);
}
static void pt_config_stop(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
- u64 ctl = READ_ONCE(event->hw.config);
+ u64 ctl = READ_ONCE(event->hw.aux_config);
/* may be already stopped by a PMI */
if (!(ctl & RTIT_CTL_TRACEEN))
@@ -550,7 +571,7 @@ static void pt_config_stop(struct perf_event *event)
if (!READ_ONCE(pt->vmx_on))
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
- WRITE_ONCE(event->hw.config, ctl);
+ WRITE_ONCE(event->hw.aux_config, ctl);
/*
* A wrmsr that disables trace generation serializes other PT
@@ -828,11 +849,13 @@ static void pt_buffer_advance(struct pt_buffer *buf)
buf->cur_idx++;
if (buf->cur_idx == buf->cur->last) {
- if (buf->cur == buf->last)
+ if (buf->cur == buf->last) {
buf->cur = buf->first;
- else
+ buf->wrapped = true;
+ } else {
buf->cur = list_entry(buf->cur->list.next, struct topa,
list);
+ }
buf->cur_idx = 0;
}
}
@@ -846,8 +869,11 @@ static void pt_buffer_advance(struct pt_buffer *buf)
static void pt_update_head(struct pt *pt)
{
struct pt_buffer *buf = perf_get_aux(&pt->handle);
+ bool wrapped = buf->wrapped;
u64 topa_idx, base, old;
+ buf->wrapped = false;
+
if (buf->single) {
local_set(&buf->data_size, buf->output_off);
return;
@@ -865,7 +891,7 @@ static void pt_update_head(struct pt *pt)
} else {
old = (local64_xchg(&buf->head, base) &
((buf->nr_pages << PAGE_SHIFT) - 1));
- if (base < old)
+ if (base < old || (base == old && wrapped))
base += buf->nr_pages << PAGE_SHIFT;
local_add(base - old, &buf->data_size);
@@ -878,7 +904,7 @@ static void pt_update_head(struct pt *pt)
*/
static void *pt_buffer_region(struct pt_buffer *buf)
{
- return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
+ return phys_to_virt((phys_addr_t)TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
}
/**
@@ -990,7 +1016,7 @@ pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg)
* order allocations, there shouldn't be many of these.
*/
list_for_each_entry(topa, &buf->tables, list) {
- if (topa->offset + topa->size > pg << PAGE_SHIFT)
+ if (topa->offset + topa->size > (unsigned long)pg << PAGE_SHIFT)
goto found;
}
@@ -1511,6 +1537,7 @@ void intel_pt_interrupt(void)
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf) {
event->hw.state = PERF_HES_STOPPED;
+ WRITE_ONCE(pt->resume_allowed, 0);
return;
}
@@ -1519,6 +1546,7 @@ void intel_pt_interrupt(void)
ret = pt_buffer_reset_markers(buf, &pt->handle);
if (ret) {
perf_aux_output_end(&pt->handle, 0);
+ WRITE_ONCE(pt->resume_allowed, 0);
return;
}
@@ -1557,7 +1585,7 @@ void intel_pt_handle_vmx(int on)
/* Turn PTs back on */
if (!on && event)
- wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config);
+ wrmsrl(MSR_IA32_RTIT_CTL, event->hw.aux_config);
local_irq_restore(flags);
}
@@ -1573,6 +1601,26 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf;
+ if (mode & PERF_EF_RESUME) {
+ if (READ_ONCE(pt->resume_allowed)) {
+ u64 status;
+
+ /*
+ * Only if the trace is not active and the error and
+ * stopped bits are clear, is it safe to start, but a
+ * PMI might have just cleared these, so resume_allowed
+ * must be checked again also.
+ */
+ rdmsrl(MSR_IA32_RTIT_STATUS, status);
+ if (!(status & (RTIT_STATUS_TRIGGEREN |
+ RTIT_STATUS_ERROR |
+ RTIT_STATUS_STOPPED)) &&
+ READ_ONCE(pt->resume_allowed))
+ pt_config_start(event);
+ }
+ return;
+ }
+
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf)
goto fail_stop;
@@ -1583,7 +1631,6 @@ static void pt_event_start(struct perf_event *event, int mode)
goto fail_end_stop;
}
- WRITE_ONCE(pt->handle_nmi, 1);
hwc->state = 0;
pt_config_buffer(buf);
@@ -1601,11 +1648,27 @@ static void pt_event_stop(struct perf_event *event, int mode)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
+ if (mode & PERF_EF_PAUSE) {
+ if (READ_ONCE(pt->pause_allowed))
+ pt_config_stop(event);
+ return;
+ }
+
/*
* Protect against the PMI racing with disabling wrmsr,
* see comment in intel_pt_interrupt().
*/
WRITE_ONCE(pt->handle_nmi, 0);
+ barrier();
+
+ /*
+ * Prevent a resume from attempting to restart tracing, or a pause
+ * during a subsequent start. Do this after clearing handle_nmi so that
+ * pt_event_snapshot_aux() will not re-allow them.
+ */
+ WRITE_ONCE(pt->pause_allowed, 0);
+ WRITE_ONCE(pt->resume_allowed, 0);
+ barrier();
pt_config_stop(event);
@@ -1656,12 +1719,15 @@ static long pt_event_snapshot_aux(struct perf_event *event,
if (WARN_ON_ONCE(!buf->snapshot))
return 0;
+ /* Prevent pause/resume from attempting to start/stop tracing */
+ WRITE_ONCE(pt->pause_allowed, 0);
+ WRITE_ONCE(pt->resume_allowed, 0);
+ barrier();
/*
- * Here, handle_nmi tells us if the tracing is on
+ * There is no PT interrupt in this mode, so stop the trace and it will
+ * remain stopped while the buffer is copied.
*/
- if (READ_ONCE(pt->handle_nmi))
- pt_config_stop(event);
-
+ pt_config_stop(event);
pt_read_offset(buf);
pt_update_head(pt);
@@ -1673,12 +1739,16 @@ static long pt_event_snapshot_aux(struct perf_event *event,
ret = perf_output_copy_aux(&pt->handle, handle, from, to);
/*
- * If the tracing was on when we turned up, restart it.
- * Compiler barrier not needed as we couldn't have been
- * preempted by anything that touches pt->handle_nmi.
+ * Here, handle_nmi tells us if the tracing was on.
+ * If the tracing was on, restart it.
*/
- if (pt->handle_nmi)
+ if (READ_ONCE(pt->handle_nmi)) {
+ WRITE_ONCE(pt->resume_allowed, 1);
+ barrier();
pt_config_start(event);
+ barrier();
+ WRITE_ONCE(pt->pause_allowed, 1);
+ }
return ret;
}
@@ -1794,7 +1864,9 @@ static __init int pt_init(void)
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
- pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
+ pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE |
+ PERF_PMU_CAP_ITRACE |
+ PERF_PMU_CAP_AUX_PAUSE;
pt_pmu.pmu.attr_groups = pt_attr_groups;
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
pt_pmu.pmu.event_init = pt_event_init;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 96906a62aacd..2ac36250b656 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -33,13 +33,10 @@ struct topa_entry {
u64 rsvd2 : 1;
u64 size : 4;
u64 rsvd3 : 2;
- u64 base : 36;
- u64 rsvd4 : 16;
+ u64 base : 40;
+ u64 rsvd4 : 12;
};
-/* TSC to Core Crystal Clock Ratio */
-#define CPUID_TSC_LEAF 0x15
-
struct pt_pmu {
struct pmu pmu;
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
@@ -65,6 +62,7 @@ struct pt_pmu {
* @head: logical write offset inside the buffer
* @snapshot: if this is for a snapshot/overwrite counter
* @single: use Single Range Output instead of ToPA
+ * @wrapped: buffer advance wrapped back to the first topa table
* @stop_pos: STOP topa entry index
* @intr_pos: INT topa entry index
* @stop_te: STOP topa entry pointer
@@ -82,6 +80,7 @@ struct pt_buffer {
local64_t head;
bool snapshot;
bool single;
+ bool wrapped;
long stop_pos, intr_pos;
struct topa_entry *stop_te, *intr_te;
void **data_pages;
@@ -117,6 +116,8 @@ struct pt_filters {
* @filters: last configured filters
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
* @vmx_on: 1 if VMX is ON on this cpu
+ * @pause_allowed: PERF_EF_PAUSE is allowed to stop tracing
+ * @resume_allowed: PERF_EF_RESUME is allowed to start tracing
* @output_base: cached RTIT_OUTPUT_BASE MSR value
* @output_mask: cached RTIT_OUTPUT_MASK MSR value
*/
@@ -125,6 +126,8 @@ struct pt {
struct pt_filters filters;
int handle_nmi;
int vmx_on;
+ int pause_allowed;
+ int resume_allowed;
u64 output_base;
u64 output_mask;
};
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 258e2cdf28fa..a34e50fc4a8f 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -34,6 +34,7 @@ static struct event_constraint uncore_constraint_fixed =
struct event_constraint uncore_constraint_empty =
EVENT_CONSTRAINT(0, 0, 0);
+MODULE_DESCRIPTION("Support for Intel uncore performance events");
MODULE_LICENSE("GPL");
int uncore_pcibus_to_dieid(struct pci_bus *bus)
@@ -263,6 +264,9 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box,
return;
}
+ if (intel_generic_uncore_assign_hw_event(event, box))
+ return;
+
hwc->config_base = uncore_event_ctl(box, hwc->idx);
hwc->event_base = uncore_perf_ctr(box, hwc->idx);
}
@@ -343,8 +347,7 @@ void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
{
- hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- box->hrtimer.function = uncore_pmu_hrtimer;
+ hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
}
static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
@@ -741,7 +744,7 @@ static int uncore_pmu_event_init(struct perf_event *event)
pmu = uncore_event_to_pmu(event);
/* no device found for this pmu */
- if (pmu->func_id < 0)
+ if (!pmu->registered)
return -ENOENT;
/* Sampling not supported yet */
@@ -843,7 +846,9 @@ static void uncore_pmu_disable(struct pmu *pmu)
static ssize_t uncore_get_attr_cpumask(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
+ struct intel_uncore_pmu *pmu = container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu);
+
+ return cpumap_print_to_pagebuf(true, buf, &pmu->cpu_mask);
}
static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
@@ -860,7 +865,10 @@ static const struct attribute_group uncore_pmu_attr_group = {
static inline int uncore_get_box_id(struct intel_uncore_type *type,
struct intel_uncore_pmu *pmu)
{
- return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx;
+ if (type->boxes)
+ return intel_uncore_find_discovery_unit_id(type->boxes, -1, pmu->pmu_idx);
+
+ return pmu->pmu_idx;
}
void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
@@ -961,6 +969,9 @@ static void uncore_type_exit(struct intel_uncore_type *type)
if (type->cleanup_mapping)
type->cleanup_mapping(type);
+ if (type->cleanup_extra_boxes)
+ type->cleanup_extra_boxes(type);
+
if (pmu) {
for (i = 0; i < type->num_boxes; i++, pmu++) {
uncore_pmu_unregister(pmu);
@@ -969,10 +980,7 @@ static void uncore_type_exit(struct intel_uncore_type *type)
kfree(type->pmus);
type->pmus = NULL;
}
- if (type->box_ids) {
- kfree(type->box_ids);
- type->box_ids = NULL;
- }
+
kfree(type->events_group);
type->events_group = NULL;
}
@@ -983,7 +991,7 @@ static void uncore_types_exit(struct intel_uncore_type **types)
uncore_type_exit(*types);
}
-static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
+static int __init uncore_type_init(struct intel_uncore_type *type)
{
struct intel_uncore_pmu *pmus;
size_t size;
@@ -996,7 +1004,6 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
for (i = 0; i < type->num_boxes; i++) {
- pmus[i].func_id = setid ? i : -1;
pmus[i].pmu_idx = i;
pmus[i].type = type;
pmus[i].boxes = kzalloc(size, GFP_KERNEL);
@@ -1046,12 +1053,12 @@ err:
}
static int __init
-uncore_types_init(struct intel_uncore_type **types, bool setid)
+uncore_types_init(struct intel_uncore_type **types)
{
int ret;
for (; *types; types++) {
- ret = uncore_type_init(*types, setid);
+ ret = uncore_type_init(*types);
if (ret)
return ret;
}
@@ -1076,22 +1083,19 @@ static struct intel_uncore_pmu *
uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
{
struct intel_uncore_type **types = uncore_pci_uncores;
+ struct intel_uncore_discovery_unit *unit;
struct intel_uncore_type *type;
- u64 box_ctl;
- int i, die;
+ struct rb_node *node;
for (; *types; types++) {
type = *types;
- for (die = 0; die < __uncore_max_dies; die++) {
- for (i = 0; i < type->num_boxes; i++) {
- if (!type->box_ctls[die])
- continue;
- box_ctl = type->box_ctls[die] + type->pci_offsets[i];
- if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) &&
- pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) &&
- pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
- return &type->pmus[i];
- }
+
+ for (node = rb_first(type->boxes); node; node = rb_next(node)) {
+ unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
+ if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(unit->addr) &&
+ pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(unit->addr) &&
+ pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr))
+ return &type->pmus[unit->pmu_idx];
}
}
@@ -1154,11 +1158,6 @@ static int uncore_pci_pmu_register(struct pci_dev *pdev,
if (!box)
return -ENOMEM;
- if (pmu->func_id < 0)
- pmu->func_id = pdev->devfn;
- else
- WARN_ON_ONCE(pmu->func_id != pdev->devfn);
-
atomic_inc(&box->refcnt);
box->dieid = die;
box->pci_dev = pdev;
@@ -1367,28 +1366,25 @@ static struct notifier_block uncore_pci_notifier = {
static void uncore_pci_pmus_register(void)
{
struct intel_uncore_type **types = uncore_pci_uncores;
+ struct intel_uncore_discovery_unit *unit;
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
+ struct rb_node *node;
struct pci_dev *pdev;
- u64 box_ctl;
- int i, die;
for (; *types; types++) {
type = *types;
- for (die = 0; die < __uncore_max_dies; die++) {
- for (i = 0; i < type->num_boxes; i++) {
- if (!type->box_ctls[die])
- continue;
- box_ctl = type->box_ctls[die] + type->pci_offsets[i];
- pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl),
- UNCORE_DISCOVERY_PCI_BUS(box_ctl),
- UNCORE_DISCOVERY_PCI_DEVFN(box_ctl));
- if (!pdev)
- continue;
- pmu = &type->pmus[i];
-
- uncore_pci_pmu_register(pdev, type, pmu, die);
- }
+
+ for (node = rb_first(type->boxes); node; node = rb_next(node)) {
+ unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
+ pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr),
+ UNCORE_DISCOVERY_PCI_BUS(unit->addr),
+ UNCORE_DISCOVERY_PCI_DEVFN(unit->addr));
+
+ if (!pdev)
+ continue;
+ pmu = &type->pmus[unit->pmu_idx];
+ uncore_pci_pmu_register(pdev, type, pmu, unit->die);
}
}
@@ -1407,7 +1403,7 @@ static int __init uncore_pci_init(void)
goto err;
}
- ret = uncore_types_init(uncore_pci_uncores, false);
+ ret = uncore_types_init(uncore_pci_uncores);
if (ret)
goto errtype;
@@ -1453,6 +1449,18 @@ static void uncore_pci_exit(void)
}
}
+static bool uncore_die_has_box(struct intel_uncore_type *type,
+ int die, unsigned int pmu_idx)
+{
+ if (!type->boxes)
+ return true;
+
+ if (intel_uncore_find_discovery_unit_id(type->boxes, die, pmu_idx) < 0)
+ return false;
+
+ return true;
+}
+
static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
int new_cpu)
{
@@ -1468,18 +1476,25 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
if (old_cpu < 0) {
WARN_ON_ONCE(box->cpu != -1);
- box->cpu = new_cpu;
+ if (uncore_die_has_box(type, die, pmu->pmu_idx)) {
+ box->cpu = new_cpu;
+ cpumask_set_cpu(new_cpu, &pmu->cpu_mask);
+ }
continue;
}
- WARN_ON_ONCE(box->cpu != old_cpu);
+ WARN_ON_ONCE(box->cpu != -1 && box->cpu != old_cpu);
box->cpu = -1;
+ cpumask_clear_cpu(old_cpu, &pmu->cpu_mask);
if (new_cpu < 0)
continue;
+ if (!uncore_die_has_box(type, die, pmu->pmu_idx))
+ continue;
uncore_pmu_cancel_hrtimer(box);
perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
box->cpu = new_cpu;
+ cpumask_set_cpu(new_cpu, &pmu->cpu_mask);
}
}
@@ -1502,7 +1517,7 @@ static void uncore_box_unref(struct intel_uncore_type **types, int id)
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[id];
- if (box && atomic_dec_return(&box->refcnt) == 0)
+ if (box && box->cpu >= 0 && atomic_dec_return(&box->refcnt) == 0)
uncore_box_exit(box);
}
}
@@ -1592,7 +1607,7 @@ static int uncore_box_ref(struct intel_uncore_type **types,
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[id];
- if (box && atomic_inc_return(&box->refcnt) == 1)
+ if (box && box->cpu >= 0 && atomic_inc_return(&box->refcnt) == 1)
uncore_box_init(box);
}
}
@@ -1656,7 +1671,7 @@ static int __init uncore_cpu_init(void)
{
int ret;
- ret = uncore_types_init(uncore_msr_uncores, true);
+ ret = uncore_types_init(uncore_msr_uncores);
if (ret)
goto err;
@@ -1675,7 +1690,7 @@ static int __init uncore_mmio_init(void)
struct intel_uncore_type **types = uncore_mmio_uncores;
int ret;
- ret = uncore_types_init(types, true);
+ ret = uncore_types_init(types);
if (ret)
goto err;
@@ -1794,6 +1809,11 @@ static const struct intel_uncore_init_fun mtl_uncore_init __initconst = {
.mmio_init = adl_uncore_mmio_init,
};
+static const struct intel_uncore_init_fun lnl_uncore_init __initconst = {
+ .cpu_init = lnl_uncore_cpu_init,
+ .mmio_init = lnl_uncore_mmio_init,
+};
+
static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
.cpu_init = icx_uncore_cpu_init,
.pci_init = icx_uncore_pci_init,
@@ -1829,56 +1849,61 @@ static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
};
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
- X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &ivb_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &hsw_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hsw_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &hsw_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &bdw_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &bdw_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snbep_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhmex_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhmex_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ivbep_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &hswep_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &bdx_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &bdx_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &skl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &skl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &skx_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &skl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &skl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &skl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &skl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_l_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &mtl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &mtl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, &gnr_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, &gnr_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, &gnr_uncore_init),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, &gnr_uncore_init),
+ X86_MATCH_VFM(INTEL_NEHALEM_EP, &nhm_uncore_init),
+ X86_MATCH_VFM(INTEL_NEHALEM, &nhm_uncore_init),
+ X86_MATCH_VFM(INTEL_WESTMERE, &nhm_uncore_init),
+ X86_MATCH_VFM(INTEL_WESTMERE_EP, &nhm_uncore_init),
+ X86_MATCH_VFM(INTEL_SANDYBRIDGE, &snb_uncore_init),
+ X86_MATCH_VFM(INTEL_IVYBRIDGE, &ivb_uncore_init),
+ X86_MATCH_VFM(INTEL_HASWELL, &hsw_uncore_init),
+ X86_MATCH_VFM(INTEL_HASWELL_L, &hsw_uncore_init),
+ X86_MATCH_VFM(INTEL_HASWELL_G, &hsw_uncore_init),
+ X86_MATCH_VFM(INTEL_BROADWELL, &bdw_uncore_init),
+ X86_MATCH_VFM(INTEL_BROADWELL_G, &bdw_uncore_init),
+ X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &snbep_uncore_init),
+ X86_MATCH_VFM(INTEL_NEHALEM_EX, &nhmex_uncore_init),
+ X86_MATCH_VFM(INTEL_WESTMERE_EX, &nhmex_uncore_init),
+ X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &ivbep_uncore_init),
+ X86_MATCH_VFM(INTEL_HASWELL_X, &hswep_uncore_init),
+ X86_MATCH_VFM(INTEL_BROADWELL_X, &bdx_uncore_init),
+ X86_MATCH_VFM(INTEL_BROADWELL_D, &bdx_uncore_init),
+ X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &knl_uncore_init),
+ X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &knl_uncore_init),
+ X86_MATCH_VFM(INTEL_SKYLAKE, &skl_uncore_init),
+ X86_MATCH_VFM(INTEL_SKYLAKE_L, &skl_uncore_init),
+ X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_uncore_init),
+ X86_MATCH_VFM(INTEL_KABYLAKE_L, &skl_uncore_init),
+ X86_MATCH_VFM(INTEL_KABYLAKE, &skl_uncore_init),
+ X86_MATCH_VFM(INTEL_COMETLAKE_L, &skl_uncore_init),
+ X86_MATCH_VFM(INTEL_COMETLAKE, &skl_uncore_init),
+ X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_uncore_init),
+ X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &icl_uncore_init),
+ X86_MATCH_VFM(INTEL_ICELAKE, &icl_uncore_init),
+ X86_MATCH_VFM(INTEL_ICELAKE_D, &icx_uncore_init),
+ X86_MATCH_VFM(INTEL_ICELAKE_X, &icx_uncore_init),
+ X86_MATCH_VFM(INTEL_TIGERLAKE_L, &tgl_l_uncore_init),
+ X86_MATCH_VFM(INTEL_TIGERLAKE, &tgl_uncore_init),
+ X86_MATCH_VFM(INTEL_ROCKETLAKE, &rkl_uncore_init),
+ X86_MATCH_VFM(INTEL_ALDERLAKE, &adl_uncore_init),
+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, &adl_uncore_init),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE, &adl_uncore_init),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &adl_uncore_init),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_uncore_init),
+ X86_MATCH_VFM(INTEL_METEORLAKE, &mtl_uncore_init),
+ X86_MATCH_VFM(INTEL_METEORLAKE_L, &mtl_uncore_init),
+ X86_MATCH_VFM(INTEL_ARROWLAKE, &mtl_uncore_init),
+ X86_MATCH_VFM(INTEL_ARROWLAKE_U, &mtl_uncore_init),
+ X86_MATCH_VFM(INTEL_ARROWLAKE_H, &mtl_uncore_init),
+ X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_uncore_init),
+ X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init),
+ X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init),
+ X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init),
+ X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &gnr_uncore_init),
+ X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &snr_uncore_init),
+ X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init),
+ X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init),
+ X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init),
+ X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_uncore_init),
{},
};
MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 4838502d89ae..3dcb88c0ecfa 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -62,7 +62,6 @@ struct intel_uncore_type {
unsigned fixed_ctr;
unsigned fixed_ctl;
unsigned box_ctl;
- u64 *box_ctls; /* Unit ctrl addr of the first box of each die */
union {
unsigned msr_offset;
unsigned mmio_offset;
@@ -76,7 +75,6 @@ struct intel_uncore_type {
u64 *pci_offsets;
u64 *mmio_offsets;
};
- unsigned *box_ids;
struct event_constraint unconstrainted;
struct event_constraint *constraints;
struct intel_uncore_pmu *pmus;
@@ -86,6 +84,7 @@ struct intel_uncore_type {
const struct attribute_group *attr_groups[4];
const struct attribute_group **attr_update;
struct pmu *pmu; /* for custom pmu ops */
+ struct rb_root *boxes;
/*
* Uncore PMU would store relevant platform topology configuration here
* to identify which platform component each PMON block of that type is
@@ -98,6 +97,10 @@ struct intel_uncore_type {
int (*get_topology)(struct intel_uncore_type *type);
void (*set_mapping)(struct intel_uncore_type *type);
void (*cleanup_mapping)(struct intel_uncore_type *type);
+ /*
+ * Optional callbacks for extra uncore units cleanup
+ */
+ void (*cleanup_extra_boxes)(struct intel_uncore_type *type);
};
#define pmu_group attr_groups[0]
@@ -122,9 +125,9 @@ struct intel_uncore_pmu {
struct pmu pmu;
char name[UNCORE_PMU_NAME_LEN];
int pmu_idx;
- int func_id;
bool registered;
atomic_t activeboxes;
+ cpumask_t cpu_mask;
struct intel_uncore_type *type;
struct intel_uncore_box **boxes;
};
@@ -607,10 +610,12 @@ void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void);
void adl_uncore_cpu_init(void);
+void lnl_uncore_cpu_init(void);
void mtl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void);
void adl_uncore_mmio_init(void);
+void lnl_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid);
/* uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 9a698a92962a..571e44b49691 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -89,9 +89,7 @@ add_uncore_discovery_type(struct uncore_unit_discovery *unit)
if (!type)
return NULL;
- type->box_ctrl_die = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL);
- if (!type->box_ctrl_die)
- goto free_type;
+ type->units = RB_ROOT;
type->access_type = unit->access_type;
num_discovered_types[type->access_type]++;
@@ -100,12 +98,6 @@ add_uncore_discovery_type(struct uncore_unit_discovery *unit)
rb_add(&type->node, &discovery_tables, __type_less);
return type;
-
-free_type:
- kfree(type);
-
- return NULL;
-
}
static struct intel_uncore_discovery_type *
@@ -120,14 +112,118 @@ get_uncore_discovery_type(struct uncore_unit_discovery *unit)
return add_uncore_discovery_type(unit);
}
+static inline int pmu_idx_cmp(const void *key, const struct rb_node *b)
+{
+ struct intel_uncore_discovery_unit *unit;
+ const unsigned int *id = key;
+
+ unit = rb_entry(b, struct intel_uncore_discovery_unit, node);
+
+ if (unit->pmu_idx > *id)
+ return -1;
+ else if (unit->pmu_idx < *id)
+ return 1;
+
+ return 0;
+}
+
+static struct intel_uncore_discovery_unit *
+intel_uncore_find_discovery_unit(struct rb_root *units, int die,
+ unsigned int pmu_idx)
+{
+ struct intel_uncore_discovery_unit *unit;
+ struct rb_node *pos;
+
+ if (!units)
+ return NULL;
+
+ pos = rb_find_first(&pmu_idx, units, pmu_idx_cmp);
+ if (!pos)
+ return NULL;
+ unit = rb_entry(pos, struct intel_uncore_discovery_unit, node);
+
+ if (die < 0)
+ return unit;
+
+ for (; pos; pos = rb_next(pos)) {
+ unit = rb_entry(pos, struct intel_uncore_discovery_unit, node);
+
+ if (unit->pmu_idx != pmu_idx)
+ break;
+
+ if (unit->die == die)
+ return unit;
+ }
+
+ return NULL;
+}
+
+int intel_uncore_find_discovery_unit_id(struct rb_root *units, int die,
+ unsigned int pmu_idx)
+{
+ struct intel_uncore_discovery_unit *unit;
+
+ unit = intel_uncore_find_discovery_unit(units, die, pmu_idx);
+ if (unit)
+ return unit->id;
+
+ return -1;
+}
+
+static inline bool unit_less(struct rb_node *a, const struct rb_node *b)
+{
+ struct intel_uncore_discovery_unit *a_node, *b_node;
+
+ a_node = rb_entry(a, struct intel_uncore_discovery_unit, node);
+ b_node = rb_entry(b, struct intel_uncore_discovery_unit, node);
+
+ if (a_node->pmu_idx < b_node->pmu_idx)
+ return true;
+ if (a_node->pmu_idx > b_node->pmu_idx)
+ return false;
+
+ if (a_node->die < b_node->die)
+ return true;
+ if (a_node->die > b_node->die)
+ return false;
+
+ return 0;
+}
+
+static inline struct intel_uncore_discovery_unit *
+uncore_find_unit(struct rb_root *root, unsigned int id)
+{
+ struct intel_uncore_discovery_unit *unit;
+ struct rb_node *node;
+
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
+ if (unit->id == id)
+ return unit;
+ }
+
+ return NULL;
+}
+
+void uncore_find_add_unit(struct intel_uncore_discovery_unit *node,
+ struct rb_root *root, u16 *num_units)
+{
+ struct intel_uncore_discovery_unit *unit = uncore_find_unit(root, node->id);
+
+ if (unit)
+ node->pmu_idx = unit->pmu_idx;
+ else if (num_units)
+ node->pmu_idx = (*num_units)++;
+
+ rb_add(&node->node, root, unit_less);
+}
+
static void
uncore_insert_box_info(struct uncore_unit_discovery *unit,
- int die, bool parsed)
+ int die)
{
+ struct intel_uncore_discovery_unit *node;
struct intel_uncore_discovery_type *type;
- unsigned int *ids;
- u64 *box_offset;
- int i;
if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) {
pr_info("Invalid address is detected for uncore type %d box %d, "
@@ -136,71 +232,29 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
return;
}
- if (parsed) {
- type = search_uncore_discovery_type(unit->box_type);
- if (!type) {
- pr_info("A spurious uncore type %d is detected, "
- "Disable the uncore type.\n",
- unit->box_type);
- return;
- }
- /* Store the first box of each die */
- if (!type->box_ctrl_die[die])
- type->box_ctrl_die[die] = unit->ctl;
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
return;
- }
- type = get_uncore_discovery_type(unit);
- if (!type)
- return;
+ node->die = die;
+ node->id = unit->box_id;
+ node->addr = unit->ctl;
- box_offset = kcalloc(type->num_boxes + 1, sizeof(u64), GFP_KERNEL);
- if (!box_offset)
+ type = get_uncore_discovery_type(unit);
+ if (!type) {
+ kfree(node);
return;
+ }
- ids = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL);
- if (!ids)
- goto free_box_offset;
+ uncore_find_add_unit(node, &type->units, &type->num_units);
/* Store generic information for the first box */
- if (!type->num_boxes) {
- type->box_ctrl = unit->ctl;
- type->box_ctrl_die[die] = unit->ctl;
+ if (type->num_units == 1) {
type->num_counters = unit->num_regs;
type->counter_width = unit->bit_width;
type->ctl_offset = unit->ctl_offset;
type->ctr_offset = unit->ctr_offset;
- *ids = unit->box_id;
- goto end;
- }
-
- for (i = 0; i < type->num_boxes; i++) {
- ids[i] = type->ids[i];
- box_offset[i] = type->box_offset[i];
-
- if (unit->box_id == ids[i]) {
- pr_info("Duplicate uncore type %d box ID %d is detected, "
- "Drop the duplicate uncore unit.\n",
- unit->box_type, unit->box_id);
- goto free_ids;
- }
}
- ids[i] = unit->box_id;
- box_offset[i] = unit->ctl - type->box_ctrl;
- kfree(type->ids);
- kfree(type->box_offset);
-end:
- type->ids = ids;
- type->box_offset = box_offset;
- type->num_boxes++;
- return;
-
-free_ids:
- kfree(ids);
-
-free_box_offset:
- kfree(box_offset);
-
}
static bool
@@ -279,7 +333,7 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
if (uncore_ignore_unit(&unit, ignore))
continue;
- uncore_insert_box_info(&unit, die, *parsed);
+ uncore_insert_box_info(&unit, die);
}
*parsed = true;
@@ -339,9 +393,16 @@ err:
void intel_uncore_clear_discovery_tables(void)
{
struct intel_uncore_discovery_type *type, *next;
+ struct intel_uncore_discovery_unit *pos;
+ struct rb_node *node;
rbtree_postorder_for_each_entry_safe(type, next, &discovery_tables, node) {
- kfree(type->box_ctrl_die);
+ while (!RB_EMPTY_ROOT(&type->units)) {
+ node = rb_first(&type->units);
+ pos = rb_entry(node, struct intel_uncore_discovery_unit, node);
+ rb_erase(node, &type->units);
+ kfree(pos);
+ }
kfree(type);
}
}
@@ -366,19 +427,31 @@ static const struct attribute_group generic_uncore_format_group = {
.attrs = generic_uncore_formats_attr,
};
+static u64 intel_generic_uncore_box_ctl(struct intel_uncore_box *box)
+{
+ struct intel_uncore_discovery_unit *unit;
+
+ unit = intel_uncore_find_discovery_unit(box->pmu->type->boxes,
+ -1, box->pmu->pmu_idx);
+ if (WARN_ON_ONCE(!unit))
+ return 0;
+
+ return unit->addr;
+}
+
void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box)
{
- wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_INT);
+ wrmsrl(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_INT);
}
void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box)
{
- wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ);
+ wrmsrl(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ);
}
void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box)
{
- wrmsrl(uncore_msr_box_ctl(box), 0);
+ wrmsrl(intel_generic_uncore_box_ctl(box), 0);
}
static void intel_generic_uncore_msr_enable_event(struct intel_uncore_box *box,
@@ -406,10 +479,47 @@ static struct intel_uncore_ops generic_uncore_msr_ops = {
.read_counter = uncore_msr_read_counter,
};
+bool intel_generic_uncore_assign_hw_event(struct perf_event *event,
+ struct intel_uncore_box *box)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 box_ctl;
+
+ if (!box->pmu->type->boxes)
+ return false;
+
+ if (box->io_addr) {
+ hwc->config_base = uncore_pci_event_ctl(box, hwc->idx);
+ hwc->event_base = uncore_pci_perf_ctr(box, hwc->idx);
+ return true;
+ }
+
+ box_ctl = intel_generic_uncore_box_ctl(box);
+ if (!box_ctl)
+ return false;
+
+ if (box->pci_dev) {
+ box_ctl = UNCORE_DISCOVERY_PCI_BOX_CTRL(box_ctl);
+ hwc->config_base = box_ctl + uncore_pci_event_ctl(box, hwc->idx);
+ hwc->event_base = box_ctl + uncore_pci_perf_ctr(box, hwc->idx);
+ return true;
+ }
+
+ hwc->config_base = box_ctl + box->pmu->type->event_ctl + hwc->idx;
+ hwc->event_base = box_ctl + box->pmu->type->perf_ctr + hwc->idx;
+
+ return true;
+}
+
+static inline int intel_pci_uncore_box_ctl(struct intel_uncore_box *box)
+{
+ return UNCORE_DISCOVERY_PCI_BOX_CTRL(intel_generic_uncore_box_ctl(box));
+}
+
void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
- int box_ctl = uncore_pci_box_ctl(box);
+ int box_ctl = intel_pci_uncore_box_ctl(box);
__set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_INT);
@@ -418,7 +528,7 @@ void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
- int box_ctl = uncore_pci_box_ctl(box);
+ int box_ctl = intel_pci_uncore_box_ctl(box);
pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_FRZ);
}
@@ -426,7 +536,7 @@ void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
- int box_ctl = uncore_pci_box_ctl(box);
+ int box_ctl = intel_pci_uncore_box_ctl(box);
pci_write_config_dword(pdev, box_ctl, 0);
}
@@ -473,34 +583,30 @@ static struct intel_uncore_ops generic_uncore_pci_ops = {
#define UNCORE_GENERIC_MMIO_SIZE 0x4000
-static u64 generic_uncore_mmio_box_ctl(struct intel_uncore_box *box)
-{
- struct intel_uncore_type *type = box->pmu->type;
-
- if (!type->box_ctls || !type->box_ctls[box->dieid] || !type->mmio_offsets)
- return 0;
-
- return type->box_ctls[box->dieid] + type->mmio_offsets[box->pmu->pmu_idx];
-}
-
void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
{
- u64 box_ctl = generic_uncore_mmio_box_ctl(box);
+ static struct intel_uncore_discovery_unit *unit;
struct intel_uncore_type *type = box->pmu->type;
resource_size_t addr;
- if (!box_ctl) {
+ unit = intel_uncore_find_discovery_unit(type->boxes, box->dieid, box->pmu->pmu_idx);
+ if (!unit) {
+ pr_warn("Uncore type %d id %d: Cannot find box control address.\n",
+ type->type_id, box->pmu->pmu_idx);
+ return;
+ }
+
+ if (!unit->addr) {
pr_warn("Uncore type %d box %d: Invalid box control address.\n",
- type->type_id, type->box_ids[box->pmu->pmu_idx]);
+ type->type_id, unit->id);
return;
}
- addr = box_ctl;
+ addr = unit->addr;
box->io_addr = ioremap(addr, UNCORE_GENERIC_MMIO_SIZE);
if (!box->io_addr) {
pr_warn("Uncore type %d box %d: ioremap error for 0x%llx.\n",
- type->type_id, type->box_ids[box->pmu->pmu_idx],
- (unsigned long long)addr);
+ type->type_id, unit->id, (unsigned long long)addr);
return;
}
@@ -560,34 +666,22 @@ static bool uncore_update_uncore_type(enum uncore_access_type type_id,
struct intel_uncore_discovery_type *type)
{
uncore->type_id = type->type;
- uncore->num_boxes = type->num_boxes;
uncore->num_counters = type->num_counters;
uncore->perf_ctr_bits = type->counter_width;
- uncore->box_ids = type->ids;
+ uncore->perf_ctr = (unsigned int)type->ctr_offset;
+ uncore->event_ctl = (unsigned int)type->ctl_offset;
+ uncore->boxes = &type->units;
+ uncore->num_boxes = type->num_units;
switch (type_id) {
case UNCORE_ACCESS_MSR:
uncore->ops = &generic_uncore_msr_ops;
- uncore->perf_ctr = (unsigned int)type->box_ctrl + type->ctr_offset;
- uncore->event_ctl = (unsigned int)type->box_ctrl + type->ctl_offset;
- uncore->box_ctl = (unsigned int)type->box_ctrl;
- uncore->msr_offsets = type->box_offset;
break;
case UNCORE_ACCESS_PCI:
uncore->ops = &generic_uncore_pci_ops;
- uncore->perf_ctr = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctr_offset;
- uncore->event_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctl_offset;
- uncore->box_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl);
- uncore->box_ctls = type->box_ctrl_die;
- uncore->pci_offsets = type->box_offset;
break;
case UNCORE_ACCESS_MMIO:
uncore->ops = &generic_uncore_mmio_ops;
- uncore->perf_ctr = (unsigned int)type->ctr_offset;
- uncore->event_ctl = (unsigned int)type->ctl_offset;
- uncore->box_ctl = (unsigned int)type->box_ctrl;
- uncore->box_ctls = type->box_ctrl_die;
- uncore->mmio_offsets = type->box_offset;
uncore->mmio_map_size = UNCORE_GENERIC_MMIO_SIZE;
break;
default:
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 22e769a81103..0e94aa7db8e7 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -113,19 +113,24 @@ struct uncore_unit_discovery {
};
};
+struct intel_uncore_discovery_unit {
+ struct rb_node node;
+ unsigned int pmu_idx; /* The idx of the corresponding PMU */
+ unsigned int id; /* Unit ID */
+ unsigned int die; /* Die ID */
+ u64 addr; /* Unit Control Address */
+};
+
struct intel_uncore_discovery_type {
struct rb_node node;
enum uncore_access_type access_type;
- u64 box_ctrl; /* Unit ctrl addr of the first box */
- u64 *box_ctrl_die; /* Unit ctrl addr of the first box of each die */
+ struct rb_root units; /* Unit ctrl addr for all units */
u16 type; /* Type ID of the uncore block */
u8 num_counters;
u8 counter_width;
u8 ctl_offset; /* Counter Control 0 offset */
u8 ctr_offset; /* Counter 0 offset */
- u16 num_boxes; /* number of boxes for the uncore block */
- unsigned int *ids; /* Box IDs */
- u64 *box_offset; /* Box offset */
+ u16 num_units; /* number of units */
};
bool intel_uncore_has_discovery_tables(int *ignore);
@@ -156,3 +161,10 @@ u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box,
struct intel_uncore_type **
intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra);
+
+int intel_uncore_find_discovery_unit_id(struct rb_root *units, int die,
+ unsigned int pmu_idx);
+bool intel_generic_uncore_assign_hw_event(struct perf_event *event,
+ struct intel_uncore_box *box);
+void uncore_find_add_unit(struct intel_uncore_discovery_unit *node,
+ struct rb_root *root, u16 *num_units);
diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
index 92da8aaa5966..466833478e81 100644
--- a/arch/x86/events/intel/uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Nehalem-EX/Westmere-EX uncore support */
+#include <asm/cpu_device_id.h>
#include "uncore.h"
/* NHM-EX event control */
@@ -1217,7 +1218,7 @@ static struct intel_uncore_type *nhmex_msr_uncores[] = {
void nhmex_uncore_cpu_init(void)
{
- if (boot_cpu_data.x86_model == 46)
+ if (boot_cpu_data.x86_vfm == INTEL_NEHALEM_EX)
uncore_nhmex = true;
else
nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 9462fd9f3b7a..edb7fd50efe0 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -252,6 +252,7 @@ DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
DEFINE_UNCORE_FORMAT_ATTR(threshold, threshold, "config:24-29");
+DEFINE_UNCORE_FORMAT_ATTR(threshold2, threshold, "config:24-31");
/* Sandy Bridge uncore support */
static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
@@ -746,6 +747,34 @@ void mtl_uncore_cpu_init(void)
uncore_msr_uncores = mtl_msr_uncores;
}
+static struct intel_uncore_type *lnl_msr_uncores[] = {
+ &mtl_uncore_cbox,
+ &mtl_uncore_arb,
+ NULL
+};
+
+#define LNL_UNC_MSR_GLOBAL_CTL 0x240e
+
+static void lnl_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0)
+ wrmsrl(LNL_UNC_MSR_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN);
+}
+
+static struct intel_uncore_ops lnl_uncore_msr_ops = {
+ .init_box = lnl_uncore_msr_init_box,
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = snb_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
+void lnl_uncore_cpu_init(void)
+{
+ mtl_uncore_cbox.num_boxes = 4;
+ mtl_uncore_cbox.ops = &lnl_uncore_msr_ops;
+ uncore_msr_uncores = lnl_msr_uncores;
+}
+
enum {
SNB_PCI_UNCORE_IMC,
};
@@ -881,7 +910,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event)
pmu = uncore_event_to_pmu(event);
/* no device found for this pmu */
- if (pmu->func_id < 0)
+ if (!pmu->registered)
return -ENOENT;
/* Sampling not supported yet */
@@ -1475,39 +1504,45 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void)
ids++;
}
+ /* Just try to grab 00:00.0 device */
+ if (!mc_dev)
+ mc_dev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0));
+
return mc_dev;
}
#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000
#define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000
-static void __uncore_imc_init_box(struct intel_uncore_box *box,
- unsigned int base_offset)
+static void
+uncore_get_box_mmio_addr(struct intel_uncore_box *box,
+ unsigned int base_offset,
+ int bar_offset, int step)
{
struct pci_dev *pdev = tgl_uncore_get_mc_dev();
struct intel_uncore_pmu *pmu = box->pmu;
struct intel_uncore_type *type = pmu->type;
resource_size_t addr;
- u32 mch_bar;
+ u32 bar;
if (!pdev) {
pr_warn("perf uncore: Cannot find matched IMC device.\n");
return;
}
- pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &mch_bar);
- /* MCHBAR is disabled */
- if (!(mch_bar & BIT(0))) {
- pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n");
+ pci_read_config_dword(pdev, bar_offset, &bar);
+ if (!(bar & BIT(0))) {
+ pr_warn("perf uncore: BAR 0x%x is disabled. Failed to map %s counters.\n",
+ bar_offset, type->name);
pci_dev_put(pdev);
return;
}
- mch_bar &= ~BIT(0);
- addr = (resource_size_t)(mch_bar + TGL_UNCORE_MMIO_IMC_MEM_OFFSET * pmu->pmu_idx);
+ bar &= ~BIT(0);
+ addr = (resource_size_t)(bar + step * pmu->pmu_idx);
#ifdef CONFIG_PHYS_ADDR_T_64BIT
- pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET + 4, &mch_bar);
- addr |= ((resource_size_t)mch_bar << 32);
+ pci_read_config_dword(pdev, bar_offset + 4, &bar);
+ addr |= ((resource_size_t)bar << 32);
#endif
addr += base_offset;
@@ -1518,6 +1553,14 @@ static void __uncore_imc_init_box(struct intel_uncore_box *box,
pci_dev_put(pdev);
}
+static void __uncore_imc_init_box(struct intel_uncore_box *box,
+ unsigned int base_offset)
+{
+ uncore_get_box_mmio_addr(box, base_offset,
+ SNB_UNCORE_PCI_IMC_BAR_OFFSET,
+ TGL_UNCORE_MMIO_IMC_MEM_OFFSET);
+}
+
static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
{
__uncore_imc_init_box(box, 0);
@@ -1612,14 +1655,17 @@ static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box)
writel(0, box->io_addr + uncore_mmio_box_ctl(box));
}
+#define MMIO_UNCORE_COMMON_OPS() \
+ .exit_box = uncore_mmio_exit_box, \
+ .disable_box = adl_uncore_mmio_disable_box, \
+ .enable_box = adl_uncore_mmio_enable_box, \
+ .disable_event = intel_generic_uncore_mmio_disable_event, \
+ .enable_event = intel_generic_uncore_mmio_enable_event, \
+ .read_counter = uncore_mmio_read_counter,
+
static struct intel_uncore_ops adl_uncore_mmio_ops = {
.init_box = adl_uncore_imc_init_box,
- .exit_box = uncore_mmio_exit_box,
- .disable_box = adl_uncore_mmio_disable_box,
- .enable_box = adl_uncore_mmio_enable_box,
- .disable_event = intel_generic_uncore_mmio_disable_event,
- .enable_event = intel_generic_uncore_mmio_enable_event,
- .read_counter = uncore_mmio_read_counter,
+ MMIO_UNCORE_COMMON_OPS()
};
#define ADL_UNC_CTL_CHMASK_MASK 0x00000f00
@@ -1703,3 +1749,108 @@ void adl_uncore_mmio_init(void)
}
/* end of Alder Lake MMIO uncore support */
+
+/* Lunar Lake MMIO uncore support */
+#define LNL_UNCORE_PCI_SAFBAR_OFFSET 0x68
+#define LNL_UNCORE_MAP_SIZE 0x1000
+#define LNL_UNCORE_SNCU_BASE 0xE4B000
+#define LNL_UNCORE_SNCU_CTR 0x390
+#define LNL_UNCORE_SNCU_CTRL 0x398
+#define LNL_UNCORE_SNCU_BOX_CTL 0x380
+#define LNL_UNCORE_GLOBAL_CTL 0x700
+#define LNL_UNCORE_HBO_BASE 0xE54000
+#define LNL_UNCORE_HBO_OFFSET -4096
+#define LNL_UNCORE_HBO_CTR 0x570
+#define LNL_UNCORE_HBO_CTRL 0x550
+#define LNL_UNCORE_HBO_BOX_CTL 0x548
+
+#define LNL_UNC_CTL_THRESHOLD 0xff000000
+#define LNL_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
+ SNB_UNC_CTL_UMASK_MASK | \
+ SNB_UNC_CTL_EDGE_DET | \
+ SNB_UNC_CTL_INVERT | \
+ LNL_UNC_CTL_THRESHOLD)
+
+static struct attribute *lnl_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_threshold2.attr,
+ NULL
+};
+
+static const struct attribute_group lnl_uncore_format_group = {
+ .name = "format",
+ .attrs = lnl_uncore_formats_attr,
+};
+
+static void lnl_uncore_hbo_init_box(struct intel_uncore_box *box)
+{
+ uncore_get_box_mmio_addr(box, LNL_UNCORE_HBO_BASE,
+ LNL_UNCORE_PCI_SAFBAR_OFFSET,
+ LNL_UNCORE_HBO_OFFSET);
+}
+
+static struct intel_uncore_ops lnl_uncore_hbo_ops = {
+ .init_box = lnl_uncore_hbo_init_box,
+ MMIO_UNCORE_COMMON_OPS()
+};
+
+static struct intel_uncore_type lnl_uncore_hbo = {
+ .name = "hbo",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 64,
+ .perf_ctr = LNL_UNCORE_HBO_CTR,
+ .event_ctl = LNL_UNCORE_HBO_CTRL,
+ .event_mask = LNL_UNC_RAW_EVENT_MASK,
+ .box_ctl = LNL_UNCORE_HBO_BOX_CTL,
+ .mmio_map_size = LNL_UNCORE_MAP_SIZE,
+ .ops = &lnl_uncore_hbo_ops,
+ .format_group = &lnl_uncore_format_group,
+};
+
+static void lnl_uncore_sncu_init_box(struct intel_uncore_box *box)
+{
+ uncore_get_box_mmio_addr(box, LNL_UNCORE_SNCU_BASE,
+ LNL_UNCORE_PCI_SAFBAR_OFFSET,
+ 0);
+
+ if (box->io_addr)
+ writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + LNL_UNCORE_GLOBAL_CTL);
+}
+
+static struct intel_uncore_ops lnl_uncore_sncu_ops = {
+ .init_box = lnl_uncore_sncu_init_box,
+ MMIO_UNCORE_COMMON_OPS()
+};
+
+static struct intel_uncore_type lnl_uncore_sncu = {
+ .name = "sncu",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 64,
+ .perf_ctr = LNL_UNCORE_SNCU_CTR,
+ .event_ctl = LNL_UNCORE_SNCU_CTRL,
+ .event_mask = LNL_UNC_RAW_EVENT_MASK,
+ .box_ctl = LNL_UNCORE_SNCU_BOX_CTL,
+ .mmio_map_size = LNL_UNCORE_MAP_SIZE,
+ .ops = &lnl_uncore_sncu_ops,
+ .format_group = &lnl_uncore_format_group,
+};
+
+static struct intel_uncore_type *lnl_mmio_uncores[] = {
+ &adl_uncore_imc,
+ &lnl_uncore_hbo,
+ &lnl_uncore_sncu,
+ &adl_uncore_imc_free_running,
+ NULL
+};
+
+void lnl_uncore_mmio_init(void)
+{
+ uncore_mmio_uncores = lnl_mmio_uncores;
+}
+
+/* end of Lunar Lake MMIO uncore support */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 2eaf0f339849..60973c209c0e 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* SandyBridge-EP/IvyTown uncore support */
+#include <asm/cpu_device_id.h>
#include "uncore.h"
#include "uncore_discovery.h"
@@ -461,6 +462,7 @@
#define SPR_UBOX_DID 0x3250
/* SPR CHA */
+#define SPR_CHA_EVENT_MASK_EXT 0xffffffff
#define SPR_CHA_PMON_CTL_TID_EN (1 << 16)
#define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
SPR_CHA_PMON_CTL_TID_EN)
@@ -477,6 +479,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55");
DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57");
DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39");
DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55");
+DEFINE_UNCORE_FORMAT_ATTR(umask_ext5, umask, "config:8-15,32-63");
DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
@@ -3285,7 +3288,7 @@ void bdx_uncore_cpu_init(void)
uncore_msr_uncores = bdx_msr_uncores;
/* Detect systems with no SBOXes */
- if ((boot_cpu_data.x86_model == 86) || hswep_has_limit_sbox(BDX_PCU_DID))
+ if (boot_cpu_data.x86_vfm == INTEL_BROADWELL_D || hswep_has_limit_sbox(BDX_PCU_DID))
uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints;
@@ -5394,7 +5397,7 @@ static int icx_iio_get_topology(struct intel_uncore_type *type)
static void icx_iio_set_mapping(struct intel_uncore_type *type)
{
/* Detect ICX-D system. This case is not supported */
- if (boot_cpu_data.x86_model == INTEL_FAM6_ICELAKE_D) {
+ if (boot_cpu_data.x86_vfm == INTEL_ICELAKE_D) {
pmu_clear_mapping_attr(type->attr_update, &icx_iio_mapping_group);
return;
}
@@ -5932,10 +5935,11 @@ static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev
struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
bool tie_en = !!(event->hw.config & SPR_CHA_PMON_CTL_TID_EN);
struct intel_uncore_type *type = box->pmu->type;
+ int id = intel_uncore_find_discovery_unit_id(type->boxes, -1, box->pmu->pmu_idx);
if (tie_en) {
reg1->reg = SPR_C0_MSR_PMON_BOX_FILTER0 +
- HSWEP_CBO_MSR_OFFSET * type->box_ids[box->pmu->pmu_idx];
+ HSWEP_CBO_MSR_OFFSET * id;
reg1->config = event->attr.config1 & SPR_CHA_PMON_BOX_FILTER_TID;
reg1->idx = 0;
}
@@ -5957,7 +5961,7 @@ static struct intel_uncore_ops spr_uncore_chabox_ops = {
static struct attribute *spr_uncore_cha_formats_attr[] = {
&format_attr_event.attr,
- &format_attr_umask_ext4.attr,
+ &format_attr_umask_ext5.attr,
&format_attr_tid_en2.attr,
&format_attr_edge.attr,
&format_attr_inv.attr,
@@ -5993,7 +5997,7 @@ ATTRIBUTE_GROUPS(uncore_alias);
static struct intel_uncore_type spr_uncore_chabox = {
.name = "cha",
.event_mask = SPR_CHA_PMON_EVENT_MASK,
- .event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
+ .event_mask_ext = SPR_CHA_EVENT_MASK_EXT,
.num_shared_regs = 1,
.constraints = skx_uncore_chabox_constraints,
.ops = &spr_uncore_chabox_ops,
@@ -6161,7 +6165,55 @@ static struct intel_uncore_type spr_uncore_mdf = {
.name = "mdf",
};
-#define UNCORE_SPR_NUM_UNCORE_TYPES 12
+static void spr_uncore_mmio_offs8_init_box(struct intel_uncore_box *box)
+{
+ __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
+ intel_generic_uncore_mmio_init_box(box);
+}
+
+static struct intel_uncore_ops spr_uncore_mmio_offs8_ops = {
+ .init_box = spr_uncore_mmio_offs8_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .disable_box = intel_generic_uncore_mmio_disable_box,
+ .enable_box = intel_generic_uncore_mmio_enable_box,
+ .disable_event = intel_generic_uncore_mmio_disable_event,
+ .enable_event = spr_uncore_mmio_enable_event,
+ .read_counter = uncore_mmio_read_counter,
+};
+
+#define SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT() \
+ SPR_UNCORE_COMMON_FORMAT(), \
+ .ops = &spr_uncore_mmio_offs8_ops
+
+static struct event_constraint spr_uncore_cxlcm_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x02, 0x0f),
+ UNCORE_EVENT_CONSTRAINT(0x05, 0x0f),
+ UNCORE_EVENT_CONSTRAINT(0x40, 0xf0),
+ UNCORE_EVENT_CONSTRAINT(0x41, 0xf0),
+ UNCORE_EVENT_CONSTRAINT(0x42, 0xf0),
+ UNCORE_EVENT_CONSTRAINT(0x43, 0xf0),
+ UNCORE_EVENT_CONSTRAINT(0x4b, 0xf0),
+ UNCORE_EVENT_CONSTRAINT(0x52, 0xf0),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type spr_uncore_cxlcm = {
+ SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(),
+ .name = "cxlcm",
+ .constraints = spr_uncore_cxlcm_constraints,
+};
+
+static struct intel_uncore_type spr_uncore_cxldp = {
+ SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(),
+ .name = "cxldp",
+};
+
+static struct intel_uncore_type spr_uncore_hbm = {
+ SPR_UNCORE_COMMON_FORMAT(),
+ .name = "hbm",
+};
+
+#define UNCORE_SPR_NUM_UNCORE_TYPES 15
#define UNCORE_SPR_CHA 0
#define UNCORE_SPR_IIO 1
#define UNCORE_SPR_IMC 6
@@ -6185,6 +6237,9 @@ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
NULL,
NULL,
&spr_uncore_mdf,
+ &spr_uncore_cxlcm,
+ &spr_uncore_cxldp,
+ &spr_uncore_hbm,
};
/*
@@ -6197,6 +6252,24 @@ static u64 spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = {
0, 0x8000, 0x10000, 0x18000
};
+static void spr_extra_boxes_cleanup(struct intel_uncore_type *type)
+{
+ struct intel_uncore_discovery_unit *pos;
+ struct rb_node *node;
+
+ if (!type->boxes)
+ return;
+
+ while (!RB_EMPTY_ROOT(type->boxes)) {
+ node = rb_first(type->boxes);
+ pos = rb_entry(node, struct intel_uncore_discovery_unit, node);
+ rb_erase(node, type->boxes);
+ kfree(pos);
+ }
+ kfree(type->boxes);
+ type->boxes = NULL;
+}
+
static struct intel_uncore_type spr_uncore_upi = {
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
.event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
@@ -6211,10 +6284,11 @@ static struct intel_uncore_type spr_uncore_upi = {
.num_counters = 4,
.num_boxes = SPR_UNCORE_UPI_NUM_BOXES,
.perf_ctr_bits = 48,
- .perf_ctr = ICX_UPI_PCI_PMON_CTR0,
- .event_ctl = ICX_UPI_PCI_PMON_CTL0,
+ .perf_ctr = ICX_UPI_PCI_PMON_CTR0 - ICX_UPI_PCI_PMON_BOX_CTL,
+ .event_ctl = ICX_UPI_PCI_PMON_CTL0 - ICX_UPI_PCI_PMON_BOX_CTL,
.box_ctl = ICX_UPI_PCI_PMON_BOX_CTL,
.pci_offsets = spr_upi_pci_offsets,
+ .cleanup_extra_boxes = spr_extra_boxes_cleanup,
};
static struct intel_uncore_type spr_uncore_m3upi = {
@@ -6224,11 +6298,12 @@ static struct intel_uncore_type spr_uncore_m3upi = {
.num_counters = 4,
.num_boxes = SPR_UNCORE_UPI_NUM_BOXES,
.perf_ctr_bits = 48,
- .perf_ctr = ICX_M3UPI_PCI_PMON_CTR0,
- .event_ctl = ICX_M3UPI_PCI_PMON_CTL0,
+ .perf_ctr = ICX_M3UPI_PCI_PMON_CTR0 - ICX_M3UPI_PCI_PMON_BOX_CTL,
+ .event_ctl = ICX_M3UPI_PCI_PMON_CTL0 - ICX_M3UPI_PCI_PMON_BOX_CTL,
.box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL,
.pci_offsets = spr_upi_pci_offsets,
.constraints = icx_uncore_m3upi_constraints,
+ .cleanup_extra_boxes = spr_extra_boxes_cleanup,
};
enum perf_uncore_spr_iio_freerunning_type_id {
@@ -6459,18 +6534,21 @@ uncore_find_type_by_id(struct intel_uncore_type **types, int type_id)
static int uncore_type_max_boxes(struct intel_uncore_type **types,
int type_id)
{
+ struct intel_uncore_discovery_unit *unit;
struct intel_uncore_type *type;
- int i, max = 0;
+ struct rb_node *node;
+ int max = 0;
type = uncore_find_type_by_id(types, type_id);
if (!type)
return 0;
- for (i = 0; i < type->num_boxes; i++) {
- if (type->box_ids[i] > max)
- max = type->box_ids[i];
- }
+ for (node = rb_first(type->boxes); node; node = rb_next(node)) {
+ unit = rb_entry(node, struct intel_uncore_discovery_unit, node);
+ if (unit->id > max)
+ max = unit->id;
+ }
return max + 1;
}
@@ -6512,10 +6590,11 @@ void spr_uncore_cpu_init(void)
static void spr_update_device_location(int type_id)
{
+ struct intel_uncore_discovery_unit *unit;
struct intel_uncore_type *type;
struct pci_dev *dev = NULL;
+ struct rb_root *root;
u32 device, devfn;
- u64 *ctls;
int die;
if (type_id == UNCORE_SPR_UPI) {
@@ -6529,27 +6608,35 @@ static void spr_update_device_location(int type_id)
} else
return;
- ctls = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL);
- if (!ctls) {
+ root = kzalloc(sizeof(struct rb_root), GFP_KERNEL);
+ if (!root) {
type->num_boxes = 0;
return;
}
+ *root = RB_ROOT;
while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) {
- if (devfn != dev->devfn)
- continue;
die = uncore_device_to_die(dev);
if (die < 0)
continue;
- ctls[die] = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET |
- dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET |
- devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET |
- type->box_ctl;
+ unit = kzalloc(sizeof(*unit), GFP_KERNEL);
+ if (!unit)
+ continue;
+ unit->die = die;
+ unit->id = PCI_SLOT(dev->devfn) - PCI_SLOT(devfn);
+ unit->addr = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET |
+ dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET |
+ devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET |
+ type->box_ctl;
+
+ unit->pmu_idx = unit->id;
+
+ uncore_find_add_unit(unit, root, NULL);
}
- type->box_ctls = ctls;
+ type->boxes = root;
}
int spr_uncore_pci_init(void)
@@ -6597,17 +6684,8 @@ void spr_uncore_mmio_init(void)
/* GNR uncore support */
#define UNCORE_GNR_NUM_UNCORE_TYPES 23
-#define UNCORE_GNR_TYPE_15 15
-#define UNCORE_GNR_B2UPI 18
-#define UNCORE_GNR_TYPE_21 21
-#define UNCORE_GNR_TYPE_22 22
int gnr_uncore_units_ignore[] = {
- UNCORE_SPR_UPI,
- UNCORE_GNR_TYPE_15,
- UNCORE_GNR_B2UPI,
- UNCORE_GNR_TYPE_21,
- UNCORE_GNR_TYPE_22,
UNCORE_IGNORE_END
};
@@ -6616,13 +6694,38 @@ static struct intel_uncore_type gnr_uncore_ubox = {
.attr_update = uncore_alias_groups,
};
+static struct intel_uncore_type gnr_uncore_pciex8 = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "pciex8",
+};
+
+static struct intel_uncore_type gnr_uncore_pciex16 = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "pciex16",
+};
+
+static struct intel_uncore_type gnr_uncore_upi = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "upi",
+};
+
+static struct intel_uncore_type gnr_uncore_b2upi = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "b2upi",
+};
+
+static struct intel_uncore_type gnr_uncore_b2hot = {
+ .name = "b2hot",
+ .attr_update = uncore_alias_groups,
+};
+
static struct intel_uncore_type gnr_uncore_b2cmi = {
SPR_UNCORE_PCI_COMMON_FORMAT(),
.name = "b2cmi",
};
static struct intel_uncore_type gnr_uncore_b2cxl = {
- SPR_UNCORE_MMIO_COMMON_FORMAT(),
+ SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(),
.name = "b2cxl",
};
@@ -6640,21 +6743,21 @@ static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = {
&gnr_uncore_ubox,
&spr_uncore_imc,
NULL,
+ &gnr_uncore_upi,
NULL,
NULL,
NULL,
+ &spr_uncore_cxlcm,
+ &spr_uncore_cxldp,
NULL,
- NULL,
- NULL,
- NULL,
- NULL,
+ &gnr_uncore_b2hot,
&gnr_uncore_b2cmi,
&gnr_uncore_b2cxl,
- NULL,
+ &gnr_uncore_b2upi,
NULL,
&gnr_uncore_mdf_sbo,
- NULL,
- NULL,
+ &gnr_uncore_pciex16,
+ &gnr_uncore_pciex8,
};
static struct freerunning_counters gnr_iio_freerunning[] = {
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 9e237b30f017..45b1866ff051 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -2,7 +2,7 @@
#include <linux/perf_event.h>
#include <linux/sysfs.h>
#include <linux/nospec.h>
-#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
#include "probe.h"
enum perf_msr_id {
@@ -43,75 +43,75 @@ static bool test_intel(int idx, void *data)
boot_cpu_data.x86 != 6)
return false;
- switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_NEHALEM:
- case INTEL_FAM6_NEHALEM_G:
- case INTEL_FAM6_NEHALEM_EP:
- case INTEL_FAM6_NEHALEM_EX:
-
- case INTEL_FAM6_WESTMERE:
- case INTEL_FAM6_WESTMERE_EP:
- case INTEL_FAM6_WESTMERE_EX:
-
- case INTEL_FAM6_SANDYBRIDGE:
- case INTEL_FAM6_SANDYBRIDGE_X:
-
- case INTEL_FAM6_IVYBRIDGE:
- case INTEL_FAM6_IVYBRIDGE_X:
-
- case INTEL_FAM6_HASWELL:
- case INTEL_FAM6_HASWELL_X:
- case INTEL_FAM6_HASWELL_L:
- case INTEL_FAM6_HASWELL_G:
-
- case INTEL_FAM6_BROADWELL:
- case INTEL_FAM6_BROADWELL_D:
- case INTEL_FAM6_BROADWELL_G:
- case INTEL_FAM6_BROADWELL_X:
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- case INTEL_FAM6_EMERALDRAPIDS_X:
- case INTEL_FAM6_GRANITERAPIDS_X:
- case INTEL_FAM6_GRANITERAPIDS_D:
-
- case INTEL_FAM6_ATOM_SILVERMONT:
- case INTEL_FAM6_ATOM_SILVERMONT_D:
- case INTEL_FAM6_ATOM_AIRMONT:
-
- case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_GOLDMONT_D:
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
- case INTEL_FAM6_ATOM_TREMONT_D:
- case INTEL_FAM6_ATOM_TREMONT:
- case INTEL_FAM6_ATOM_TREMONT_L:
-
- case INTEL_FAM6_XEON_PHI_KNL:
- case INTEL_FAM6_XEON_PHI_KNM:
+ switch (boot_cpu_data.x86_vfm) {
+ case INTEL_NEHALEM:
+ case INTEL_NEHALEM_G:
+ case INTEL_NEHALEM_EP:
+ case INTEL_NEHALEM_EX:
+
+ case INTEL_WESTMERE:
+ case INTEL_WESTMERE_EP:
+ case INTEL_WESTMERE_EX:
+
+ case INTEL_SANDYBRIDGE:
+ case INTEL_SANDYBRIDGE_X:
+
+ case INTEL_IVYBRIDGE:
+ case INTEL_IVYBRIDGE_X:
+
+ case INTEL_HASWELL:
+ case INTEL_HASWELL_X:
+ case INTEL_HASWELL_L:
+ case INTEL_HASWELL_G:
+
+ case INTEL_BROADWELL:
+ case INTEL_BROADWELL_D:
+ case INTEL_BROADWELL_G:
+ case INTEL_BROADWELL_X:
+ case INTEL_SAPPHIRERAPIDS_X:
+ case INTEL_EMERALDRAPIDS_X:
+ case INTEL_GRANITERAPIDS_X:
+ case INTEL_GRANITERAPIDS_D:
+
+ case INTEL_ATOM_SILVERMONT:
+ case INTEL_ATOM_SILVERMONT_D:
+ case INTEL_ATOM_AIRMONT:
+
+ case INTEL_ATOM_GOLDMONT:
+ case INTEL_ATOM_GOLDMONT_D:
+ case INTEL_ATOM_GOLDMONT_PLUS:
+ case INTEL_ATOM_TREMONT_D:
+ case INTEL_ATOM_TREMONT:
+ case INTEL_ATOM_TREMONT_L:
+
+ case INTEL_XEON_PHI_KNL:
+ case INTEL_XEON_PHI_KNM:
if (idx == PERF_MSR_SMI)
return true;
break;
- case INTEL_FAM6_SKYLAKE_L:
- case INTEL_FAM6_SKYLAKE:
- case INTEL_FAM6_SKYLAKE_X:
- case INTEL_FAM6_KABYLAKE_L:
- case INTEL_FAM6_KABYLAKE:
- case INTEL_FAM6_COMETLAKE_L:
- case INTEL_FAM6_COMETLAKE:
- case INTEL_FAM6_ICELAKE_L:
- case INTEL_FAM6_ICELAKE:
- case INTEL_FAM6_ICELAKE_X:
- case INTEL_FAM6_ICELAKE_D:
- case INTEL_FAM6_TIGERLAKE_L:
- case INTEL_FAM6_TIGERLAKE:
- case INTEL_FAM6_ROCKETLAKE:
- case INTEL_FAM6_ALDERLAKE:
- case INTEL_FAM6_ALDERLAKE_L:
- case INTEL_FAM6_ATOM_GRACEMONT:
- case INTEL_FAM6_RAPTORLAKE:
- case INTEL_FAM6_RAPTORLAKE_P:
- case INTEL_FAM6_RAPTORLAKE_S:
- case INTEL_FAM6_METEORLAKE:
- case INTEL_FAM6_METEORLAKE_L:
+ case INTEL_SKYLAKE_L:
+ case INTEL_SKYLAKE:
+ case INTEL_SKYLAKE_X:
+ case INTEL_KABYLAKE_L:
+ case INTEL_KABYLAKE:
+ case INTEL_COMETLAKE_L:
+ case INTEL_COMETLAKE:
+ case INTEL_ICELAKE_L:
+ case INTEL_ICELAKE:
+ case INTEL_ICELAKE_X:
+ case INTEL_ICELAKE_D:
+ case INTEL_TIGERLAKE_L:
+ case INTEL_TIGERLAKE:
+ case INTEL_ROCKETLAKE:
+ case INTEL_ALDERLAKE:
+ case INTEL_ALDERLAKE_L:
+ case INTEL_ATOM_GRACEMONT:
+ case INTEL_RAPTORLAKE:
+ case INTEL_RAPTORLAKE_P:
+ case INTEL_RAPTORLAKE_S:
+ case INTEL_METEORLAKE:
+ case INTEL_METEORLAKE_L:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index fb56518356ec..2c0ce0e9545e 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -115,6 +115,11 @@ static inline bool is_branch_counters_group(struct perf_event *event)
return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS;
}
+static inline bool is_pebs_counter_event_group(struct perf_event *event)
+{
+ return event->group_leader->hw.flags & PERF_X86_EVENT_PEBS_CNTR;
+}
+
struct amd_nb {
int nb_id; /* NorthBridge id */
int refcnt; /* reference count */
@@ -476,6 +481,14 @@ struct cpu_hw_events {
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)
+#define INTEL_HYBRID_LDLAT_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_LD_HSW)
+
+#define INTEL_HYBRID_STLAT_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_ST_HSW)
+
/* Event constraint, but match on all event flags too. */
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
@@ -616,6 +629,7 @@ union perf_capabilities {
u64 pebs_output_pt_available:1;
u64 pebs_timing_info:1;
u64 anythread_deprecated:1;
+ u64 rdpmc_metrics_clear:1;
};
u64 capabilities;
};
@@ -655,28 +669,27 @@ enum {
x86_lbr_exclusive_max,
};
-#define PERF_PEBS_DATA_SOURCE_MAX 0x10
+#define PERF_PEBS_DATA_SOURCE_MAX 0x100
#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1)
+#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
+#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
-enum hybrid_cpu_type {
- HYBRID_INTEL_NONE,
- HYBRID_INTEL_ATOM = 0x20,
- HYBRID_INTEL_CORE = 0x40,
-};
+#define X86_HYBRID_PMU_ATOM_IDX 0
+#define X86_HYBRID_PMU_CORE_IDX 1
+#define X86_HYBRID_PMU_TINY_IDX 2
enum hybrid_pmu_type {
not_hybrid,
- hybrid_small = BIT(0),
- hybrid_big = BIT(1),
-
- hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */
+ hybrid_small = BIT(X86_HYBRID_PMU_ATOM_IDX),
+ hybrid_big = BIT(X86_HYBRID_PMU_CORE_IDX),
+ hybrid_tiny = BIT(X86_HYBRID_PMU_TINY_IDX),
+
+ /* The belows are only used for matching */
+ hybrid_big_small = hybrid_big | hybrid_small,
+ hybrid_small_tiny = hybrid_small | hybrid_tiny,
+ hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny,
};
-#define X86_HYBRID_PMU_ATOM_IDX 0
-#define X86_HYBRID_PMU_CORE_IDX 1
-
-#define X86_HYBRID_NUM_PMUS 2
-
struct x86_hybrid_pmu {
struct pmu pmu;
const char *name;
@@ -684,9 +697,16 @@ struct x86_hybrid_pmu {
cpumask_t supported_cpus;
union perf_capabilities intel_cap;
u64 intel_ctrl;
- int max_pebs_events;
- int num_counters;
- int num_counters_fixed;
+ u64 pebs_events_mask;
+ u64 config_mask;
+ union {
+ u64 cntr_mask64;
+ unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
+ union {
+ u64 fixed_cntr_mask64;
+ unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
struct event_constraint unconstrained;
u64 hw_cache_event_ids
@@ -768,14 +788,23 @@ struct x86_pmu {
u64 (*update)(struct perf_event *event);
int (*hw_config)(struct perf_event *event);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
+ void (*late_setup)(void);
unsigned eventsel;
unsigned perfctr;
+ unsigned fixedctr;
int (*addr_offset)(int index, bool eventsel);
int (*rdpmc_index)(int index);
u64 (*event_map)(int);
int max_events;
- int num_counters;
- int num_counters_fixed;
+ u64 config_mask;
+ union {
+ u64 cntr_mask64;
+ unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
+ union {
+ u64 fixed_cntr_mask64;
+ unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ };
int cntval_bits;
u64 cntval_mask;
union {
@@ -829,7 +858,7 @@ struct x86_pmu {
void (*check_microcode)(void);
void (*sched_task)(struct perf_event_pmu_context *pmu_ctx,
- bool sched_in);
+ struct task_struct *task, bool sched_in);
/*
* Intel Arch Perfmon v2+
@@ -852,7 +881,7 @@ struct x86_pmu {
pebs_ept :1;
int pebs_record_size;
int pebs_buffer_size;
- int max_pebs_events;
+ u64 pebs_events_mask;
void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
@@ -874,6 +903,7 @@ struct x86_pmu {
const int *lbr_sel_map; /* lbr_select mappings */
int *lbr_ctl_map; /* LBR_CTL mappings */
};
+ u64 lbr_callstack_users; /* lbr callstack system wide users */
bool lbr_double_abort; /* duplicated lbr aborts */
bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
@@ -912,14 +942,6 @@ struct x86_pmu {
int num_topdown_events;
/*
- * perf task context (i.e. struct perf_event_pmu_context::task_ctx_data)
- * switch helper to bridge calls from perf/core to perf/x86.
- * See struct pmu::swap_task_ctx() usage for examples;
- */
- void (*swap_task_ctx)(struct perf_event_pmu_context *prev_epc,
- struct perf_event_pmu_context *next_epc);
-
- /*
* AMD bits
*/
unsigned int amd_nb_constraints : 1;
@@ -954,7 +976,7 @@ struct x86_pmu {
*/
int num_hybrid_pmus;
struct x86_hybrid_pmu *hybrid_pmu;
- enum hybrid_cpu_type (*get_hybrid_cpu_type) (void);
+ enum intel_cpu_type (*get_hybrid_cpu_type) (void);
};
struct x86_perf_task_context_opt {
@@ -1067,6 +1089,8 @@ extern struct x86_pmu x86_pmu __read_mostly;
DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period);
DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update);
+DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
+DECLARE_STATIC_CALL(x86_pmu_late_setup, *x86_pmu.late_setup);
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
{
@@ -1108,6 +1132,12 @@ extern u64 __read_mostly hw_cache_extra_regs
u64 x86_perf_event_update(struct perf_event *event);
+static inline u64 intel_pmu_topdown_event_update(struct perf_event *event, u64 *val)
+{
+ return x86_perf_event_update(event);
+}
+DECLARE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update);
+
static inline unsigned int x86_pmu_config_addr(int index)
{
return x86_pmu.eventsel + (x86_pmu.addr_offset ?
@@ -1120,13 +1150,19 @@ static inline unsigned int x86_pmu_event_addr(int index)
x86_pmu.addr_offset(index, false) : index);
}
+static inline unsigned int x86_pmu_fixed_ctr_addr(int index)
+{
+ return x86_pmu.fixedctr + (x86_pmu.addr_offset ?
+ x86_pmu.addr_offset(index, false) : index);
+}
+
static inline int x86_pmu_rdpmc_index(int index)
{
return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
}
-bool check_hw_exists(struct pmu *pmu, int num_counters,
- int num_counters_fixed);
+bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
+ unsigned long *fixed_cntr_mask);
int x86_add_exclusive(unsigned int what);
@@ -1197,8 +1233,32 @@ void x86_pmu_enable_event(struct perf_event *event);
int x86_pmu_handle_irq(struct pt_regs *regs);
-void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
- u64 intel_ctrl);
+void x86_pmu_show_pmu_cap(struct pmu *pmu);
+
+static inline int x86_pmu_num_counters(struct pmu *pmu)
+{
+ return hweight64(hybrid(pmu, cntr_mask64));
+}
+
+static inline int x86_pmu_max_num_counters(struct pmu *pmu)
+{
+ return fls64(hybrid(pmu, cntr_mask64));
+}
+
+static inline int x86_pmu_num_counters_fixed(struct pmu *pmu)
+{
+ return hweight64(hybrid(pmu, fixed_cntr_mask64));
+}
+
+static inline int x86_pmu_max_num_counters_fixed(struct pmu *pmu)
+{
+ return fls64(hybrid(pmu, fixed_cntr_mask64));
+}
+
+static inline u64 x86_pmu_get_event_config(struct perf_event *event)
+{
+ return event->attr.config & hybrid(event->pmu, config_mask);
+}
extern struct event_constraint emptyconstraint;
@@ -1324,11 +1384,25 @@ void amd_pmu_lbr_reset(void);
void amd_pmu_lbr_read(void);
void amd_pmu_lbr_add(struct perf_event *event);
void amd_pmu_lbr_del(struct perf_event *event);
-void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
+void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in);
void amd_pmu_lbr_enable_all(void);
void amd_pmu_lbr_disable_all(void);
int amd_pmu_lbr_hw_config(struct perf_event *event);
+static __always_inline void __amd_pmu_lbr_disable(void)
+{
+ u64 dbg_ctl, dbg_extn_cfg;
+
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
+
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
+}
+
#ifdef CONFIG_PERF_EVENTS_AMD_BRS
#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
@@ -1365,7 +1439,8 @@ static inline void amd_pmu_brs_del(struct perf_event *event)
perf_sched_cb_dec(event->pmu);
}
-void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
+void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in);
#else
static inline int amd_brs_init(void)
{
@@ -1390,7 +1465,8 @@ static inline void amd_pmu_brs_del(struct perf_event *event)
{
}
-static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
}
@@ -1504,9 +1580,13 @@ void intel_pmu_disable_bts(void);
int intel_pmu_drain_bts_buffer(void);
-u64 adl_latency_data_small(struct perf_event *event, u64 status);
+u64 grt_latency_data(struct perf_event *event, u64 status);
-u64 mtl_latency_data_small(struct perf_event *event, u64 status);
+u64 cmt_latency_data(struct perf_event *event, u64 status);
+
+u64 lnl_latency_data(struct perf_event *event, u64 status);
+
+u64 arl_h_latency_data(struct perf_event *event, u64 status);
extern struct event_constraint intel_core2_pebs_event_constraints[];
@@ -1538,6 +1618,8 @@ extern struct event_constraint intel_icl_pebs_event_constraints[];
extern struct event_constraint intel_glc_pebs_event_constraints[];
+extern struct event_constraint intel_lnc_pebs_event_constraints[];
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_add(struct perf_event *event);
@@ -1554,7 +1636,7 @@ void intel_pmu_pebs_disable_all(void);
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
-void intel_pmu_auto_reload_read(struct perf_event *event);
+void intel_pmu_drain_pebs_buffer(void);
void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
@@ -1564,10 +1646,8 @@ void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
struct cpu_hw_events *cpuc,
struct perf_event *event);
-void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
- struct perf_event_pmu_context *next_epc);
-
-void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
+void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in);
u64 lbr_from_signext_quirk_wr(u64 val);
@@ -1625,8 +1705,12 @@ void intel_pmu_pebs_data_source_grt(void);
void intel_pmu_pebs_data_source_mtl(void);
+void intel_pmu_pebs_data_source_arl_h(void);
+
void intel_pmu_pebs_data_source_cmt(void);
+void intel_pmu_pebs_data_source_lnl(void);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
@@ -1648,6 +1732,17 @@ static inline int is_ht_workaround_enabled(void)
return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
}
+static inline u64 intel_pmu_pebs_mask(u64 cntr_mask)
+{
+ return MAX_PEBS_EVENTS_MASK & cntr_mask;
+}
+
+static inline int intel_pmu_max_num_pebs(struct pmu *pmu)
+{
+ static_assert(MAX_PEBS_EVENTS == 32);
+ return fls((u32)hybrid(pmu, pebs_events_mask));
+}
+
#else /* CONFIG_CPU_SUP_INTEL */
static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
index 6c977c19f2cd..1d9e385649b5 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -9,7 +9,7 @@ PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */
PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */
PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */
PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */
- /* 0x00080 */
+PERF_ARCH(PEBS_CNTR, 0x00080) /* PEBS counters snapshot */
PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */
PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */
PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index fb2b1961e5a3..8ddace8cea96 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -39,6 +39,10 @@
* event: rapl_energy_psys
* perf code: 0x5
*
+ * core counter: consumption of a single physical core
+ * event: rapl_energy_core (power_core PMU)
+ * perf code: 0x1
+ *
* We manage those counters as free running (read-only). They may be
* use simultaneously by other tools, such as turbostat.
*
@@ -64,23 +68,28 @@
#include "perf_event.h"
#include "probe.h"
+MODULE_DESCRIPTION("Support Intel/AMD RAPL energy consumption counters");
MODULE_LICENSE("GPL");
/*
* RAPL energy status counters
*/
-enum perf_rapl_events {
+enum perf_rapl_pkg_events {
PERF_RAPL_PP0 = 0, /* all cores */
PERF_RAPL_PKG, /* entire package */
PERF_RAPL_RAM, /* DRAM */
PERF_RAPL_PP1, /* gpu */
PERF_RAPL_PSYS, /* psys */
- PERF_RAPL_MAX,
- NR_RAPL_DOMAINS = PERF_RAPL_MAX,
+ PERF_RAPL_PKG_EVENTS_MAX,
+ NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX,
};
-static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+#define PERF_RAPL_CORE 0 /* single core */
+#define PERF_RAPL_CORE_EVENTS_MAX 1
+#define NR_RAPL_CORE_DOMAINS PERF_RAPL_CORE_EVENTS_MAX
+
+static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst = {
"pp0-core",
"package",
"dram",
@@ -88,6 +97,8 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"psys",
};
+static const char *const rapl_core_domain_name __initconst = "core";
+
/*
* event code: LSB 8 bits, passed in attr->config
* any other bit is reserved
@@ -102,6 +113,19 @@ static struct perf_pmu_events_attr event_attr_##v = { \
.event_str = str, \
};
+/*
+ * RAPL Package energy counter scope:
+ * 1. AMD/HYGON platforms have a per-PKG package energy counter
+ * 2. For Intel platforms
+ * 2.1. CLX-AP is multi-die and its RAPL MSRs are die-scope
+ * 2.2. Other Intel platforms are single die systems so the scope can be
+ * considered as either pkg-scope or die-scope, and we are considering
+ * them as die-scope.
+ */
+#define rapl_pkg_pmu_is_pkg_scope() \
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+
struct rapl_pmu {
raw_spinlock_t lock;
int n_active;
@@ -114,8 +138,9 @@ struct rapl_pmu {
struct rapl_pmus {
struct pmu pmu;
- unsigned int maxdie;
- struct rapl_pmu *pmus[] __counted_by(maxdie);
+ unsigned int nr_rapl_pmu;
+ unsigned int cntr_mask;
+ struct rapl_pmu *rapl_pmu[] __counted_by(nr_rapl_pmu);
};
enum rapl_unit_quirk {
@@ -125,29 +150,43 @@ enum rapl_unit_quirk {
};
struct rapl_model {
- struct perf_msr *rapl_msrs;
- unsigned long events;
+ struct perf_msr *rapl_pkg_msrs;
+ struct perf_msr *rapl_core_msrs;
+ unsigned long pkg_events;
+ unsigned long core_events;
unsigned int msr_power_unit;
enum rapl_unit_quirk unit_quirk;
};
/* 1/2^hw_unit Joule */
-static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
-static struct rapl_pmus *rapl_pmus;
-static cpumask_t rapl_cpu_mask;
-static unsigned int rapl_cntr_mask;
+static int rapl_pkg_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly;
+static int rapl_core_hw_unit __read_mostly;
+static struct rapl_pmus *rapl_pmus_pkg;
+static struct rapl_pmus *rapl_pmus_core;
static u64 rapl_timer_ms;
-static struct perf_msr *rapl_msrs;
+static struct rapl_model *rapl_model;
-static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+/*
+ * Helper function to get the correct topology id according to the
+ * RAPL PMU scope.
+ */
+static inline unsigned int get_rapl_pmu_idx(int cpu, int scope)
{
- unsigned int dieid = topology_logical_die_id(cpu);
-
/*
- * The unsigned check also catches the '-1' return value for non
- * existent mappings in the topology map.
+ * Returns unsigned int, which converts the '-1' return value
+ * (for non-existent mappings in topology map) to UINT_MAX, so
+ * the error check in the caller is simplified.
*/
- return dieid < rapl_pmus->maxdie ? rapl_pmus->pmus[dieid] : NULL;
+ switch (scope) {
+ case PERF_PMU_SCOPE_PKG:
+ return topology_logical_package_id(cpu);
+ case PERF_PMU_SCOPE_DIE:
+ return topology_logical_die_id(cpu);
+ case PERF_PMU_SCOPE_CORE:
+ return topology_logical_core_id(cpu);
+ default:
+ return -EINVAL;
+ }
}
static inline u64 rapl_read_counter(struct perf_event *event)
@@ -157,19 +196,20 @@ static inline u64 rapl_read_counter(struct perf_event *event)
return raw;
}
-static inline u64 rapl_scale(u64 v, int cfg)
+static inline u64 rapl_scale(u64 v, struct perf_event *event)
{
- if (cfg > NR_RAPL_DOMAINS) {
- pr_warn("Invalid domain %d, failed to scale data\n", cfg);
- return v;
- }
+ int hw_unit = rapl_pkg_hw_unit[event->hw.config - 1];
+
+ if (event->pmu->scope == PERF_PMU_SCOPE_CORE)
+ hw_unit = rapl_core_hw_unit;
+
/*
* scale delta to smallest unit (1/2^32)
* users must then scale back: count * 1/(1e9*2^32) to get Joules
* or use ldexp(count, -32).
* Watts = Joules/Time delta
*/
- return v << (32 - rapl_hw_unit[cfg - 1]);
+ return v << (32 - hw_unit);
}
static u64 rapl_event_update(struct perf_event *event)
@@ -196,7 +236,7 @@ static u64 rapl_event_update(struct perf_event *event)
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
- sdelta = rapl_scale(delta, event->hw.config);
+ sdelta = rapl_scale(delta, event);
local64_add(sdelta, &event->count);
@@ -211,34 +251,33 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu)
static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
{
- struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
+ struct rapl_pmu *rapl_pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
struct perf_event *event;
unsigned long flags;
- if (!pmu->n_active)
+ if (!rapl_pmu->n_active)
return HRTIMER_NORESTART;
- raw_spin_lock_irqsave(&pmu->lock, flags);
+ raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
- list_for_each_entry(event, &pmu->active_list, active_entry)
+ list_for_each_entry(event, &rapl_pmu->active_list, active_entry)
rapl_event_update(event);
- raw_spin_unlock_irqrestore(&pmu->lock, flags);
+ raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
- hrtimer_forward_now(hrtimer, pmu->timer_interval);
+ hrtimer_forward_now(hrtimer, rapl_pmu->timer_interval);
return HRTIMER_RESTART;
}
-static void rapl_hrtimer_init(struct rapl_pmu *pmu)
+static void rapl_hrtimer_init(struct rapl_pmu *rapl_pmu)
{
- struct hrtimer *hr = &pmu->hrtimer;
+ struct hrtimer *hr = &rapl_pmu->hrtimer;
- hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- hr->function = rapl_hrtimer_handle;
+ hrtimer_setup(hr, rapl_hrtimer_handle, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
}
-static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
+static void __rapl_pmu_event_start(struct rapl_pmu *rapl_pmu,
struct perf_event *event)
{
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
@@ -246,39 +285,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
event->hw.state = 0;
- list_add_tail(&event->active_entry, &pmu->active_list);
+ list_add_tail(&event->active_entry, &rapl_pmu->active_list);
local64_set(&event->hw.prev_count, rapl_read_counter(event));
- pmu->n_active++;
- if (pmu->n_active == 1)
- rapl_start_hrtimer(pmu);
+ rapl_pmu->n_active++;
+ if (rapl_pmu->n_active == 1)
+ rapl_start_hrtimer(rapl_pmu);
}
static void rapl_pmu_event_start(struct perf_event *event, int mode)
{
- struct rapl_pmu *pmu = event->pmu_private;
+ struct rapl_pmu *rapl_pmu = event->pmu_private;
unsigned long flags;
- raw_spin_lock_irqsave(&pmu->lock, flags);
- __rapl_pmu_event_start(pmu, event);
- raw_spin_unlock_irqrestore(&pmu->lock, flags);
+ raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
+ __rapl_pmu_event_start(rapl_pmu, event);
+ raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
}
static void rapl_pmu_event_stop(struct perf_event *event, int mode)
{
- struct rapl_pmu *pmu = event->pmu_private;
+ struct rapl_pmu *rapl_pmu = event->pmu_private;
struct hw_perf_event *hwc = &event->hw;
unsigned long flags;
- raw_spin_lock_irqsave(&pmu->lock, flags);
+ raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
/* mark event as deactivated and stopped */
if (!(hwc->state & PERF_HES_STOPPED)) {
- WARN_ON_ONCE(pmu->n_active <= 0);
- pmu->n_active--;
- if (pmu->n_active == 0)
- hrtimer_cancel(&pmu->hrtimer);
+ WARN_ON_ONCE(rapl_pmu->n_active <= 0);
+ rapl_pmu->n_active--;
+ if (rapl_pmu->n_active == 0)
+ hrtimer_cancel(&rapl_pmu->hrtimer);
list_del(&event->active_entry);
@@ -296,23 +335,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode)
hwc->state |= PERF_HES_UPTODATE;
}
- raw_spin_unlock_irqrestore(&pmu->lock, flags);
+ raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
}
static int rapl_pmu_event_add(struct perf_event *event, int mode)
{
- struct rapl_pmu *pmu = event->pmu_private;
+ struct rapl_pmu *rapl_pmu = event->pmu_private;
struct hw_perf_event *hwc = &event->hw;
unsigned long flags;
- raw_spin_lock_irqsave(&pmu->lock, flags);
+ raw_spin_lock_irqsave(&rapl_pmu->lock, flags);
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (mode & PERF_EF_START)
- __rapl_pmu_event_start(pmu, event);
+ __rapl_pmu_event_start(rapl_pmu, event);
- raw_spin_unlock_irqrestore(&pmu->lock, flags);
+ raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags);
return 0;
}
@@ -325,13 +364,19 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags)
static int rapl_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
- int bit, ret = 0;
- struct rapl_pmu *pmu;
+ int bit, rapl_pmus_scope, ret = 0;
+ struct rapl_pmu *rapl_pmu;
+ unsigned int rapl_pmu_idx;
+ struct rapl_pmus *rapl_pmus;
/* only look at RAPL events */
- if (event->attr.type != rapl_pmus->pmu.type)
+ if (event->attr.type != event->pmu->type)
return -ENOENT;
+ /* unsupported modes and filters */
+ if (event->attr.sample_period) /* no sampling */
+ return -EINVAL;
+
/* check only supported bits are set */
if (event->attr.config & ~RAPL_EVENT_MASK)
return -EINVAL;
@@ -339,29 +384,41 @@ static int rapl_pmu_event_init(struct perf_event *event)
if (event->cpu < 0)
return -EINVAL;
- event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
-
- if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
+ rapl_pmus = container_of(event->pmu, struct rapl_pmus, pmu);
+ if (!rapl_pmus)
+ return -EINVAL;
+ rapl_pmus_scope = rapl_pmus->pmu.scope;
+
+ if (rapl_pmus_scope == PERF_PMU_SCOPE_PKG || rapl_pmus_scope == PERF_PMU_SCOPE_DIE) {
+ cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1);
+ if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
+ return -EINVAL;
+
+ bit = cfg - 1;
+ event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr;
+ } else if (rapl_pmus_scope == PERF_PMU_SCOPE_CORE) {
+ cfg = array_index_nospec((long)cfg, NR_RAPL_CORE_DOMAINS + 1);
+ if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1)
+ return -EINVAL;
+
+ bit = cfg - 1;
+ event->hw.event_base = rapl_model->rapl_core_msrs[bit].msr;
+ } else
return -EINVAL;
-
- cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1);
- bit = cfg - 1;
/* check event supported */
- if (!(rapl_cntr_mask & (1 << bit)))
+ if (!(rapl_pmus->cntr_mask & (1 << bit)))
return -EINVAL;
- /* unsupported modes and filters */
- if (event->attr.sample_period) /* no sampling */
+ rapl_pmu_idx = get_rapl_pmu_idx(event->cpu, rapl_pmus_scope);
+ if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
return -EINVAL;
-
/* must be done before validate_group */
- pmu = cpu_to_rapl_pmu(event->cpu);
- if (!pmu)
+ rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
+ if (!rapl_pmu)
return -EINVAL;
- event->cpu = pmu->cpu;
- event->pmu_private = pmu;
- event->hw.event_base = rapl_msrs[bit].msr;
+
+ event->pmu_private = rapl_pmu;
event->hw.config = cfg;
event->hw.idx = bit;
@@ -373,34 +430,19 @@ static void rapl_pmu_event_read(struct perf_event *event)
rapl_event_update(event);
}
-static ssize_t rapl_get_attr_cpumask(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
-}
-
-static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
-
-static struct attribute *rapl_pmu_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static struct attribute_group rapl_pmu_attr_group = {
- .attrs = rapl_pmu_attrs,
-};
-
RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05");
+RAPL_EVENT_ATTR_STR(energy-core, rapl_core, "event=0x01");
RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-core.unit, rapl_core_unit, "Joules");
/*
* we compute in 0.23 nJ increments regardless of MSR
@@ -410,6 +452,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890
RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-core.scale, rapl_core_scale, "2.3283064365386962890625e-10");
/*
* There are no default events, but we need to create
@@ -437,7 +480,12 @@ static struct attribute_group rapl_pmu_format_group = {
};
static const struct attribute_group *rapl_attr_groups[] = {
- &rapl_pmu_attr_group,
+ &rapl_pmu_format_group,
+ &rapl_pmu_events_group,
+ NULL,
+};
+
+static const struct attribute_group *rapl_core_attr_groups[] = {
&rapl_pmu_format_group,
&rapl_pmu_events_group,
NULL,
@@ -503,6 +551,18 @@ static struct attribute_group rapl_events_psys_group = {
.attrs = rapl_events_psys,
};
+static struct attribute *rapl_events_core[] = {
+ EVENT_PTR(rapl_core),
+ EVENT_PTR(rapl_core_unit),
+ EVENT_PTR(rapl_core_scale),
+ NULL,
+};
+
+static struct attribute_group rapl_events_core_group = {
+ .name = "events",
+ .attrs = rapl_events_core,
+};
+
static bool test_msr(int idx, void *data)
{
return test_bit(idx, (unsigned long *) data);
@@ -528,11 +588,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
};
/*
- * Force to PERF_RAPL_MAX size due to:
- * - perf_msr_probe(PERF_RAPL_MAX)
+ * Force to PERF_RAPL_PKG_EVENTS_MAX size due to:
+ * - perf_msr_probe(PERF_RAPL_PKG_EVENTS_MAX)
* - want to use same event codes across both architectures
*/
-static struct perf_msr amd_rapl_msrs[] = {
+static struct perf_msr amd_rapl_pkg_msrs[] = {
[PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, NULL, false, 0 },
[PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, NULL, false, 0 },
@@ -540,72 +600,25 @@ static struct perf_msr amd_rapl_msrs[] = {
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
};
-static int rapl_cpu_offline(unsigned int cpu)
-{
- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
- int target;
-
- /* Check if exiting cpu is used for collecting rapl events */
- if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
- return 0;
-
- pmu->cpu = -1;
- /* Find a new cpu to collect rapl events */
- target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
-
- /* Migrate rapl events to the new target */
- if (target < nr_cpu_ids) {
- cpumask_set_cpu(target, &rapl_cpu_mask);
- pmu->cpu = target;
- perf_pmu_migrate_context(pmu->pmu, cpu, target);
- }
- return 0;
-}
-
-static int rapl_cpu_online(unsigned int cpu)
-{
- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
- int target;
-
- if (!pmu) {
- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
- if (!pmu)
- return -ENOMEM;
-
- raw_spin_lock_init(&pmu->lock);
- INIT_LIST_HEAD(&pmu->active_list);
- pmu->pmu = &rapl_pmus->pmu;
- pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
- rapl_hrtimer_init(pmu);
-
- rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
- }
-
- /*
- * Check if there is an online cpu in the package which collects rapl
- * events already.
- */
- target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu));
- if (target < nr_cpu_ids)
- return 0;
-
- cpumask_set_cpu(cpu, &rapl_cpu_mask);
- pmu->cpu = cpu;
- return 0;
-}
+static struct perf_msr amd_rapl_core_msrs[] = {
+ [PERF_RAPL_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_core_group,
+ test_msr, false, RAPL_MSR_MASK },
+};
-static int rapl_check_hw_unit(struct rapl_model *rm)
+static int rapl_check_hw_unit(void)
{
u64 msr_rapl_power_unit_bits;
int i;
/* protect rdmsrl() to handle virtualization */
- if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits))
+ if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits))
return -1;
- for (i = 0; i < NR_RAPL_DOMAINS; i++)
- rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+ for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++)
+ rapl_pkg_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+
+ rapl_core_hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
- switch (rm->unit_quirk) {
+ switch (rapl_model->unit_quirk) {
/*
* DRAM domain on HSW server and KNL has fixed energy unit which can be
* different than the unit from power unit MSR. See
@@ -613,17 +626,16 @@ static int rapl_check_hw_unit(struct rapl_model *rm)
* of 2. Datasheet, September 2014, Reference Number: 330784-001 "
*/
case RAPL_UNIT_QUIRK_INTEL_HSW:
- rapl_hw_unit[PERF_RAPL_RAM] = 16;
+ rapl_pkg_hw_unit[PERF_RAPL_RAM] = 16;
break;
/* SPR uses a fixed energy unit for Psys domain. */
case RAPL_UNIT_QUIRK_INTEL_SPR:
- rapl_hw_unit[PERF_RAPL_PSYS] = 0;
+ rapl_pkg_hw_unit[PERF_RAPL_PSYS] = 0;
break;
default:
break;
}
-
/*
* Calculate the timer rate:
* Use reference of 200W for scaling the timeout to avoid counter
@@ -632,9 +644,9 @@ static int rapl_check_hw_unit(struct rapl_model *rm)
* if hw unit is 32, then we use 2 ms 1/200/2
*/
rapl_timer_ms = 2;
- if (rapl_hw_unit[0] < 32) {
+ if (rapl_pkg_hw_unit[0] < 32) {
rapl_timer_ms = (1000 / (2 * 100));
- rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1));
+ rapl_timer_ms *= (1ULL << (32 - rapl_pkg_hw_unit[0] - 1));
}
return 0;
}
@@ -642,24 +654,32 @@ static int rapl_check_hw_unit(struct rapl_model *rm)
static void __init rapl_advertise(void)
{
int i;
+ int num_counters = hweight32(rapl_pmus_pkg->cntr_mask);
+
+ if (rapl_pmus_core)
+ num_counters += hweight32(rapl_pmus_core->cntr_mask);
pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
- hweight32(rapl_cntr_mask), rapl_timer_ms);
+ num_counters, rapl_timer_ms);
- for (i = 0; i < NR_RAPL_DOMAINS; i++) {
- if (rapl_cntr_mask & (1 << i)) {
+ for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) {
+ if (rapl_pmus_pkg->cntr_mask & (1 << i)) {
pr_info("hw unit of domain %s 2^-%d Joules\n",
- rapl_domain_names[i], rapl_hw_unit[i]);
+ rapl_pkg_domain_names[i], rapl_pkg_hw_unit[i]);
}
}
+
+ if (rapl_pmus_core && (rapl_pmus_core->cntr_mask & (1 << PERF_RAPL_CORE)))
+ pr_info("hw unit of domain %s 2^-%d Joules\n",
+ rapl_core_domain_name, rapl_core_hw_unit);
}
-static void cleanup_rapl_pmus(void)
+static void cleanup_rapl_pmus(struct rapl_pmus *rapl_pmus)
{
int i;
- for (i = 0; i < rapl_pmus->maxdie; i++)
- kfree(rapl_pmus->pmus[i]);
+ for (i = 0; i < rapl_pmus->nr_rapl_pmu; i++)
+ kfree(rapl_pmus->rapl_pmu[i]);
kfree(rapl_pmus);
}
@@ -672,17 +692,62 @@ static const struct attribute_group *rapl_attr_update[] = {
NULL,
};
-static int __init init_rapl_pmus(void)
+static const struct attribute_group *rapl_core_attr_update[] = {
+ &rapl_events_core_group,
+ NULL,
+};
+
+static int __init init_rapl_pmu(struct rapl_pmus *rapl_pmus)
+{
+ struct rapl_pmu *rapl_pmu;
+ int idx;
+
+ for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) {
+ rapl_pmu = kzalloc(sizeof(*rapl_pmu), GFP_KERNEL);
+ if (!rapl_pmu)
+ goto free;
+
+ raw_spin_lock_init(&rapl_pmu->lock);
+ INIT_LIST_HEAD(&rapl_pmu->active_list);
+ rapl_pmu->pmu = &rapl_pmus->pmu;
+ rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+ rapl_hrtimer_init(rapl_pmu);
+
+ rapl_pmus->rapl_pmu[idx] = rapl_pmu;
+ }
+
+ return 0;
+free:
+ for (; idx > 0; idx--)
+ kfree(rapl_pmus->rapl_pmu[idx - 1]);
+ return -ENOMEM;
+}
+
+static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_scope,
+ const struct attribute_group **rapl_attr_groups,
+ const struct attribute_group **rapl_attr_update)
{
- int maxdie = topology_max_packages() * topology_max_dies_per_package();
- size_t size;
+ int nr_rapl_pmu = topology_max_packages();
+ struct rapl_pmus *rapl_pmus;
+ int ret;
+
+ /*
+ * rapl_pmu_scope must be either PKG, DIE or CORE
+ */
+ if (rapl_pmu_scope == PERF_PMU_SCOPE_DIE)
+ nr_rapl_pmu *= topology_max_dies_per_package();
+ else if (rapl_pmu_scope == PERF_PMU_SCOPE_CORE)
+ nr_rapl_pmu *= topology_num_cores_per_package();
+ else if (rapl_pmu_scope != PERF_PMU_SCOPE_PKG)
+ return -EINVAL;
- size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *);
- rapl_pmus = kzalloc(size, GFP_KERNEL);
+ rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL);
if (!rapl_pmus)
return -ENOMEM;
- rapl_pmus->maxdie = maxdie;
+ *rapl_pmus_ptr = rapl_pmus;
+
+ rapl_pmus->nr_rapl_pmu = nr_rapl_pmu;
rapl_pmus->pmu.attr_groups = rapl_attr_groups;
rapl_pmus->pmu.attr_update = rapl_attr_update;
rapl_pmus->pmu.task_ctx_nr = perf_invalid_context;
@@ -692,122 +757,134 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.start = rapl_pmu_event_start;
rapl_pmus->pmu.stop = rapl_pmu_event_stop;
rapl_pmus->pmu.read = rapl_pmu_event_read;
+ rapl_pmus->pmu.scope = rapl_pmu_scope;
rapl_pmus->pmu.module = THIS_MODULE;
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
- return 0;
+
+ ret = init_rapl_pmu(rapl_pmus);
+ if (ret)
+ kfree(rapl_pmus);
+
+ return ret;
}
static struct rapl_model model_snb = {
- .events = BIT(PERF_RAPL_PP0) |
+ .pkg_events = BIT(PERF_RAPL_PP0) |
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_PP1),
.msr_power_unit = MSR_RAPL_POWER_UNIT,
- .rapl_msrs = intel_rapl_msrs,
+ .rapl_pkg_msrs = intel_rapl_msrs,
};
static struct rapl_model model_snbep = {
- .events = BIT(PERF_RAPL_PP0) |
+ .pkg_events = BIT(PERF_RAPL_PP0) |
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM),
.msr_power_unit = MSR_RAPL_POWER_UNIT,
- .rapl_msrs = intel_rapl_msrs,
+ .rapl_pkg_msrs = intel_rapl_msrs,
};
static struct rapl_model model_hsw = {
- .events = BIT(PERF_RAPL_PP0) |
+ .pkg_events = BIT(PERF_RAPL_PP0) |
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM) |
BIT(PERF_RAPL_PP1),
.msr_power_unit = MSR_RAPL_POWER_UNIT,
- .rapl_msrs = intel_rapl_msrs,
+ .rapl_pkg_msrs = intel_rapl_msrs,
};
static struct rapl_model model_hsx = {
- .events = BIT(PERF_RAPL_PP0) |
+ .pkg_events = BIT(PERF_RAPL_PP0) |
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM),
.unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW,
.msr_power_unit = MSR_RAPL_POWER_UNIT,
- .rapl_msrs = intel_rapl_msrs,
+ .rapl_pkg_msrs = intel_rapl_msrs,
};
static struct rapl_model model_knl = {
- .events = BIT(PERF_RAPL_PKG) |
+ .pkg_events = BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM),
.unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW,
.msr_power_unit = MSR_RAPL_POWER_UNIT,
- .rapl_msrs = intel_rapl_msrs,
+ .rapl_pkg_msrs = intel_rapl_msrs,
};
static struct rapl_model model_skl = {
- .events = BIT(PERF_RAPL_PP0) |
+ .pkg_events = BIT(PERF_RAPL_PP0) |
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM) |
BIT(PERF_RAPL_PP1) |
BIT(PERF_RAPL_PSYS),
.msr_power_unit = MSR_RAPL_POWER_UNIT,
- .rapl_msrs = intel_rapl_msrs,
+ .rapl_pkg_msrs = intel_rapl_msrs,
};
static struct rapl_model model_spr = {
- .events = BIT(PERF_RAPL_PP0) |
+ .pkg_events = BIT(PERF_RAPL_PP0) |
BIT(PERF_RAPL_PKG) |
BIT(PERF_RAPL_RAM) |
BIT(PERF_RAPL_PSYS),
.unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR,
.msr_power_unit = MSR_RAPL_POWER_UNIT,
- .rapl_msrs = intel_rapl_spr_msrs,
+ .rapl_pkg_msrs = intel_rapl_spr_msrs,
};
static struct rapl_model model_amd_hygon = {
- .events = BIT(PERF_RAPL_PKG),
+ .pkg_events = BIT(PERF_RAPL_PKG),
+ .core_events = BIT(PERF_RAPL_CORE),
.msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
- .rapl_msrs = amd_rapl_msrs,
+ .rapl_pkg_msrs = amd_rapl_pkg_msrs,
+ .rapl_core_msrs = amd_rapl_core_msrs,
};
static const struct x86_cpu_id rapl_model_match[] __initconst = {
- X86_MATCH_FEATURE(X86_FEATURE_RAPL, &model_amd_hygon),
- X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &model_snb),
- X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &model_snbep),
- X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &model_snb),
- X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &model_snbep),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &model_hsx),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &model_hsx),
- X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &model_hsx),
- X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &model_knl),
- X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &model_knl),
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &model_hsx),
- X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &model_hsw),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &model_hsx),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx),
- X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
- X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &model_spr),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &model_skl),
- X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &model_skl),
+ X86_MATCH_FEATURE(X86_FEATURE_RAPL, &model_amd_hygon),
+ X86_MATCH_VFM(INTEL_SANDYBRIDGE, &model_snb),
+ X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &model_snbep),
+ X86_MATCH_VFM(INTEL_IVYBRIDGE, &model_snb),
+ X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &model_snbep),
+ X86_MATCH_VFM(INTEL_HASWELL, &model_hsw),
+ X86_MATCH_VFM(INTEL_HASWELL_X, &model_hsx),
+ X86_MATCH_VFM(INTEL_HASWELL_L, &model_hsw),
+ X86_MATCH_VFM(INTEL_HASWELL_G, &model_hsw),
+ X86_MATCH_VFM(INTEL_BROADWELL, &model_hsw),
+ X86_MATCH_VFM(INTEL_BROADWELL_G, &model_hsw),
+ X86_MATCH_VFM(INTEL_BROADWELL_X, &model_hsx),
+ X86_MATCH_VFM(INTEL_BROADWELL_D, &model_hsx),
+ X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &model_knl),
+ X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &model_knl),
+ X86_MATCH_VFM(INTEL_SKYLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_SKYLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_SKYLAKE_X, &model_hsx),
+ X86_MATCH_VFM(INTEL_KABYLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_KABYLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_CANNONLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &model_hsw),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &model_hsw),
+ X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &model_hsw),
+ X86_MATCH_VFM(INTEL_ICELAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_ICELAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_ICELAKE_D, &model_hsx),
+ X86_MATCH_VFM(INTEL_ICELAKE_X, &model_hsx),
+ X86_MATCH_VFM(INTEL_COMETLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_COMETLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_TIGERLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_TIGERLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_ALDERLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &model_skl),
+ X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &model_spr),
+ X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &model_spr),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &model_skl),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &model_skl),
+ X86_MATCH_VFM(INTEL_METEORLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_METEORLAKE_L, &model_skl),
+ X86_MATCH_VFM(INTEL_ARROWLAKE_H, &model_skl),
+ X86_MATCH_VFM(INTEL_ARROWLAKE, &model_skl),
+ X86_MATCH_VFM(INTEL_ARROWLAKE_U, &model_skl),
+ X86_MATCH_VFM(INTEL_LUNARLAKE_M, &model_skl),
{},
};
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
@@ -815,57 +892,73 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
static int __init rapl_pmu_init(void)
{
const struct x86_cpu_id *id;
- struct rapl_model *rm;
+ int rapl_pkg_pmu_scope = PERF_PMU_SCOPE_DIE;
int ret;
+ if (rapl_pkg_pmu_is_pkg_scope())
+ rapl_pkg_pmu_scope = PERF_PMU_SCOPE_PKG;
+
id = x86_match_cpu(rapl_model_match);
if (!id)
return -ENODEV;
- rm = (struct rapl_model *) id->driver_data;
-
- rapl_msrs = rm->rapl_msrs;
+ rapl_model = (struct rapl_model *) id->driver_data;
- rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
- false, (void *) &rm->events);
-
- ret = rapl_check_hw_unit(rm);
+ ret = rapl_check_hw_unit();
if (ret)
return ret;
- ret = init_rapl_pmus();
+ ret = init_rapl_pmus(&rapl_pmus_pkg, rapl_pkg_pmu_scope, rapl_attr_groups,
+ rapl_attr_update);
if (ret)
return ret;
- /*
- * Install callbacks. Core will call them for each online cpu.
- */
- ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
- "perf/x86/rapl:online",
- rapl_cpu_online, rapl_cpu_offline);
+ rapl_pmus_pkg->cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs,
+ PERF_RAPL_PKG_EVENTS_MAX, false,
+ (void *) &rapl_model->pkg_events);
+
+ ret = perf_pmu_register(&rapl_pmus_pkg->pmu, "power", -1);
if (ret)
goto out;
- ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
- if (ret)
- goto out1;
+ if (rapl_model->core_events) {
+ ret = init_rapl_pmus(&rapl_pmus_core, PERF_PMU_SCOPE_CORE,
+ rapl_core_attr_groups,
+ rapl_core_attr_update);
+ if (ret) {
+ pr_warn("power-core PMU initialization failed (%d)\n", ret);
+ goto core_init_failed;
+ }
+
+ rapl_pmus_core->cntr_mask = perf_msr_probe(rapl_model->rapl_core_msrs,
+ PERF_RAPL_CORE_EVENTS_MAX, false,
+ (void *) &rapl_model->core_events);
+
+ ret = perf_pmu_register(&rapl_pmus_core->pmu, "power_core", -1);
+ if (ret) {
+ pr_warn("power-core PMU registration failed (%d)\n", ret);
+ cleanup_rapl_pmus(rapl_pmus_core);
+ }
+ }
+core_init_failed:
rapl_advertise();
return 0;
-out1:
- cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
out:
pr_warn("Initialization failed (%d), disabled\n", ret);
- cleanup_rapl_pmus();
+ cleanup_rapl_pmus(rapl_pmus_pkg);
return ret;
}
module_init(rapl_pmu_init);
static void __exit intel_rapl_exit(void)
{
- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
- perf_pmu_unregister(&rapl_pmus->pmu);
- cleanup_rapl_pmus();
+ if (rapl_pmus_core) {
+ perf_pmu_unregister(&rapl_pmus_core->pmu);
+ cleanup_rapl_pmus(rapl_pmus_core);
+ }
+ perf_pmu_unregister(&rapl_pmus_pkg->pmu);
+ cleanup_rapl_pmus(rapl_pmus_pkg);
}
module_exit(intel_rapl_exit);
diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
index 3e9acdaeed1e..2fd9b0cf9a5e 100644
--- a/arch/x86/events/zhaoxin/core.c
+++ b/arch/x86/events/zhaoxin/core.c
@@ -530,13 +530,13 @@ __init int zhaoxin_pmu_init(void)
pr_info("Version check pass!\n");
x86_pmu.version = version;
- x86_pmu.num_counters = eax.split.num_counters;
+ x86_pmu.cntr_mask64 = GENMASK_ULL(eax.split.num_counters - 1, 0);
x86_pmu.cntval_bits = eax.split.bit_width;
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
x86_pmu.events_maskl = ebx.full;
x86_pmu.events_mask_len = eax.split.mask_length;
- x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
+ x86_pmu.fixed_cntr_mask64 = GENMASK_ULL(edx.split.num_counters_fixed - 1, 0);
x86_add_quirk(zhaoxin_arch_events_quirk);
switch (boot_cpu_data.x86) {
@@ -604,13 +604,13 @@ __init int zhaoxin_pmu_init(void)
return -ENODEV;
}
- x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1;
- x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
+ x86_pmu.intel_ctrl = x86_pmu.cntr_mask64;
+ x86_pmu.intel_ctrl |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) {
- c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
- c->weight += x86_pmu.num_counters;
+ c->idxmsk64 |= x86_pmu.cntr_mask64;
+ c->weight += x86_pmu_num_counters(NULL);
}
}