summaryrefslogtreecommitdiff
path: root/arch/x86/events/amd
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events/amd')
-rw-r--r--arch/x86/events/amd/brs.c3
-rw-r--r--arch/x86/events/amd/core.c117
-rw-r--r--arch/x86/events/amd/ibs.c344
-rw-r--r--arch/x86/events/amd/iommu.c2
-rw-r--r--arch/x86/events/amd/lbr.c22
-rw-r--r--arch/x86/events/amd/uncore.c41
6 files changed, 423 insertions, 106 deletions
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
index 780acd3dff22..ec3427463382 100644
--- a/arch/x86/events/amd/brs.c
+++ b/arch/x86/events/amd/brs.c
@@ -381,7 +381,8 @@ static void amd_brs_poison_buffer(void)
* On ctxswin, sched_in = true, called after the PMU has started
* On ctxswout, sched_in = false, called before the PMU is stopped
*/
-void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index aec16e581f5b..30d6ceb4c8ad 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,39 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
+static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x100000120,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a)
+ return amd_zen4_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
@@ -403,8 +432,10 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
* be removed on one CPU at a time AND PMU is disabled
* when we come here
*/
- for (i = 0; i < x86_pmu.num_counters; i++) {
- if (cmpxchg(nb->owners + i, event, NULL) == event)
+ for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
+ struct perf_event *tmp = event;
+
+ if (try_cmpxchg(nb->owners + i, &tmp, NULL))
break;
}
}
@@ -470,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
* because of successive calls to x86_schedule_events() from
* hw_perf_group_sched_in() without hw_perf_enable()
*/
- for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
+ for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
if (new == -1 || hwc->idx == idx)
/* assign free slot, prefer hwc->idx */
old = cmpxchg(nb->owners + idx, NULL, event);
@@ -513,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
/*
* initialize all possible NB constraints
*/
- for (i = 0; i < x86_pmu.num_counters; i++) {
+ for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
__set_bit(i, nb->event_constraints[i].idxmsk);
nb->event_constraints[i].weight = 1;
}
@@ -618,7 +649,7 @@ static void amd_pmu_cpu_dead(int cpu)
}
}
-static inline void amd_pmu_set_global_ctl(u64 ctl)
+static __always_inline void amd_pmu_set_global_ctl(u64 ctl)
{
wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
}
@@ -706,7 +737,7 @@ static void amd_pmu_check_overflow(void)
* counters are always enabled when this function is called and
* ARCH_PERFMON_EVENTSEL_INT is always set.
*/
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -726,7 +757,7 @@ static void amd_pmu_enable_all(int added)
amd_brs_enable_all();
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
/* only activate events which are marked as active */
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -878,14 +909,46 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
return amd_pmu_adjust_nmi_window(handled);
}
+/*
+ * AMD-specific callback invoked through perf_snapshot_branch_stack static
+ * call, defined in include/linux/perf_event.h. See its definition for API
+ * details. It's up to caller to provide enough space in *entries* to fit all
+ * LBR records, otherwise returned result will be truncated to *cnt* entries.
+ */
+static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
+{
+ struct cpu_hw_events *cpuc;
+ unsigned long flags;
+
+ /*
+ * The sequence of steps to freeze LBR should be completely inlined
+ * and contain no branches to minimize contamination of LBR snapshot
+ */
+ local_irq_save(flags);
+ amd_pmu_core_disable_all();
+ __amd_pmu_lbr_disable();
+
+ cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ amd_pmu_lbr_read();
+ cnt = min(cnt, x86_pmu.lbr_nr);
+ memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
+
+ amd_pmu_v2_enable_all(0);
+ local_irq_restore(flags);
+
+ return cnt;
+}
+
static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ static atomic64_t status_warned = ATOMIC64_INIT(0);
+ u64 reserved, status, mask, new_bits, prev_bits;
struct perf_sample_data data;
struct hw_perf_event *hwc;
struct perf_event *event;
int handled = 0, idx;
- u64 reserved, status, mask;
bool pmu_enabled;
/*
@@ -904,8 +967,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!status)
goto done;
- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
@@ -918,7 +981,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
/* Clear any reserved bits set by buggy microcode */
status &= amd_pmu_global_cntr_mask;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -938,8 +1001,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!x86_perf_event_set_period(event))
continue;
- if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
@@ -950,7 +1012,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
* the corresponding PMCs are expected to be inactive according to the
* active_mask
*/
- WARN_ON(status > 0);
+ if (status > 0) {
+ prev_bits = atomic64_fetch_or(status, &status_warned);
+ // A new bit was set for the very first time.
+ new_bits = status & ~prev_bits;
+ WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits);
+ }
/* Clear overflow and freeze bits */
amd_pmu_ack_global_status(~status);
@@ -1253,7 +1320,7 @@ static __initconst const struct x86_pmu amd_pmu = {
.addr_offset = amd_pmu_addr_offset,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
- .num_counters = AMD64_NUM_COUNTERS,
+ .cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
.add = amd_pmu_add_event,
.del = amd_pmu_del_event,
.cntval_bits = 48,
@@ -1352,7 +1419,7 @@ static int __init amd_core_pmu_init(void)
*/
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
- x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
+ x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
/* Check for Performance Monitoring v2 support */
if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
@@ -1362,9 +1429,9 @@ static int __init amd_core_pmu_init(void)
x86_pmu.version = 2;
/* Find the number of available Core PMCs */
- x86_pmu.num_counters = ebx.split.num_core_pmc;
+ x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
- amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
+ amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;
/* Update PMC handling functions */
x86_pmu.enable_all = amd_pmu_v2_enable_all;
@@ -1392,12 +1459,12 @@ static int __init amd_core_pmu_init(void)
* even numbered counter that has a consecutive adjacent odd
* numbered counter following it.
*/
- for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
+ for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
even_ctr_mask |= BIT_ULL(i);
pair_constraint = (struct event_constraint)
__EVENT_CONSTRAINT(0, even_ctr_mask, 0,
- x86_pmu.num_counters / 2, 0,
+ x86_pmu_max_num_counters(NULL) / 2, 0,
PERF_X86_EVENT_PAIR);
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
@@ -1414,6 +1481,10 @@ static int __init amd_core_pmu_init(void)
static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset);
static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add);
static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del);
+
+ /* Only support branch_stack snapshot on perfmon v2 */
+ if (x86_pmu.handle_irq == amd_pmu_v2_handle_irq)
+ static_call_update(perf_snapshot_branch_stack, amd_pmu_v2_snapshot_branch_stack);
} else if (!amd_brs_init()) {
/*
* BRS requires special event constraints and flushing on ctxsw.
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index e91970b01d62..0252b7ea8bca 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -28,9 +28,8 @@ static u32 ibs_caps;
#include <asm/nmi.h>
#include <asm/amd-ibs.h>
-#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
-#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
-
+/* attr.config2 */
+#define IBS_SW_FILTER_MASK 1
/*
* IBS states:
@@ -87,6 +86,7 @@ struct perf_ibs {
u64 cnt_mask;
u64 enable_mask;
u64 valid_mask;
+ u16 min_period;
u64 max_period;
unsigned long offset_mask[1];
int offset_max;
@@ -268,11 +268,19 @@ static int validate_group(struct perf_event *event)
return 0;
}
+static bool perf_ibs_ldlat_event(struct perf_ibs *perf_ibs,
+ struct perf_event *event)
+{
+ return perf_ibs == &perf_ibs_op &&
+ (ibs_caps & IBS_CAPS_OPLDLAT) &&
+ (event->attr.config1 & 0xFFF);
+}
+
static int perf_ibs_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs;
- u64 max_cnt, config;
+ u64 config;
int ret;
perf_ibs = get_ibs_pmu(event->attr.type);
@@ -290,6 +298,16 @@ static int perf_ibs_init(struct perf_event *event)
if (has_branch_stack(event))
return -EOPNOTSUPP;
+ /* handle exclude_{user,kernel} in the IRQ handler */
+ if (event->attr.exclude_host || event->attr.exclude_guest ||
+ event->attr.exclude_idle)
+ return -EINVAL;
+
+ if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
+ (event->attr.exclude_kernel || event->attr.exclude_user ||
+ event->attr.exclude_hv))
+ return -EINVAL;
+
ret = validate_group(event);
if (ret)
return ret;
@@ -298,25 +316,47 @@ static int perf_ibs_init(struct perf_event *event)
if (config & perf_ibs->cnt_mask)
/* raw max_cnt may not be set */
return -EINVAL;
- if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
- /*
- * lower 4 bits can not be set in ibs max cnt,
- * but allowing it in case we adjust the
- * sample period to set a frequency.
- */
- return -EINVAL;
- hwc->sample_period &= ~0x0FULL;
- if (!hwc->sample_period)
- hwc->sample_period = 0x10;
+
+ if (event->attr.freq) {
+ hwc->sample_period = perf_ibs->min_period;
+ } else {
+ /* Silently mask off lower nibble. IBS hw mandates it. */
+ hwc->sample_period &= ~0x0FULL;
+ if (hwc->sample_period < perf_ibs->min_period)
+ return -EINVAL;
+ }
} else {
- max_cnt = config & perf_ibs->cnt_mask;
+ u64 period = 0;
+
+ if (event->attr.freq)
+ return -EINVAL;
+
+ if (perf_ibs == &perf_ibs_op) {
+ period = (config & IBS_OP_MAX_CNT) << 4;
+ if (ibs_caps & IBS_CAPS_OPCNTEXT)
+ period |= config & IBS_OP_MAX_CNT_EXT_MASK;
+ } else {
+ period = (config & IBS_FETCH_MAX_CNT) << 4;
+ }
+
config &= ~perf_ibs->cnt_mask;
- event->attr.sample_period = max_cnt << 4;
- hwc->sample_period = event->attr.sample_period;
+ event->attr.sample_period = period;
+ hwc->sample_period = period;
+
+ if (hwc->sample_period < perf_ibs->min_period)
+ return -EINVAL;
}
- if (!hwc->sample_period)
- return -EINVAL;
+ if (perf_ibs_ldlat_event(perf_ibs, event)) {
+ u64 ldlat = event->attr.config1 & 0xFFF;
+
+ if (ldlat < 128 || ldlat > 2048)
+ return -EINVAL;
+ ldlat >>= 7;
+
+ config |= (ldlat - 1) << 59;
+ config |= IBS_OP_L3MISSONLY | IBS_OP_LDLAT_EN;
+ }
/*
* If we modify hwc->sample_period, we also need to update
@@ -337,7 +377,8 @@ static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
int overflow;
/* ignore lower 4 bits in min count: */
- overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
+ overflow = perf_event_set_period(hwc, perf_ibs->min_period,
+ perf_ibs->max_period, period);
local64_set(&hwc->prev_count, 0);
return overflow;
@@ -435,6 +476,9 @@ static void perf_ibs_start(struct perf_event *event, int flags)
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
+ if (event->attr.freq && hwc->sample_period < perf_ibs->min_period)
+ hwc->sample_period = perf_ibs->min_period;
+
perf_ibs_set_period(perf_ibs, hwc, &period);
if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) {
config |= period & IBS_OP_MAX_CNT_EXT_MASK;
@@ -542,6 +586,28 @@ static void perf_ibs_del(struct perf_event *event, int flags)
static void perf_ibs_read(struct perf_event *event) { }
+static int perf_ibs_check_period(struct perf_event *event, u64 value)
+{
+ struct perf_ibs *perf_ibs;
+ u64 low_nibble;
+
+ if (event->attr.freq)
+ return 0;
+
+ perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ low_nibble = value & 0xFULL;
+
+ /*
+ * This contradicts with perf_ibs_init() which allows sample period
+ * with lower nibble bits set but silently masks them off. Whereas
+ * this returns error.
+ */
+ if (low_nibble || value < perf_ibs->min_period)
+ return -EINVAL;
+
+ return 0;
+}
+
/*
* We need to initialize with empty group if all attributes in the
* group are dynamic.
@@ -550,27 +616,20 @@ static struct attribute *attrs_empty[] = {
NULL,
};
-static struct attribute_group empty_format_group = {
- .name = "format",
- .attrs = attrs_empty,
-};
-
static struct attribute_group empty_caps_group = {
.name = "caps",
.attrs = attrs_empty,
};
-static const struct attribute_group *empty_attr_groups[] = {
- &empty_format_group,
- &empty_caps_group,
- NULL,
-};
-
PMU_FORMAT_ATTR(rand_en, "config:57");
PMU_FORMAT_ATTR(cnt_ctl, "config:19");
+PMU_FORMAT_ATTR(swfilt, "config2:0");
PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59");
PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16");
+PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_format, "config1:0-11");
PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1");
+PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_cap, "1");
+PMU_EVENT_ATTR_STRING(dtlb_pgsize, ibs_op_dtlb_pgsize_cap, "1");
static umode_t
zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i)
@@ -578,8 +637,21 @@ zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int
return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0;
}
-static struct attribute *rand_en_attrs[] = {
+static umode_t
+ibs_op_ldlat_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return ibs_caps & IBS_CAPS_OPLDLAT ? attr->mode : 0;
+}
+
+static umode_t
+ibs_op_dtlb_pgsize_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ return ibs_caps & IBS_CAPS_OPDTLBPGSIZE ? attr->mode : 0;
+}
+
+static struct attribute *fetch_attrs[] = {
&format_attr_rand_en.attr,
+ &format_attr_swfilt.attr,
NULL,
};
@@ -593,9 +665,19 @@ static struct attribute *zen4_ibs_extensions_attrs[] = {
NULL,
};
-static struct attribute_group group_rand_en = {
+static struct attribute *ibs_op_ldlat_cap_attrs[] = {
+ &ibs_op_ldlat_cap.attr.attr,
+ NULL,
+};
+
+static struct attribute *ibs_op_dtlb_pgsize_cap_attrs[] = {
+ &ibs_op_dtlb_pgsize_cap.attr.attr,
+ NULL,
+};
+
+static struct attribute_group group_fetch_formats = {
.name = "format",
- .attrs = rand_en_attrs,
+ .attrs = fetch_attrs,
};
static struct attribute_group group_fetch_l3missonly = {
@@ -610,8 +692,20 @@ static struct attribute_group group_zen4_ibs_extensions = {
.is_visible = zen4_ibs_extensions_is_visible,
};
+static struct attribute_group group_ibs_op_ldlat_cap = {
+ .name = "caps",
+ .attrs = ibs_op_ldlat_cap_attrs,
+ .is_visible = ibs_op_ldlat_is_visible,
+};
+
+static struct attribute_group group_ibs_op_dtlb_pgsize_cap = {
+ .name = "caps",
+ .attrs = ibs_op_dtlb_pgsize_cap_attrs,
+ .is_visible = ibs_op_dtlb_pgsize_is_visible,
+};
+
static const struct attribute_group *fetch_attr_groups[] = {
- &group_rand_en,
+ &group_fetch_formats,
&empty_caps_group,
NULL,
};
@@ -628,6 +722,11 @@ cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i)
return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0;
}
+static struct attribute *op_attrs[] = {
+ &format_attr_swfilt.attr,
+ NULL,
+};
+
static struct attribute *cnt_ctl_attrs[] = {
&format_attr_cnt_ctl.attr,
NULL,
@@ -638,6 +737,16 @@ static struct attribute *op_l3missonly_attrs[] = {
NULL,
};
+static struct attribute_group group_op_formats = {
+ .name = "format",
+ .attrs = op_attrs,
+};
+
+static struct attribute *ibs_op_ldlat_format_attrs[] = {
+ &ibs_op_ldlat_format.attr.attr,
+ NULL,
+};
+
static struct attribute_group group_cnt_ctl = {
.name = "format",
.attrs = cnt_ctl_attrs,
@@ -650,10 +759,25 @@ static struct attribute_group group_op_l3missonly = {
.is_visible = zen4_ibs_extensions_is_visible,
};
+static const struct attribute_group *op_attr_groups[] = {
+ &group_op_formats,
+ &empty_caps_group,
+ NULL,
+};
+
+static struct attribute_group group_ibs_op_ldlat_format = {
+ .name = "format",
+ .attrs = ibs_op_ldlat_format_attrs,
+ .is_visible = ibs_op_ldlat_is_visible,
+};
+
static const struct attribute_group *op_attr_update[] = {
&group_cnt_ctl,
&group_op_l3missonly,
&group_zen4_ibs_extensions,
+ &group_ibs_op_ldlat_cap,
+ &group_ibs_op_ldlat_format,
+ &group_ibs_op_dtlb_pgsize_cap,
NULL,
};
@@ -667,13 +791,14 @@ static struct perf_ibs perf_ibs_fetch = {
.start = perf_ibs_start,
.stop = perf_ibs_stop,
.read = perf_ibs_read,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .check_period = perf_ibs_check_period,
},
.msr = MSR_AMD64_IBSFETCHCTL,
- .config_mask = IBS_FETCH_CONFIG_MASK,
+ .config_mask = IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN,
.cnt_mask = IBS_FETCH_MAX_CNT,
.enable_mask = IBS_FETCH_ENABLE,
.valid_mask = IBS_FETCH_VAL,
+ .min_period = 0x10,
.max_period = IBS_FETCH_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
@@ -691,14 +816,15 @@ static struct perf_ibs perf_ibs_op = {
.start = perf_ibs_start,
.stop = perf_ibs_stop,
.read = perf_ibs_read,
- .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .check_period = perf_ibs_check_period,
},
.msr = MSR_AMD64_IBSOPCTL,
- .config_mask = IBS_OP_CONFIG_MASK,
+ .config_mask = IBS_OP_MAX_CNT,
.cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
IBS_OP_CUR_CNT_RAND,
.enable_mask = IBS_OP_ENABLE,
.valid_mask = IBS_OP_VAL,
+ .min_period = 0x90,
.max_period = IBS_OP_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
@@ -900,6 +1026,10 @@ static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3,
if (!op_data3->dc_lin_addr_valid)
return;
+ if ((ibs_caps & IBS_CAPS_OPDTLBPGSIZE) &&
+ !op_data3->dc_phy_addr_valid)
+ return;
+
if (!op_data3->dc_l1tlb_miss) {
data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT;
return;
@@ -924,6 +1054,8 @@ static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3,
data_src->mem_lock = PERF_MEM_LOCK_LOCKED;
}
+/* Be careful. Works only for contiguous MSRs. */
+#define ibs_fetch_msr_idx(msr) (msr - MSR_AMD64_IBSFETCHCTL)
#define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL)
static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
@@ -1004,21 +1136,92 @@ static void perf_ibs_parse_ld_st_data(__u64 sample_type,
}
}
-static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type,
+static bool perf_ibs_is_mem_sample_type(struct perf_ibs *perf_ibs,
+ struct perf_event *event)
+{
+ u64 sample_type = event->attr.sample_type;
+
+ return perf_ibs == &perf_ibs_op &&
+ sample_type & (PERF_SAMPLE_DATA_SRC |
+ PERF_SAMPLE_WEIGHT_TYPE |
+ PERF_SAMPLE_ADDR |
+ PERF_SAMPLE_PHYS_ADDR);
+}
+
+static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs,
+ struct perf_event *event,
int check_rip)
{
- if (sample_type & PERF_SAMPLE_RAW ||
- (perf_ibs == &perf_ibs_op &&
- (sample_type & PERF_SAMPLE_DATA_SRC ||
- sample_type & PERF_SAMPLE_WEIGHT_TYPE ||
- sample_type & PERF_SAMPLE_ADDR ||
- sample_type & PERF_SAMPLE_PHYS_ADDR)))
+ if (event->attr.sample_type & PERF_SAMPLE_RAW ||
+ perf_ibs_is_mem_sample_type(perf_ibs, event) ||
+ perf_ibs_ldlat_event(perf_ibs, event))
return perf_ibs->offset_max;
else if (check_rip)
return 3;
return 1;
}
+static bool perf_ibs_is_kernel_data_addr(struct perf_event *event,
+ struct perf_ibs_data *ibs_data)
+{
+ u64 sample_type_mask = PERF_SAMPLE_ADDR | PERF_SAMPLE_RAW;
+ union ibs_op_data3 op_data3;
+ u64 dc_lin_addr;
+
+ op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)];
+ dc_lin_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)];
+
+ return unlikely((event->attr.sample_type & sample_type_mask) &&
+ op_data3.dc_lin_addr_valid && kernel_ip(dc_lin_addr));
+}
+
+static bool perf_ibs_is_kernel_br_target(struct perf_event *event,
+ struct perf_ibs_data *ibs_data,
+ int br_target_idx)
+{
+ union ibs_op_data op_data;
+ u64 br_target;
+
+ op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)];
+ br_target = ibs_data->regs[br_target_idx];
+
+ return unlikely((event->attr.sample_type & PERF_SAMPLE_RAW) &&
+ op_data.op_brn_ret && kernel_ip(br_target));
+}
+
+static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event,
+ struct pt_regs *regs, struct perf_ibs_data *ibs_data,
+ int br_target_idx)
+{
+ if (perf_exclude_event(event, regs))
+ return true;
+
+ if (perf_ibs != &perf_ibs_op || !event->attr.exclude_kernel)
+ return false;
+
+ if (perf_ibs_is_kernel_data_addr(event, ibs_data))
+ return true;
+
+ if (br_target_idx != -1 &&
+ perf_ibs_is_kernel_br_target(event, ibs_data, br_target_idx))
+ return true;
+
+ return false;
+}
+
+static void perf_ibs_phyaddr_clear(struct perf_ibs *perf_ibs,
+ struct perf_ibs_data *ibs_data)
+{
+ if (perf_ibs == &perf_ibs_op) {
+ ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)] &= ~(1ULL << 18);
+ ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)] = 0;
+ return;
+ }
+
+ ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)] &= ~(1ULL << 52);
+ ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHPHYSAD)] = 0;
+}
+
static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
{
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
@@ -1031,6 +1234,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
int offset, size, check_rip, offset_max, throttle = 0;
unsigned int msr;
u64 *buf, *config, period, new_config = 0;
+ int br_target_idx = -1;
if (!test_bit(IBS_STARTED, pcpu->state)) {
fail:
@@ -1067,7 +1271,7 @@ fail:
offset = 1;
check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
- offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip);
+ offset_max = perf_ibs_get_offset_max(perf_ibs, event, check_rip);
do {
rdmsrl(msr + offset, *buf++);
@@ -1076,6 +1280,22 @@ fail:
perf_ibs->offset_max,
offset + 1);
} while (offset < offset_max);
+
+ if (perf_ibs_ldlat_event(perf_ibs, event)) {
+ union ibs_op_data3 op_data3;
+
+ op_data3.val = ibs_data.regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)];
+ /*
+ * Opening event is errored out if load latency threshold is
+ * outside of [128, 2048] range. Since the event has reached
+ * interrupt handler, we can safely assume the threshold is
+ * within [128, 2048] range.
+ */
+ if (!op_data3.ld_op || !op_data3.dc_miss ||
+ op_data3.dc_miss_lat <= (event->attr.config1 & 0xFFF))
+ goto out;
+ }
+
/*
* Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately
* depending on their availability.
@@ -1085,6 +1305,7 @@ fail:
if (perf_ibs == &perf_ibs_op) {
if (ibs_caps & IBS_CAPS_BRNTRGT) {
rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
+ br_target_idx = size;
size++;
}
if (ibs_caps & IBS_CAPS_OPDATA4) {
@@ -1111,6 +1332,21 @@ fail:
regs.flags |= PERF_EFLAGS_EXACT;
}
+ if ((event->attr.config2 & IBS_SW_FILTER_MASK) &&
+ perf_ibs_swfilt_discard(perf_ibs, event, &regs, &ibs_data, br_target_idx)) {
+ throttle = perf_event_account_interrupt(event);
+ goto out;
+ }
+ /*
+ * Prevent leaking physical addresses to unprivileged users. Skip
+ * PERF_SAMPLE_PHYS_ADDR check since generic code prevents it for
+ * unprivileged users.
+ */
+ if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
+ perf_allow_kernel()) {
+ perf_ibs_phyaddr_clear(perf_ibs, &ibs_data);
+ }
+
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw = (struct perf_raw_record){
.frag = {
@@ -1118,7 +1354,7 @@ fail:
.data = ibs_data.data,
},
};
- perf_sample_save_raw_data(&data, &raw);
+ perf_sample_save_raw_data(&data, event, &raw);
}
if (perf_ibs == &perf_ibs_op)
@@ -1129,10 +1365,13 @@ fail:
* recorded as part of interrupt regs. Thus we need to use rip from
* interrupt regs while unwinding call stack.
*/
- if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
- perf_sample_save_callchain(&data, event, iregs);
+ perf_sample_save_callchain(&data, event, iregs);
throttle = perf_event_overflow(event, &data, &regs);
+
+ if (event->attr.freq && hwc->sample_period < perf_ibs->min_period)
+ hwc->sample_period = perf_ibs->min_period;
+
out:
if (throttle) {
perf_ibs_stop(event, 0);
@@ -1222,13 +1461,14 @@ static __init int perf_ibs_op_init(void)
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK;
- perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
+ perf_ibs_op.cnt_mask |= (IBS_OP_MAX_CNT_EXT_MASK |
+ IBS_OP_CUR_CNT_EXT_MASK);
}
if (ibs_caps & IBS_CAPS_ZEN4)
perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;
- perf_ibs_op.pmu.attr_groups = empty_attr_groups;
+ perf_ibs_op.pmu.attr_groups = op_attr_groups;
perf_ibs_op.pmu.attr_update = op_attr_update;
return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index b15f7b950d2e..f8228d8243f7 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -30,7 +30,7 @@
#define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL)
#define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL)
-#define IOMMU_NAME_SIZE 16
+#define IOMMU_NAME_SIZE 24
struct perf_amd_iommu {
struct list_head list;
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index 4a1e600314d5..c06ccca96851 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -310,10 +310,6 @@ int amd_pmu_lbr_hw_config(struct perf_event *event)
{
int ret = 0;
- /* LBR is not recommended in counting mode */
- if (!is_sampling_event(event))
- return -EINVAL;
-
ret = amd_pmu_lbr_setup_filter(event);
if (!ret)
event->attach_state |= PERF_ATTACH_SCHED_CB;
@@ -375,7 +371,8 @@ void amd_pmu_lbr_del(struct perf_event *event)
perf_sched_cb_dec(event->pmu);
}
-void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -402,26 +399,23 @@ void amd_pmu_lbr_enable_all(void)
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
void amd_pmu_lbr_disable_all(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- u64 dbg_ctl, dbg_extn_cfg;
if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
return;
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
- wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ __amd_pmu_lbr_disable();
}
__init int amd_pmu_lbr_init(void)
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 4ccb8fa483e6..49c26ce2b115 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -162,7 +162,9 @@ static int amd_uncore_add(struct perf_event *event, int flags)
/* if not, take the first available counter */
hwc->idx = -1;
for (i = 0; i < pmu->num_counters; i++) {
- if (cmpxchg(&ctx->events[i], NULL, event) == NULL) {
+ struct perf_event *tmp = NULL;
+
+ if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
hwc->idx = i;
break;
}
@@ -196,7 +198,9 @@ static void amd_uncore_del(struct perf_event *event, int flags)
event->pmu->stop(event, PERF_EF_UPDATE);
for (i = 0; i < pmu->num_counters; i++) {
- if (cmpxchg(&ctx->events[i], event, NULL) == event)
+ struct perf_event *tmp = event;
+
+ if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
break;
}
@@ -639,7 +643,7 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
info.split.aux_data = 0;
info.split.num_pmcs = NUM_COUNTERS_NB;
info.split.gid = 0;
- info.split.cid = topology_die_id(cpu);
+ info.split.cid = topology_logical_package_id(cpu);
if (pmu_version >= 2) {
ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
@@ -654,17 +658,20 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
{
struct attribute **df_attr = amd_uncore_df_format_attr;
struct amd_uncore_pmu *pmu;
+ int num_counters;
/* Run just once */
if (uncore->init_done)
return amd_uncore_ctx_init(uncore, cpu);
+ num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
+ if (!num_counters)
+ goto done;
+
/* No grouping, single instance for a system */
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
- if (!uncore->pmus) {
- uncore->num_pmus = 0;
+ if (!uncore->pmus)
goto done;
- }
/*
* For Family 17h and above, the Northbridge counters are repurposed
@@ -674,7 +681,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
pmu = &uncore->pmus[0];
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
sizeof(pmu->name));
- pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
+ pmu->num_counters = num_counters;
pmu->msr_base = MSR_F15H_NB_PERF_CTL;
pmu->rdpmc_base = RDPMC_BASE_NB;
pmu->group = amd_uncore_ctx_gid(uncore, cpu);
@@ -785,17 +792,20 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
{
struct attribute **l3_attr = amd_uncore_l3_format_attr;
struct amd_uncore_pmu *pmu;
+ int num_counters;
/* Run just once */
if (uncore->init_done)
return amd_uncore_ctx_init(uncore, cpu);
+ num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
+ if (!num_counters)
+ goto done;
+
/* No grouping, single instance for a system */
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
- if (!uncore->pmus) {
- uncore->num_pmus = 0;
+ if (!uncore->pmus)
goto done;
- }
/*
* For Family 17h and above, L3 cache counters are available instead
@@ -805,7 +815,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
pmu = &uncore->pmus[0];
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
sizeof(pmu->name));
- pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
+ pmu->num_counters = num_counters;
pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
pmu->rdpmc_base = RDPMC_BASE_LLC;
pmu->group = amd_uncore_ctx_gid(uncore, cpu);
@@ -893,8 +903,8 @@ void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
info.split.aux_data = ecx; /* stash active mask */
info.split.num_pmcs = ebx.split.num_umc_pmc;
- info.split.gid = topology_die_id(cpu);
- info.split.cid = topology_die_id(cpu);
+ info.split.gid = topology_logical_package_id(cpu);
+ info.split.cid = topology_logical_package_id(cpu);
*per_cpu_ptr(uncore->info, cpu) = info;
}
@@ -906,7 +916,8 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
union amd_uncore_info info;
struct amd_uncore_pmu *pmu;
- int index = 0, gid, i;
+ int gid, i;
+ u16 index = 0;
if (pmu_version < 2)
return 0;
@@ -938,7 +949,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
for (i = 0; i < group_num_pmus[gid]; i++) {
pmu = &uncore->pmus[index];
- snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index);
+ snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%hu", index);
pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
pmu->rdpmc_base = -1;