summaryrefslogtreecommitdiff
path: root/drivers/perf/arm_pmuv3.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/perf/arm_pmuv3.c')
-rw-r--r--drivers/perf/arm_pmuv3.c174
1 files changed, 161 insertions, 13 deletions
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index b5cc11abc962..8014ff766cff 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -25,6 +25,8 @@
#include <linux/smp.h>
#include <linux/nmi.h>
+#include "arm_brbe.h"
+
/* ARMv8 Cortex-A53 specific event types. */
#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2
@@ -438,7 +440,19 @@ static ssize_t threshold_max_show(struct device *dev,
static DEVICE_ATTR_RO(threshold_max);
+static ssize_t branches_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+
+ return sysfs_emit(page, "%d\n", brbe_num_branch_records(cpu_pmu));
+}
+
+static DEVICE_ATTR_RO(branches);
+
static struct attribute *armv8_pmuv3_caps_attrs[] = {
+ &dev_attr_branches.attr,
&dev_attr_slots.attr,
&dev_attr_bus_slots.attr,
&dev_attr_bus_width.attr,
@@ -446,9 +460,22 @@ static struct attribute *armv8_pmuv3_caps_attrs[] = {
NULL,
};
+static umode_t caps_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+
+ if (i == 0)
+ return brbe_num_branch_records(cpu_pmu) ? attr->mode : 0;
+
+ return attr->mode;
+}
+
static const struct attribute_group armv8_pmuv3_caps_attr_group = {
.name = "caps",
.attrs = armv8_pmuv3_caps_attrs,
+ .is_visible = caps_is_visible,
};
/*
@@ -795,11 +822,6 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
static void armv8pmu_enable_event(struct perf_event *event)
{
- /*
- * Enable counter and interrupt, and set the counter to count
- * the event that we're interested in.
- */
- armv8pmu_disable_event_counter(event);
armv8pmu_write_event_type(event);
armv8pmu_enable_event_irq(event);
armv8pmu_enable_event_counter(event);
@@ -814,6 +836,7 @@ static void armv8pmu_disable_event(struct perf_event *event)
static void armv8pmu_start(struct arm_pmu *cpu_pmu)
{
struct perf_event_context *ctx;
+ struct pmu_hw_events *hw_events = this_cpu_ptr(cpu_pmu->hw_events);
int nr_user = 0;
ctx = perf_cpu_task_ctx();
@@ -825,18 +848,36 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu)
else
armv8pmu_disable_user_access();
+ kvm_vcpu_pmu_resync_el0();
+
+ if (hw_events->branch_users)
+ brbe_enable(cpu_pmu);
+
/* Enable all counters */
armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
-
- kvm_vcpu_pmu_resync_el0();
}
static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
{
+ struct pmu_hw_events *hw_events = this_cpu_ptr(cpu_pmu->hw_events);
+
+ if (hw_events->branch_users)
+ brbe_disable();
+
/* Disable all counters */
armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
}
+static void read_branch_records(struct pmu_hw_events *cpuc,
+ struct perf_event *event,
+ struct perf_sample_data *data)
+{
+ struct perf_branch_stack *branch_stack = cpuc->branch_stack;
+
+ brbe_read_filtered_entries(branch_stack, event);
+ perf_sample_save_brstack(data, event, branch_stack, NULL);
+}
+
static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
{
u64 pmovsr;
@@ -887,13 +928,15 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
if (!armpmu_event_set_period(event))
continue;
+ if (has_branch_stack(event))
+ read_branch_records(cpuc, event, &data);
+
/*
* Perf event overflow will queue the processing of the event as
* an irq_work which will be taken care of in the handling of
* IPI_IRQ_WORK.
*/
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
+ perf_event_overflow(event, &data, regs);
}
armv8pmu_start(cpu_pmu);
@@ -935,6 +978,42 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
return -EAGAIN;
}
+static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
+
+ if (evtype != ARMV8_PMUV3_PERFCTR_CPU_CYCLES)
+ return false;
+
+ /*
+ * A CPU_CYCLES event with threshold counting cannot use PMCCNTR_EL0
+ * since it lacks threshold support.
+ */
+ if (armv8pmu_event_get_threshold(&event->attr))
+ return false;
+
+ /*
+ * PMCCNTR_EL0 is not affected by BRBE controls like BRBCR_ELx.FZP.
+ * So don't use it for branch events.
+ */
+ if (has_branch_stack(event))
+ return false;
+
+ /*
+ * The PMCCNTR_EL0 increments from the processor clock rather than
+ * the PE clock (ARM DDI0487 L.b D13.1.3) which means it'll continue
+ * counting on a WFI PE if one of its SMT sibling is not idle on a
+ * multi-threaded implementation. So don't use it on SMT cores.
+ */
+ if (cpu_pmu->has_smt)
+ return false;
+
+ return true;
+}
+
static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
struct perf_event *event)
{
@@ -943,8 +1022,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
/* Always prefer to place a cycle counter into the cycle counter. */
- if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) &&
- !armv8pmu_event_get_threshold(&event->attr)) {
+ if (armv8pmu_can_use_pmccntr(cpuc, event)) {
if (!test_and_set_bit(ARMV8_PMU_CYCLE_IDX, cpuc->used_mask))
return ARMV8_PMU_CYCLE_IDX;
else if (armv8pmu_event_is_64bit(event) &&
@@ -993,6 +1071,19 @@ static int armv8pmu_user_event_idx(struct perf_event *event)
return event->hw.idx + 1;
}
+static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
+{
+ struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);
+ struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+
+ if (!hw_events->branch_users)
+ return;
+
+ if (sched_in)
+ brbe_invalidate();
+}
+
/*
* Add an event filter to a given event.
*/
@@ -1010,6 +1101,13 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
return -EOPNOTSUPP;
}
+ if (has_branch_stack(perf_event)) {
+ if (!brbe_num_branch_records(cpu_pmu) || !brbe_branch_attr_valid(perf_event))
+ return -EOPNOTSUPP;
+
+ perf_event->attach_state |= PERF_ATTACH_SCHED_CB;
+ }
+
/*
* If we're running in hyp mode, then we *are* the hypervisor.
* Therefore we ignore exclude_hv in this configuration, since
@@ -1076,6 +1174,11 @@ static void armv8pmu_reset(void *info)
/* Clear the counters we flip at guest entry/exit */
kvm_clr_pmu_events(mask);
+ if (brbe_num_branch_records(cpu_pmu)) {
+ brbe_disable();
+ brbe_invalidate();
+ }
+
/*
* Initialize & Reset PMNC. Request overflow interrupt for
* 64 bit cycle counter but cheat in armv8pmu_write_counter().
@@ -1244,6 +1347,25 @@ static void __armv8pmu_probe_pmu(void *info)
cpu_pmu->reg_pmmir = read_pmmir();
else
cpu_pmu->reg_pmmir = 0;
+
+ brbe_probe(cpu_pmu);
+}
+
+static int branch_records_alloc(struct arm_pmu *armpmu)
+{
+ size_t size = struct_size_t(struct perf_branch_stack, entries,
+ brbe_num_branch_records(armpmu));
+ int cpu;
+
+ for_each_cpu(cpu, &armpmu->supported_cpus) {
+ struct pmu_hw_events *events_cpu;
+
+ events_cpu = per_cpu_ptr(armpmu->hw_events, cpu);
+ events_cpu->branch_stack = kmalloc(size, GFP_KERNEL);
+ if (!events_cpu->branch_stack)
+ return -ENOMEM;
+ }
+ return 0;
}
static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
@@ -1260,7 +1382,15 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
if (ret)
return ret;
- return probe.present ? 0 : -ENODEV;
+ if (!probe.present)
+ return -ENODEV;
+
+ if (brbe_num_branch_records(cpu_pmu)) {
+ ret = branch_records_alloc(cpu_pmu);
+ if (ret)
+ return ret;
+ }
+ return 0;
}
static void armv8pmu_disable_user_access_ipi(void *unused)
@@ -1279,7 +1409,7 @@ static int armv8pmu_proc_user_access_handler(const struct ctl_table *table, int
return 0;
}
-static struct ctl_table armv8_pmu_sysctl_table[] = {
+static const struct ctl_table armv8_pmu_sysctl_table[] = {
{
.procname = "perf_user_access",
.data = &sysctl_perf_user_access,
@@ -1319,6 +1449,8 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx;
+ if (brbe_num_branch_records(cpu_pmu))
+ cpu_pmu->pmu.sched_task = armv8pmu_sched_task;
cpu_pmu->name = name;
cpu_pmu->map_event = map_event;
@@ -1343,6 +1475,10 @@ static int name##_pmu_init(struct arm_pmu *cpu_pmu) \
PMUV3_INIT_SIMPLE(armv8_pmuv3)
+PMUV3_INIT_SIMPLE(armv8_c1_nano)
+PMUV3_INIT_SIMPLE(armv8_c1_premium)
+PMUV3_INIT_SIMPLE(armv8_c1_pro)
+PMUV3_INIT_SIMPLE(armv8_c1_ultra)
PMUV3_INIT_SIMPLE(armv8_cortex_a34)
PMUV3_INIT_SIMPLE(armv8_cortex_a55)
PMUV3_INIT_SIMPLE(armv8_cortex_a65)
@@ -1350,11 +1486,14 @@ PMUV3_INIT_SIMPLE(armv8_cortex_a75)
PMUV3_INIT_SIMPLE(armv8_cortex_a76)
PMUV3_INIT_SIMPLE(armv8_cortex_a77)
PMUV3_INIT_SIMPLE(armv8_cortex_a78)
+PMUV3_INIT_SIMPLE(armv9_cortex_a320)
PMUV3_INIT_SIMPLE(armv9_cortex_a510)
PMUV3_INIT_SIMPLE(armv9_cortex_a520)
+PMUV3_INIT_SIMPLE(armv9_cortex_a520ae)
PMUV3_INIT_SIMPLE(armv9_cortex_a710)
PMUV3_INIT_SIMPLE(armv9_cortex_a715)
PMUV3_INIT_SIMPLE(armv9_cortex_a720)
+PMUV3_INIT_SIMPLE(armv9_cortex_a720ae)
PMUV3_INIT_SIMPLE(armv9_cortex_a725)
PMUV3_INIT_SIMPLE(armv8_cortex_x1)
PMUV3_INIT_SIMPLE(armv9_cortex_x2)
@@ -1369,6 +1508,7 @@ PMUV3_INIT_SIMPLE(armv8_neoverse_v1)
PMUV3_INIT_SIMPLE(armv8_neoverse_v2)
PMUV3_INIT_SIMPLE(armv8_neoverse_v3)
PMUV3_INIT_SIMPLE(armv8_neoverse_v3ae)
+PMUV3_INIT_SIMPLE(armv8_rainier)
PMUV3_INIT_SIMPLE(armv8_nvidia_carmel)
PMUV3_INIT_SIMPLE(armv8_nvidia_denver)
@@ -1385,6 +1525,10 @@ PMUV3_INIT_MAP_EVENT(armv8_brcm_vulcan, armv8_vulcan_map_event)
static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_pmu_init},
+ {.compatible = "arm,c1-nano-pmu", .data = armv8_c1_nano_pmu_init},
+ {.compatible = "arm,c1-premium-pmu", .data = armv8_c1_premium_pmu_init},
+ {.compatible = "arm,c1-pro-pmu", .data = armv8_c1_pro_pmu_init},
+ {.compatible = "arm,c1-ultra-pmu", .data = armv8_c1_ultra_pmu_init},
{.compatible = "arm,cortex-a34-pmu", .data = armv8_cortex_a34_pmu_init},
{.compatible = "arm,cortex-a35-pmu", .data = armv8_cortex_a35_pmu_init},
{.compatible = "arm,cortex-a53-pmu", .data = armv8_cortex_a53_pmu_init},
@@ -1397,11 +1541,14 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,cortex-a76-pmu", .data = armv8_cortex_a76_pmu_init},
{.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init},
{.compatible = "arm,cortex-a78-pmu", .data = armv8_cortex_a78_pmu_init},
+ {.compatible = "arm,cortex-a320-pmu", .data = armv9_cortex_a320_pmu_init},
{.compatible = "arm,cortex-a510-pmu", .data = armv9_cortex_a510_pmu_init},
{.compatible = "arm,cortex-a520-pmu", .data = armv9_cortex_a520_pmu_init},
+ {.compatible = "arm,cortex-a520ae-pmu", .data = armv9_cortex_a520ae_pmu_init},
{.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init},
{.compatible = "arm,cortex-a715-pmu", .data = armv9_cortex_a715_pmu_init},
{.compatible = "arm,cortex-a720-pmu", .data = armv9_cortex_a720_pmu_init},
+ {.compatible = "arm,cortex-a720ae-pmu", .data = armv9_cortex_a720ae_pmu_init},
{.compatible = "arm,cortex-a725-pmu", .data = armv9_cortex_a725_pmu_init},
{.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init},
{.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init},
@@ -1416,6 +1563,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,neoverse-v2-pmu", .data = armv8_neoverse_v2_pmu_init},
{.compatible = "arm,neoverse-v3-pmu", .data = armv8_neoverse_v3_pmu_init},
{.compatible = "arm,neoverse-v3ae-pmu", .data = armv8_neoverse_v3ae_pmu_init},
+ {.compatible = "arm,rainier-pmu", .data = armv8_rainier_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_cavium_thunder_pmu_init},
{.compatible = "brcm,vulcan-pmu", .data = armv8_brcm_vulcan_pmu_init},
{.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init},