summaryrefslogtreecommitdiff
path: root/drivers/perf/arm_pmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/perf/arm_pmu.c')
-rw-r--r--drivers/perf/arm_pmu.c359
1 files changed, 215 insertions, 144 deletions
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index d0b7dd8fb184..973a027d9063 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#undef DEBUG
/*
@@ -25,13 +26,97 @@
#include <asm/irq_regs.h>
-static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
+static int armpmu_count_irq_users(const struct cpumask *affinity,
+ const int irq);
+
+struct pmu_irq_ops {
+ void (*enable_pmuirq)(unsigned int irq);
+ void (*disable_pmuirq)(unsigned int irq);
+ void (*free_pmuirq)(unsigned int irq, int cpu, void __percpu *devid);
+};
+
+static void armpmu_free_pmuirq(unsigned int irq, int cpu, void __percpu *devid)
+{
+ free_irq(irq, per_cpu_ptr(devid, cpu));
+}
+
+static const struct pmu_irq_ops pmuirq_ops = {
+ .enable_pmuirq = enable_irq,
+ .disable_pmuirq = disable_irq_nosync,
+ .free_pmuirq = armpmu_free_pmuirq
+};
+
+static void armpmu_free_pmunmi(unsigned int irq, int cpu, void __percpu *devid)
+{
+ free_nmi(irq, per_cpu_ptr(devid, cpu));
+}
+
+static const struct pmu_irq_ops pmunmi_ops = {
+ .enable_pmuirq = enable_nmi,
+ .disable_pmuirq = disable_nmi_nosync,
+ .free_pmuirq = armpmu_free_pmunmi
+};
+
+static void armpmu_enable_percpu_pmuirq(unsigned int irq)
+{
+ enable_percpu_irq(irq, IRQ_TYPE_NONE);
+}
+
+static void armpmu_free_percpu_pmuirq(unsigned int irq, int cpu,
+ void __percpu *devid)
+{
+ struct arm_pmu *armpmu = *per_cpu_ptr((void * __percpu *)devid, cpu);
+
+ if (armpmu_count_irq_users(&armpmu->supported_cpus, irq) == 1)
+ free_percpu_irq(irq, devid);
+}
+
+static const struct pmu_irq_ops percpu_pmuirq_ops = {
+ .enable_pmuirq = armpmu_enable_percpu_pmuirq,
+ .disable_pmuirq = disable_percpu_irq,
+ .free_pmuirq = armpmu_free_percpu_pmuirq
+};
+
+static void armpmu_enable_percpu_pmunmi(unsigned int irq)
+{
+ if (!prepare_percpu_nmi(irq))
+ enable_percpu_nmi(irq, IRQ_TYPE_NONE);
+}
+
+static void armpmu_disable_percpu_pmunmi(unsigned int irq)
+{
+ disable_percpu_nmi(irq);
+ teardown_percpu_nmi(irq);
+}
+
+static void armpmu_free_percpu_pmunmi(unsigned int irq, int cpu,
+ void __percpu *devid)
+{
+ struct arm_pmu *armpmu = *per_cpu_ptr((void * __percpu *)devid, cpu);
+
+ if (armpmu_count_irq_users(&armpmu->supported_cpus, irq) == 1)
+ free_percpu_nmi(irq, devid);
+}
+
+static const struct pmu_irq_ops percpu_pmunmi_ops = {
+ .enable_pmuirq = armpmu_enable_percpu_pmunmi,
+ .disable_pmuirq = armpmu_disable_percpu_pmunmi,
+ .free_pmuirq = armpmu_free_percpu_pmunmi
+};
+
static DEFINE_PER_CPU(int, cpu_irq);
+static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops);
+
+static bool has_nmi;
static inline u64 arm_pmu_event_max_period(struct perf_event *event)
{
if (event->hw.flags & ARMPMU_EVT_64BIT)
return GENMASK_ULL(63, 0);
+ else if (event->hw.flags & ARMPMU_EVT_63BIT)
+ return GENMASK_ULL(62, 0);
+ else if (event->hw.flags & ARMPMU_EVT_47BIT)
+ return GENMASK_ULL(46, 0);
else
return GENMASK_ULL(31, 0);
}
@@ -237,6 +322,12 @@ armpmu_del(struct perf_event *event, int flags)
int idx = hwc->idx;
armpmu_stop(event, PERF_EF_UPDATE);
+
+ if (has_branch_stack(event)) {
+ hw_events->branch_users--;
+ perf_sched_cb_dec(event->pmu);
+ }
+
hw_events->events[idx] = NULL;
armpmu->clear_event_idx(hw_events, event);
perf_event_update_userpage(event);
@@ -261,12 +352,15 @@ armpmu_add(struct perf_event *event, int flags)
if (idx < 0)
return idx;
- /*
- * If there is an event in the counter we are going to use then make
- * sure it is disabled.
- */
+ /* The newly-allocated counter should be empty */
+ WARN_ON_ONCE(hw_events->events[idx]);
+
+ if (has_branch_stack(event)) {
+ hw_events->branch_users++;
+ perf_sched_cb_inc(event->pmu);
+ }
+
event->hw.idx = idx;
- armpmu->disable(event);
hw_events->events[idx] = event;
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
@@ -321,6 +415,9 @@ validate_group(struct perf_event *event)
if (!validate_event(event->pmu, &fake_pmu, leader))
return -EINVAL;
+ if (event == leader)
+ return 0;
+
for_each_sibling_event(sibling, leader) {
if (!validate_event(event->pmu, &fake_pmu, sibling))
return -EINVAL;
@@ -357,18 +454,11 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
}
static int
-event_requires_mode_exclusion(struct perf_event_attr *attr)
-{
- return attr->exclude_idle || attr->exclude_user ||
- attr->exclude_kernel || attr->exclude_hv;
-}
-
-static int
__hw_perf_event_init(struct perf_event *event)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- int mapping;
+ int mapping, ret;
hwc->flags = 0;
mapping = armpmu->map_event(event);
@@ -393,12 +483,10 @@ __hw_perf_event_init(struct perf_event *event)
/*
* Check whether we need to exclude the counter from certain modes.
*/
- if ((!armpmu->set_event_filter ||
- armpmu->set_event_filter(hwc, &event->attr)) &&
- event_requires_mode_exclusion(&event->attr)) {
- pr_debug("ARM performance counters do not support "
- "mode exclusion\n");
- return -EOPNOTSUPP;
+ if (armpmu->set_event_filter) {
+ ret = armpmu->set_event_filter(hwc, &event->attr);
+ if (ret)
+ return ret;
}
/*
@@ -418,12 +506,7 @@ __hw_perf_event_init(struct perf_event *event)
local64_set(&hwc->period_left, hwc->sample_period);
}
- if (event->group_leader != event) {
- if (validate_group(event) != 0)
- return -EINVAL;
- }
-
- return 0;
+ return validate_group(event);
}
static int armpmu_event_init(struct perf_event *event)
@@ -441,13 +524,9 @@ static int armpmu_event_init(struct perf_event *event)
!cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
return -ENOENT;
- /* does not support taken branch sampling */
- if (has_branch_stack(event))
+ if (has_branch_stack(event) && !armpmu->reg_brbidr)
return -EOPNOTSUPP;
- if (armpmu->map_event(event) == -ENOENT)
- return -ENOENT;
-
return __hw_perf_event_init(event);
}
@@ -455,7 +534,7 @@ static void armpmu_enable(struct pmu *pmu)
{
struct arm_pmu *armpmu = to_arm_pmu(pmu);
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
- int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+ bool enabled = !bitmap_empty(hw_events->used_mask, ARMPMU_MAX_HWEVENTS);
/* For task-bound events we may be called on other CPUs */
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
@@ -481,95 +560,82 @@ static void armpmu_disable(struct pmu *pmu)
* microarchitecture, and aren't suitable for another. Thus, only match CPUs of
* the same microarchitecture.
*/
-static int armpmu_filter_match(struct perf_event *event)
+static bool armpmu_filter(struct pmu *pmu, int cpu)
{
- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
- unsigned int cpu = smp_processor_id();
- int ret;
-
- ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus);
- if (ret && armpmu->filter_match)
- return armpmu->filter_match(event);
-
- return ret;
+ struct arm_pmu *armpmu = to_arm_pmu(pmu);
+ return !cpumask_test_cpu(cpu, &armpmu->supported_cpus);
}
-static ssize_t armpmu_cpumask_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t cpus_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev));
return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus);
}
-static DEVICE_ATTR(cpus, S_IRUGO, armpmu_cpumask_show, NULL);
+static DEVICE_ATTR_RO(cpus);
static struct attribute *armpmu_common_attrs[] = {
&dev_attr_cpus.attr,
NULL,
};
-static struct attribute_group armpmu_common_attr_group = {
+static const struct attribute_group armpmu_common_attr_group = {
.attrs = armpmu_common_attrs,
};
-/* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *__oprofile_cpu_pmu;
-
-/*
- * Despite the names, these two functions are CPU-specific and are used
- * by the OProfile/perf code.
- */
-const char *perf_pmu_name(void)
-{
- if (!__oprofile_cpu_pmu)
- return NULL;
-
- return __oprofile_cpu_pmu->name;
-}
-EXPORT_SYMBOL_GPL(perf_pmu_name);
-
-int perf_num_counters(void)
+static int armpmu_count_irq_users(const struct cpumask *affinity, const int irq)
{
- int max_events = 0;
+ int cpu, count = 0;
- if (__oprofile_cpu_pmu != NULL)
- max_events = __oprofile_cpu_pmu->num_events;
+ for_each_cpu(cpu, affinity) {
+ if (per_cpu(cpu_irq, cpu) == irq)
+ count++;
+ }
- return max_events;
+ return count;
}
-EXPORT_SYMBOL_GPL(perf_num_counters);
-static int armpmu_count_irq_users(const int irq)
+static const struct pmu_irq_ops *
+armpmu_find_irq_ops(const struct cpumask *affinity, int irq)
{
- int cpu, count = 0;
+ const struct pmu_irq_ops *ops = NULL;
+ int cpu;
- for_each_possible_cpu(cpu) {
- if (per_cpu(cpu_irq, cpu) == irq)
- count++;
+ for_each_cpu(cpu, affinity) {
+ if (per_cpu(cpu_irq, cpu) != irq)
+ continue;
+
+ ops = per_cpu(cpu_irq_ops, cpu);
+ if (ops)
+ break;
}
- return count;
+ return ops;
}
-void armpmu_free_irq(int irq, int cpu)
+void armpmu_free_irq(struct arm_pmu * __percpu *armpmu, int irq, int cpu)
{
if (per_cpu(cpu_irq, cpu) == 0)
return;
if (WARN_ON(irq != per_cpu(cpu_irq, cpu)))
return;
- if (!irq_is_percpu_devid(irq))
- free_irq(irq, per_cpu_ptr(&cpu_armpmu, cpu));
- else if (armpmu_count_irq_users(irq) == 1)
- free_percpu_irq(irq, &cpu_armpmu);
+ per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, armpmu);
per_cpu(cpu_irq, cpu) = 0;
+ per_cpu(cpu_irq_ops, cpu) = NULL;
}
-int armpmu_request_irq(int irq, int cpu)
+int armpmu_request_irq(struct arm_pmu * __percpu *pcpu_armpmu, int irq, int cpu)
{
int err = 0;
+ struct arm_pmu **armpmu = per_cpu_ptr(pcpu_armpmu, cpu);
+ const struct cpumask *affinity = *armpmu ? &(*armpmu)->supported_cpus :
+ cpu_possible_mask; /* ACPI */
const irq_handler_t handler = armpmu_dispatch_irq;
+ const struct pmu_irq_ops *irq_ops;
+
if (!irq)
return 0;
@@ -585,21 +651,45 @@ int armpmu_request_irq(int irq, int cpu)
}
irq_flags = IRQF_PERCPU |
- IRQF_NOBALANCING |
+ IRQF_NOBALANCING | IRQF_NO_AUTOEN |
IRQF_NO_THREAD;
- irq_set_status_flags(irq, IRQ_NOAUTOEN);
- err = request_irq(irq, handler, irq_flags, "arm-pmu",
- per_cpu_ptr(&cpu_armpmu, cpu));
- } else if (armpmu_count_irq_users(irq) == 0) {
- err = request_percpu_irq(irq, handler, "arm-pmu",
- &cpu_armpmu);
+ err = request_nmi(irq, handler, irq_flags, "arm-pmu", armpmu);
+
+ /* If cannot get an NMI, get a normal interrupt */
+ if (err) {
+ err = request_irq(irq, handler, irq_flags, "arm-pmu",
+ armpmu);
+ irq_ops = &pmuirq_ops;
+ } else {
+ has_nmi = true;
+ irq_ops = &pmunmi_ops;
+ }
+ } else if (armpmu_count_irq_users(affinity, irq) == 0) {
+ err = request_percpu_nmi(irq, handler, "arm-pmu", affinity, pcpu_armpmu);
+
+ /* If cannot get an NMI, get a normal interrupt */
+ if (err) {
+ err = request_percpu_irq_affinity(irq, handler, "arm-pmu",
+ affinity, pcpu_armpmu);
+ irq_ops = &percpu_pmuirq_ops;
+ } else {
+ has_nmi = true;
+ irq_ops = &percpu_pmunmi_ops;
+ }
+ } else {
+ /* Per cpudevid irq was already requested by another CPU */
+ irq_ops = armpmu_find_irq_ops(affinity, irq);
+
+ if (WARN_ON(!irq_ops))
+ err = -EINVAL;
}
if (err)
goto err_out;
per_cpu(cpu_irq, cpu) = irq;
+ per_cpu(cpu_irq_ops, cpu) = irq_ops;
return 0;
err_out:
@@ -613,6 +703,11 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
return per_cpu(hw_events->irq, cpu);
}
+bool arm_pmu_irq_is_nmi(void)
+{
+ return has_nmi;
+}
+
/*
* PMU hardware loses all context when a CPU goes offline.
* When a CPU is hotplugged back in, since some hardware registers are
@@ -629,15 +724,9 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
if (pmu->reset)
pmu->reset(pmu);
- per_cpu(cpu_armpmu, cpu) = pmu;
-
irq = armpmu_get_cpu_irq(pmu, cpu);
- if (irq) {
- if (irq_is_percpu_devid(irq))
- enable_percpu_irq(irq, IRQ_TYPE_NONE);
- else
- enable_irq(irq);
- }
+ if (irq)
+ per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq);
return 0;
}
@@ -651,14 +740,8 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
return 0;
irq = armpmu_get_cpu_irq(pmu, cpu);
- if (irq) {
- if (irq_is_percpu_devid(irq))
- disable_percpu_irq(irq);
- else
- disable_irq_nosync(irq);
- }
-
- per_cpu(cpu_armpmu, cpu) = NULL;
+ if (irq)
+ per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq);
return 0;
}
@@ -670,7 +753,7 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
struct perf_event *event;
int idx;
- for (idx = 0; idx < armpmu->num_events; idx++) {
+ for_each_set_bit(idx, armpmu->cntr_mask, ARMPMU_MAX_HWEVENTS) {
event = hw_events->events[idx];
if (!event)
continue;
@@ -686,17 +769,8 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
case CPU_PM_ENTER_FAILED:
/*
* Restore and enable the counter.
- * armpmu_start() indirectly calls
- *
- * perf_event_update_userpage()
- *
- * that requires RCU read locking to be functional,
- * wrap the call within RCU_NONIDLE to make the
- * RCU subsystem aware this cpu is not idle from
- * an RCU perspective for the armpmu_start() call
- * duration.
*/
- RCU_NONIDLE(armpmu_start(event, PERF_EF_RELOAD));
+ armpmu_start(event, PERF_EF_RELOAD);
break;
default:
break;
@@ -709,7 +783,7 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
{
struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
- int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+ bool enabled = !bitmap_empty(hw_events->used_mask, ARMPMU_MAX_HWEVENTS);
if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
return NOTIFY_DONE;
@@ -730,8 +804,8 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
cpu_pm_pmu_setup(armpmu, cmd);
break;
case CPU_PM_EXIT:
- cpu_pm_pmu_setup(armpmu, cmd);
case CPU_PM_ENTER_FAILED:
+ cpu_pm_pmu_setup(armpmu, cmd);
armpmu->start(armpmu);
break;
default:
@@ -785,18 +859,16 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
&cpu_pmu->node);
}
-static struct arm_pmu *__armpmu_alloc(gfp_t flags)
+struct arm_pmu *armpmu_alloc(void)
{
struct arm_pmu *pmu;
int cpu;
- pmu = kzalloc(sizeof(*pmu), flags);
- if (!pmu) {
- pr_info("failed to allocate PMU device!\n");
+ pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+ if (!pmu)
goto out;
- }
- pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags);
+ pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, GFP_KERNEL);
if (!pmu->hw_events) {
pr_info("failed to allocate per-cpu PMU data.\n");
goto out_free_pmu;
@@ -811,16 +883,17 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags)
.start = armpmu_start,
.stop = armpmu_stop,
.read = armpmu_read,
- .filter_match = armpmu_filter_match,
+ .filter = armpmu_filter,
.attr_groups = pmu->attr_groups,
/*
* This is a CPU PMU potentially in a heterogeneous
- * configuration (e.g. big.LITTLE). This is not an uncore PMU,
- * and we have taken ctx sharing into account (e.g. with our
- * pmu::filter_match callback and pmu::event_init group
- * validation).
+ * configuration (e.g. big.LITTLE) so
+ * PERF_PMU_CAP_EXTENDED_HW_TYPE is required to open
+ * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE events on a
+ * specific PMU.
*/
- .capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS,
+ .capabilities = PERF_PMU_CAP_EXTENDED_REGS |
+ PERF_PMU_CAP_EXTENDED_HW_TYPE,
};
pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
@@ -830,7 +903,6 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags)
struct pmu_hw_events *events;
events = per_cpu_ptr(pmu->hw_events, cpu);
- raw_spin_lock_init(&events->pmu_lock);
events->percpu_pmu = pmu;
}
@@ -842,17 +914,6 @@ out:
return NULL;
}
-struct arm_pmu *armpmu_alloc(void)
-{
- return __armpmu_alloc(GFP_KERNEL);
-}
-
-struct arm_pmu *armpmu_alloc_atomic(void)
-{
- return __armpmu_alloc(GFP_ATOMIC);
-}
-
-
void armpmu_free(struct arm_pmu *pmu)
{
free_percpu(pmu->hw_events);
@@ -867,15 +928,25 @@ int armpmu_register(struct arm_pmu *pmu)
if (ret)
return ret;
+ /*
+ * By this stage we know our supported CPUs on either DT/ACPI platforms,
+ * detect the SMT implementation.
+ */
+ pmu->has_smt = topology_core_has_smt(cpumask_first(&pmu->supported_cpus));
+
+ if (!pmu->set_event_filter)
+ pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
+
ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
if (ret)
goto out_destroy;
- if (!__oprofile_cpu_pmu)
- __oprofile_cpu_pmu = pmu;
+ pr_info("enabled with %s PMU driver, %d (%*pb) counters available%s\n",
+ pmu->name, bitmap_weight(pmu->cntr_mask, ARMPMU_MAX_HWEVENTS),
+ ARMPMU_MAX_HWEVENTS, &pmu->cntr_mask,
+ has_nmi ? ", using NMIs" : "");
- pr_info("enabled with %s PMU driver, %d counters available\n",
- pmu->name, pmu->num_events);
+ kvm_host_pmu_init(pmu);
return 0;