diff options
Diffstat (limited to 'drivers/perf/arm_pmu.c')
| -rw-r--r-- | drivers/perf/arm_pmu.c | 359 |
1 files changed, 215 insertions, 144 deletions
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index d0b7dd8fb184..973a027d9063 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only #undef DEBUG /* @@ -25,13 +26,97 @@ #include <asm/irq_regs.h> -static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); +static int armpmu_count_irq_users(const struct cpumask *affinity, + const int irq); + +struct pmu_irq_ops { + void (*enable_pmuirq)(unsigned int irq); + void (*disable_pmuirq)(unsigned int irq); + void (*free_pmuirq)(unsigned int irq, int cpu, void __percpu *devid); +}; + +static void armpmu_free_pmuirq(unsigned int irq, int cpu, void __percpu *devid) +{ + free_irq(irq, per_cpu_ptr(devid, cpu)); +} + +static const struct pmu_irq_ops pmuirq_ops = { + .enable_pmuirq = enable_irq, + .disable_pmuirq = disable_irq_nosync, + .free_pmuirq = armpmu_free_pmuirq +}; + +static void armpmu_free_pmunmi(unsigned int irq, int cpu, void __percpu *devid) +{ + free_nmi(irq, per_cpu_ptr(devid, cpu)); +} + +static const struct pmu_irq_ops pmunmi_ops = { + .enable_pmuirq = enable_nmi, + .disable_pmuirq = disable_nmi_nosync, + .free_pmuirq = armpmu_free_pmunmi +}; + +static void armpmu_enable_percpu_pmuirq(unsigned int irq) +{ + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static void armpmu_free_percpu_pmuirq(unsigned int irq, int cpu, + void __percpu *devid) +{ + struct arm_pmu *armpmu = *per_cpu_ptr((void * __percpu *)devid, cpu); + + if (armpmu_count_irq_users(&armpmu->supported_cpus, irq) == 1) + free_percpu_irq(irq, devid); +} + +static const struct pmu_irq_ops percpu_pmuirq_ops = { + .enable_pmuirq = armpmu_enable_percpu_pmuirq, + .disable_pmuirq = disable_percpu_irq, + .free_pmuirq = armpmu_free_percpu_pmuirq +}; + +static void armpmu_enable_percpu_pmunmi(unsigned int irq) +{ + if (!prepare_percpu_nmi(irq)) + enable_percpu_nmi(irq, IRQ_TYPE_NONE); +} + +static void armpmu_disable_percpu_pmunmi(unsigned int irq) +{ + disable_percpu_nmi(irq); + teardown_percpu_nmi(irq); +} + +static void armpmu_free_percpu_pmunmi(unsigned int irq, int cpu, + void __percpu *devid) +{ + struct arm_pmu *armpmu = *per_cpu_ptr((void * __percpu *)devid, cpu); + + if (armpmu_count_irq_users(&armpmu->supported_cpus, irq) == 1) + free_percpu_nmi(irq, devid); +} + +static const struct pmu_irq_ops percpu_pmunmi_ops = { + .enable_pmuirq = armpmu_enable_percpu_pmunmi, + .disable_pmuirq = armpmu_disable_percpu_pmunmi, + .free_pmuirq = armpmu_free_percpu_pmunmi +}; + static DEFINE_PER_CPU(int, cpu_irq); +static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops); + +static bool has_nmi; static inline u64 arm_pmu_event_max_period(struct perf_event *event) { if (event->hw.flags & ARMPMU_EVT_64BIT) return GENMASK_ULL(63, 0); + else if (event->hw.flags & ARMPMU_EVT_63BIT) + return GENMASK_ULL(62, 0); + else if (event->hw.flags & ARMPMU_EVT_47BIT) + return GENMASK_ULL(46, 0); else return GENMASK_ULL(31, 0); } @@ -237,6 +322,12 @@ armpmu_del(struct perf_event *event, int flags) int idx = hwc->idx; armpmu_stop(event, PERF_EF_UPDATE); + + if (has_branch_stack(event)) { + hw_events->branch_users--; + perf_sched_cb_dec(event->pmu); + } + hw_events->events[idx] = NULL; armpmu->clear_event_idx(hw_events, event); perf_event_update_userpage(event); @@ -261,12 +352,15 @@ armpmu_add(struct perf_event *event, int flags) if (idx < 0) return idx; - /* - * If there is an event in the counter we are going to use then make - * sure it is disabled. - */ + /* The newly-allocated counter should be empty */ + WARN_ON_ONCE(hw_events->events[idx]); + + if (has_branch_stack(event)) { + hw_events->branch_users++; + perf_sched_cb_inc(event->pmu); + } + event->hw.idx = idx; - armpmu->disable(event); hw_events->events[idx] = event; hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; @@ -321,6 +415,9 @@ validate_group(struct perf_event *event) if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; + if (event == leader) + return 0; + for_each_sibling_event(sibling, leader) { if (!validate_event(event->pmu, &fake_pmu, sibling)) return -EINVAL; @@ -357,18 +454,11 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) } static int -event_requires_mode_exclusion(struct perf_event_attr *attr) -{ - return attr->exclude_idle || attr->exclude_user || - attr->exclude_kernel || attr->exclude_hv; -} - -static int __hw_perf_event_init(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - int mapping; + int mapping, ret; hwc->flags = 0; mapping = armpmu->map_event(event); @@ -393,12 +483,10 @@ __hw_perf_event_init(struct perf_event *event) /* * Check whether we need to exclude the counter from certain modes. */ - if ((!armpmu->set_event_filter || - armpmu->set_event_filter(hwc, &event->attr)) && - event_requires_mode_exclusion(&event->attr)) { - pr_debug("ARM performance counters do not support " - "mode exclusion\n"); - return -EOPNOTSUPP; + if (armpmu->set_event_filter) { + ret = armpmu->set_event_filter(hwc, &event->attr); + if (ret) + return ret; } /* @@ -418,12 +506,7 @@ __hw_perf_event_init(struct perf_event *event) local64_set(&hwc->period_left, hwc->sample_period); } - if (event->group_leader != event) { - if (validate_group(event) != 0) - return -EINVAL; - } - - return 0; + return validate_group(event); } static int armpmu_event_init(struct perf_event *event) @@ -441,13 +524,9 @@ static int armpmu_event_init(struct perf_event *event) !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) return -ENOENT; - /* does not support taken branch sampling */ - if (has_branch_stack(event)) + if (has_branch_stack(event) && !armpmu->reg_brbidr) return -EOPNOTSUPP; - if (armpmu->map_event(event) == -ENOENT) - return -ENOENT; - return __hw_perf_event_init(event); } @@ -455,7 +534,7 @@ static void armpmu_enable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + bool enabled = !bitmap_empty(hw_events->used_mask, ARMPMU_MAX_HWEVENTS); /* For task-bound events we may be called on other CPUs */ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) @@ -481,95 +560,82 @@ static void armpmu_disable(struct pmu *pmu) * microarchitecture, and aren't suitable for another. Thus, only match CPUs of * the same microarchitecture. */ -static int armpmu_filter_match(struct perf_event *event) +static bool armpmu_filter(struct pmu *pmu, int cpu) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - unsigned int cpu = smp_processor_id(); - int ret; - - ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus); - if (ret && armpmu->filter_match) - return armpmu->filter_match(event); - - return ret; + struct arm_pmu *armpmu = to_arm_pmu(pmu); + return !cpumask_test_cpu(cpu, &armpmu->supported_cpus); } -static ssize_t armpmu_cpumask_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t cpus_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev)); return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus); } -static DEVICE_ATTR(cpus, S_IRUGO, armpmu_cpumask_show, NULL); +static DEVICE_ATTR_RO(cpus); static struct attribute *armpmu_common_attrs[] = { &dev_attr_cpus.attr, NULL, }; -static struct attribute_group armpmu_common_attr_group = { +static const struct attribute_group armpmu_common_attr_group = { .attrs = armpmu_common_attrs, }; -/* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *__oprofile_cpu_pmu; - -/* - * Despite the names, these two functions are CPU-specific and are used - * by the OProfile/perf code. - */ -const char *perf_pmu_name(void) -{ - if (!__oprofile_cpu_pmu) - return NULL; - - return __oprofile_cpu_pmu->name; -} -EXPORT_SYMBOL_GPL(perf_pmu_name); - -int perf_num_counters(void) +static int armpmu_count_irq_users(const struct cpumask *affinity, const int irq) { - int max_events = 0; + int cpu, count = 0; - if (__oprofile_cpu_pmu != NULL) - max_events = __oprofile_cpu_pmu->num_events; + for_each_cpu(cpu, affinity) { + if (per_cpu(cpu_irq, cpu) == irq) + count++; + } - return max_events; + return count; } -EXPORT_SYMBOL_GPL(perf_num_counters); -static int armpmu_count_irq_users(const int irq) +static const struct pmu_irq_ops * +armpmu_find_irq_ops(const struct cpumask *affinity, int irq) { - int cpu, count = 0; + const struct pmu_irq_ops *ops = NULL; + int cpu; - for_each_possible_cpu(cpu) { - if (per_cpu(cpu_irq, cpu) == irq) - count++; + for_each_cpu(cpu, affinity) { + if (per_cpu(cpu_irq, cpu) != irq) + continue; + + ops = per_cpu(cpu_irq_ops, cpu); + if (ops) + break; } - return count; + return ops; } -void armpmu_free_irq(int irq, int cpu) +void armpmu_free_irq(struct arm_pmu * __percpu *armpmu, int irq, int cpu) { if (per_cpu(cpu_irq, cpu) == 0) return; if (WARN_ON(irq != per_cpu(cpu_irq, cpu))) return; - if (!irq_is_percpu_devid(irq)) - free_irq(irq, per_cpu_ptr(&cpu_armpmu, cpu)); - else if (armpmu_count_irq_users(irq) == 1) - free_percpu_irq(irq, &cpu_armpmu); + per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, armpmu); per_cpu(cpu_irq, cpu) = 0; + per_cpu(cpu_irq_ops, cpu) = NULL; } -int armpmu_request_irq(int irq, int cpu) +int armpmu_request_irq(struct arm_pmu * __percpu *pcpu_armpmu, int irq, int cpu) { int err = 0; + struct arm_pmu **armpmu = per_cpu_ptr(pcpu_armpmu, cpu); + const struct cpumask *affinity = *armpmu ? &(*armpmu)->supported_cpus : + cpu_possible_mask; /* ACPI */ const irq_handler_t handler = armpmu_dispatch_irq; + const struct pmu_irq_ops *irq_ops; + if (!irq) return 0; @@ -585,21 +651,45 @@ int armpmu_request_irq(int irq, int cpu) } irq_flags = IRQF_PERCPU | - IRQF_NOBALANCING | + IRQF_NOBALANCING | IRQF_NO_AUTOEN | IRQF_NO_THREAD; - irq_set_status_flags(irq, IRQ_NOAUTOEN); - err = request_irq(irq, handler, irq_flags, "arm-pmu", - per_cpu_ptr(&cpu_armpmu, cpu)); - } else if (armpmu_count_irq_users(irq) == 0) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &cpu_armpmu); + err = request_nmi(irq, handler, irq_flags, "arm-pmu", armpmu); + + /* If cannot get an NMI, get a normal interrupt */ + if (err) { + err = request_irq(irq, handler, irq_flags, "arm-pmu", + armpmu); + irq_ops = &pmuirq_ops; + } else { + has_nmi = true; + irq_ops = &pmunmi_ops; + } + } else if (armpmu_count_irq_users(affinity, irq) == 0) { + err = request_percpu_nmi(irq, handler, "arm-pmu", affinity, pcpu_armpmu); + + /* If cannot get an NMI, get a normal interrupt */ + if (err) { + err = request_percpu_irq_affinity(irq, handler, "arm-pmu", + affinity, pcpu_armpmu); + irq_ops = &percpu_pmuirq_ops; + } else { + has_nmi = true; + irq_ops = &percpu_pmunmi_ops; + } + } else { + /* Per cpudevid irq was already requested by another CPU */ + irq_ops = armpmu_find_irq_ops(affinity, irq); + + if (WARN_ON(!irq_ops)) + err = -EINVAL; } if (err) goto err_out; per_cpu(cpu_irq, cpu) = irq; + per_cpu(cpu_irq_ops, cpu) = irq_ops; return 0; err_out: @@ -613,6 +703,11 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) return per_cpu(hw_events->irq, cpu); } +bool arm_pmu_irq_is_nmi(void) +{ + return has_nmi; +} + /* * PMU hardware loses all context when a CPU goes offline. * When a CPU is hotplugged back in, since some hardware registers are @@ -629,15 +724,9 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) if (pmu->reset) pmu->reset(pmu); - per_cpu(cpu_armpmu, cpu) = pmu; - irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq) { - if (irq_is_percpu_devid(irq)) - enable_percpu_irq(irq, IRQ_TYPE_NONE); - else - enable_irq(irq); - } + if (irq) + per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq); return 0; } @@ -651,14 +740,8 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) return 0; irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq) { - if (irq_is_percpu_devid(irq)) - disable_percpu_irq(irq); - else - disable_irq_nosync(irq); - } - - per_cpu(cpu_armpmu, cpu) = NULL; + if (irq) + per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq); return 0; } @@ -670,7 +753,7 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) struct perf_event *event; int idx; - for (idx = 0; idx < armpmu->num_events; idx++) { + for_each_set_bit(idx, armpmu->cntr_mask, ARMPMU_MAX_HWEVENTS) { event = hw_events->events[idx]; if (!event) continue; @@ -686,17 +769,8 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) case CPU_PM_ENTER_FAILED: /* * Restore and enable the counter. - * armpmu_start() indirectly calls - * - * perf_event_update_userpage() - * - * that requires RCU read locking to be functional, - * wrap the call within RCU_NONIDLE to make the - * RCU subsystem aware this cpu is not idle from - * an RCU perspective for the armpmu_start() call - * duration. */ - RCU_NONIDLE(armpmu_start(event, PERF_EF_RELOAD)); + armpmu_start(event, PERF_EF_RELOAD); break; default: break; @@ -709,7 +783,7 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, { struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb); struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + bool enabled = !bitmap_empty(hw_events->used_mask, ARMPMU_MAX_HWEVENTS); if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) return NOTIFY_DONE; @@ -730,8 +804,8 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, cpu_pm_pmu_setup(armpmu, cmd); break; case CPU_PM_EXIT: - cpu_pm_pmu_setup(armpmu, cmd); case CPU_PM_ENTER_FAILED: + cpu_pm_pmu_setup(armpmu, cmd); armpmu->start(armpmu); break; default: @@ -785,18 +859,16 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) &cpu_pmu->node); } -static struct arm_pmu *__armpmu_alloc(gfp_t flags) +struct arm_pmu *armpmu_alloc(void) { struct arm_pmu *pmu; int cpu; - pmu = kzalloc(sizeof(*pmu), flags); - if (!pmu) { - pr_info("failed to allocate PMU device!\n"); + pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); + if (!pmu) goto out; - } - pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags); + pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, GFP_KERNEL); if (!pmu->hw_events) { pr_info("failed to allocate per-cpu PMU data.\n"); goto out_free_pmu; @@ -811,16 +883,17 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags) .start = armpmu_start, .stop = armpmu_stop, .read = armpmu_read, - .filter_match = armpmu_filter_match, + .filter = armpmu_filter, .attr_groups = pmu->attr_groups, /* * This is a CPU PMU potentially in a heterogeneous - * configuration (e.g. big.LITTLE). This is not an uncore PMU, - * and we have taken ctx sharing into account (e.g. with our - * pmu::filter_match callback and pmu::event_init group - * validation). + * configuration (e.g. big.LITTLE) so + * PERF_PMU_CAP_EXTENDED_HW_TYPE is required to open + * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE events on a + * specific PMU. */ - .capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS, + .capabilities = PERF_PMU_CAP_EXTENDED_REGS | + PERF_PMU_CAP_EXTENDED_HW_TYPE, }; pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] = @@ -830,7 +903,6 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags) struct pmu_hw_events *events; events = per_cpu_ptr(pmu->hw_events, cpu); - raw_spin_lock_init(&events->pmu_lock); events->percpu_pmu = pmu; } @@ -842,17 +914,6 @@ out: return NULL; } -struct arm_pmu *armpmu_alloc(void) -{ - return __armpmu_alloc(GFP_KERNEL); -} - -struct arm_pmu *armpmu_alloc_atomic(void) -{ - return __armpmu_alloc(GFP_ATOMIC); -} - - void armpmu_free(struct arm_pmu *pmu) { free_percpu(pmu->hw_events); @@ -867,15 +928,25 @@ int armpmu_register(struct arm_pmu *pmu) if (ret) return ret; + /* + * By this stage we know our supported CPUs on either DT/ACPI platforms, + * detect the SMT implementation. + */ + pmu->has_smt = topology_core_has_smt(cpumask_first(&pmu->supported_cpus)); + + if (!pmu->set_event_filter) + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; + ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); if (ret) goto out_destroy; - if (!__oprofile_cpu_pmu) - __oprofile_cpu_pmu = pmu; + pr_info("enabled with %s PMU driver, %d (%*pb) counters available%s\n", + pmu->name, bitmap_weight(pmu->cntr_mask, ARMPMU_MAX_HWEVENTS), + ARMPMU_MAX_HWEVENTS, &pmu->cntr_mask, + has_nmi ? ", using NMIs" : ""); - pr_info("enabled with %s PMU driver, %d counters available\n", - pmu->name, pmu->num_events); + kvm_host_pmu_init(pmu); return 0; |
