summaryrefslogtreecommitdiff
path: root/arch/powerpc/perf/imc-pmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/perf/imc-pmu.c')
-rw-r--r--arch/powerpc/perf/imc-pmu.c667
1 files changed, 570 insertions, 97 deletions
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index f292a3f284f1..8664a7d297ad 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1,15 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* In-Memory Collection (IMC) Performance Monitor counter support.
*
* Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
* (C) 2017 Anju T Sudhakar, IBM Corporation.
* (C) 2017 Hemant K Shaw, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or later version.
*/
+#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/slab.h>
#include <asm/opal.h>
@@ -17,6 +14,7 @@
#include <asm/cputhreads.h>
#include <asm/smp.h>
#include <linux/string.h>
+#include <linux/spinlock.h>
/* Nest IMC data structures and variables */
@@ -43,12 +41,27 @@ static DEFINE_PER_CPU(u64 *, thread_imc_mem);
static struct imc_pmu *thread_imc_pmu;
static int thread_imc_mem_size;
+/* Trace IMC data structures */
+static DEFINE_PER_CPU(u64 *, trace_imc_mem);
+static struct imc_pmu_ref *trace_imc_refc;
+static int trace_imc_mem_size;
+
+/*
+ * Global data structure used to avoid races between thread,
+ * core and trace-imc
+ */
+static struct imc_pmu_ref imc_global_refc = {
+ .lock = __SPIN_LOCK_UNLOCKED(imc_global_refc.lock),
+ .id = 0,
+ .refc = 0,
+};
+
static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct imc_pmu, pmu);
}
-PMU_FORMAT_ATTR(event, "config:0-40");
+PMU_FORMAT_ATTR(event, "config:0-61");
PMU_FORMAT_ATTR(offset, "config:0-31");
PMU_FORMAT_ATTR(rvalue, "config:32");
PMU_FORMAT_ATTR(mode, "config:33-40");
@@ -60,11 +73,30 @@ static struct attribute *imc_format_attrs[] = {
NULL,
};
-static struct attribute_group imc_format_group = {
+static const struct attribute_group imc_format_group = {
.name = "format",
.attrs = imc_format_attrs,
};
+/* Format attribute for imc trace-mode */
+PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19");
+PMU_FORMAT_ATTR(cpmc_event, "config:20-27");
+PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29");
+PMU_FORMAT_ATTR(cpmc_load, "config:30-61");
+static struct attribute *trace_imc_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_cpmc_reserved.attr,
+ &format_attr_cpmc_event.attr,
+ &format_attr_cpmc_samplesel.attr,
+ &format_attr_cpmc_load.attr,
+ NULL,
+};
+
+static const struct attribute_group trace_imc_format_group = {
+.name = "format",
+.attrs = trace_imc_format_attrs,
+};
+
/* Get the cpumask printed to a buffer "buf" */
static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
struct device_attribute *attr,
@@ -95,7 +127,7 @@ static struct attribute *imc_pmu_cpumask_attrs[] = {
NULL,
};
-static struct attribute_group imc_pmu_cpumask_attr_group = {
+static const struct attribute_group imc_pmu_cpumask_attr_group = {
.attrs = imc_pmu_cpumask_attrs,
};
@@ -209,8 +241,10 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
ct = of_get_child_count(pmu_events);
/* Get the event prefix */
- if (of_property_read_string(node, "events-prefix", &prefix))
+ if (of_property_read_string(node, "events-prefix", &prefix)) {
+ of_node_put(pmu_events);
return 0;
+ }
/* Get a global unit and scale data if available */
if (of_property_read_string(node, "scale", &g_scale))
@@ -224,8 +258,10 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
/* Allocate memory for the events */
pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL);
- if (!pmu->events)
+ if (!pmu->events) {
+ of_node_put(pmu_events);
return -ENOMEM;
+ }
ct = 0;
/* Parse the events and update the struct */
@@ -235,6 +271,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
ct++;
}
+ of_node_put(pmu_events);
+
/* Allocate memory for attribute group */
attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
if (!attr_group) {
@@ -261,6 +299,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
attr_group->attrs = attrs;
do {
ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value);
+ if (!ev_val_str)
+ continue;
dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str);
if (!dev_str)
continue;
@@ -268,6 +308,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
attrs[j++] = dev_str;
if (pmu->events[i].scale) {
ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name);
+ if (!ev_scale_str)
+ continue;
dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale);
if (!dev_str)
continue;
@@ -277,6 +319,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
if (pmu->events[i].unit) {
ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name);
+ if (!ev_unit_str)
+ continue;
dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit);
if (!dev_str)
continue;
@@ -342,7 +386,14 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
*/
nid = cpu_to_node(cpu);
l_cpumask = cpumask_of_node(nid);
- target = cpumask_any_but(l_cpumask, cpu);
+ target = cpumask_last(l_cpumask);
+
+ /*
+ * If this(target) is the last cpu in the cpumask for this chip,
+ * check for any possible online cpu in the chip.
+ */
+ if (unlikely(target == cpu))
+ target = cpumask_any_but(l_cpumask, cpu);
/*
* Update the cpumask with the target cpu and
@@ -356,7 +407,7 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
get_hard_smp_processor_id(cpu));
/*
* If this is the last cpu in this chip then, skip the reference
- * count mutex lock and make the reference count on this chip zero.
+ * count lock and make the reference count on this chip zero.
*/
ref = get_nest_pmu_ref(cpu);
if (!ref)
@@ -418,15 +469,15 @@ static void nest_imc_counters_release(struct perf_event *event)
/*
* See if we need to disable the nest PMU.
* If no events are currently in use, then we have to take a
- * mutex to ensure that we don't race with another task doing
+ * lock to ensure that we don't race with another task doing
* enable or disable the nest counters.
*/
ref = get_nest_pmu_ref(event->cpu);
if (!ref)
return;
- /* Take the mutex lock for this node and then decrement the reference count */
- mutex_lock(&ref->lock);
+ /* Take the lock for this node and then decrement the reference count */
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
/*
* The scenario where this is true is, when perf session is
@@ -438,7 +489,7 @@ static void nest_imc_counters_release(struct perf_event *event)
* an OPAL call to disable the engine in that node.
*
*/
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return;
}
ref->refc--;
@@ -446,7 +497,7 @@ static void nest_imc_counters_release(struct perf_event *event)
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id);
return;
}
@@ -454,7 +505,7 @@ static void nest_imc_counters_release(struct perf_event *event)
WARN(1, "nest-imc: Invalid event reference count\n");
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
}
static int nest_imc_event_init(struct perf_event *event)
@@ -473,15 +524,6 @@ static int nest_imc_event_init(struct perf_event *event)
if (event->hw.sample_period)
return -EINVAL;
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
if (event->cpu < 0)
return -EINVAL;
@@ -493,9 +535,14 @@ static int nest_imc_event_init(struct perf_event *event)
/*
* Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
- * Get the base memory addresss for this cpu.
+ * Get the base memory address for this cpu.
*/
chip_id = cpu_to_chip_id(event->cpu);
+
+ /* Return, if chip_id is not valid */
+ if (chip_id < 0)
+ return -ENODEV;
+
pcni = pmu->mem_info;
do {
if (pcni->id == chip_id) {
@@ -503,7 +550,7 @@ static int nest_imc_event_init(struct perf_event *event)
break;
}
pcni++;
- } while (pcni);
+ } while (pcni->vbase);
if (!flag)
return -ENODEV;
@@ -517,26 +564,25 @@ static int nest_imc_event_init(struct perf_event *event)
/*
* Get the imc_pmu_ref struct for this node.
- * Take the mutex lock and then increment the count of nest pmu events
- * inited.
+ * Take the lock and then increment the count of nest pmu events inited.
*/
ref = get_nest_pmu_ref(event->cpu);
if (!ref)
return -EINVAL;
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("nest-imc: Unable to start the counters for node %d\n",
node_id);
return rc;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
event->destroy = nest_imc_counters_release;
return 0;
@@ -554,6 +600,7 @@ static int core_imc_mem_init(int cpu, int size)
{
int nid, rc = 0, core_id = (cpu / threads_per_core);
struct imc_mem_info *mem_info;
+ struct page *page;
/*
* alloc_pages_node() will allocate memory for core in the
@@ -564,15 +611,15 @@ static int core_imc_mem_init(int cpu, int size)
mem_info->id = core_id;
/* We need only vbase for core counters */
- mem_info->vbase = page_address(alloc_pages_node(nid,
- GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
- __GFP_NOWARN, get_order(size)));
- if (!mem_info->vbase)
+ page = alloc_pages_node(nid,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size));
+ if (!page)
return -ENOMEM;
+ mem_info->vbase = page_address(page);
- /* Init the mutex */
core_imc_refc[core_id].id = core_id;
- mutex_init(&core_imc_refc[core_id].lock);
+ spin_lock_init(&core_imc_refc[core_id].lock);
rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
__pa((void *)mem_info->vbase),
@@ -639,7 +686,7 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
/*
* Check whether core_imc is registered. We could end up here
* if the cpuhotplug callback registration fails. i.e, callback
- * invokes the offline path for all sucessfully registered cpus.
+ * invokes the offline path for all successfully registered cpus.
* At this stage, core_imc pmu will not be registered and we
* should return here.
*
@@ -651,16 +698,18 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
return 0;
/* Find any online cpu in that core except the current "cpu" */
- ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
+ ncpu = cpumask_last(cpu_sibling_mask(cpu));
+
+ if (unlikely(ncpu == cpu))
+ ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
if (ncpu >= 0 && ncpu < nr_cpu_ids) {
cpumask_set_cpu(ncpu, &core_imc_cpumask);
perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
} else {
/*
- * If this is the last cpu in this core then, skip taking refernce
- * count mutex lock for this core and directly zero "refc" for
- * this core.
+ * If this is the last cpu in this core then skip taking reference
+ * count lock for this core and directly zero "refc" for this core.
*/
opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(cpu));
@@ -670,6 +719,16 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
return -EINVAL;
ref->refc = 0;
+ /*
+ * Reduce the global reference count, if this is the
+ * last cpu in this core and core-imc event running
+ * in this cpu.
+ */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == IMC_DOMAIN_CORE)
+ imc_global_refc.refc--;
+
+ spin_unlock(&imc_global_refc.lock);
}
return 0;
}
@@ -682,6 +741,23 @@ static int core_imc_pmu_cpumask_init(void)
ppc_core_imc_cpu_offline);
}
+static void reset_global_refc(struct perf_event *event)
+{
+ spin_lock(&imc_global_refc.lock);
+ imc_global_refc.refc--;
+
+ /*
+ * If no other thread is running any
+ * event for this domain(thread/core/trace),
+ * set the global id to zero.
+ */
+ if (imc_global_refc.refc <= 0) {
+ imc_global_refc.refc = 0;
+ imc_global_refc.id = 0;
+ }
+ spin_unlock(&imc_global_refc.lock);
+}
+
static void core_imc_counters_release(struct perf_event *event)
{
int rc, core_id;
@@ -692,17 +768,17 @@ static void core_imc_counters_release(struct perf_event *event)
/*
* See if we need to disable the IMC PMU.
* If no events are currently in use, then we have to take a
- * mutex to ensure that we don't race with another task doing
+ * lock to ensure that we don't race with another task doing
* enable or disable the core counters.
*/
core_id = event->cpu / threads_per_core;
- /* Take the mutex lock and decrement the refernce count for this core */
+ /* Take the lock and decrement the refernce count for this core */
ref = &core_imc_refc[core_id];
if (!ref)
return;
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
/*
* The scenario where this is true is, when perf session is
@@ -714,7 +790,7 @@ static void core_imc_counters_release(struct perf_event *event)
* an OPAL call to disable the engine in that core.
*
*/
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return;
}
ref->refc--;
@@ -722,7 +798,7 @@ static void core_imc_counters_release(struct perf_event *event)
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
return;
}
@@ -730,7 +806,9 @@ static void core_imc_counters_release(struct perf_event *event)
WARN(1, "core-imc: Invalid event reference count\n");
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
+
+ reset_global_refc(event);
}
static int core_imc_event_init(struct perf_event *event)
@@ -748,15 +826,6 @@ static int core_imc_event_init(struct perf_event *event)
if (event->hw.sample_period)
return -EINVAL;
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
if (event->cpu < 0)
return -EINVAL;
@@ -775,7 +844,6 @@ static int core_imc_event_init(struct perf_event *event)
if ((!pcmi->vbase))
return -ENODEV;
- /* Get the core_imc mutex for this core */
ref = &core_imc_refc[core_id];
if (!ref)
return -EINVAL;
@@ -783,22 +851,45 @@ static int core_imc_event_init(struct perf_event *event)
/*
* Core pmu units are enabled only when it is used.
* See if this is triggered for the first time.
- * If yes, take the mutex lock and enable the core counters.
+ * If yes, take the lock and enable the core counters.
* If not, just increment the count in core_imc_refc struct.
*/
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("core-imc: Unable to start the counters for core %d\n",
core_id);
return rc;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
+
+ /*
+ * Since the system can run either in accumulation or trace-mode
+ * of IMC at a time, core-imc events are allowed only if no other
+ * trace/thread imc events are enabled/monitored.
+ *
+ * Take the global lock, and check the refc.id
+ * to know whether any other trace/thread imc
+ * events are running.
+ */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) {
+ /*
+ * No other trace/thread imc events are running in
+ * the system, so set the refc.id to core-imc.
+ */
+ imc_global_refc.id = IMC_DOMAIN_CORE;
+ imc_global_refc.refc++;
+ } else {
+ spin_unlock(&imc_global_refc.lock);
+ return -EBUSY;
+ }
+ spin_unlock(&imc_global_refc.lock);
event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
event->destroy = core_imc_counters_release;
@@ -806,8 +897,11 @@ static int core_imc_event_init(struct perf_event *event)
}
/*
- * Allocates a page of memory for each of the online cpus, and write the
- * physical base address of that page to the LDBAR for that cpu.
+ * Allocates a page of memory for each of the online cpus, and load
+ * LDBAR with 0.
+ * The physical base address of the page allocated for a cpu will be
+ * written to the LDBAR for that cpu, when the thread-imc event
+ * is added.
*
* LDBAR Register Layout:
*
@@ -825,26 +919,26 @@ static int core_imc_event_init(struct perf_event *event)
*/
static int thread_imc_mem_alloc(int cpu_id, int size)
{
- u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id);
+ u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
int nid = cpu_to_node(cpu_id);
if (!local_mem) {
+ struct page *page;
/*
* This case could happen only once at start, since we dont
* free the memory in cpu offline path.
*/
- local_mem = page_address(alloc_pages_node(nid,
+ page = alloc_pages_node(nid,
GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
- __GFP_NOWARN, get_order(size)));
- if (!local_mem)
+ __GFP_NOWARN, get_order(size));
+ if (!page)
return -ENOMEM;
+ local_mem = page_address(page);
per_cpu(thread_imc_mem, cpu_id) = local_mem;
}
- ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
-
- mtspr(SPRN_LDBAR, ldbar_value);
+ mtspr(SPRN_LDBAR, 0);
return 0;
}
@@ -855,7 +949,23 @@ static int ppc_thread_imc_cpu_online(unsigned int cpu)
static int ppc_thread_imc_cpu_offline(unsigned int cpu)
{
- mtspr(SPRN_LDBAR, 0);
+ /*
+ * Set the bit 0 of LDBAR to zero.
+ *
+ * If bit 0 of LDBAR is unset, it will stop posting
+ * the counter data to memory.
+ * For thread-imc, bit 0 of LDBAR will be set to 1 in the
+ * event_add function. So reset this bit here, to stop the updates
+ * to memory in the cpu_offline path.
+ */
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
+
+ /* Reduce the refc if thread-imc event running on this cpu */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == IMC_DOMAIN_THREAD)
+ imc_global_refc.refc--;
+ spin_unlock(&imc_global_refc.lock);
+
return 0;
}
@@ -876,6 +986,9 @@ static int thread_imc_event_init(struct perf_event *event)
if (event->attr.type != event->pmu->type)
return -ENOENT;
+ if (!perfmon_capable())
+ return -EACCES;
+
/* Sampling not supported */
if (event->hw.sample_period)
return -EINVAL;
@@ -891,7 +1004,22 @@ static int thread_imc_event_init(struct perf_event *event)
if (!target)
return -EINVAL;
+ spin_lock(&imc_global_refc.lock);
+ /*
+ * Check if any other trace/core imc events are running in the
+ * system, if not set the global id to thread-imc.
+ */
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_THREAD) {
+ imc_global_refc.id = IMC_DOMAIN_THREAD;
+ imc_global_refc.refc++;
+ } else {
+ spin_unlock(&imc_global_refc.lock);
+ return -EBUSY;
+ }
+ spin_unlock(&imc_global_refc.lock);
+
event->pmu->task_ctx_nr = perf_sw_context;
+ event->destroy = reset_global_refc;
return 0;
}
@@ -903,16 +1031,16 @@ static bool is_thread_imc_pmu(struct perf_event *event)
return false;
}
-static u64 * get_event_base_addr(struct perf_event *event)
+static __be64 *get_event_base_addr(struct perf_event *event)
{
u64 addr;
if (is_thread_imc_pmu(event)) {
addr = (u64)per_cpu(thread_imc_mem, smp_processor_id());
- return (u64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK));
+ return (__be64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK));
}
- return (u64 *)event->hw.event_base;
+ return (__be64 *)event->hw.event_base;
}
static void thread_imc_pmu_start_txn(struct pmu *pmu,
@@ -936,7 +1064,8 @@ static int thread_imc_pmu_commit_txn(struct pmu *pmu)
static u64 imc_read_counter(struct perf_event *event)
{
- u64 *addr, data;
+ __be64 *addr;
+ u64 data;
/*
* In-Memory Collection (IMC) counters are free flowing counters.
@@ -995,6 +1124,7 @@ static int thread_imc_event_add(struct perf_event *event, int flags)
{
int core_id;
struct imc_pmu_ref *ref;
+ u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id());
if (flags & PERF_EF_START)
imc_event_start(event, flags);
@@ -1003,28 +1133,31 @@ static int thread_imc_event_add(struct perf_event *event, int flags)
return -EINVAL;
core_id = smp_processor_id() / threads_per_core;
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
+ mtspr(SPRN_LDBAR, ldbar_value);
+
/*
* imc pmus are enabled only when it is used.
* See if this is triggered for the first time.
- * If yes, take the mutex lock and enable the counters.
+ * If yes, take the lock and enable the counters.
* If not, just increment the count in ref count struct.
*/
ref = &core_imc_refc[core_id];
if (!ref)
return -EINVAL;
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("thread-imc: Unable to start the counter\
for core %d\n", core_id);
return -EINVAL;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return 0;
}
@@ -1034,21 +1167,19 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
int core_id;
struct imc_pmu_ref *ref;
- /*
- * Take a snapshot and calculate the delta and update
- * the event counter values.
- */
- imc_event_update(event);
-
core_id = smp_processor_id() / threads_per_core;
ref = &core_imc_refc[core_id];
+ if (!ref) {
+ pr_debug("imc: Failed to get event reference count\n");
+ return;
+ }
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
ref->refc--;
if (ref->refc == 0) {
if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("thread-imc: Unable to stop the counters\
for core %d\n", core_id);
return;
@@ -1056,7 +1187,293 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
} else if (ref->refc < 0) {
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
+
+ /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
+
+ /*
+ * Take a snapshot and calculate the delta and update
+ * the event counter values.
+ */
+ imc_event_update(event);
+}
+
+/*
+ * Allocate a page of memory for each cpu, and load LDBAR with 0.
+ */
+static int trace_imc_mem_alloc(int cpu_id, int size)
+{
+ u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
+ int phys_id = cpu_to_node(cpu_id), rc = 0;
+ int core_id = (cpu_id / threads_per_core);
+
+ if (!local_mem) {
+ struct page *page;
+
+ page = alloc_pages_node(phys_id,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size));
+ if (!page)
+ return -ENOMEM;
+ local_mem = page_address(page);
+ per_cpu(trace_imc_mem, cpu_id) = local_mem;
+
+ /* Initialise the counters for trace mode */
+ rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem),
+ get_hard_smp_processor_id(cpu_id));
+ if (rc) {
+ pr_info("IMC:opal init failed for trace imc\n");
+ return rc;
+ }
+ }
+
+ trace_imc_refc[core_id].id = core_id;
+ spin_lock_init(&trace_imc_refc[core_id].lock);
+
+ mtspr(SPRN_LDBAR, 0);
+ return 0;
+}
+
+static int ppc_trace_imc_cpu_online(unsigned int cpu)
+{
+ return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+}
+
+static int ppc_trace_imc_cpu_offline(unsigned int cpu)
+{
+ /*
+ * No need to set bit 0 of LDBAR to zero, as
+ * it is set to zero for imc trace-mode
+ *
+ * Reduce the refc if any trace-imc event running
+ * on this cpu.
+ */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == IMC_DOMAIN_TRACE)
+ imc_global_refc.refc--;
+ spin_unlock(&imc_global_refc.lock);
+
+ return 0;
+}
+
+static int trace_imc_cpu_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
+ "perf/powerpc/imc_trace:online",
+ ppc_trace_imc_cpu_online,
+ ppc_trace_imc_cpu_offline);
+}
+
+static u64 get_trace_imc_event_base_addr(void)
+{
+ return (u64)per_cpu(trace_imc_mem, smp_processor_id());
+}
+
+/*
+ * Function to parse trace-imc data obtained
+ * and to prepare the perf sample.
+ */
+static int trace_imc_prepare_sample(struct trace_imc_data *mem,
+ struct perf_sample_data *data,
+ u64 *prev_tb,
+ struct perf_event_header *header,
+ struct perf_event *event)
+{
+ /* Sanity checks for a valid record */
+ if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
+ *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
+ else
+ return -EINVAL;
+
+ if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
+ be64_to_cpu(READ_ONCE(mem->tb2)))
+ return -EINVAL;
+
+ /* Prepare perf sample */
+ data->ip = be64_to_cpu(READ_ONCE(mem->ip));
+ data->period = event->hw.last_period;
+
+ header->type = PERF_RECORD_SAMPLE;
+ header->size = sizeof(*header) + event->header_size;
+ header->misc = 0;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ switch (IMC_TRACE_RECORD_VAL_HVPR(be64_to_cpu(READ_ONCE(mem->val)))) {
+ case 0:/* when MSR HV and PR not set in the trace-record */
+ header->misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+ break;
+ case 1: /* MSR HV is 0 and PR is 1 */
+ header->misc |= PERF_RECORD_MISC_GUEST_USER;
+ break;
+ case 2: /* MSR HV is 1 and PR is 0 */
+ header->misc |= PERF_RECORD_MISC_KERNEL;
+ break;
+ case 3: /* MSR HV is 1 and PR is 1 */
+ header->misc |= PERF_RECORD_MISC_USER;
+ break;
+ default:
+ pr_info("IMC: Unable to set the flag based on MSR bits\n");
+ break;
+ }
+ } else {
+ if (is_kernel_addr(data->ip))
+ header->misc |= PERF_RECORD_MISC_KERNEL;
+ else
+ header->misc |= PERF_RECORD_MISC_USER;
+ }
+ perf_event_header__init_id(header, data, event);
+
+ return 0;
+}
+
+static void dump_trace_imc_data(struct perf_event *event)
+{
+ struct trace_imc_data *mem;
+ int i, ret;
+ u64 prev_tb = 0;
+
+ mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
+ for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
+ i++, mem++) {
+ struct perf_sample_data data;
+ struct perf_event_header header;
+
+ ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event);
+ if (ret) /* Exit, if not a valid record */
+ break;
+ else {
+ /* If this is a valid record, create the sample */
+ struct perf_output_handle handle;
+
+ if (perf_output_begin(&handle, &data, event, header.size))
+ return;
+
+ perf_output_sample(&handle, &header, &data, event);
+ perf_output_end(&handle);
+ }
+ }
+}
+
+static int trace_imc_event_add(struct perf_event *event, int flags)
+{
+ int core_id = smp_processor_id() / threads_per_core;
+ struct imc_pmu_ref *ref = NULL;
+ u64 local_mem, ldbar_value;
+
+ /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */
+ local_mem = get_trace_imc_event_base_addr();
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
+
+ /* trace-imc reference count */
+ if (trace_imc_refc)
+ ref = &trace_imc_refc[core_id];
+ if (!ref) {
+ pr_debug("imc: Failed to get the event reference count\n");
+ return -EINVAL;
+ }
+
+ mtspr(SPRN_LDBAR, ldbar_value);
+ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ spin_unlock(&ref->lock);
+ pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
+ return -EINVAL;
+ }
+ }
+ ++ref->refc;
+ spin_unlock(&ref->lock);
+ return 0;
+}
+
+static void trace_imc_event_read(struct perf_event *event)
+{
+ return;
+}
+
+static void trace_imc_event_stop(struct perf_event *event, int flags)
+{
+ u64 local_mem = get_trace_imc_event_base_addr();
+ dump_trace_imc_data(event);
+ memset((void *)local_mem, 0, sizeof(u64));
+}
+
+static void trace_imc_event_start(struct perf_event *event, int flags)
+{
+ return;
+}
+
+static void trace_imc_event_del(struct perf_event *event, int flags)
+{
+ int core_id = smp_processor_id() / threads_per_core;
+ struct imc_pmu_ref *ref = NULL;
+
+ if (trace_imc_refc)
+ ref = &trace_imc_refc[core_id];
+ if (!ref) {
+ pr_debug("imc: Failed to get event reference count\n");
+ return;
+ }
+
+ spin_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ spin_unlock(&ref->lock);
+ pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ ref->refc = 0;
+ }
+ spin_unlock(&ref->lock);
+
+ trace_imc_event_stop(event, flags);
+}
+
+static int trace_imc_event_init(struct perf_event *event)
+{
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (!perfmon_capable())
+ return -EACCES;
+
+ /* Return if this is a couting event */
+ if (event->attr.sample_period == 0)
+ return -ENOENT;
+
+ /*
+ * Take the global lock, and make sure
+ * no other thread is running any core/thread imc
+ * events
+ */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) {
+ /*
+ * No core/thread imc events are running in the
+ * system, so set the refc.id to trace-imc.
+ */
+ imc_global_refc.id = IMC_DOMAIN_TRACE;
+ imc_global_refc.refc++;
+ } else {
+ spin_unlock(&imc_global_refc.lock);
+ return -EBUSY;
+ }
+ spin_unlock(&imc_global_refc.lock);
+
+ event->hw.idx = -1;
+
+ /*
+ * There can only be a single PMU for perf_hw_context events which is assigned to
+ * core PMU. Hence use "perf_sw_context" for trace_imc.
+ */
+ event->pmu->task_ctx_nr = perf_sw_context;
+ event->destroy = reset_global_refc;
+ return 0;
}
/* update_pmu_ops : Populate the appropriate operations for "pmu" */
@@ -1069,6 +1486,7 @@ static int update_pmu_ops(struct imc_pmu *pmu)
pmu->pmu.stop = imc_event_stop;
pmu->pmu.read = imc_event_update;
pmu->pmu.attr_groups = pmu->attr_groups;
+ pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
switch (pmu->domain) {
@@ -1088,6 +1506,15 @@ static int update_pmu_ops(struct imc_pmu *pmu)
pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
break;
+ case IMC_DOMAIN_TRACE:
+ pmu->pmu.event_init = trace_imc_event_init;
+ pmu->pmu.add = trace_imc_event_add;
+ pmu->pmu.del = trace_imc_event_del;
+ pmu->pmu.start = trace_imc_event_start;
+ pmu->pmu.stop = trace_imc_event_stop;
+ pmu->pmu.read = trace_imc_event_read;
+ pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group;
+ break;
default:
break;
}
@@ -1109,10 +1536,10 @@ static int init_nest_pmu_ref(void)
i = 0;
for_each_node(nid) {
/*
- * Mutex lock to avoid races while tracking the number of
+ * Take the lock to avoid races while tracking the number of
* sessions using the chip's nest pmu units.
*/
- mutex_init(&nest_imc_refc[i].lock);
+ spin_lock_init(&nest_imc_refc[i].lock);
/*
* Loop to init the "id" with the node_id. Variable "i" initialized to
@@ -1158,10 +1585,10 @@ static void cleanup_all_core_imc_memory(void)
static void thread_imc_ldbar_disable(void *dummy)
{
/*
- * By Zeroing LDBAR, we disable thread-imc
- * updates.
+ * By setting 0th bit of LDBAR to zero, we disable thread-imc
+ * updates to memory.
*/
- mtspr(SPRN_LDBAR, 0);
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
}
void thread_imc_disable(void)
@@ -1180,6 +1607,18 @@ static void cleanup_all_thread_imc_memory(void)
}
}
+static void cleanup_all_trace_imc_memory(void)
+{
+ int i, order = get_order(trace_imc_mem_size);
+
+ for_each_online_cpu(i) {
+ if (per_cpu(trace_imc_mem, i))
+ free_pages((u64)per_cpu(trace_imc_mem, i), order);
+
+ }
+ kfree(trace_imc_refc);
+}
+
/* Function to free the attr_groups which are dynamically allocated */
static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
{
@@ -1221,6 +1660,11 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
cleanup_all_thread_imc_memory();
}
+
+ if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
+ cleanup_all_trace_imc_memory();
+ }
}
/*
@@ -1303,6 +1747,27 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
thread_imc_pmu = pmu_ptr;
break;
+ case IMC_DOMAIN_TRACE:
+ /* Update the pmu name */
+ pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+ if (!pmu_ptr->pmu.name)
+ return -ENOMEM;
+
+ nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+ trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
+ GFP_KERNEL);
+ if (!trace_imc_refc)
+ return -ENOMEM;
+
+ trace_imc_mem_size = pmu_ptr->counter_mem_size;
+ for_each_online_cpu(cpu) {
+ res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+ if (res) {
+ cleanup_all_trace_imc_memory();
+ goto err;
+ }
+ }
+ break;
default:
return -EINVAL;
}
@@ -1376,6 +1841,14 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
}
break;
+ case IMC_DOMAIN_TRACE:
+ ret = trace_imc_cpu_init();
+ if (ret) {
+ cleanup_all_trace_imc_memory();
+ goto err_free_mem;
+ }
+
+ break;
default:
return -EINVAL; /* Unknown domain */
}