diff options
Diffstat (limited to 'arch/mips/kernel/perf_event_mipsxx.c')
| -rw-r--r-- | arch/mips/kernel/perf_event_mipsxx.c | 973 |
1 files changed, 691 insertions, 282 deletions
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 45f1ffcf1a4b..196a070349b0 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Linux performance counter support for MIPS. * @@ -9,10 +10,6 @@ * based on the sparc64 perf event code and the x86 code. Performance * counter access is based on the MIPS Oprofile code. And the callchain * support references the code of MIPS stacktrace.c. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/cpumask.h> @@ -64,17 +61,11 @@ struct mips_perf_event { #define CNTR_EVEN 0x55555555 #define CNTR_ODD 0xaaaaaaaa #define CNTR_ALL 0xffffffff -#ifdef CONFIG_MIPS_MT_SMP enum { T = 0, V = 1, P = 2, } range; -#else - #define T - #define V - #define P -#endif }; static struct mips_perf_event raw_event; @@ -99,61 +90,46 @@ struct mips_pmu { unsigned int num_counters; }; +static int counter_bits; static struct mips_pmu mipspmu; -#define M_CONFIG1_PC (1 << 4) - -#define M_PERFCTL_EXL (1 << 0) -#define M_PERFCTL_KERNEL (1 << 1) -#define M_PERFCTL_SUPERVISOR (1 << 2) -#define M_PERFCTL_USER (1 << 3) -#define M_PERFCTL_INTERRUPT_ENABLE (1 << 4) -#define M_PERFCTL_EVENT(event) (((event) & 0x3ff) << 5) -#define M_PERFCTL_VPEID(vpe) ((vpe) << 16) +#define M_PERFCTL_EVENT(event) (((event) << MIPS_PERFCTRL_EVENT_S) & \ + MIPS_PERFCTRL_EVENT) +#define M_PERFCTL_VPEID(vpe) ((vpe) << MIPS_PERFCTRL_VPEID_S) #ifdef CONFIG_CPU_BMIPS5000 #define M_PERFCTL_MT_EN(filter) 0 #else /* !CONFIG_CPU_BMIPS5000 */ -#define M_PERFCTL_MT_EN(filter) ((filter) << 20) +#define M_PERFCTL_MT_EN(filter) (filter) #endif /* CONFIG_CPU_BMIPS5000 */ -#define M_TC_EN_ALL M_PERFCTL_MT_EN(0) -#define M_TC_EN_VPE M_PERFCTL_MT_EN(1) -#define M_TC_EN_TC M_PERFCTL_MT_EN(2) -#define M_PERFCTL_TCID(tcid) ((tcid) << 22) -#define M_PERFCTL_WIDE (1 << 30) -#define M_PERFCTL_MORE (1 << 31) -#define M_PERFCTL_TC (1 << 30) +#define M_TC_EN_ALL M_PERFCTL_MT_EN(MIPS_PERFCTRL_MT_EN_ALL) +#define M_TC_EN_VPE M_PERFCTL_MT_EN(MIPS_PERFCTRL_MT_EN_VPE) +#define M_TC_EN_TC M_PERFCTL_MT_EN(MIPS_PERFCTRL_MT_EN_TC) -#define M_PERFCTL_COUNT_EVENT_WHENEVER (M_PERFCTL_EXL | \ - M_PERFCTL_KERNEL | \ - M_PERFCTL_USER | \ - M_PERFCTL_SUPERVISOR | \ - M_PERFCTL_INTERRUPT_ENABLE) +#define M_PERFCTL_COUNT_EVENT_WHENEVER (MIPS_PERFCTRL_EXL | \ + MIPS_PERFCTRL_K | \ + MIPS_PERFCTRL_U | \ + MIPS_PERFCTRL_S | \ + MIPS_PERFCTRL_IE) #ifdef CONFIG_MIPS_MT_SMP #define M_PERFCTL_CONFIG_MASK 0x3fff801f #else #define M_PERFCTL_CONFIG_MASK 0x1f #endif -#define M_PERFCTL_EVENT_MASK 0xfe0 +#define CNTR_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) #ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS -static int cpu_has_mipsmt_pertccounters; - static DEFINE_RWLOCK(pmuint_rwlock); #if defined(CONFIG_CPU_BMIPS5000) #define vpe_id() (cpu_has_mipsmt_pertccounters ? \ 0 : (smp_processor_id() & MIPS_CPUID_TO_COUNTER_MASK)) #else -/* - * FIXME: For VSMP, vpe_id() is redefined for Perf-events, because - * cpu_data[cpuid].vpe_id reports 0 for _both_ CPUs. - */ #define vpe_id() (cpu_has_mipsmt_pertccounters ? \ - 0 : smp_processor_id()) + 0 : cpu_vpe_id(¤t_cpu_data)) #endif /* Copied from op_model_mipsxx.c */ @@ -180,6 +156,31 @@ static void pause_local_counters(void); static irqreturn_t mipsxx_pmu_handle_irq(int, void *); static int mipsxx_pmu_handle_shared_irq(void); +/* 0: Not Loongson-3 + * 1: Loongson-3A1000/3B1000/3B1500 + * 2: Loongson-3A2000/3A3000 + * 3: Loongson-3A4000+ + */ + +#define LOONGSON_PMU_TYPE0 0 +#define LOONGSON_PMU_TYPE1 1 +#define LOONGSON_PMU_TYPE2 2 +#define LOONGSON_PMU_TYPE3 3 + +static inline int get_loongson3_pmu_type(void) +{ + if (boot_cpu_type() != CPU_LOONGSON64) + return LOONGSON_PMU_TYPE0; + if ((boot_cpu_data.processor_id & PRID_COMP_MASK) == PRID_COMP_LEGACY) + return LOONGSON_PMU_TYPE1; + if ((boot_cpu_data.processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64C) + return LOONGSON_PMU_TYPE2; + if ((boot_cpu_data.processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G) + return LOONGSON_PMU_TYPE3; + + return LOONGSON_PMU_TYPE0; +} + static unsigned int mipsxx_pmu_swizzle_perf_idx(unsigned int idx) { if (vpe_id() == 1) @@ -212,17 +213,18 @@ static u64 mipsxx_pmu_read_counter(unsigned int idx) static u64 mipsxx_pmu_read_counter_64(unsigned int idx) { + u64 mask = CNTR_BIT_MASK(counter_bits); idx = mipsxx_pmu_swizzle_perf_idx(idx); switch (idx) { case 0: - return read_c0_perfcntr0_64(); + return read_c0_perfcntr0_64() & mask; case 1: - return read_c0_perfcntr1_64(); + return read_c0_perfcntr1_64() & mask; case 2: - return read_c0_perfcntr2_64(); + return read_c0_perfcntr2_64() & mask; case 3: - return read_c0_perfcntr3_64(); + return read_c0_perfcntr3_64() & mask; default: WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); return 0; @@ -251,6 +253,7 @@ static void mipsxx_pmu_write_counter(unsigned int idx, u64 val) static void mipsxx_pmu_write_counter_64(unsigned int idx, u64 val) { + val &= CNTR_BIT_MASK(counter_bits); idx = mipsxx_pmu_swizzle_perf_idx(idx); switch (idx) { @@ -312,17 +315,21 @@ static int mipsxx_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { int i; + unsigned long cntr_mask; /* * We only need to care the counter mask. The range has been * checked definitely. */ - unsigned long cntr_mask = (hwc->event_base >> 8) & 0xffff; + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) + cntr_mask = (hwc->event_base >> 10) & 0xffff; + else + cntr_mask = (hwc->event_base >> 8) & 0xffff; for (i = mipspmu.num_counters - 1; i >= 0; i--) { /* * Note that some MIPS perf events can be counted by both - * even and odd counters, wheresas many other are only by + * even and odd counters, whereas many other are only by * even _or_ odd counters. This introduces an issue that * when the former kind of event takes the counter the * latter kind of event wants to use, then the "counter @@ -340,19 +347,46 @@ static int mipsxx_pmu_alloc_counter(struct cpu_hw_events *cpuc, static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct perf_event *event = container_of(evt, struct perf_event, hw); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + unsigned int range = evt->event_base >> 24; WARN_ON(idx < 0 || idx >= mipspmu.num_counters); - cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | - (evt->config_base & M_PERFCTL_CONFIG_MASK) | - /* Make sure interrupt enabled. */ - M_PERFCTL_INTERRUPT_ENABLE; - if (IS_ENABLED(CONFIG_CPU_BMIPS5000)) + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0x3ff) | + (evt->config_base & M_PERFCTL_CONFIG_MASK) | + /* Make sure interrupt enabled. */ + MIPS_PERFCTRL_IE; + else + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | + (evt->config_base & M_PERFCTL_CONFIG_MASK) | + /* Make sure interrupt enabled. */ + MIPS_PERFCTRL_IE; + + if (IS_ENABLED(CONFIG_CPU_BMIPS5000)) { /* enable the counter for the calling thread */ cpuc->saved_ctrl[idx] |= - (1 << (12 + vpe_id())) | M_PERFCTL_TC; + (1 << (12 + vpe_id())) | BRCM_PERFCTRL_TC; + } else if (IS_ENABLED(CONFIG_MIPS_MT_SMP) && range > V) { + /* The counter is processor wide. Set it up to count all TCs. */ + pr_debug("Enabling perf counter for all TCs\n"); + cpuc->saved_ctrl[idx] |= M_TC_EN_ALL; + } else { + unsigned int cpu, ctrl; + + /* + * Set up the counter for a particular CPU when event->cpu is + * a valid CPU number. Otherwise set up the counter for the CPU + * scheduling this thread. + */ + cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); + ctrl = M_PERFCTL_VPEID(cpu_vpe_id(&cpu_data[cpu])); + ctrl |= M_TC_EN_VPE; + cpuc->saved_ctrl[idx] |= ctrl; + pr_debug("Enabling perf counter for CPU%d\n", cpu); + } /* * We do not actually let the counter run. Leave it until start(). */ @@ -360,7 +394,7 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) static void mipsxx_pmu_disable_event(int idx) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned long flags; WARN_ON(idx < 0 || idx >= mipspmu.num_counters); @@ -401,6 +435,10 @@ static int mipspmu_event_set_period(struct perf_event *event, local64_set(&hwc->prev_count, mipspmu.overflow - left); + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) + mipsxx_pmu_write_control(idx, + M_PERFCTL_EVENT(hwc->event_base & 0x3ff)); + mipspmu.write_counter(idx, mipspmu.overflow - left); perf_event_update_userpage(event); @@ -460,7 +498,7 @@ static void mipspmu_stop(struct perf_event *event, int flags) static int mipspmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx; int err = 0; @@ -496,7 +534,7 @@ out: static void mipspmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -530,7 +568,7 @@ static void mipspmu_enable(struct pmu *pmu) /* * MIPS performance counters can be per-TC. The control registers can - * not be directly accessed accross CPUs. Hence if we want to do global + * not be directly accessed across CPUs. Hence if we want to do global * control, we need cross CPU calls. on_each_cpu() can help us, but we * can not make sure this function is called with interrupts enabled. So * here we pause local counters and then grab a rwlock and leave the @@ -558,11 +596,13 @@ static int mipspmu_get_irq(void) if (mipspmu.irq >= 0) { /* Request my own irq handler. */ err = request_irq(mipspmu.irq, mipsxx_pmu_handle_irq, - IRQF_PERCPU | IRQF_NOBALANCING, - "mips_perf_pmu", NULL); + IRQF_PERCPU | IRQF_NOBALANCING | + IRQF_NO_THREAD | IRQF_NO_SUSPEND | + IRQF_SHARED, + "mips_perf_pmu", &mipspmu); if (err) { - pr_warning("Unable to request IRQ%d for MIPS " - "performance counters!\n", mipspmu.irq); + pr_warn("Unable to request IRQ%d for MIPS performance counters!\n", + mipspmu.irq); } } else if (cp0_perfcount_irq < 0) { /* @@ -572,8 +612,7 @@ static int mipspmu_get_irq(void) perf_irq = mipsxx_pmu_handle_shared_irq; err = 0; } else { - pr_warning("The platform hasn't properly defined its " - "interrupt controller.\n"); + pr_warn("The platform hasn't properly defined its interrupt controller\n"); err = -ENOENT; } @@ -583,7 +622,7 @@ static int mipspmu_get_irq(void) static void mipspmu_free_irq(void) { if (mipspmu.irq >= 0) - free_irq(mipspmu.irq, NULL); + free_irq(mipspmu.irq, &mipspmu); else if (cp0_perfcount_irq < 0) perf_irq = save_perf_irq; } @@ -628,8 +667,7 @@ static int mipspmu_event_init(struct perf_event *event) return -ENOENT; } - if (event->cpu >= nr_cpumask_bits || - (event->cpu >= 0 && !cpu_online(event->cpu))) + if (event->cpu >= 0 && !cpu_online(event->cpu)) return -ENODEV; if (!atomic_inc_not_zero(&active_events)) { @@ -666,13 +704,20 @@ static unsigned int mipspmu_perf_event_encode(const struct mips_perf_event *pev) * event_id. */ #ifdef CONFIG_MIPS_MT_SMP - return ((unsigned int)pev->range << 24) | - (pev->cntr_mask & 0xffff00) | - (pev->event_id & 0xff); -#else - return (pev->cntr_mask & 0xffff00) | - (pev->event_id & 0xff); -#endif + if (num_possible_cpus() > 1) + return ((unsigned int)pev->range << 24) | + (pev->cntr_mask & 0xffff00) | + (pev->event_id & 0xff); + else +#endif /* CONFIG_MIPS_MT_SMP */ + { + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) + return (pev->cntr_mask & 0xfffc00) | + (pev->event_id & 0x3ff); + else + return (pev->cntr_mask & 0xffff00) | + (pev->event_id & 0xff); + } } static const struct mips_perf_event *mipspmu_map_general_event(int idx) @@ -722,7 +767,7 @@ static int validate_group(struct perf_event *event) if (mipsxx_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0) return -EINVAL; - list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + for_each_sibling_event(sibling, leader) { if (mipsxx_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0) return -EINVAL; } @@ -746,20 +791,19 @@ static void handle_associated_event(struct cpu_hw_events *cpuc, if (!mipspmu_event_set_period(event, hwc, idx)) return; - if (perf_event_overflow(event, data, regs)) - mipsxx_pmu_disable_event(idx); + perf_event_overflow(event, data, regs); } static int __n_counters(void) { - if (!(read_c0_config1() & M_CONFIG1_PC)) + if (!cpu_has_perf) return 0; - if (!(read_c0_perfctrl0() & M_PERFCTL_MORE)) + if (!(read_c0_perfctrl0() & MIPS_PERFCTRL_M)) return 1; - if (!(read_c0_perfctrl1() & M_PERFCTL_MORE)) + if (!(read_c0_perfctrl1() & MIPS_PERFCTRL_M)) return 2; - if (!(read_c0_perfctrl2() & M_PERFCTL_MORE)) + if (!(read_c0_perfctrl2() & MIPS_PERFCTRL_M)) return 3; return 4; @@ -776,6 +820,7 @@ static int n_counters(void) case CPU_R12000: case CPU_R14000: + case CPU_R16000: counters = 4; break; @@ -786,26 +831,108 @@ static int n_counters(void) return counters; } +static void loongson3_reset_counters(void *arg) +{ + int counters = (int)(long)arg; + + switch (counters) { + case 4: + mipsxx_pmu_write_control(3, 0); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 127<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 191<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 255<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 319<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 383<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 575<<5); + mipspmu.write_counter(3, 0); + fallthrough; + case 3: + mipsxx_pmu_write_control(2, 0); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 127<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 191<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 255<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 319<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 383<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 575<<5); + mipspmu.write_counter(2, 0); + fallthrough; + case 2: + mipsxx_pmu_write_control(1, 0); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 127<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 191<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 255<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 319<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 383<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 575<<5); + mipspmu.write_counter(1, 0); + fallthrough; + case 1: + mipsxx_pmu_write_control(0, 0); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 127<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 191<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 255<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 319<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 383<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 575<<5); + mipspmu.write_counter(0, 0); + break; + } +} + static void reset_counters(void *arg) { int counters = (int)(long)arg; + + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) { + loongson3_reset_counters(arg); + return; + } + switch (counters) { case 4: mipsxx_pmu_write_control(3, 0); mipspmu.write_counter(3, 0); + fallthrough; case 3: mipsxx_pmu_write_control(2, 0); mipspmu.write_counter(2, 0); + fallthrough; case 2: mipsxx_pmu_write_control(1, 0); mipspmu.write_counter(1, 0); + fallthrough; case 1: mipsxx_pmu_write_control(0, 0); mipspmu.write_counter(0, 0); + break; } } -/* 24K/34K/1004K cores can share the same event map. */ +/* 24K/34K/1004K/interAptiv/loongson1 cores share the same event map. */ static const struct mips_perf_event mipsxxcore_event_map [PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN | CNTR_ODD, P }, @@ -814,8 +941,8 @@ static const struct mips_perf_event mipsxxcore_event_map [PERF_COUNT_HW_BRANCH_MISSES] = { 0x02, CNTR_ODD, T }, }; -/* 74K core has different branch event code. */ -static const struct mips_perf_event mipsxx74Kcore_event_map +/* 74K/proAptiv core has different branch event code. */ +static const struct mips_perf_event mipsxxcore_event_map2 [PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN | CNTR_ODD, P }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01, CNTR_EVEN | CNTR_ODD, T }, @@ -823,6 +950,40 @@ static const struct mips_perf_event mipsxx74Kcore_event_map [PERF_COUNT_HW_BRANCH_MISSES] = { 0x27, CNTR_ODD, T }, }; +static const struct mips_perf_event i6x00_event_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN | CNTR_ODD }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01, CNTR_EVEN | CNTR_ODD }, + /* These only count dcache, not icache */ + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x45, CNTR_EVEN | CNTR_ODD }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x48, CNTR_EVEN | CNTR_ODD }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x15, CNTR_EVEN | CNTR_ODD }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x16, CNTR_EVEN | CNTR_ODD }, +}; + +static const struct mips_perf_event loongson3_event_map1[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, CNTR_ODD }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x01, CNTR_EVEN }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x01, CNTR_ODD }, +}; + +static const struct mips_perf_event loongson3_event_map2[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x80, CNTR_ALL }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x81, CNTR_ALL }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x18, CNTR_ALL }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x94, CNTR_ALL }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x9c, CNTR_ALL }, +}; + +static const struct mips_perf_event loongson3_event_map3[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_ALL }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01, CNTR_ALL }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x1c, CNTR_ALL }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x1d, CNTR_ALL }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02, CNTR_ALL }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x08, CNTR_ALL }, +}; + static const struct mips_perf_event octeon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x01, CNTR_ALL }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x03, CNTR_ALL }, @@ -840,16 +1001,7 @@ static const struct mips_perf_event bmips5000_event_map [PERF_COUNT_HW_BRANCH_MISSES] = { 0x02, CNTR_ODD, T }, }; -static const struct mips_perf_event xlp_event_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = { 0x01, CNTR_ALL }, - [PERF_COUNT_HW_INSTRUCTIONS] = { 0x18, CNTR_ALL }, /* PAPI_TOT_INS */ - [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x04, CNTR_ALL }, /* PAPI_L1_ICA */ - [PERF_COUNT_HW_CACHE_MISSES] = { 0x07, CNTR_ALL }, /* PAPI_L1_ICM */ - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x1b, CNTR_ALL }, /* PAPI_BR_CN */ - [PERF_COUNT_HW_BRANCH_MISSES] = { 0x1c, CNTR_ALL }, /* PAPI_BR_MSP */ -}; - -/* 24K/34K/1004K cores can share the same cache event map. */ +/* 24K/34K/1004K/interAptiv/loongson1 cores share the same cache event map. */ static const struct mips_perf_event mipsxxcore_cache_map [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -930,8 +1082,8 @@ static const struct mips_perf_event mipsxxcore_cache_map }, }; -/* 74K core has completely different cache event map. */ -static const struct mips_perf_event mipsxx74Kcore_cache_map +/* 74K/proAptiv core has completely different cache event map. */ +static const struct mips_perf_event mipsxxcore_cache_map2 [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -971,13 +1123,18 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map [C(LL)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = { 0x1c, CNTR_ODD, P }, - [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN | CNTR_ODD, P }, + [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN, P }, }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = { 0x1c, CNTR_ODD, P }, - [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN | CNTR_ODD, P }, + [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN, P }, }, }, +/* + * 74K core does not have specific DTLB events. proAptiv core has + * "speculative" DTLB events which are numbered 0x63 (even/odd) and + * not included here. One can use raw events if really needed. + */ [C(ITLB)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = { 0x04, CNTR_EVEN, T }, @@ -1001,8 +1158,47 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map }, }; -/* BMIPS5000 */ -static const struct mips_perf_event bmips5000_cache_map +static const struct mips_perf_event i6x00_cache_map + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x46, CNTR_EVEN | CNTR_ODD }, + [C(RESULT_MISS)] = { 0x49, CNTR_EVEN | CNTR_ODD }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x47, CNTR_EVEN | CNTR_ODD }, + [C(RESULT_MISS)] = { 0x4a, CNTR_EVEN | CNTR_ODD }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x84, CNTR_EVEN | CNTR_ODD }, + [C(RESULT_MISS)] = { 0x85, CNTR_EVEN | CNTR_ODD }, + }, +}, +[C(DTLB)] = { + /* Can't distinguish read & write */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x40, CNTR_EVEN | CNTR_ODD }, + [C(RESULT_MISS)] = { 0x41, CNTR_EVEN | CNTR_ODD }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x40, CNTR_EVEN | CNTR_ODD }, + [C(RESULT_MISS)] = { 0x41, CNTR_EVEN | CNTR_ODD }, + }, +}, +[C(BPU)] = { + /* Conditional branches / mispredicted */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x15, CNTR_EVEN | CNTR_ODD }, + [C(RESULT_MISS)] = { 0x16, CNTR_EVEN | CNTR_ODD }, + }, +}, +}; + +static const struct mips_perf_event loongson3_cache_map1 [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -1014,180 +1210,262 @@ static const struct mips_perf_event bmips5000_cache_map * best we can do. Writes and reads get combined. */ [C(OP_READ)] = { - [C(RESULT_ACCESS)] = { 12, CNTR_EVEN, T }, - [C(RESULT_MISS)] = { 12, CNTR_ODD, T }, + [C(RESULT_MISS)] = { 0x04, CNTR_ODD }, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = { 12, CNTR_EVEN, T }, - [C(RESULT_MISS)] = { 12, CNTR_ODD, T }, + [C(RESULT_MISS)] = { 0x04, CNTR_ODD }, }, }, [C(L1I)] = { [C(OP_READ)] = { - [C(RESULT_ACCESS)] = { 10, CNTR_EVEN, T }, - [C(RESULT_MISS)] = { 10, CNTR_ODD, T }, + [C(RESULT_MISS)] = { 0x04, CNTR_EVEN }, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = { 10, CNTR_EVEN, T }, - [C(RESULT_MISS)] = { 10, CNTR_ODD, T }, + [C(RESULT_MISS)] = { 0x04, CNTR_EVEN }, }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = { 23, CNTR_EVEN, T }, - /* - * Note that MIPS has only "hit" events countable for - * the prefetch operation. - */ +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x09, CNTR_ODD }, + }, + [C(OP_WRITE)] = { + [C(RESULT_MISS)] = { 0x09, CNTR_ODD }, }, }, -[C(LL)] = { +[C(ITLB)] = { [C(OP_READ)] = { - [C(RESULT_ACCESS)] = { 28, CNTR_EVEN, P }, - [C(RESULT_MISS)] = { 28, CNTR_ODD, P }, + [C(RESULT_MISS)] = { 0x0c, CNTR_ODD }, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = { 28, CNTR_EVEN, P }, - [C(RESULT_MISS)] = { 28, CNTR_ODD, P }, + [C(RESULT_MISS)] = { 0x0c, CNTR_ODD }, }, }, [C(BPU)] = { /* Using the same code for *HW_BRANCH* */ [C(OP_READ)] = { - [C(RESULT_MISS)] = { 0x02, CNTR_ODD, T }, + [C(RESULT_ACCESS)] = { 0x01, CNTR_EVEN }, + [C(RESULT_MISS)] = { 0x01, CNTR_ODD }, }, [C(OP_WRITE)] = { - [C(RESULT_MISS)] = { 0x02, CNTR_ODD, T }, + [C(RESULT_ACCESS)] = { 0x01, CNTR_EVEN }, + [C(RESULT_MISS)] = { 0x01, CNTR_ODD }, }, }, }; - -static const struct mips_perf_event octeon_cache_map +static const struct mips_perf_event loongson3_cache_map2 [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { [C(L1D)] = { + /* + * Like some other architectures (e.g. ARM), the performance + * counters don't differentiate between read and write + * accesses/misses, so this isn't strictly correct, but it's the + * best we can do. Writes and reads get combined. + */ [C(OP_READ)] = { - [C(RESULT_ACCESS)] = { 0x2b, CNTR_ALL }, - [C(RESULT_MISS)] = { 0x2e, CNTR_ALL }, + [C(RESULT_ACCESS)] = { 0x156, CNTR_ALL }, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = { 0x30, CNTR_ALL }, + [C(RESULT_ACCESS)] = { 0x155, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x153, CNTR_ALL }, }, }, [C(L1I)] = { [C(OP_READ)] = { - [C(RESULT_ACCESS)] = { 0x18, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x18, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_MISS)] = { 0x18, CNTR_ALL }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x1b6, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x1b7, CNTR_ALL }, }, [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = { 0x19, CNTR_ALL }, + [C(RESULT_ACCESS)] = { 0x1bf, CNTR_ALL }, }, }, [C(DTLB)] = { - /* - * Only general DTLB misses are counted use the same event for - * read and write. - */ [C(OP_READ)] = { - [C(RESULT_MISS)] = { 0x35, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x92, CNTR_ALL }, }, [C(OP_WRITE)] = { - [C(RESULT_MISS)] = { 0x35, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x92, CNTR_ALL }, }, }, [C(ITLB)] = { [C(OP_READ)] = { - [C(RESULT_MISS)] = { 0x37, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x1a, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_MISS)] = { 0x1a, CNTR_ALL }, + }, +}, +[C(BPU)] = { + /* Using the same code for *HW_BRANCH* */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x94, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x9c, CNTR_ALL }, }, }, }; -static const struct mips_perf_event xlp_cache_map +static const struct mips_perf_event loongson3_cache_map3 [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { [C(L1D)] = { + /* + * Like some other architectures (e.g. ARM), the performance + * counters don't differentiate between read and write + * accesses/misses, so this isn't strictly correct, but it's the + * best we can do. Writes and reads get combined. + */ [C(OP_READ)] = { - [C(RESULT_ACCESS)] = { 0x31, CNTR_ALL }, /* PAPI_L1_DCR */ - [C(RESULT_MISS)] = { 0x30, CNTR_ALL }, /* PAPI_L1_LDM */ + [C(RESULT_ACCESS)] = { 0x1e, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x1f, CNTR_ALL }, }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = { 0x2f, CNTR_ALL }, /* PAPI_L1_DCW */ - [C(RESULT_MISS)] = { 0x2e, CNTR_ALL }, /* PAPI_L1_STM */ + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { 0xaa, CNTR_ALL }, + [C(RESULT_MISS)] = { 0xa9, CNTR_ALL }, }, }, [C(L1I)] = { [C(OP_READ)] = { - [C(RESULT_ACCESS)] = { 0x04, CNTR_ALL }, /* PAPI_L1_ICA */ - [C(RESULT_MISS)] = { 0x07, CNTR_ALL }, /* PAPI_L1_ICM */ + [C(RESULT_ACCESS)] = { 0x1c, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x1d, CNTR_ALL }, }, }, [C(LL)] = { [C(OP_READ)] = { - [C(RESULT_ACCESS)] = { 0x35, CNTR_ALL }, /* PAPI_L2_DCR */ - [C(RESULT_MISS)] = { 0x37, CNTR_ALL }, /* PAPI_L2_LDM */ - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = { 0x34, CNTR_ALL }, /* PAPI_L2_DCA */ - [C(RESULT_MISS)] = { 0x36, CNTR_ALL }, /* PAPI_L2_DCM */ + [C(RESULT_ACCESS)] = { 0x2e, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x2f, CNTR_ALL }, }, }, [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x14, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x1b, CNTR_ALL }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x1a, CNTR_ALL }, + }, +}, +[C(BPU)] = { + /* Using the same code for *HW_BRANCH* */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x02, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x08, CNTR_ALL }, + }, +}, +}; + +/* BMIPS5000 */ +static const struct mips_perf_event bmips5000_cache_map + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { /* - * Only general DTLB misses are counted use the same event for - * read and write. + * Like some other architectures (e.g. ARM), the performance + * counters don't differentiate between read and write + * accesses/misses, so this isn't strictly correct, but it's the + * best we can do. Writes and reads get combined. */ [C(OP_READ)] = { - [C(RESULT_MISS)] = { 0x2d, CNTR_ALL }, /* PAPI_TLB_DM */ + [C(RESULT_ACCESS)] = { 12, CNTR_EVEN, T }, + [C(RESULT_MISS)] = { 12, CNTR_ODD, T }, }, [C(OP_WRITE)] = { - [C(RESULT_MISS)] = { 0x2d, CNTR_ALL }, /* PAPI_TLB_DM */ + [C(RESULT_ACCESS)] = { 12, CNTR_EVEN, T }, + [C(RESULT_MISS)] = { 12, CNTR_ODD, T }, }, }, -[C(ITLB)] = { +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 10, CNTR_EVEN, T }, + [C(RESULT_MISS)] = { 10, CNTR_ODD, T }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 10, CNTR_EVEN, T }, + [C(RESULT_MISS)] = { 10, CNTR_ODD, T }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { 23, CNTR_EVEN, T }, + /* + * Note that MIPS has only "hit" events countable for + * the prefetch operation. + */ + }, +}, +[C(LL)] = { [C(OP_READ)] = { - [C(RESULT_MISS)] = { 0x08, CNTR_ALL }, /* PAPI_TLB_IM */ + [C(RESULT_ACCESS)] = { 28, CNTR_EVEN, P }, + [C(RESULT_MISS)] = { 28, CNTR_ODD, P }, }, [C(OP_WRITE)] = { - [C(RESULT_MISS)] = { 0x08, CNTR_ALL }, /* PAPI_TLB_IM */ + [C(RESULT_ACCESS)] = { 28, CNTR_EVEN, P }, + [C(RESULT_MISS)] = { 28, CNTR_ODD, P }, }, }, [C(BPU)] = { + /* Using the same code for *HW_BRANCH* */ [C(OP_READ)] = { - [C(RESULT_MISS)] = { 0x25, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x02, CNTR_ODD, T }, + }, + [C(OP_WRITE)] = { + [C(RESULT_MISS)] = { 0x02, CNTR_ODD, T }, }, }, }; -#ifdef CONFIG_MIPS_MT_SMP -static void check_and_calc_range(struct perf_event *event, - const struct mips_perf_event *pev) -{ - struct hw_perf_event *hwc = &event->hw; - - if (event->cpu >= 0) { - if (pev->range > V) { - /* - * The user selected an event that is processor - * wide, while expecting it to be VPE wide. - */ - hwc->config_base |= M_TC_EN_ALL; - } else { - /* - * FIXME: cpu_data[event->cpu].vpe_id reports 0 - * for both CPUs. - */ - hwc->config_base |= M_PERFCTL_VPEID(event->cpu); - hwc->config_base |= M_TC_EN_VPE; - } - } else - hwc->config_base |= M_TC_EN_ALL; -} -#else -static void check_and_calc_range(struct perf_event *event, - const struct mips_perf_event *pev) -{ -} -#endif +static const struct mips_perf_event octeon_cache_map + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x2b, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x2e, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x30, CNTR_ALL }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x18, CNTR_ALL }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { 0x19, CNTR_ALL }, + }, +}, +[C(DTLB)] = { + /* + * Only general DTLB misses are counted use the same event for + * read and write. + */ + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x35, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_MISS)] = { 0x35, CNTR_ALL }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x37, CNTR_ALL }, + }, +}, +}; static int __hw_perf_event_init(struct perf_event *event) { @@ -1222,25 +1500,21 @@ static int __hw_perf_event_init(struct perf_event *event) * We allow max flexibility on how each individual counter shared * by the single CPU operates (the mode exclusion and the range). */ - hwc->config_base = M_PERFCTL_INTERRUPT_ENABLE; - - /* Calculate range bits and validate it. */ - if (num_possible_cpus() > 1) - check_and_calc_range(event, pev); + hwc->config_base = MIPS_PERFCTRL_IE; hwc->event_base = mipspmu_perf_event_encode(pev); if (PERF_TYPE_RAW == event->attr.type) mutex_unlock(&raw_event_mutex); if (!attr->exclude_user) - hwc->config_base |= M_PERFCTL_USER; + hwc->config_base |= MIPS_PERFCTRL_U; if (!attr->exclude_kernel) { - hwc->config_base |= M_PERFCTL_KERNEL; + hwc->config_base |= MIPS_PERFCTRL_K; /* MIPS kernel mode: KSU == 00b || EXL == 1 || ERL == 1 */ - hwc->config_base |= M_PERFCTL_EXL; + hwc->config_base |= MIPS_PERFCTRL_EXL; } if (!attr->exclude_hv) - hwc->config_base |= M_PERFCTL_SUPERVISOR; + hwc->config_base |= MIPS_PERFCTRL_S; hwc->config_base &= M_PERFCTL_CONFIG_MASK; /* @@ -1270,7 +1544,7 @@ static int __hw_perf_event_init(struct perf_event *event) static void pause_local_counters(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int ctr = mipspmu.num_counters; unsigned long flags; @@ -1286,7 +1560,7 @@ static void pause_local_counters(void) static void resume_local_counters(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int ctr = mipspmu.num_counters; do { @@ -1297,11 +1571,11 @@ static void resume_local_counters(void) static int mipsxx_pmu_handle_shared_irq(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_sample_data data; unsigned int counters = mipspmu.num_counters; u64 counter; - int handled = IRQ_NONE; + int n, handled = IRQ_NONE; struct pt_regs *regs; if (cpu_has_perf_cntr_intr_bit && !(read_c0_cause() & CAUSEF_PCI)) @@ -1322,22 +1596,23 @@ static int mipsxx_pmu_handle_shared_irq(void) perf_sample_data_init(&data, 0, 0); - switch (counters) { -#define HANDLE_COUNTER(n) \ - case n + 1: \ - if (test_bit(n, cpuc->used_mask)) { \ - counter = mipspmu.read_counter(n); \ - if (counter & mipspmu.overflow) { \ - handle_associated_event(cpuc, n, &data, regs); \ - handled = IRQ_HANDLED; \ - } \ - } - HANDLE_COUNTER(3) - HANDLE_COUNTER(2) - HANDLE_COUNTER(1) - HANDLE_COUNTER(0) + for (n = counters - 1; n >= 0; n--) { + if (!test_bit(n, cpuc->used_mask)) + continue; + + counter = mipspmu.read_counter(n); + if (!(counter & mipspmu.overflow)) + continue; + + handle_associated_event(cpuc, n, &data, regs); + handled = IRQ_HANDLED; } +#ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS + read_unlock(&pmuint_rwlock); +#endif + resume_local_counters(); + /* * Do all the work for the pending perf events. We can do this * in here because the performance counter interrupt is a regular @@ -1346,10 +1621,6 @@ static int mipsxx_pmu_handle_shared_irq(void) if (handled == IRQ_HANDLED) irq_work_run(); -#ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS - read_unlock(&pmuint_rwlock); -#endif - resume_local_counters(); return handled; } @@ -1378,6 +1649,13 @@ static irqreturn_t mipsxx_pmu_handle_irq(int irq, void *dev) #define IS_BOTH_COUNTERS_74K_EVENT(b) \ ((b) == 0 || (b) == 1) +/* proAptiv */ +#define IS_BOTH_COUNTERS_PROAPTIV_EVENT(b) \ + ((b) == 0 || (b) == 1) +/* P5600 */ +#define IS_BOTH_COUNTERS_P5600_EVENT(b) \ + ((b) == 0 || (b) == 1) + /* 1004K */ #define IS_BOTH_COUNTERS_1004K_EVENT(b) \ ((b) == 0 || (b) == 1 || (b) == 11) @@ -1391,26 +1669,44 @@ static irqreturn_t mipsxx_pmu_handle_irq(int irq, void *dev) #define IS_RANGE_V_1004K_EVENT(r) ((r) == 47) #endif +/* interAptiv */ +#define IS_BOTH_COUNTERS_INTERAPTIV_EVENT(b) \ + ((b) == 0 || (b) == 1 || (b) == 11) +#ifdef CONFIG_MIPS_MT_SMP +/* The P/V/T info is not provided for "(b) == 38" in SUM, assume P. */ +#define IS_RANGE_P_INTERAPTIV_EVENT(r, b) \ + ((b) == 0 || (r) == 18 || (b) == 21 || (b) == 22 || \ + (b) == 25 || (b) == 36 || (b) == 38 || (b) == 39 || \ + (r) == 44 || (r) == 174 || (r) == 176 || ((b) >= 50 && \ + (b) <= 59) || (r) == 188 || (b) == 61 || (b) == 62 || \ + ((b) >= 64 && (b) <= 67)) +#define IS_RANGE_V_INTERAPTIV_EVENT(r) ((r) == 47 || (r) == 175) +#endif + /* BMIPS5000 */ #define IS_BOTH_COUNTERS_BMIPS5000_EVENT(b) \ ((b) == 0 || (b) == 1) /* - * User can use 0-255 raw events, where 0-127 for the events of even - * counters, and 128-255 for odd counters. Note that bit 7 is used to - * indicate the parity. So, for example, when user wants to take the - * Event Num of 15 for odd counters (by referring to the user manual), - * then 128 needs to be added to 15 as the input for the event config, - * i.e., 143 (0x8F) to be used. + * For most cores the user can use 0-255 raw events, where 0-127 for the events + * of even counters, and 128-255 for odd counters. Note that bit 7 is used to + * indicate the even/odd bank selector. So, for example, when user wants to take + * the Event Num of 15 for odd counters (by referring to the user manual), then + * 128 needs to be added to 15 as the input for the event config, i.e., 143 (0x8F) + * to be used. + * + * Some newer cores have even more events, in which case the user can use raw + * events 0-511, where 0-255 are for the events of even counters, and 256-511 + * are for odd counters, so bit 8 is used to indicate the even/odd bank selector. */ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) { + /* currently most cores have 7-bit event numbers */ + int pmu_type; unsigned int raw_id = config & 0xff; unsigned int base_id = raw_id & 0x7f; - raw_event.event_id = base_id; - switch (current_cpu_type()) { case CPU_24K: if (IS_BOTH_COUNTERS_24K_EVENT(base_id)) @@ -1442,6 +1738,7 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) #endif break; case CPU_74K: + case CPU_1074K: if (IS_BOTH_COUNTERS_74K_EVENT(base_id)) raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD; else @@ -1451,6 +1748,36 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) raw_event.range = P; #endif break; + case CPU_PROAPTIV: + if (IS_BOTH_COUNTERS_PROAPTIV_EVENT(base_id)) + raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD; + else + raw_event.cntr_mask = + raw_id > 127 ? CNTR_ODD : CNTR_EVEN; +#ifdef CONFIG_MIPS_MT_SMP + raw_event.range = P; +#endif + break; + case CPU_P5600: + case CPU_P6600: + /* 8-bit event numbers */ + raw_id = config & 0x1ff; + base_id = raw_id & 0xff; + if (IS_BOTH_COUNTERS_P5600_EVENT(base_id)) + raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD; + else + raw_event.cntr_mask = + raw_id > 255 ? CNTR_ODD : CNTR_EVEN; +#ifdef CONFIG_MIPS_MT_SMP + raw_event.range = P; +#endif + break; + case CPU_I6400: + case CPU_I6500: + /* 8-bit event numbers */ + base_id = config & 0xff; + raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD; + break; case CPU_1004K: if (IS_BOTH_COUNTERS_1004K_EVENT(base_id)) raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD; @@ -1466,32 +1793,81 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) raw_event.range = T; #endif break; + case CPU_INTERAPTIV: + if (IS_BOTH_COUNTERS_INTERAPTIV_EVENT(base_id)) + raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD; + else + raw_event.cntr_mask = + raw_id > 127 ? CNTR_ODD : CNTR_EVEN; +#ifdef CONFIG_MIPS_MT_SMP + if (IS_RANGE_P_INTERAPTIV_EVENT(raw_id, base_id)) + raw_event.range = P; + else if (unlikely(IS_RANGE_V_INTERAPTIV_EVENT(raw_id))) + raw_event.range = V; + else + raw_event.range = T; +#endif + break; case CPU_BMIPS5000: if (IS_BOTH_COUNTERS_BMIPS5000_EVENT(base_id)) raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD; else raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN; + break; + case CPU_LOONGSON64: + pmu_type = get_loongson3_pmu_type(); + + switch (pmu_type) { + case LOONGSON_PMU_TYPE1: + raw_event.cntr_mask = + raw_id > 127 ? CNTR_ODD : CNTR_EVEN; + break; + case LOONGSON_PMU_TYPE2: + base_id = config & 0x3ff; + raw_event.cntr_mask = CNTR_ALL; + + if ((base_id >= 1 && base_id < 28) || + (base_id >= 64 && base_id < 90) || + (base_id >= 128 && base_id < 164) || + (base_id >= 192 && base_id < 200) || + (base_id >= 256 && base_id < 275) || + (base_id >= 320 && base_id < 361) || + (base_id >= 384 && base_id < 574)) + break; + + return ERR_PTR(-EOPNOTSUPP); + case LOONGSON_PMU_TYPE3: + base_id = raw_id; + raw_event.cntr_mask = CNTR_ALL; + break; + } + break; } + raw_event.event_id = base_id; + return &raw_event; } static const struct mips_perf_event *octeon_pmu_map_raw_event(u64 config) { - unsigned int raw_id = config & 0xff; - unsigned int base_id = raw_id & 0x7f; + unsigned int base_id = config & 0x7f; + unsigned int event_max; raw_event.cntr_mask = CNTR_ALL; raw_event.event_id = base_id; - if (current_cpu_type() == CPU_CAVIUM_OCTEON2) { - if (base_id > 0x42) - return ERR_PTR(-EOPNOTSUPP); - } else { - if (base_id > 0x3a) - return ERR_PTR(-EOPNOTSUPP); + if (current_cpu_type() == CPU_CAVIUM_OCTEON3) + event_max = 0x5f; + else if (current_cpu_type() == CPU_CAVIUM_OCTEON2) + event_max = 0x42; + else + event_max = 0x3a; + + if (base_id > event_max) { + return ERR_PTR(-EOPNOTSUPP); } switch (base_id) { @@ -1501,7 +1877,7 @@ static const struct mips_perf_event *octeon_pmu_map_raw_event(u64 config) case 0x1f: case 0x2f: case 0x34: - case 0x3b ... 0x3f: + case 0x3e ... 0x3f: return ERR_PTR(-EOPNOTSUPP); default: break; @@ -1510,25 +1886,10 @@ static const struct mips_perf_event *octeon_pmu_map_raw_event(u64 config) return &raw_event; } -static const struct mips_perf_event *xlp_pmu_map_raw_event(u64 config) -{ - unsigned int raw_id = config & 0xff; - - /* Only 1-63 are defined */ - if ((raw_id < 0x01) || (raw_id > 0x3f)) - return ERR_PTR(-EOPNOTSUPP); - - raw_event.cntr_mask = CNTR_ALL; - raw_event.event_id = raw_id; - - return &raw_event; -} - static int __init init_hw_perf_events(void) { - int counters, irq; - int counter_bits; + int counters, irq, pmu_type; pr_info("Performance counters: "); @@ -1539,27 +1900,16 @@ init_hw_perf_events(void) } #ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS - cpu_has_mipsmt_pertccounters = read_c0_config7() & (1<<19); if (!cpu_has_mipsmt_pertccounters) counters = counters_total_to_per_cpu(counters); #endif -#ifdef MSC01E_INT_BASE - if (cpu_has_veic) { - /* - * Using platform specific interrupt controller defines. - */ - irq = MSC01E_INT_BASE + MSC01E_INT_PERFCTR; - } else { -#endif - if ((cp0_perfcount_irq >= 0) && - (cp0_compare_irq != cp0_perfcount_irq)) - irq = MIPS_CPU_IRQ_BASE + cp0_perfcount_irq; - else - irq = -1; -#ifdef MSC01E_INT_BASE - } -#endif + if (get_c0_perfcount_int) + irq = get_c0_perfcount_int(); + else if (cp0_perfcount_irq >= 0) + irq = MIPS_CPU_IRQ_BASE + cp0_perfcount_irq; + else + irq = -1; mipspmu.map_raw_event = mipsxx_pmu_map_raw_event; @@ -1576,22 +1926,80 @@ init_hw_perf_events(void) break; case CPU_74K: mipspmu.name = "mips/74K"; - mipspmu.general_event_map = &mipsxx74Kcore_event_map; - mipspmu.cache_event_map = &mipsxx74Kcore_cache_map; + mipspmu.general_event_map = &mipsxxcore_event_map2; + mipspmu.cache_event_map = &mipsxxcore_cache_map2; + break; + case CPU_PROAPTIV: + mipspmu.name = "mips/proAptiv"; + mipspmu.general_event_map = &mipsxxcore_event_map2; + mipspmu.cache_event_map = &mipsxxcore_cache_map2; + break; + case CPU_P5600: + mipspmu.name = "mips/P5600"; + mipspmu.general_event_map = &mipsxxcore_event_map2; + mipspmu.cache_event_map = &mipsxxcore_cache_map2; + break; + case CPU_P6600: + mipspmu.name = "mips/P6600"; + mipspmu.general_event_map = &mipsxxcore_event_map2; + mipspmu.cache_event_map = &mipsxxcore_cache_map2; + break; + case CPU_I6400: + mipspmu.name = "mips/I6400"; + mipspmu.general_event_map = &i6x00_event_map; + mipspmu.cache_event_map = &i6x00_cache_map; + break; + case CPU_I6500: + mipspmu.name = "mips/I6500"; + mipspmu.general_event_map = &i6x00_event_map; + mipspmu.cache_event_map = &i6x00_cache_map; break; case CPU_1004K: mipspmu.name = "mips/1004K"; mipspmu.general_event_map = &mipsxxcore_event_map; mipspmu.cache_event_map = &mipsxxcore_cache_map; break; - case CPU_LOONGSON1: + case CPU_1074K: + mipspmu.name = "mips/1074K"; + mipspmu.general_event_map = &mipsxxcore_event_map; + mipspmu.cache_event_map = &mipsxxcore_cache_map; + break; + case CPU_INTERAPTIV: + mipspmu.name = "mips/interAptiv"; + mipspmu.general_event_map = &mipsxxcore_event_map; + mipspmu.cache_event_map = &mipsxxcore_cache_map; + break; + case CPU_LOONGSON32: mipspmu.name = "mips/loongson1"; mipspmu.general_event_map = &mipsxxcore_event_map; mipspmu.cache_event_map = &mipsxxcore_cache_map; break; + case CPU_LOONGSON64: + mipspmu.name = "mips/loongson3"; + pmu_type = get_loongson3_pmu_type(); + + switch (pmu_type) { + case LOONGSON_PMU_TYPE1: + counters = 2; + mipspmu.general_event_map = &loongson3_event_map1; + mipspmu.cache_event_map = &loongson3_cache_map1; + break; + case LOONGSON_PMU_TYPE2: + counters = 4; + mipspmu.general_event_map = &loongson3_event_map2; + mipspmu.cache_event_map = &loongson3_cache_map2; + break; + case LOONGSON_PMU_TYPE3: + counters = 4; + mipspmu.general_event_map = &loongson3_event_map3; + mipspmu.cache_event_map = &loongson3_cache_map3; + break; + } + break; case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: mipspmu.name = "octeon"; mipspmu.general_event_map = &octeon_event_map; mipspmu.cache_event_map = &octeon_cache_map; @@ -1602,12 +2010,6 @@ init_hw_perf_events(void) mipspmu.general_event_map = &bmips5000_event_map; mipspmu.cache_event_map = &bmips5000_cache_map; break; - case CPU_XLP: - mipspmu.name = "xlp"; - mipspmu.general_event_map = &xlp_event_map; - mipspmu.cache_event_map = &xlp_cache_map; - mipspmu.map_raw_event = xlp_pmu_map_raw_event; - break; default: pr_cont("Either hardware does not support performance " "counters, or not yet implemented.\n"); @@ -1617,20 +2019,27 @@ init_hw_perf_events(void) mipspmu.num_counters = counters; mipspmu.irq = irq; - if (read_c0_perfctrl0() & M_PERFCTL_WIDE) { - mipspmu.max_period = (1ULL << 63) - 1; - mipspmu.valid_count = (1ULL << 63) - 1; - mipspmu.overflow = 1ULL << 63; + if (read_c0_perfctrl0() & MIPS_PERFCTRL_W) { + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) { + counter_bits = 48; + mipspmu.max_period = (1ULL << 47) - 1; + mipspmu.valid_count = (1ULL << 47) - 1; + mipspmu.overflow = 1ULL << 47; + } else { + counter_bits = 64; + mipspmu.max_period = (1ULL << 63) - 1; + mipspmu.valid_count = (1ULL << 63) - 1; + mipspmu.overflow = 1ULL << 63; + } mipspmu.read_counter = mipsxx_pmu_read_counter_64; mipspmu.write_counter = mipsxx_pmu_write_counter_64; - counter_bits = 64; } else { + counter_bits = 32; mipspmu.max_period = (1ULL << 31) - 1; mipspmu.valid_count = (1ULL << 31) - 1; mipspmu.overflow = 1ULL << 31; mipspmu.read_counter = mipsxx_pmu_read_counter; mipspmu.write_counter = mipsxx_pmu_write_counter; - counter_bits = 32; } on_each_cpu(reset_counters, (void *)(long)counters, 1); |
