From 6b1814cde5c79c6aa4d02c9aedc14a709c2c0737 Mon Sep 17 00:00:00 2001 From: Maxime Coquelin stm32 Date: Fri, 10 Apr 2015 09:46:46 +0100 Subject: ARM: 8340/1: ARMv7-M: Enlarge vector table up to 256 entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From Cortex-M reference manuals, the nvic supports up to 240 interrupts. So the number of entries in vectors table is up to 256. This patch adds a new config flag to specify the number of external interrupts. Some ifdeferies are added in order to respect the natural alignment without wasting too much space on smaller systems. Acked-by: Uwe Kleine-König Acked-by: Stefan Agner Tested-by: Chanwoo Choi Signed-off-by: Maxime Coquelin Signed-off-by: Russell King --- arch/arm/kernel/entry-v7m.S | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index 8944f4991c3c..b6c8bb9315e7 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -117,9 +117,14 @@ ENTRY(__switch_to) ENDPROC(__switch_to) .data - .align 8 +#if CONFIG_CPU_V7M_NUM_IRQ <= 112 + .align 9 +#else + .align 10 +#endif + /* - * Vector table (64 words => 256 bytes natural alignment) + * Vector table (Natural alignment need to be ensured) */ ENTRY(vector_table) .long 0 @ 0 - Reset stack pointer @@ -138,6 +143,6 @@ ENTRY(vector_table) .long __invalid_entry @ 13 - Reserved .long __pendsv_entry @ 14 - PendSV .long __invalid_entry @ 15 - SysTick - .rept 64 - 16 - .long __irq_entry @ 16..64 - External Interrupts + .rept CONFIG_CPU_V7M_NUM_IRQ + .long __irq_entry @ External Interrupts .endr -- cgit From 7d485f647c1f4a6976264c90447fb0dbf07b111d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 24 Nov 2014 16:54:35 +0100 Subject: ARM: 8220/1: allow modules outside of bl range Loading modules far away from the kernel in memory is problematic because the 'bl' instruction only has limited reach, and modules are not built with PLTs. Instead of using the -mlong-calls option (which affects all compiler emitted bl instructions, but not the ones in assembler), this patch allocates some additional space at module load time, and populates it with PLT like veneers when encountering relocations that are out of range. This should work with all relocations against symbols exported by the kernel, including those resulting from GCC generated implicit function calls for ftrace etc. The module memory size increases by about 5% on average, regardless of whether any PLT entries were actually needed. However, due to the page based rounding that occurs when allocating module memory, the average memory footprint increase is negligible. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/kernel/Makefile | 1 + arch/arm/kernel/module-plts.c | 181 ++++++++++++++++++++++++++++++++++++++++++ arch/arm/kernel/module.c | 32 +++++++- arch/arm/kernel/module.lds | 4 + 4 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 arch/arm/kernel/module-plts.c create mode 100644 arch/arm/kernel/module.lds (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 752725dcbf42..32c0990d1968 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_ISA_DMA_API) += dma.o obj-$(CONFIG_FIQ) += fiq.o fiqasm.o obj-$(CONFIG_MODULES) += armksyms.o module.o +obj-$(CONFIG_ARM_MODULE_PLTS) += module-plts.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c new file mode 100644 index 000000000000..71a65c49871d --- /dev/null +++ b/arch/arm/kernel/module-plts.c @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include + +#define PLT_ENT_STRIDE L1_CACHE_BYTES +#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32)) +#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT) + +#ifdef CONFIG_THUMB2_KERNEL +#define PLT_ENT_LDR __opcode_to_mem_thumb32(0xf8dff000 | \ + (PLT_ENT_STRIDE - 4)) +#else +#define PLT_ENT_LDR __opcode_to_mem_arm(0xe59ff000 | \ + (PLT_ENT_STRIDE - 8)) +#endif + +struct plt_entries { + u32 ldr[PLT_ENT_COUNT]; + u32 lit[PLT_ENT_COUNT]; +}; + +static bool in_init(const struct module *mod, u32 addr) +{ + return addr - (u32)mod->module_init < mod->init_size; +} + +u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) +{ + struct plt_entries *plt, *plt_end; + int c, *count; + + if (in_init(mod, loc)) { + plt = (void *)mod->arch.init_plt->sh_addr; + plt_end = (void *)plt + mod->arch.init_plt->sh_size; + count = &mod->arch.init_plt_count; + } else { + plt = (void *)mod->arch.core_plt->sh_addr; + plt_end = (void *)plt + mod->arch.core_plt->sh_size; + count = &mod->arch.core_plt_count; + } + + /* Look for an existing entry pointing to 'val' */ + for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) { + int i; + + if (!c) { + /* Populate a new set of entries */ + *plt = (struct plt_entries){ + { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, }, + { val, } + }; + ++*count; + return (u32)plt->ldr; + } + for (i = 0; i < PLT_ENT_COUNT; i++) { + if (!plt->lit[i]) { + plt->lit[i] = val; + ++*count; + } + if (plt->lit[i] == val) + return (u32)&plt->ldr[i]; + } + } + BUG(); +} + +static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num, + u32 mask) +{ + u32 *loc1, *loc2; + int i; + + for (i = 0; i < num; i++) { + if (rel[i].r_info != rel[num].r_info) + continue; + + /* + * Identical relocation types against identical symbols can + * still result in different PLT entries if the addend in the + * place is different. So resolve the target of the relocation + * to compare the values. + */ + loc1 = (u32 *)(base + rel[i].r_offset); + loc2 = (u32 *)(base + rel[num].r_offset); + if (((*loc1 ^ *loc2) & mask) == 0) + return 1; + } + return 0; +} + +/* Count how many PLT entries we may need */ +static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num) +{ + unsigned int ret = 0; + int i; + + /* + * Sure, this is order(n^2), but it's usually short, and not + * time critical + */ + for (i = 0; i < num; i++) + switch (ELF32_R_TYPE(rel[i].r_info)) { + case R_ARM_CALL: + case R_ARM_PC24: + case R_ARM_JUMP24: + if (!duplicate_rel(base, rel, i, + __opcode_to_mem_arm(0x00ffffff))) + ret++; + break; + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: + if (!duplicate_rel(base, rel, i, + __opcode_to_mem_thumb32(0x07ff2fff))) + ret++; + } + return ret; +} + +int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) +{ + unsigned long core_plts = 0, init_plts = 0; + Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; + + /* + * To store the PLTs, we expand the .text section for core module code + * and the .init.text section for initialization code. + */ + for (s = sechdrs; s < sechdrs_end; ++s) + if (strcmp(".core.plt", secstrings + s->sh_name) == 0) + mod->arch.core_plt = s; + else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) + mod->arch.init_plt = s; + + if (!mod->arch.core_plt || !mod->arch.init_plt) { + pr_err("%s: sections missing\n", mod->name); + return -ENOEXEC; + } + + for (s = sechdrs + 1; s < sechdrs_end; ++s) { + const Elf32_Rel *rels = (void *)ehdr + s->sh_offset; + int numrels = s->sh_size / sizeof(Elf32_Rel); + Elf32_Shdr *dstsec = sechdrs + s->sh_info; + + if (s->sh_type != SHT_REL) + continue; + + if (strstr(secstrings + s->sh_name, ".init")) + init_plts += count_plts(dstsec->sh_addr, rels, numrels); + else + core_plts += count_plts(dstsec->sh_addr, rels, numrels); + } + + mod->arch.core_plt->sh_type = SHT_NOBITS; + mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, + sizeof(struct plt_entries)); + mod->arch.core_plt_count = 0; + + mod->arch.init_plt->sh_type = SHT_NOBITS; + mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, + sizeof(struct plt_entries)); + mod->arch.init_plt_count = 0; + pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__, + mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size); + return 0; +} diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index af791f4a6205..efdddcb97dd1 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -40,7 +40,12 @@ #ifdef CONFIG_MMU void *module_alloc(unsigned long size) { - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p) + return p; + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } @@ -110,6 +115,20 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset -= 0x04000000; offset += sym->st_value - loc; + + /* + * Route through a PLT entry if 'offset' exceeds the + * supported range. Note that 'offset + loc + 8' + * contains the absolute jump target, i.e., + * @sym + addend, corrected for the +8 PC bias. + */ + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) && + (offset <= (s32)0xfe000000 || + offset >= (s32)0x02000000)) + offset = get_module_plt(module, loc, + offset + loc + 8) + - loc - 8; + if (offset <= (s32)0xfe000000 || offset >= (s32)0x02000000) { pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", @@ -203,6 +222,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset -= 0x02000000; offset += sym->st_value - loc; + /* + * Route through a PLT entry if 'offset' exceeds the + * supported range. + */ + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) && + (offset <= (s32)0xff000000 || + offset >= (s32)0x01000000)) + offset = get_module_plt(module, loc, + offset + loc + 4) + - loc - 4; + if (offset <= (s32)0xff000000 || offset >= (s32)0x01000000) { pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds new file mode 100644 index 000000000000..3682fa107918 --- /dev/null +++ b/arch/arm/kernel/module.lds @@ -0,0 +1,4 @@ +SECTIONS { + .core.plt : { BYTE(0) } + .init.plt : { BYTE(0) } +} -- cgit From 3f599875e5202986b350618a617527ab441bf206 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Wed, 6 May 2015 15:23:56 +0100 Subject: ARM: 8355/1: arch: Show the serial number from devicetree in cpuinfo This grabs the serial number shown in cpuinfo from the serial-number device-tree property in priority. When booting with ATAGs (and without device-tree), the provided number is still shown instead. Signed-off-by: Paul Kocialkowski Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 6c777e908a24..ee3e329ecf58 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -93,6 +93,9 @@ unsigned int __atags_pointer __initdata; unsigned int system_rev; EXPORT_SYMBOL(system_rev); +const char *system_serial; +EXPORT_SYMBOL(system_serial); + unsigned int system_serial_low; EXPORT_SYMBOL(system_serial_low); @@ -839,8 +842,25 @@ arch_initcall(customize_machine); static int __init init_machine_late(void) { + struct device_node *root; + int ret; + if (machine_desc->init_late) machine_desc->init_late(); + + root = of_find_node_by_path("/"); + if (root) { + ret = of_property_read_string(root, "serial-number", + &system_serial); + if (ret) + system_serial = NULL; + } + + if (!system_serial) + system_serial = kasprintf(GFP_KERNEL, "%08x%08x", + system_serial_high, + system_serial_low); + return 0; } late_initcall(init_machine_late); @@ -1109,8 +1129,7 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "Hardware\t: %s\n", machine_name); seq_printf(m, "Revision\t: %04x\n", system_rev); - seq_printf(m, "Serial\t\t: %08x%08x\n", - system_serial_high, system_serial_low); + seq_printf(m, "Serial\t\t: %s\n", system_serial); return 0; } -- cgit From 14327c662822e5e874cb971a7162067519300ca8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 21 Apr 2015 14:17:25 +0100 Subject: ARM: replace BSYM() with badr assembly macro BSYM() was invented to allow us to work around a problem with the assembler, where local symbols resolved by the assembler for the 'adr' instruction did not take account of their ISA. Since we don't want BSYM() used elsewhere, replace BSYM() with a new macro 'badr', which is like the 'adr' pseudo-op, but with the BSYM() mechanics integrated into it. This ensures that the BSYM()-ification is only used in conjunction with 'adr'. Acked-by: Dave Martin Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 12 ++++++------ arch/arm/kernel/entry-common.S | 6 +++--- arch/arm/kernel/entry-ftrace.S | 2 +- arch/arm/kernel/head-nommu.S | 6 +++--- arch/arm/kernel/head.S | 8 ++++---- arch/arm/kernel/sleep.S | 2 +- 6 files changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 570306c49406..f8f7398c74c2 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -40,7 +40,7 @@ #ifdef CONFIG_MULTI_IRQ_HANDLER ldr r1, =handle_arch_irq mov r0, sp - adr lr, BSYM(9997f) + badr lr, 9997f ldr pc, [r1] #else arch_irq_handler_default @@ -273,7 +273,7 @@ __und_svc: str r4, [sp, #S_PC] orr r0, r9, r0, lsl #16 #endif - adr r9, BSYM(__und_svc_finish) + badr r9, __und_svc_finish mov r2, r4 bl call_fpe @@ -469,7 +469,7 @@ __und_usr: @ instruction, or the more conventional lr if we are to treat @ this as a real undefined instruction @ - adr r9, BSYM(ret_from_exception) + badr r9, ret_from_exception @ IRQs must be enabled before attempting to read the instruction from @ user space since that could cause a page/translation fault if the @@ -486,7 +486,7 @@ __und_usr: @ r2 = PC value for the following instruction (:= regs->ARM_pc) @ r4 = PC value for the faulting instruction @ lr = 32-bit undefined instruction function - adr lr, BSYM(__und_usr_fault_32) + badr lr, __und_usr_fault_32 b call_fpe __und_usr_thumb: @@ -522,7 +522,7 @@ ARM_BE8(rev16 r0, r0) @ little endian instruction add r2, r2, #2 @ r2 is PC + 2, make it PC + 4 str r2, [sp, #S_PC] @ it's a 2x16bit instr, update orr r0, r0, r5, lsl #16 - adr lr, BSYM(__und_usr_fault_32) + badr lr, __und_usr_fault_32 @ r0 = the two 16-bit Thumb instructions which caused the exception @ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc) @ r4 = PC value for the first 16-bit Thumb instruction @@ -716,7 +716,7 @@ __und_usr_fault_32: __und_usr_fault_16: mov r1, #2 1: mov r0, sp - adr lr, BSYM(ret_from_exception) + badr lr, ret_from_exception b __und_fault ENDPROC(__und_usr_fault_32) ENDPROC(__und_usr_fault_16) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index f8ccc21fa032..6ab159384667 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -88,7 +88,7 @@ ENTRY(ret_from_fork) bl schedule_tail cmp r5, #0 movne r0, r4 - adrne lr, BSYM(1f) + badrne lr, 1f retne r5 1: get_thread_info tsk b ret_slow_syscall @@ -196,7 +196,7 @@ local_restart: bne __sys_trace cmp scno, #NR_syscalls @ check upper syscall limit - adr lr, BSYM(ret_fast_syscall) @ return address + badr lr, ret_fast_syscall @ return address ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine add r1, sp, #S_OFF @@ -231,7 +231,7 @@ __sys_trace: add r0, sp, #S_OFF bl syscall_trace_enter - adr lr, BSYM(__sys_trace_return) @ return address + badr lr, __sys_trace_return @ return address mov scno, r0 @ syscall number (possibly new) add r1, sp, #S_R0 + S_OFF @ pointer to regs cmp scno, #NR_syscalls @ check upper syscall limit diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index fe57c73e70a4..c73c4030ca5d 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -87,7 +87,7 @@ 1: mcount_get_lr r1 @ lr of instrumented func mcount_adjust_addr r0, lr @ instrumented function - adr lr, BSYM(2f) + badr lr, 2f mov pc, r2 2: mcount_exit .endm diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index aebfbf79a1a3..b6f3cb6333e4 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -46,7 +46,7 @@ ENTRY(stext) .arm ENTRY(stext) - THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. + THUMB( badr r9, 1f ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) @@ -79,7 +79,7 @@ ENTRY(stext) #endif ldr r13, =__mmap_switched @ address to jump to after @ initialising sctlr - adr lr, BSYM(1f) @ return (PIC) address + badr lr, 1f @ return (PIC) address ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 @@ -115,7 +115,7 @@ ENTRY(secondary_startup) bl __setup_mpu @ Initialize the MPU #endif - adr lr, BSYM(__after_proc_init) @ return address + badr lr, __after_proc_init @ return address mov r13, r12 @ __secondary_switched address ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 3637973a9708..ab3c478aaced 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -80,7 +80,7 @@ ENTRY(stext) ARM_BE8(setend be ) @ ensure we are in BE8 mode - THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. + THUMB( badr r9, 1f ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) @@ -136,7 +136,7 @@ ENTRY(stext) */ ldr r13, =__mmap_switched @ address to jump to after @ mmu has been enabled - adr lr, BSYM(1f) @ return (PIC) address + badr lr, 1f @ return (PIC) address mov r8, r4 @ set TTBR1 to swapper_pg_dir ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 @@ -348,7 +348,7 @@ __turn_mmu_on_loc: .text ENTRY(secondary_startup_arm) .arm - THUMB( adr r9, BSYM(1f) ) @ Kernel is entered in ARM. + THUMB( badr r9, 1f ) @ Kernel is entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) @@ -384,7 +384,7 @@ ENTRY(secondary_startup) ldr r4, [r7, lr] @ get secondary_data.pgdir add r7, r7, #4 ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir - adr lr, BSYM(__enable_mmu) @ return address + badr lr, __enable_mmu @ return address mov r13, r12 @ __secondary_switched address ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 @ initialise processor diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 7d37bfc50830..76bb3128e135 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -81,7 +81,7 @@ ENTRY(__cpu_suspend) mov r1, r4 @ size of save block add r0, sp, #8 @ pointer to save block bl __cpu_suspend_save - adr lr, BSYM(cpu_suspend_abort) + badr lr, cpu_suspend_abort ldmfd sp!, {r0, pc} @ call suspend fn ENDPROC(__cpu_suspend) .ltorg -- cgit From 64d0d3943e14653fcfd5f9b3bd585bc77fa053df Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 May 2015 17:12:24 +0100 Subject: arm: perf: make of_pmu_irq_cfg take arm_pmu To support multiple PMUs we'll need to pass the arm_pmu instance around. Update of_pmu_irq_cfg to take an arm_pmu, and acquire the platform device from this. Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_cpu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 91c7ba182dcd..2a9003ef6db3 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -301,9 +301,10 @@ static int probe_current_pmu(struct arm_pmu *pmu) return ret; } -static int of_pmu_irq_cfg(struct platform_device *pdev) +static int of_pmu_irq_cfg(struct arm_pmu *pmu) { int i; + struct platform_device *pdev = pmu->plat_device; int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); if (!irqs) @@ -336,7 +337,7 @@ static int of_pmu_irq_cfg(struct platform_device *pdev) } if (i == pdev->num_resources) - cpu_pmu->irq_affinity = irqs; + pmu->irq_affinity = irqs; else kfree(irqs); @@ -368,7 +369,7 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { init_fn = of_id->data; - ret = of_pmu_irq_cfg(pdev); + ret = of_pmu_irq_cfg(pmu); if (!ret) ret = init_fn(pmu); } else { -- cgit From cc88116da0d18b8292f5437dbc0c4683c8a34ac1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 May 2015 17:12:25 +0100 Subject: arm: perf: treat PMUs as CPU affine In multi-cluster systems, the PMUs can be different across clusters, and so our logical PMU may not be able to schedule events on all CPUs. This patch adds a cpumask to encode which CPUs a PMU driver supports controlling events for, and limits the driver to scheduling events on those CPUs, and enabling and disabling the physical PMUs on those CPUs. The cpumask is built based on the interrupt-affinity property, and in the absence of such a property a homogenous system is assumed. Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 25 +++++++++++++++++++++++++ arch/arm/kernel/perf_event_cpu.c | 15 ++++++++++++--- 2 files changed, 37 insertions(+), 3 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 4a86a0133ac3..9b536be74f7b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -11,6 +11,7 @@ */ #define pr_fmt(fmt) "hw perfevents: " fmt +#include #include #include #include @@ -229,6 +230,10 @@ armpmu_add(struct perf_event *event, int flags) int idx; int err = 0; + /* An event following a process won't be stopped earlier */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return -ENOENT; + perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ @@ -454,6 +459,17 @@ static int armpmu_event_init(struct perf_event *event) int err = 0; atomic_t *active_events = &armpmu->active_events; + /* + * Reject CPU-affine events for CPUs that are of a different class to + * that which this PMU handles. Process-following events (where + * event->cpu == -1) can be migrated between CPUs, and thus we have to + * reject them later (in armpmu_add) if they're scheduled on a + * different class of CPU. + */ + if (event->cpu != -1 && + !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) + return -ENOENT; + /* does not support taken branch sampling */ if (has_branch_stack(event)) return -EOPNOTSUPP; @@ -489,6 +505,10 @@ static void armpmu_enable(struct pmu *pmu) struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; + if (enabled) armpmu->start(armpmu); } @@ -496,6 +516,11 @@ static void armpmu_enable(struct pmu *pmu) static void armpmu_disable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); + + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; + armpmu->stop(armpmu); } diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 2a9003ef6db3..9602d31aae03 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -179,11 +179,15 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, void *hcpu) { + int cpu = (unsigned long)hcpu; struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) return NOTIFY_DONE; + if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) + return NOTIFY_DONE; + if (pmu->reset) pmu->reset(pmu); else @@ -219,7 +223,8 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu) /* Ensure the PMU has sane values out of reset. */ if (cpu_pmu->reset) - on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); + on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, + cpu_pmu, 1); /* If no interrupts available, set the corresponding capability flag */ if (!platform_get_irq(cpu_pmu->plat_device, 0)) @@ -334,12 +339,15 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) } irqs[i] = cpu; + cpumask_set_cpu(cpu, &pmu->supported_cpus); } - if (i == pdev->num_resources) + if (i == pdev->num_resources) { pmu->irq_affinity = irqs; - else + } else { kfree(irqs); + cpumask_setall(&pmu->supported_cpus); + } return 0; } @@ -374,6 +382,7 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) ret = init_fn(pmu); } else { ret = probe_current_pmu(pmu); + cpumask_setall(&pmu->supported_cpus); } if (ret) { -- cgit From c904e32a69b7c77905876fc834f474f13f62c138 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 May 2015 17:12:26 +0100 Subject: arm: perf: filter unschedulable events Different CPU microarchitectures implement different PMU events, and thus events which can be scheduled on one microarchitecture cannot be scheduled on another, and vice-versa. Some archicted events behave differently across microarchitectures, and thus cannot be meaningfully summed. Due to this, we reject the scheduling of an event on a CPU of a different microarchitecture to that the event targets. When the core perf code is scheduling events and encounters an event which cannot be scheduled, it stops attempting to schedule events. As the perf core periodically rotates the list of events, for some proportion of the time events which are unschedulable will block events which are schedulable, resulting in low utilisation of the hardware counters. This patch implements a pmu::filter_match callback such that we can detect and skip such events while scheduling early, before they can block the schedulable events. This prevents the low HW counter utilisation issue. Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 9b536be74f7b..df028072aabf 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -524,6 +524,18 @@ static void armpmu_disable(struct pmu *pmu) armpmu->stop(armpmu); } +/* + * In heterogeneous systems, events are specific to a particular + * microarchitecture, and aren't suitable for another. Thus, only match CPUs of + * the same microarchitecture. + */ +static int armpmu_filter_match(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + unsigned int cpu = smp_processor_id(); + return cpumask_test_cpu(cpu, &armpmu->supported_cpus); +} + #ifdef CONFIG_PM static int armpmu_runtime_resume(struct device *dev) { @@ -564,6 +576,7 @@ static void armpmu_init(struct arm_pmu *armpmu) .start = armpmu_start, .stop = armpmu_stop, .read = armpmu_read, + .filter_match = armpmu_filter_match, }; } -- cgit From 0e3038d18adcecf375c39ef5b39eb3c613293280 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 May 2015 17:12:27 +0100 Subject: arm: perf: probe number of counters on affine CPUs In heterogeneous systems, the number of counters may differ across clusters. To find the number of counters for a cluster, we must probe the PMU from a CPU in that cluster. Reviewed-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v7.c | 48 ++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 25 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index f4207a4dcb01..ccec472c1cdd 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1056,15 +1056,22 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->max_period = (1LLU << 32) - 1; }; -static u32 armv7_read_num_pmnc_events(void) +static void armv7_read_num_pmnc_events(void *info) { - u32 nb_cnt; + int *nb_cnt = info; /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + *nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; + /* Add the CPU cycles counter */ + *nb_cnt += 1; +} + +static int armv7_probe_num_events(struct arm_pmu *arm_pmu) +{ + return smp_call_function_any(&arm_pmu->supported_cpus, + armv7_read_num_pmnc_events, + &arm_pmu->num_events, 1); } static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) @@ -1072,8 +1079,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a8"; cpu_pmu->map_event = armv7_a8_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) @@ -1081,8 +1087,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a9"; cpu_pmu->map_event = armv7_a9_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) @@ -1090,8 +1095,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a5"; cpu_pmu->map_event = armv7_a5_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) @@ -1099,9 +1103,8 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a15"; cpu_pmu->map_event = armv7_a15_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) @@ -1109,9 +1112,8 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a7"; cpu_pmu->map_event = armv7_a7_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) @@ -1119,16 +1121,15 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a12"; cpu_pmu->map_event = armv7_a12_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) { - armv7_a12_pmu_init(cpu_pmu); + int ret = armv7_a12_pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a17"; - return 0; + return ret; } /* @@ -1508,14 +1509,13 @@ static int krait_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->map_event = krait_map_event_no_branch; else cpu_pmu->map_event = krait_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; cpu_pmu->reset = krait_pmu_reset; cpu_pmu->enable = krait_pmu_enable_event; cpu_pmu->disable = krait_pmu_disable_event; cpu_pmu->get_event_idx = krait_pmu_get_event_idx; cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx; - return 0; + return armv7_probe_num_events(cpu_pmu); } /* @@ -1833,13 +1833,12 @@ static int scorpion_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_scorpion"; cpu_pmu->map_event = scorpion_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->reset = scorpion_pmu_reset; cpu_pmu->enable = scorpion_pmu_enable_event; cpu_pmu->disable = scorpion_pmu_disable_event; cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; - return 0; + return armv7_probe_num_events(cpu_pmu); } static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) @@ -1847,13 +1846,12 @@ static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_scorpion_mp"; cpu_pmu->map_event = scorpion_map_event; - cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->reset = scorpion_pmu_reset; cpu_pmu->enable = scorpion_pmu_enable_event; cpu_pmu->disable = scorpion_pmu_disable_event; cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; - return 0; + return armv7_probe_num_events(cpu_pmu); } #else static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) -- cgit From 7a2a24cb433e0932828d77e98d5c86606cb09c2a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 May 2015 17:12:28 +0100 Subject: arm: perf: remove singleton PMU restriction Now that we can describe PMUs in heterogeneous systems, the only item in the way of perf support for big.LITTLE is the singleton cpu_pmu variable used for OProfile compatibility. Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_cpu.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 9602d31aae03..50f245bf4e05 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -33,7 +33,7 @@ #include /* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *cpu_pmu; +static struct arm_pmu *__oprofile_cpu_pmu; /* * Despite the names, these two functions are CPU-specific and are used @@ -41,10 +41,10 @@ static struct arm_pmu *cpu_pmu; */ const char *perf_pmu_name(void) { - if (!cpu_pmu) + if (!__oprofile_cpu_pmu) return NULL; - return cpu_pmu->name; + return __oprofile_cpu_pmu->name; } EXPORT_SYMBOL_GPL(perf_pmu_name); @@ -52,8 +52,8 @@ int perf_num_counters(void) { int max_events = 0; - if (cpu_pmu != NULL) - max_events = cpu_pmu->num_events; + if (__oprofile_cpu_pmu != NULL) + max_events = __oprofile_cpu_pmu->num_events; return max_events; } @@ -360,19 +360,16 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) struct arm_pmu *pmu; int ret = -ENODEV; - if (cpu_pmu) { - pr_info("attempt to register multiple PMU devices!\n"); - return -ENOSPC; - } - pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); if (!pmu) { pr_info("failed to allocate PMU device!\n"); return -ENOMEM; } - cpu_pmu = pmu; - cpu_pmu->plat_device = pdev; + if (!__oprofile_cpu_pmu) + __oprofile_cpu_pmu = pmu; + + pmu->plat_device = pdev; if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { init_fn = of_id->data; @@ -390,18 +387,18 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) goto out_free; } - ret = cpu_pmu_init(cpu_pmu); + ret = cpu_pmu_init(pmu); if (ret) goto out_free; - ret = armpmu_register(cpu_pmu, -1); + ret = armpmu_register(pmu, -1); if (ret) goto out_destroy; return 0; out_destroy: - cpu_pmu_destroy(cpu_pmu); + cpu_pmu_destroy(pmu); out_free: pr_info("failed to register PMU devices!\n"); kfree(pmu); -- cgit From ed61f9851d0686d56d7a9648b4807d82ad0adce6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 26 May 2015 17:23:34 +0100 Subject: arm: perf: kill off unused pm callbacks Currently the arm perf code has platdata callbacks for runtime PM and irq handling, but no platform implements the hooks for the former. Kill these off. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 38 +------------------------------------- arch/arm/kernel/perf_event_cpu.c | 1 - 2 files changed, 1 insertion(+), 38 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index df028072aabf..0072e8bb78f4 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -349,20 +348,12 @@ static void armpmu_release_hardware(struct arm_pmu *armpmu) { armpmu->free_irq(armpmu); - pm_runtime_put_sync(&armpmu->plat_device->dev); } static int armpmu_reserve_hardware(struct arm_pmu *armpmu) { - int err; - struct platform_device *pmu_device = armpmu->plat_device; - - if (!pmu_device) - return -ENODEV; - - pm_runtime_get_sync(&pmu_device->dev); - err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); + int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); if (err) { armpmu_release_hardware(armpmu); return err; @@ -536,32 +527,6 @@ static int armpmu_filter_match(struct perf_event *event) return cpumask_test_cpu(cpu, &armpmu->supported_cpus); } -#ifdef CONFIG_PM -static int armpmu_runtime_resume(struct device *dev) -{ - struct arm_pmu_platdata *plat = dev_get_platdata(dev); - - if (plat && plat->runtime_resume) - return plat->runtime_resume(dev); - - return 0; -} - -static int armpmu_runtime_suspend(struct device *dev) -{ - struct arm_pmu_platdata *plat = dev_get_platdata(dev); - - if (plat && plat->runtime_suspend) - return plat->runtime_suspend(dev); - - return 0; -} -#endif - -const struct dev_pm_ops armpmu_dev_pm_ops = { - SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL) -}; - static void armpmu_init(struct arm_pmu *armpmu) { atomic_set(&armpmu->active_events, 0); @@ -583,7 +548,6 @@ static void armpmu_init(struct arm_pmu *armpmu) int armpmu_register(struct arm_pmu *armpmu, int type) { armpmu_init(armpmu); - pm_runtime_enable(&armpmu->plat_device->dev); pr_info("enabled with %s PMU driver, %d counters available\n", armpmu->name, armpmu->num_events); return perf_pmu_register(&armpmu->pmu, armpmu->name, type); diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 50f245bf4e05..14a5a0a5ec1d 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -408,7 +408,6 @@ out_free: static struct platform_driver cpu_pmu_driver = { .driver = { .name = "arm-pmu", - .pm = &armpmu_dev_pm_ops, .of_match_table = cpu_pmu_of_device_ids, }, .probe = cpu_pmu_device_probe, -- cgit From cfdad2991f7addb1bc0ce3361a5ee980a0482a87 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 26 May 2015 17:23:35 +0100 Subject: arm: perf: share arm_pmu_device_probe Enable the probe function to be shared with other drivers, which will inject the appropriate of_device_id and pmu_probe_info tables. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_cpu.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 14a5a0a5ec1d..797b56999b0e 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -286,16 +286,16 @@ static const struct pmu_probe_info pmu_probe_table[] = { /* * CPU PMU identification and probing. */ -static int probe_current_pmu(struct arm_pmu *pmu) +static int probe_current_pmu(struct arm_pmu *pmu, + const struct pmu_probe_info *info) { int cpu = get_cpu(); unsigned int cpuid = read_cpuid_id(); int ret = -ENODEV; - const struct pmu_probe_info *info; pr_info("probing PMU on CPU %d\n", cpu); - for (info = pmu_probe_table; info->init != NULL; info++) { + for (; info->init != NULL; info++) { if ((cpuid & info->mask) != info->cpuid) continue; ret = info->init(pmu); @@ -352,7 +352,9 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) return 0; } -static int cpu_pmu_device_probe(struct platform_device *pdev) +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table) { const struct of_device_id *of_id; const int (*init_fn)(struct arm_pmu *); @@ -371,14 +373,14 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) pmu->plat_device = pdev; - if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { + if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { init_fn = of_id->data; ret = of_pmu_irq_cfg(pmu); if (!ret) ret = init_fn(pmu); } else { - ret = probe_current_pmu(pmu); + ret = probe_current_pmu(pmu, probe_table); cpumask_setall(&pmu->supported_cpus); } @@ -405,6 +407,12 @@ out_free: return ret; } +static int cpu_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, cpu_pmu_of_device_ids, + pmu_probe_table); +} + static struct platform_driver cpu_pmu_driver = { .driver = { .name = "arm-pmu", -- cgit From a12c72cc3e6938191cabeefff44b959a823d3d76 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 26 May 2015 17:23:36 +0100 Subject: arm: perf: factor out xscale pmu driver Now that the core arm perf code maintains no global state and all microarchitecture-specific PMU data can be fed in through the shared probe function, it's possible to use it as a library and get rid of the C file includes we have currently. This patch factors out the xscale-specific portions out into the xscale driver. For the moment this is always built if perf event support is enabled, but the preprocessor guards will leave behind an empty file. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/Makefile | 2 +- arch/arm/kernel/perf_event_cpu.c | 4 ---- arch/arm/kernel/perf_event_xscale.c | 32 +++++++++++++++++++++++++++----- 3 files changed, 28 insertions(+), 10 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 752725dcbf42..8b4aad7e9f50 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -70,7 +70,7 @@ obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o +obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o perf_event_xscale.o CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 797b56999b0e..1ebb179acf48 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -60,7 +60,6 @@ int perf_num_counters(void) EXPORT_SYMBOL_GPL(perf_num_counters); /* Include the PMU-specific implementations. */ -#include "perf_event_xscale.c" #include "perf_event_v6.c" #include "perf_event_v7.c" @@ -267,7 +266,6 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = { {.name = "arm-pmu"}, {.name = "armv6-pmu"}, {.name = "armv7-pmu"}, - {.name = "xscale-pmu"}, {}, }; @@ -278,8 +276,6 @@ static const struct pmu_probe_info pmu_probe_table[] = { ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init), ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), - XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), - XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), { /* sentinel value */ } }; diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 8af9f1f82c68..304d056d5b25 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -13,6 +13,14 @@ */ #ifdef CONFIG_CPU_XSCALE + +#include +#include +#include + +#include +#include + enum xscale_perf_types { XSCALE_PERFCTR_ICACHE_MISS = 0x00, XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, @@ -740,14 +748,28 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu) return 0; } -#else -static inline int xscale1pmu_init(struct arm_pmu *cpu_pmu) + +static const struct pmu_probe_info xscale_pmu_probe_table[] = { + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), + XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), + { /* sentinel value */ } +}; + +static int xscale_pmu_device_probe(struct platform_device *pdev) { - return -ENODEV; + return arm_pmu_device_probe(pdev, NULL, xscale_pmu_probe_table); } -static inline int xscale2pmu_init(struct arm_pmu *cpu_pmu) +static struct platform_driver xscale_pmu_driver = { + .driver = { + .name = "xscale-pmu", + }, + .probe = xscale_pmu_device_probe, +}; + +static int __init register_xscale_pmu_driver(void) { - return -ENODEV; + return platform_driver_register(&xscale_pmu_driver); } +device_initcall(register_xscale_pmu_driver); #endif /* CONFIG_CPU_XSCALE */ -- cgit From 1fe115b303f301916e1430667c5b03451f56c733 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 26 May 2015 17:23:37 +0100 Subject: arm: perf: factor out armv6 pmu driver Now that the core arm perf code maintains no global state and all microarchitecture-specific PMU data can be fed in through the shared probe function, it's possible to use it as a library and get rid of the C file includes we have currently. This patch factors out the ARMv6-specific portions out into the ARMv6 driver. For the moment this is always built if perf event support is enabled, but the preprocessor guards will leave behind an empty file. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/Makefile | 3 ++- arch/arm/kernel/perf_event_cpu.c | 9 -------- arch/arm/kernel/perf_event_v6.c | 49 +++++++++++++++++++++++++++++----------- 3 files changed, 38 insertions(+), 23 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 8b4aad7e9f50..42a43da07a7e 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -70,7 +70,8 @@ obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o perf_event_xscale.o +obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o \ + perf_event_xscale.o perf_event_v6.o CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 1ebb179acf48..d24d585f7b9c 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -60,7 +60,6 @@ int perf_num_counters(void) EXPORT_SYMBOL_GPL(perf_num_counters); /* Include the PMU-specific implementations. */ -#include "perf_event_v6.c" #include "perf_event_v7.c" static void cpu_pmu_enable_percpu_irq(void *data) @@ -253,9 +252,6 @@ static const struct of_device_id cpu_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, - {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, - {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, - {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, @@ -264,16 +260,11 @@ static const struct of_device_id cpu_pmu_of_device_ids[] = { static struct platform_device_id cpu_pmu_plat_device_ids[] = { {.name = "arm-pmu"}, - {.name = "armv6-pmu"}, {.name = "armv7-pmu"}, {}, }; static const struct pmu_probe_info pmu_probe_table[] = { - ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init), ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), { /* sentinel value */ } diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index f2ffd5c542ed..09f83e414a72 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -31,6 +31,14 @@ */ #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) + +#include +#include +#include + +#include +#include + enum armv6_perf_types { ARMV6_PERFCTR_ICACHE_MISS = 0x0, ARMV6_PERFCTR_IBUF_STALL = 0x1, @@ -543,24 +551,39 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) return 0; } -#else -static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} -static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} +static struct of_device_id armv6_pmu_of_device_ids[] = { + {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, + {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, + {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, + { /* sentinel value */ } +}; -static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu) +static const struct pmu_probe_info armv6_pmu_probe_table[] = { + ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init), + { /* sentinel value */ } +}; + +static int armv6_pmu_device_probe(struct platform_device *pdev) { - return -ENODEV; + return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, + armv6_pmu_probe_table); } -static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) +static struct platform_driver armv6_pmu_driver = { + .driver = { + .name = "armv6-pmu", + .of_match_table = armv6_pmu_of_device_ids, + }, + .probe = armv6_pmu_device_probe, +}; + +static int __init register_armv6_pmu_driver(void) { - return -ENODEV; + return platform_driver_register(&armv6_pmu_driver); } +device_initcall(register_armv6_pmu_driver); #endif /* CONFIG_CPU_V6 || CONFIG_CPU_V6K */ -- cgit From 29ba0f37f1578db268ac805c117365923b9a7663 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 26 May 2015 17:23:38 +0100 Subject: arm: perf: factor out armv7 pmu driver Now that the core arm perf code maintains no global state and all microarchitecture-specific PMU data can be fed in through the shared probe function, it's possible to use it as a library and get rid of the C file includes we have currently. This patch factors out the ARMv7-specific portions out into the ARMv7 driver. For the moment this is always built if perf event support is enabled, but the preprocessor guards will leave behind an empty file. Now that perf_event_cpu.c contains no microarchitecture-specific data, the associated probing code is removed, completing its relegation to a library file. The vestigal "arm-pmu" platform device ID is removed in this patch, as it has been unused since platform files were updated to specify a more specific PMU variant. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/Makefile | 3 +- arch/arm/kernel/perf_event_cpu.c | 53 --------------------------- arch/arm/kernel/perf_event_v7.c | 77 +++++++++++++++++++--------------------- 3 files changed, 39 insertions(+), 94 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 42a43da07a7e..26de3782bd74 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -71,7 +71,8 @@ obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o \ - perf_event_xscale.o perf_event_v6.o + perf_event_xscale.o perf_event_v6.o \ + perf_event_v7.o CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index d24d585f7b9c..261b639ad2d5 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -59,9 +59,6 @@ int perf_num_counters(void) } EXPORT_SYMBOL_GPL(perf_num_counters); -/* Include the PMU-specific implementations. */ -#include "perf_event_v7.c" - static void cpu_pmu_enable_percpu_irq(void *data) { int irq = *(int *)data; @@ -241,35 +238,6 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) free_percpu(cpu_pmu->hw_events); } -/* - * PMU platform driver and devicetree bindings. - */ -static const struct of_device_id cpu_pmu_of_device_ids[] = { - {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, - {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, - {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, - {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, - {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, - {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, - {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, - {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, - {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, - {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, - {}, -}; - -static struct platform_device_id cpu_pmu_plat_device_ids[] = { - {.name = "arm-pmu"}, - {.name = "armv7-pmu"}, - {}, -}; - -static const struct pmu_probe_info pmu_probe_table[] = { - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), - ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), - { /* sentinel value */ } -}; - /* * CPU PMU identification and probing. */ @@ -393,24 +361,3 @@ out_free: kfree(pmu); return ret; } - -static int cpu_pmu_device_probe(struct platform_device *pdev) -{ - return arm_pmu_device_probe(pdev, cpu_pmu_of_device_ids, - pmu_probe_table); -} - -static struct platform_driver cpu_pmu_driver = { - .driver = { - .name = "arm-pmu", - .of_match_table = cpu_pmu_of_device_ids, - }, - .probe = cpu_pmu_device_probe, - .id_table = cpu_pmu_plat_device_ids, -}; - -static int __init register_pmu_driver(void) -{ - return platform_driver_register(&cpu_pmu_driver); -} -device_initcall(register_pmu_driver); diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index ccec472c1cdd..f9b37f876e20 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -19,9 +19,15 @@ #ifdef CONFIG_CPU_V7 #include +#include +#include +#include #include #include "../vfp/vfpinstr.h" +#include +#include + /* * Common ARMv7 event types * @@ -1853,54 +1859,45 @@ static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; return armv7_probe_num_events(cpu_pmu); } -#else -static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} - -static inline int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} - -static inline int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} - -static inline int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} -static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} +static const struct of_device_id armv7_pmu_of_device_ids[] = { + {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, + {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, + {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, + {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, + {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, + {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, + {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, + {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, + {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, + {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, + {}, +}; -static inline int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} +static const struct pmu_probe_info armv7_pmu_probe_table[] = { + ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), + ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), + { /* sentinel value */ } +}; -static inline int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} -static inline int krait_pmu_init(struct arm_pmu *cpu_pmu) +static int armv7_pmu_device_probe(struct platform_device *pdev) { - return -ENODEV; + return arm_pmu_device_probe(pdev, armv7_pmu_of_device_ids, + armv7_pmu_probe_table); } -static inline int scorpion_pmu_init(struct arm_pmu *cpu_pmu) -{ - return -ENODEV; -} +static struct platform_driver armv7_pmu_driver = { + .driver = { + .name = "armv7-pmu", + .of_match_table = armv7_pmu_of_device_ids, + }, + .probe = armv7_pmu_device_probe, +}; -static inline int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) +static int __init register_armv7_pmu_driver(void) { - return -ENODEV; + return platform_driver_register(&armv7_pmu_driver); } +device_initcall(register_armv7_pmu_driver); #endif /* CONFIG_CPU_V7 */ -- cgit From 74cf0bc75f1671b8da3b2e6ef7b2dc75cab0016a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 26 May 2015 17:23:39 +0100 Subject: arm: perf: unify perf_event{,_cpu}.c Now that the arm_pmu framework is only used for CPU PMUs, there's no reason to keep the pseudo-generic and CPU-specific framework portions separate. This patch folds the two into perf_event.c. Signed-off-by: Mark Rutland [will: fixed up irq cfg to match upstream] Signed-off-by: Will Deacon --- arch/arm/kernel/Makefile | 2 +- arch/arm/kernel/perf_event.c | 340 ++++++++++++++++++++++++++++++++++++ arch/arm/kernel/perf_event_cpu.c | 363 --------------------------------------- 3 files changed, 341 insertions(+), 364 deletions(-) delete mode 100644 arch/arm/kernel/perf_event_cpu.c (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 26de3782bd74..d274a1f11f7e 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -70,7 +70,7 @@ obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o \ +obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o \ perf_event_xscale.o perf_event_v6.o \ perf_event_v7.o CFLAGS_pj4-cp0.o := -marm diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 0072e8bb78f4..357f57ea83f4 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -11,12 +11,18 @@ */ #define pr_fmt(fmt) "hw perfevents: " fmt +#include #include +#include #include +#include #include +#include +#include #include #include +#include #include #include @@ -553,3 +559,337 @@ int armpmu_register(struct arm_pmu *armpmu, int type) return perf_pmu_register(&armpmu->pmu, armpmu->name, type); } +/* Set at runtime when we know what CPU type we are. */ +static struct arm_pmu *__oprofile_cpu_pmu; + +/* + * Despite the names, these two functions are CPU-specific and are used + * by the OProfile/perf code. + */ +const char *perf_pmu_name(void) +{ + if (!__oprofile_cpu_pmu) + return NULL; + + return __oprofile_cpu_pmu->name; +} +EXPORT_SYMBOL_GPL(perf_pmu_name); + +int perf_num_counters(void) +{ + int max_events = 0; + + if (__oprofile_cpu_pmu != NULL) + max_events = __oprofile_cpu_pmu->num_events; + + return max_events; +} +EXPORT_SYMBOL_GPL(perf_num_counters); + +static void cpu_pmu_enable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static void cpu_pmu_disable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + disable_percpu_irq(irq); +} + +static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) +{ + int i, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); + free_percpu_irq(irq, &hw_events->percpu_pmu); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + } + } +} + +static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) +{ + int i, err, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + if (!pmu_device) + return -ENODEV; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); + return 0; + } + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &hw_events->percpu_pmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq < 0) + continue; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { + pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, cpu); + continue; + } + + err = request_irq(irq, handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", + per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + + cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); + } + } + + return 0; +} + +/* + * PMU hardware loses all context when a CPU goes offline. + * When a CPU is hotplugged back in, since some hardware registers are + * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading + * junk values out of them. + */ +static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, + void *hcpu) +{ + int cpu = (unsigned long)hcpu; + struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); + + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) + return NOTIFY_DONE; + + if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) + return NOTIFY_DONE; + + if (pmu->reset) + pmu->reset(pmu); + else + return NOTIFY_DONE; + + return NOTIFY_OK; +} + +static int cpu_pmu_init(struct arm_pmu *cpu_pmu) +{ + int err; + int cpu; + struct pmu_hw_events __percpu *cpu_hw_events; + + cpu_hw_events = alloc_percpu(struct pmu_hw_events); + if (!cpu_hw_events) + return -ENOMEM; + + cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; + err = register_cpu_notifier(&cpu_pmu->hotplug_nb); + if (err) + goto out_hw_events; + + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); + raw_spin_lock_init(&events->pmu_lock); + events->percpu_pmu = cpu_pmu; + } + + cpu_pmu->hw_events = cpu_hw_events; + cpu_pmu->request_irq = cpu_pmu_request_irq; + cpu_pmu->free_irq = cpu_pmu_free_irq; + + /* Ensure the PMU has sane values out of reset. */ + if (cpu_pmu->reset) + on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, + cpu_pmu, 1); + + /* If no interrupts available, set the corresponding capability flag */ + if (!platform_get_irq(cpu_pmu->plat_device, 0)) + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + + return 0; + +out_hw_events: + free_percpu(cpu_hw_events); + return err; +} + +static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) +{ + unregister_cpu_notifier(&cpu_pmu->hotplug_nb); + free_percpu(cpu_pmu->hw_events); +} + +/* + * CPU PMU identification and probing. + */ +static int probe_current_pmu(struct arm_pmu *pmu, + const struct pmu_probe_info *info) +{ + int cpu = get_cpu(); + unsigned int cpuid = read_cpuid_id(); + int ret = -ENODEV; + + pr_info("probing PMU on CPU %d\n", cpu); + + for (; info->init != NULL; info++) { + if ((cpuid & info->mask) != info->cpuid) + continue; + ret = info->init(pmu); + break; + } + + put_cpu(); + return ret; +} + +static int of_pmu_irq_cfg(struct arm_pmu *pmu) +{ + int i, irq, *irqs; + struct platform_device *pdev = pmu->plat_device; + + /* Don't bother with PPIs; they're already affine */ + irq = platform_get_irq(pdev, 0); + if (irq >= 0 && irq_is_percpu(irq)) + return 0; + + irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + if (!irqs) + return -ENOMEM; + + for (i = 0; i < pdev->num_resources; ++i) { + struct device_node *dn; + int cpu; + + dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", + i); + if (!dn) { + pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", + of_node_full_name(pdev->dev.of_node), i); + break; + } + + for_each_possible_cpu(cpu) + if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) + break; + + of_node_put(dn); + if (cpu >= nr_cpu_ids) { + pr_warn("Failed to find logical CPU for %s\n", + dn->name); + break; + } + + irqs[i] = cpu; + cpumask_set_cpu(cpu, &pmu->supported_cpus); + } + + if (i == pdev->num_resources) { + pmu->irq_affinity = irqs; + } else { + kfree(irqs); + cpumask_setall(&pmu->supported_cpus); + } + + return 0; +} + +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table) +{ + const struct of_device_id *of_id; + const int (*init_fn)(struct arm_pmu *); + struct device_node *node = pdev->dev.of_node; + struct arm_pmu *pmu; + int ret = -ENODEV; + + pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); + if (!pmu) { + pr_info("failed to allocate PMU device!\n"); + return -ENOMEM; + } + + if (!__oprofile_cpu_pmu) + __oprofile_cpu_pmu = pmu; + + pmu->plat_device = pdev; + + if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { + init_fn = of_id->data; + + ret = of_pmu_irq_cfg(pmu); + if (!ret) + ret = init_fn(pmu); + } else { + ret = probe_current_pmu(pmu, probe_table); + cpumask_setall(&pmu->supported_cpus); + } + + if (ret) { + pr_info("failed to probe PMU!\n"); + goto out_free; + } + + ret = cpu_pmu_init(pmu); + if (ret) + goto out_free; + + ret = armpmu_register(pmu, -1); + if (ret) + goto out_destroy; + + return 0; + +out_destroy: + cpu_pmu_destroy(pmu); +out_free: + pr_info("failed to register PMU devices!\n"); + kfree(pmu); + return ret; +} diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c deleted file mode 100644 index 261b639ad2d5..000000000000 --- a/arch/arm/kernel/perf_event_cpu.c +++ /dev/null @@ -1,363 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon - */ -#define pr_fmt(fmt) "CPU PMU: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *__oprofile_cpu_pmu; - -/* - * Despite the names, these two functions are CPU-specific and are used - * by the OProfile/perf code. - */ -const char *perf_pmu_name(void) -{ - if (!__oprofile_cpu_pmu) - return NULL; - - return __oprofile_cpu_pmu->name; -} -EXPORT_SYMBOL_GPL(perf_pmu_name); - -int perf_num_counters(void) -{ - int max_events = 0; - - if (__oprofile_cpu_pmu != NULL) - max_events = __oprofile_cpu_pmu->num_events; - - return max_events; -} -EXPORT_SYMBOL_GPL(perf_num_counters); - -static void cpu_pmu_enable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - enable_percpu_irq(irq, IRQ_TYPE_NONE); -} - -static void cpu_pmu_disable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - disable_percpu_irq(irq); -} - -static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) -{ - int i, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); - free_percpu_irq(irq, &hw_events->percpu_pmu); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - if (cpu_pmu->irq_affinity) - cpu = cpu_pmu->irq_affinity[i]; - - if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - } - } -} - -static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) -{ - int i, err, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - if (!pmu_device) - return -ENODEV; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { - pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); - return 0; - } - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &hw_events->percpu_pmu); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; - - if (cpu_pmu->irq_affinity) - cpu = cpu_pmu->irq_affinity[i]; - - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { - pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, cpu); - continue; - } - - err = request_irq(irq, handler, - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - - cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); - } - } - - return 0; -} - -/* - * PMU hardware loses all context when a CPU goes offline. - * When a CPU is hotplugged back in, since some hardware registers are - * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading - * junk values out of them. - */ -static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, - void *hcpu) -{ - int cpu = (unsigned long)hcpu; - struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); - - if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) - return NOTIFY_DONE; - - if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) - return NOTIFY_DONE; - - if (pmu->reset) - pmu->reset(pmu); - else - return NOTIFY_DONE; - - return NOTIFY_OK; -} - -static int cpu_pmu_init(struct arm_pmu *cpu_pmu) -{ - int err; - int cpu; - struct pmu_hw_events __percpu *cpu_hw_events; - - cpu_hw_events = alloc_percpu(struct pmu_hw_events); - if (!cpu_hw_events) - return -ENOMEM; - - cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; - err = register_cpu_notifier(&cpu_pmu->hotplug_nb); - if (err) - goto out_hw_events; - - for_each_possible_cpu(cpu) { - struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); - raw_spin_lock_init(&events->pmu_lock); - events->percpu_pmu = cpu_pmu; - } - - cpu_pmu->hw_events = cpu_hw_events; - cpu_pmu->request_irq = cpu_pmu_request_irq; - cpu_pmu->free_irq = cpu_pmu_free_irq; - - /* Ensure the PMU has sane values out of reset. */ - if (cpu_pmu->reset) - on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, - cpu_pmu, 1); - - /* If no interrupts available, set the corresponding capability flag */ - if (!platform_get_irq(cpu_pmu->plat_device, 0)) - cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - - return 0; - -out_hw_events: - free_percpu(cpu_hw_events); - return err; -} - -static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) -{ - unregister_cpu_notifier(&cpu_pmu->hotplug_nb); - free_percpu(cpu_pmu->hw_events); -} - -/* - * CPU PMU identification and probing. - */ -static int probe_current_pmu(struct arm_pmu *pmu, - const struct pmu_probe_info *info) -{ - int cpu = get_cpu(); - unsigned int cpuid = read_cpuid_id(); - int ret = -ENODEV; - - pr_info("probing PMU on CPU %d\n", cpu); - - for (; info->init != NULL; info++) { - if ((cpuid & info->mask) != info->cpuid) - continue; - ret = info->init(pmu); - break; - } - - put_cpu(); - return ret; -} - -static int of_pmu_irq_cfg(struct arm_pmu *pmu) -{ - int i; - struct platform_device *pdev = pmu->plat_device; - int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); - - if (!irqs) - return -ENOMEM; - - for (i = 0; i < pdev->num_resources; ++i) { - struct device_node *dn; - int cpu; - - dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", - i); - if (!dn) { - pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", - of_node_full_name(dn), i); - break; - } - - for_each_possible_cpu(cpu) - if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) - break; - - of_node_put(dn); - if (cpu >= nr_cpu_ids) { - pr_warn("Failed to find logical CPU for %s\n", - dn->name); - break; - } - - irqs[i] = cpu; - cpumask_set_cpu(cpu, &pmu->supported_cpus); - } - - if (i == pdev->num_resources) { - pmu->irq_affinity = irqs; - } else { - kfree(irqs); - cpumask_setall(&pmu->supported_cpus); - } - - return 0; -} - -int arm_pmu_device_probe(struct platform_device *pdev, - const struct of_device_id *of_table, - const struct pmu_probe_info *probe_table) -{ - const struct of_device_id *of_id; - const int (*init_fn)(struct arm_pmu *); - struct device_node *node = pdev->dev.of_node; - struct arm_pmu *pmu; - int ret = -ENODEV; - - pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); - if (!pmu) { - pr_info("failed to allocate PMU device!\n"); - return -ENOMEM; - } - - if (!__oprofile_cpu_pmu) - __oprofile_cpu_pmu = pmu; - - pmu->plat_device = pdev; - - if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { - init_fn = of_id->data; - - ret = of_pmu_irq_cfg(pmu); - if (!ret) - ret = init_fn(pmu); - } else { - ret = probe_current_pmu(pmu, probe_table); - cpumask_setall(&pmu->supported_cpus); - } - - if (ret) { - pr_info("failed to probe PMU!\n"); - goto out_free; - } - - ret = cpu_pmu_init(pmu); - if (ret) - goto out_free; - - ret = armpmu_register(pmu, -1); - if (ret) - goto out_destroy; - - return 0; - -out_destroy: - cpu_pmu_destroy(pmu); -out_free: - pr_info("failed to register PMU devices!\n"); - kfree(pmu); - return ret; -} -- cgit From 1221ed10f2a56ecdd8ff75f436f52aca5ba0f1d3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 4 Apr 2015 17:25:20 +0100 Subject: ARM: cleanup early_paging_init() calling Eliminate the needless nommu version of this function, and get rid of the proc_info_list structure argument - we no longer need this in order to fix up the page table entries. Acked-by: Santosh Shilimkar Tested-by: Murali Karicheri Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 6c777e908a24..979c1c5fe96a 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -75,8 +75,7 @@ __setup("fpe=", fpe_setup); extern void init_default_cache_policy(unsigned long); extern void paging_init(const struct machine_desc *desc); -extern void early_paging_init(const struct machine_desc *, - struct proc_info_list *); +extern void early_paging_init(const struct machine_desc *); extern void sanity_check_meminfo(void); extern enum reboot_mode reboot_mode; extern void setup_dma_zone(const struct machine_desc *desc); @@ -936,7 +935,9 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); - early_paging_init(mdesc, lookup_processor_type(read_cpuid_id())); +#ifdef CONFIG_MMU + early_paging_init(mdesc); +#endif setup_dma_zone(mdesc); sanity_check_meminfo(); arm_memblock_init(mdesc); -- cgit From b2c3e38a54714e917c9e8675ff5812dca1c0f39d Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 4 Apr 2015 20:09:46 +0100 Subject: ARM: redo TTBR setup code for LPAE Re-engineer the LPAE TTBR setup code. Rather than passing some shifted address in order to fit in a CPU register, pass either a full physical address (in the case of r4, r5 for TTBR0) or a PFN (for TTBR1). This removes the ARCH_PGD_SHIFT hack, and the last dangerous user of cpu_set_ttbr() in the secondary CPU startup code path (which was there to re-set TTBR1 to the appropriate high physical address space on Keystone2.) Tested-by: Murali Karicheri Signed-off-by: Russell King --- arch/arm/kernel/head-nommu.S | 2 +- arch/arm/kernel/head.S | 42 +++++++++++++++++++++++++++++++----------- arch/arm/kernel/smp.c | 10 ++++++---- 3 files changed, 38 insertions(+), 16 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index aebfbf79a1a3..84da14b7cd04 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -123,7 +123,7 @@ ENTRY(secondary_startup) ENDPROC(secondary_startup) ENTRY(__secondary_switched) - ldr sp, [r7, #8] @ set up the stack pointer + ldr sp, [r7, #12] @ set up the stack pointer mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 3637973a9708..7304b4c44b52 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -131,13 +131,30 @@ ENTRY(stext) * The following calls CPU specific code in a position independent * manner. See arch/arm/mm/proc-*.S for details. r10 = base of * xxx_proc_info structure selected by __lookup_processor_type - * above. On return, the CPU will be ready for the MMU to be - * turned on, and r0 will hold the CPU control register value. + * above. + * + * The processor init function will be called with: + * r1 - machine type + * r2 - boot data (atags/dt) pointer + * r4 - translation table base (low word) + * r5 - translation table base (high word, if LPAE) + * r8 - translation table base 1 (pfn if LPAE) + * r9 - cpuid + * r13 - virtual address for __enable_mmu -> __turn_mmu_on + * + * On return, the CPU will be ready for the MMU to be turned on, + * r0 will hold the CPU control register value, r1, r2, r4, and + * r9 will be preserved. r5 will also be preserved if LPAE. */ ldr r13, =__mmap_switched @ address to jump to after @ mmu has been enabled adr lr, BSYM(1f) @ return (PIC) address +#ifdef CONFIG_ARM_LPAE + mov r5, #0 @ high TTBR0 + mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn +#else mov r8, r4 @ set TTBR1 to swapper_pg_dir +#endif ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 @@ -158,7 +175,7 @@ ENDPROC(stext) * * Returns: * r0, r3, r5-r7 corrupted - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) + * r4 = physical page table address */ __create_page_tables: pgtbl r4, r8 @ page table address @@ -333,7 +350,6 @@ __create_page_tables: #endif #ifdef CONFIG_ARM_LPAE sub r4, r4, #0x1000 @ point to the PGD table - mov r4, r4, lsr #ARCH_PGD_SHIFT #endif ret lr ENDPROC(__create_page_tables) @@ -381,9 +397,9 @@ ENTRY(secondary_startup) adr r4, __secondary_data ldmia r4, {r5, r7, r12} @ address to jump to after sub lr, r4, r5 @ mmu has been enabled - ldr r4, [r7, lr] @ get secondary_data.pgdir - add r7, r7, #4 - ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir + add r3, r7, lr + ldrd r4, [r3, #0] @ get secondary_data.pgdir + ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir adr lr, BSYM(__enable_mmu) @ return address mov r13, r12 @ __secondary_switched address ldr r12, [r10, #PROCINFO_INITFUNC] @@ -397,7 +413,7 @@ ENDPROC(secondary_startup_arm) * r6 = &secondary_data */ ENTRY(__secondary_switched) - ldr sp, [r7, #4] @ get secondary_data.stack + ldr sp, [r7, #12] @ get secondary_data.stack mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) @@ -416,12 +432,14 @@ __secondary_data: /* * Setup common bits before finally enabling the MMU. Essentially * this is just loading the page table pointer and domain access - * registers. + * registers. All these registers need to be preserved by the + * processor setup function (or set in the case of r0) * * r0 = cp#15 control register * r1 = machine ID * r2 = atags or dtb pointer - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) + * r4 = TTBR pointer (low word) + * r5 = TTBR pointer (high word if LPAE) * r9 = processor ID * r13 = *virtual* address to jump to upon completion */ @@ -440,7 +458,9 @@ __enable_mmu: #ifdef CONFIG_CPU_ICACHE_DISABLE bic r0, r0, #CR_I #endif -#ifndef CONFIG_ARM_LPAE +#ifdef CONFIG_ARM_LPAE + mcrr p15, 0, r4, r5, c2 @ load TTBR0 +#else mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cca5b8758185..90dfbedfbfb8 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -86,9 +86,11 @@ void __init smp_set_ops(struct smp_operations *ops) static unsigned long get_arch_pgd(pgd_t *pgd) { - phys_addr_t pgdir = virt_to_idmap(pgd); - BUG_ON(pgdir & ARCH_PGD_MASK); - return pgdir >> ARCH_PGD_SHIFT; +#ifdef CONFIG_ARM_LPAE + return __phys_to_pfn(virt_to_phys(pgd)); +#else + return virt_to_phys(pgd); +#endif } int __cpu_up(unsigned int cpu, struct task_struct *idle) @@ -108,7 +110,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) #endif #ifdef CONFIG_MMU - secondary_data.pgdir = get_arch_pgd(idmap_pgd); + secondary_data.pgdir = virt_to_phys(idmap_pgd); secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); #endif sync_cache_w(&secondary_data); -- cgit From c07b5fd0268fc2bbaa584548d437f763d20bde77 Mon Sep 17 00:00:00 2001 From: Yingjoe Chen Date: Mon, 18 May 2015 09:04:31 +0100 Subject: ARM: 8359/1: correct secondary_startup_arm mode secondary_startup_arm is used as ARM mode secondary start up function when ther kernel is compiled in THUMB mode, however the label itself is still in .thumb mode. readelf shows: 160979: c020a581 120 FUNC GLOBAL DEFAULT 2 secondary_startup_arm Make sure the label is in ARM mode as well. Signed-off-by: Yingjoe Chen Tested-by: Matthias Brugger Signed-off-by: Russell King --- arch/arm/kernel/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 3637973a9708..58ee8a24308c 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -346,8 +346,8 @@ __turn_mmu_on_loc: #if defined(CONFIG_SMP) .text -ENTRY(secondary_startup_arm) .arm +ENTRY(secondary_startup_arm) THUMB( adr r9, BSYM(1f) ) @ Kernel is entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. -- cgit From 73c430bf9ac6cd3a41ccc3c9904e66cc0a5f9420 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 May 2015 16:03:13 +0100 Subject: ARM: 8364/1: fix BE32 module loading The new veneer support for loadable modules on ARM uses the __opcode_to_mem_thumb32() function to count R_ARM_THM_CALL and R_ARM_THM_JUMP24 relocations. However, this function is not defined for big-endian kernels on ARMv5 or before, causing a compile-time error: arch/arm/kernel/module-plts.c: In function 'count_plts': arch/arm/kernel/module-plts.c:124:9: error: implicit declaration of function '__opcode_to_mem_thumb32' [-Werror=implicit-function-declaration] __opcode_to_mem_thumb32(0x07ff2fff))) ^ As we know that this part of the function is only needed for Thumb2 kernels, and that those can never happen with BE32, we can avoid the error by enclosing the code in an #ifdef. Fixes: 7d485f647c1 ("ARM: 8220/1: allow modules outside of bl range") Signed-off-by: Arnd Bergmann Acked-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/kernel/module-plts.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index 71a65c49871d..097e2e201b9f 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -118,11 +118,13 @@ static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num) __opcode_to_mem_arm(0x00ffffff))) ret++; break; +#ifdef CONFIG_THUMB2_KERNEL case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: if (!duplicate_rel(base, rel, i, __opcode_to_mem_thumb32(0x07ff2fff))) ret++; +#endif } return ret; } -- cgit From 31cd08c3a1db4b3164567a2a424b5e5dba6ce7a3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 19 May 2015 13:39:05 +0100 Subject: ARM: remove __bad_xchg definition We want link errors if xchg() is called for a variable size we do not support. Signed-off-by: Russell King --- arch/arm/kernel/traps.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 3dce1a342030..d358226236f2 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -749,14 +749,6 @@ late_initcall(arm_mrc_hook_init); #endif -void __bad_xchg(volatile void *ptr, int size) -{ - pr_err("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n", - __builtin_return_address(0), ptr, size); - BUG(); -} -EXPORT_SYMBOL(__bad_xchg); - /* * A data abort trap was taken, but we did not handle the instruction. * Try to abort the user program, or panic if it was the kernel. -- cgit From 0bbe6b5a73c00f8c8e7eb38fb86993f03cd64b70 Mon Sep 17 00:00:00 2001 From: Michael van der Westhuizen Date: Thu, 4 Jun 2015 15:14:51 +0100 Subject: ARM: 8388/1: tcm: Don't crash when TCM banks are protected by TrustZone Fixes the TCM initialisation code to handle TCM banks that are present but inaccessible due to TrustZone configuration. This is the default case when enabling the non-secure world. It may also be the case that that the user decided to use TCM for TrustZone. This change has exposed a bug in handling of TCM where no TCM bank was usable (the 0 size TCM case). This change addresses the resulting hang. This code only handles the ARMv6 TCMTR register format, and will not work correctly on boards that use the ARMv7 (or any other) format. This is handled by performing an early exit from the initialisation function when the TCMTR reports any format other than v6. Signed-off-by: Michael van der Westhuizen Reviewed-by: Linus Walleij Reviewed-by: Dave Martin Signed-off-by: Russell King --- arch/arm/kernel/tcm.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 3 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c index 7a3be1d4d0b1..b10e1360762e 100644 --- a/arch/arm/kernel/tcm.c +++ b/arch/arm/kernel/tcm.c @@ -17,6 +17,9 @@ #include #include #include +#include + +#define TCMTR_FORMAT_MASK 0xe0000000U static struct gen_pool *tcm_pool; static bool dtcm_present; @@ -175,6 +178,77 @@ static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks, return 0; } +/* + * When we are running in the non-secure world and the secure world + * has not explicitly given us access to the TCM we will get an + * undefined error when reading the TCM region register in the + * setup_tcm_bank function (above). + * + * There are two variants of this register read that we need to trap, + * the read for the data TCM and the read for the instruction TCM: + * c0370628: ee196f11 mrc 15, 0, r6, cr9, cr1, {0} + * c0370674: ee196f31 mrc 15, 0, r6, cr9, cr1, {1} + * + * Our undef hook mask explicitly matches all fields of the encoded + * instruction other than the destination register. The mask also + * only allows operand 2 to have the values 0 or 1. + * + * The undefined hook is defined as __init and __initdata, and therefore + * must be removed before tcm_init returns. + * + * In this particular case (MRC with ARM condition code ALways) the + * Thumb-2 and ARM instruction encoding are identical, so this hook + * will work on a Thumb-2 kernel. + * + * See A8.8.107, DDI0406C_C ARM Architecture Reference Manual, Encoding + * T1/A1 for the bit-by-bit details. + * + * mrc p15, 0, XX, c9, c1, 0 + * mrc p15, 0, XX, c9, c1, 1 + * | | | | | | | +---- opc2 0|1 = 000|001 + * | | | | | | +------- CRm 0 = 0001 + * | | | | | +----------- CRn 0 = 1001 + * | | | | +--------------- Rt ? = ???? + * | | | +------------------- opc1 0 = 000 + * | | +----------------------- coproc 15 = 1111 + * | +-------------------------- condition ALways = 1110 + * +----------------------------- instruction MRC = 1110 + * + * Encoding this as per A8.8.107 of DDI0406C, Encoding T1/A1, yields: + * 1111 1111 1111 1111 0000 1111 1101 1111 Required Mask + * 1110 1110 0001 1001 ???? 1111 0001 0001 mrc p15, 0, XX, c9, c1, 0 + * 1110 1110 0001 1001 ???? 1111 0011 0001 mrc p15, 0, XX, c9, c1, 1 + * [ ] [ ] [ ]| [ ] [ ] [ ] [ ]| +--- CRm + * | | | | | | | | +----- SBO + * | | | | | | | +------- opc2 + * | | | | | | +----------- coproc + * | | | | | +---------------- Rt + * | | | | +--------------------- CRn + * | | | +------------------------- SBO + * | | +--------------------------- opc1 + * | +------------------------------- instruction + * +------------------------------------ condition + */ +#define TCM_REGION_READ_MASK 0xffff0fdf +#define TCM_REGION_READ_INSTR 0xee190f11 +#define DEST_REG_SHIFT 12 +#define DEST_REG_MASK 0xf + +static int __init tcm_handler(struct pt_regs *regs, unsigned int instr) +{ + regs->uregs[(instr >> DEST_REG_SHIFT) & DEST_REG_MASK] = 0; + regs->ARM_pc += 4; + return 0; +} + +static struct undef_hook tcm_hook __initdata = { + .instr_mask = TCM_REGION_READ_MASK, + .instr_val = TCM_REGION_READ_INSTR, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = tcm_handler +}; + /* * This initializes the TCM memory */ @@ -204,9 +278,18 @@ void __init tcm_init(void) } tcm_status = read_cpuid_tcmstatus(); + + /* + * This code only supports v6-compatible TCMTR implementations. + */ + if (tcm_status & TCMTR_FORMAT_MASK) + return; + dtcm_banks = (tcm_status >> 16) & 0x03; itcm_banks = (tcm_status & 0x03); + register_undef_hook(&tcm_hook); + /* Values greater than 2 for D/ITCM banks are "reserved" */ if (dtcm_banks > 2) dtcm_banks = 0; @@ -218,7 +301,7 @@ void __init tcm_init(void) for (i = 0; i < dtcm_banks; i++) { ret = setup_tcm_bank(0, i, dtcm_banks, &dtcm_end); if (ret) - return; + goto unregister; } /* This means you compiled more code than fits into DTCM */ if (dtcm_code_sz > (dtcm_end - DTCM_OFFSET)) { @@ -227,6 +310,12 @@ void __init tcm_init(void) dtcm_code_sz, (dtcm_end - DTCM_OFFSET)); goto no_dtcm; } + /* + * This means that the DTCM sizes were 0 or the DTCM banks + * were inaccessible due to TrustZone configuration. + */ + if (!(dtcm_end - DTCM_OFFSET)) + goto no_dtcm; dtcm_res.end = dtcm_end - 1; request_resource(&iomem_resource, &dtcm_res); dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET; @@ -250,15 +339,21 @@ no_dtcm: for (i = 0; i < itcm_banks; i++) { ret = setup_tcm_bank(1, i, itcm_banks, &itcm_end); if (ret) - return; + goto unregister; } /* This means you compiled more code than fits into ITCM */ if (itcm_code_sz > (itcm_end - ITCM_OFFSET)) { pr_info("CPU ITCM: %u bytes of code compiled to " "ITCM but only %lu bytes of ITCM present\n", itcm_code_sz, (itcm_end - ITCM_OFFSET)); - return; + goto unregister; } + /* + * This means that the ITCM sizes were 0 or the ITCM banks + * were inaccessible due to TrustZone configuration. + */ + if (!(itcm_end - ITCM_OFFSET)) + goto unregister; itcm_res.end = itcm_end - 1; request_resource(&iomem_resource, &itcm_res); itcm_iomap[0].length = itcm_end - ITCM_OFFSET; @@ -275,6 +370,9 @@ no_dtcm: pr_info("CPU ITCM: %u bytes of code compiled to ITCM but no " "ITCM banks present in CPU\n", itcm_code_sz); } + +unregister: + unregister_undef_hook(&tcm_hook); } /* -- cgit From 970d96f9a81b0dd83ddd8bce0e5e1ba31881c5f5 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 2 Jun 2015 20:43:24 +0100 Subject: ARM: 8383/1: nommu: avoid deprecated source register on mov In Thumb2 mode, the stack register r13 is deprecated if the destination register is the program counter (r15). Similar to head.S, head-nommu.S uses r13 to store the return address used after configuring the CPU's CP15 register. However, since we do not enable a MMU, there will be no address switch and it is possible to use branch with link instruction to call __after_proc_init. Avoid using r13 completely by using bl to call __after_proc_init and get rid of __secondary_switched. Beside removing unnecessary complexity, this also fixes a compiler warning when compiling a !MMU kernel: Warning: Use of r13 as a source register is deprecated when r15 is the destination register. Tested-?by: Maxime Coquelin Signed-off-by: Stefan Agner Signed-off-by: Russell King --- arch/arm/kernel/head-nommu.S | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index c9660167ef1a..9b8c5a113434 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -77,13 +77,13 @@ ENTRY(stext) orr r6, r6, #(1 << MPU_RSR_EN) @ Set region enabled bit bl __setup_mpu #endif - ldr r13, =__mmap_switched @ address to jump to after - @ initialising sctlr + badr lr, 1f @ return (PIC) address ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 - 1: b __after_proc_init +1: bl __after_proc_init + b __mmap_switched ENDPROC(stext) #ifdef CONFIG_SMP @@ -106,8 +106,7 @@ ENTRY(secondary_startup) movs r10, r5 @ invalid processor? beq __error_p @ yes, error 'p' - adr r4, __secondary_data - ldmia r4, {r7, r12} + ldr r7, __secondary_data #ifdef CONFIG_ARM_MPU /* Use MPU region info supplied by __cpu_up */ @@ -115,23 +114,19 @@ ENTRY(secondary_startup) bl __setup_mpu @ Initialize the MPU #endif - badr lr, __after_proc_init @ return address - mov r13, r12 @ __secondary_switched address + badr lr, 1f @ return (PIC) address ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 -ENDPROC(secondary_startup) - -ENTRY(__secondary_switched) +1: bl __after_proc_init ldr sp, [r7, #12] @ set up the stack pointer mov fp, #0 b secondary_start_kernel -ENDPROC(__secondary_switched) +ENDPROC(secondary_startup) .type __secondary_data, %object __secondary_data: .long secondary_data - .long __secondary_switched #endif /* CONFIG_SMP */ /* @@ -164,7 +159,7 @@ __after_proc_init: #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg #endif /* CONFIG_CPU_CP15 */ - ret r13 + ret lr ENDPROC(__after_proc_init) .ltorg -- cgit From 9ce93bdda7b71fd154986d36c0c1ccf0e7338e26 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 12 Jun 2015 21:19:35 +0100 Subject: ARM: fix new BSYM() usage introduced via for-arm-soc branch Commit 32e55a777f83 ("ARM: 8389/1: Add cpu_resume_arm() for firmwares that resume in ARM state") needed to introduce a new usage of BSYM() to fix a problem with a previous patch. This in turn causes a conflict with the "bsym" branch which removes this symbol, replacing it with a 'badr' assembly macro. Fix this up. Signed-off-by: Russell King --- arch/arm/kernel/sleep.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index c5e1e21a294f..0f6c1000582c 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -122,7 +122,7 @@ ENDPROC(cpu_resume_after_mmu) #ifdef CONFIG_MMU .arm ENTRY(cpu_resume_arm) - THUMB( adr r9, BSYM(1f) ) @ Kernel is entered in ARM. + THUMB( badr r9, 1f ) @ Kernel is entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) -- cgit