From f81309067ff2d84788316c513a415f6bb8c9171f Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 1 Jun 2015 23:44:46 +0100 Subject: ARM: move heavy barrier support out of line The existing memory barrier macro causes a significant amount of code to be inserted inline at every call site. For example, in gpio_set_irq_type(), we have this for mb(): c0344c08: f57ff04e dsb st c0344c0c: e59f8190 ldr r8, [pc, #400] ; c0344da4 c0344c10: e3590004 cmp r9, #4 c0344c14: e5983014 ldr r3, [r8, #20] c0344c18: 0a000054 beq c0344d70 c0344c1c: e3530000 cmp r3, #0 c0344c20: 0a000004 beq c0344c38 c0344c24: e50b2030 str r2, [fp, #-48] ; 0xffffffd0 c0344c28: e50bc034 str ip, [fp, #-52] ; 0xffffffcc c0344c2c: e12fff33 blx r3 c0344c30: e51bc034 ldr ip, [fp, #-52] ; 0xffffffcc c0344c34: e51b2030 ldr r2, [fp, #-48] ; 0xffffffd0 c0344c38: e5963004 ldr r3, [r6, #4] Moving the outer_cache_sync() call out of line reduces the impact of the barrier: c0344968: f57ff04e dsb st c034496c: e35a0004 cmp sl, #4 c0344970: e50b2030 str r2, [fp, #-48] ; 0xffffffd0 c0344974: 0a000044 beq c0344a8c c0344978: ebf363dd bl c001d8f4 c034497c: e5953004 ldr r3, [r5, #4] This should reduce the cache footprint of this code. Overall, this results in a reduction of around 20K in the kernel size: text data bss dec hex filename 10773970 667392 10369656 21811018 14ccf4a ../build/imx6/vmlinux-old 10754219 667392 10369656 21791267 14c8223 ../build/imx6/vmlinux-new Another advantage to this approach is that we can finally resolve the issue of SoCs which have their own memory barrier requirements within multiplatform kernels (such as OMAP.) Here, the bus interconnects need additional handling to ensure that writes become visible in the correct order (eg, between dma_map() operations, writes to DMA coherent memory, and MMIO accesses.) Acked-by: Tony Lindgren Acked-by: Richard Woodruff Signed-off-by: Russell King --- arch/arm/mach-ux500/cache-l2x0.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm/mach-ux500') diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c index 7557bede7ae6..780bd13cd7e3 100644 --- a/arch/arm/mach-ux500/cache-l2x0.c +++ b/arch/arm/mach-ux500/cache-l2x0.c @@ -8,6 +8,7 @@ #include #include +#include #include #include "db8500-regs.h" -- cgit From fa8ad7889d83bcf0a6cdbf6d3622f3ec019cde14 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 6 Jul 2015 12:23:53 +0100 Subject: arm: perf: factor arm_pmu core out to drivers To enable sharing of the arm_pmu code with arm64, this patch factors it out to drivers/perf/. A new drivers/perf directory is added for performance monitor drivers to live under. MAINTAINERS is updated accordingly. Files added previously without a corresponsing MAINTAINERS update (perf_regs.c, perf_callchain.c, and perf_event.h) are also added. Cc: Arnaldo Carvalho de Melo Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Linus Walleij Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Russell King Cc: Will Deacon Signed-off-by: Mark Rutland [will: augmented Kconfig help slightly] Signed-off-by: Will Deacon --- arch/arm/mach-ux500/cpu-db8500.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/mach-ux500') diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 16913800bbf9..5578dc1ab52b 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include #include "setup.h" -- cgit