From 9394c1c65e61eb6f4c1c99f342b49e451ec337b6 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Mon, 11 Mar 2013 13:52:12 +0100
Subject: ARM: 7669/1: keep __my_cpu_offset consistent with generic one

Commit 14318efb(ARM: 7587/1: implement optimized percpu variable access)
introduces arm's __my_cpu_offset to optimize percpu vaiable access,
which really works well on hackbench, but will cause __my_cpu_offset
to return garbage value before it is initialized in cpu_init() called
by setup_arch, so accessing percpu variable before setup_arch may cause
kernel hang. But generic __my_cpu_offset always returns zero before
percpu area is brought up, and won't hang kernel.

So the patch tries to clear __my_cpu_offset on boot CPU early
to avoid boot hang.

At least now percpu variable is accessed by lockdep before
setup_arch(), and enabling CONFIG_LOCK_STAT or CONFIG_DEBUG_LOCKDEP
can trigger kernel hang.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/setup.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1522c7ae31b0..dd1c6aacbaf9 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -456,6 +456,13 @@ void __init smp_setup_processor_id(void)
 	for (i = 1; i < nr_cpu_ids; ++i)
 		cpu_logical_map(i) = i == cpu ? 0 : i;
 
+	/*
+	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
+	 * using percpu variable early, for example, lockdep will
+	 * access percpu variable inside lock_release
+	 */
+	set_my_cpu_offset(0);
+
 	printk(KERN_INFO "Booting Linux on physical CPU 0x%x\n", mpidr);
 }
 
-- 
cgit 


From 4756dcbfd37819a8359d3c69a22be2ee41666d0f Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Sat, 21 Jul 2012 15:55:04 -0400
Subject: ARM: LPAE: accomodate >32-bit addresses for page table base

This patch redefines the early boot time use of the R4 register to steal a few
low order bits (ARCH_PGD_SHIFT bits) on LPAE systems.  This allows for up to
38-bit physical addresses.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/head.S | 10 ++++------
 arch/arm/kernel/smp.c  | 11 +++++++++--
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 8bac553fe213..45e8935cae4e 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -156,7 +156,7 @@ ENDPROC(stext)
  *
  * Returns:
  *  r0, r3, r5-r7 corrupted
- *  r4 = physical page table address
+ *  r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
  */
 __create_page_tables:
 	pgtbl	r4, r8				@ page table address
@@ -331,6 +331,7 @@ __create_page_tables:
 #endif
 #ifdef CONFIG_ARM_LPAE
 	sub	r4, r4, #0x1000		@ point to the PGD table
+	mov	r4, r4, lsr #ARCH_PGD_SHIFT
 #endif
 	mov	pc, lr
 ENDPROC(__create_page_tables)
@@ -408,7 +409,7 @@ __secondary_data:
  *  r0  = cp#15 control register
  *  r1  = machine ID
  *  r2  = atags or dtb pointer
- *  r4  = page table pointer
+ *  r4  = page table (see ARCH_PGD_SHIFT in asm/memory.h)
  *  r9  = processor ID
  *  r13 = *virtual* address to jump to upon completion
  */
@@ -427,10 +428,7 @@ __enable_mmu:
 #ifdef CONFIG_CPU_ICACHE_DISABLE
 	bic	r0, r0, #CR_I
 #endif
-#ifdef CONFIG_ARM_LPAE
-	mov	r5, #0
-	mcrr	p15, 0, r4, r5, c2		@ load TTBR0
-#else
+#ifndef CONFIG_ARM_LPAE
 	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 550d63cef68e..217b755aadd4 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -78,6 +78,13 @@ void __init smp_set_ops(struct smp_operations *ops)
 		smp_ops = *ops;
 };
 
+static unsigned long get_arch_pgd(pgd_t *pgd)
+{
+	phys_addr_t pgdir = virt_to_phys(pgd);
+	BUG_ON(pgdir & ARCH_PGD_MASK);
+	return pgdir >> ARCH_PGD_SHIFT;
+}
+
 int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	int ret;
@@ -87,8 +94,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * its stack and the page tables.
 	 */
 	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
-	secondary_data.pgdir = virt_to_phys(idmap_pgd);
-	secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
+	secondary_data.pgdir = get_arch_pgd(idmap_pgd);
+	secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
 	__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
 	outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
 
-- 
cgit 


From a469abd0f868c902b75532579bf87553dcf1b360 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 8 Apr 2013 17:13:12 +0100
Subject: ARM: elf: add new hwcap for identifying atomic ldrd/strd instructions

CPUs implementing LPAE have atomic ldrd/strd instructions, meaning that
userspace software can avoid having to use the exclusive variants of
these instructions if they wish.

This patch advertises the atomicity of these instructions via the
hwcaps, so userspace can detect this CPU feature.

Reported-by: Vladimir Danushevsky <vladimir.danushevsky@oracle.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/setup.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1522c7ae31b0..bdcd4dd13230 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -355,7 +355,7 @@ void __init early_print(const char *str, ...)
 
 static void __init cpuid_init_hwcaps(void)
 {
-	unsigned int divide_instrs;
+	unsigned int divide_instrs, vmsa;
 
 	if (cpu_architecture() < CPU_ARCH_ARMv7)
 		return;
@@ -368,6 +368,11 @@ static void __init cpuid_init_hwcaps(void)
 	case 1:
 		elf_hwcap |= HWCAP_IDIVT;
 	}
+
+	/* LPAE implies atomic ldrd/strd instructions */
+	vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0;
+	if (vmsa >= 5)
+		elf_hwcap |= HWCAP_LPAE;
 }
 
 static void __init feat_v6_fixup(void)
@@ -872,6 +877,7 @@ static const char *hwcap_str[] = {
 	"vfpv4",
 	"idiva",
 	"idivt",
+	"lpae",
 	NULL
 };
 
-- 
cgit 


From fdeb94b5dc5bf9db7b3e36f3f38089a554f6a108 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 3 Jun 2013 23:09:14 +0100
Subject: ARM: 7745/1: psci: fix building without HOTPLUG_CPU

The cpu_die field in smp_operations is not valid with CONFIG_HOTPLUG_CPU,
so we must enclose it in #ifdef, but at least that lets us remove
two other lines.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/psci_smp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
index 23a11424c568..219f1d73572a 100644
--- a/arch/arm/kernel/psci_smp.c
+++ b/arch/arm/kernel/psci_smp.c
@@ -68,8 +68,6 @@ void __ref psci_cpu_die(unsigned int cpu)
        /* We should never return */
        panic("psci: cpu %d failed to shutdown\n", cpu);
 }
-#else
-#define psci_cpu_die NULL
 #endif
 
 bool __init psci_smp_available(void)
@@ -80,5 +78,7 @@ bool __init psci_smp_available(void)
 
 struct smp_operations __initdata psci_smp_ops = {
 	.smp_boot_secondary	= psci_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
 	.cpu_die		= psci_cpu_die,
+#endif
 };
-- 
cgit 


From 0af0b189abf73d232af782df2f999235cd2fed7f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Wed, 30 Jan 2013 18:17:49 +0000
Subject: ARM: hyp: initialize CNTVOFF to zero

In order to be able to use the virtual counter in a safe way,
make sure it is initialized to zero before dropping to SVC.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Dave Martin <dave.martin@linaro.org>
---
 arch/arm/kernel/hyp-stub.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 1315c4ccfa56..dbe21107945a 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -153,6 +153,8 @@ THUMB(	orr	r7, #(1 << 30)	)	@ HSCTLR.TE
 	mrc	p15, 4, r7, c14, c1, 0	@ CNTHCTL
 	orr	r7, r7, #3		@ PL1PCEN | PL1PCTEN
 	mcr	p15, 4, r7, c14, c1, 0	@ CNTHCTL
+	mov	r7, #0
+	mcrr	p15, 4, r7, r7, c14	@ CNTVOFF
 1:
 #endif
 
-- 
cgit 


From 3f71be237ce37e0131973ebfa33b326bc51d743e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Tue, 12 Mar 2013 14:56:12 +0000
Subject: ARM: arch_timer: stop virtual timer when booted in HYP mode

When booting the kernel, a bootloader could have left the virtual
timer ticking away, potentially generating interrupts. This could
be troublesome if the user of the virtual timer is not careful
when enabling the interrupt.

In order to avoid any surprise, stop the virtual timer from
interrupting us when booted in HYP mode, as we'll use the physical
timer in this case.

Reported-by: Giridhar Maruthy <giridhar.m@samsung.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Dave Martin <dave.martin@linaro.org>
---
 arch/arm/kernel/hyp-stub.S | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index dbe21107945a..4910232c4833 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -155,6 +155,11 @@ THUMB(	orr	r7, #(1 << 30)	)	@ HSCTLR.TE
 	mcr	p15, 4, r7, c14, c1, 0	@ CNTHCTL
 	mov	r7, #0
 	mcrr	p15, 4, r7, r7, c14	@ CNTVOFF
+
+	@ Disable virtual timer in case it was counting
+	mrc	p15, 0, r7, c14, c3, 1	@ CNTV_CTL
+	bic	r7, #1			@ Clear ENABLE
+	mcr	p15, 0, r7, c14, c3, 1	@ CNTV_CTL
 1:
 #endif
 
-- 
cgit 


From 01fafcab20fbbd2930691c7fdcf177eaa190d499 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 28 Feb 2012 11:50:32 +0000
Subject: ARM: nommu: add entry point for secondary CPUs to head-nommu.S
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds a secondary_startup entry point to head-nommu.S so that
we can boot secondary CPUs on an SMP nommu configuration.

Signed-off-by: Will Deacon <will.deacon@arm.com>
CC: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
CC: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/kernel/head-nommu.S | 50 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 4 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 8812ce88f7a1..06ba9c8e62be 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -63,12 +63,56 @@ ENTRY(stext)
 	movs	r10, r5				@ invalid processor (r5=0)?
 	beq	__error_p				@ yes, error 'p'
 
-	adr	lr, BSYM(__after_proc_init)	@ return (PIC) address
+	ldr	r13, =__mmap_switched		@ address to jump to after
+						@ initialising sctlr
+	adr	lr, BSYM(1f)			@ return (PIC) address
  ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
  THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
  THUMB(	mov	pc, r12				)
+ 1:	b	__after_proc_init
 ENDPROC(stext)
 
+#ifdef CONFIG_SMP
+	__CPUINIT
+ENTRY(secondary_startup)
+	/*
+	 * Common entry point for secondary CPUs.
+	 *
+	 * Ensure that we're in SVC mode, and IRQs are disabled.  Lookup
+	 * the processor type - there is no need to check the machine type
+	 * as it has already been validated by the primary processor.
+	 */
+	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9
+#ifndef CONFIG_CPU_CP15
+	ldr	r9, =CONFIG_PROCESSOR_ID
+#else
+	mrc	p15, 0, r9, c0, c0		@ get processor id
+#endif
+	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
+	movs	r10, r5				@ invalid processor?
+	beq	__error_p			@ yes, error 'p'
+
+	adr	r4, __secondary_data
+	ldmia	r4, {r7, r12}
+	adr	lr, BSYM(__after_proc_init)	@ return address
+	mov	r13, r12			@ __secondary_switched address
+ ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
+ THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
+ THUMB(	mov	pc, r12				)
+ENDPROC(secondary_startup)
+
+ENTRY(__secondary_switched)
+	ldr	sp, [r7, #8]			@ set up the stack pointer
+	mov	fp, #0
+	b	secondary_start_kernel
+ENDPROC(__secondary_switched)
+
+	.type	__secondary_data, %object
+__secondary_data:
+	.long	secondary_data
+	.long	__secondary_switched
+#endif /* CONFIG_SMP */
+
 /*
  * Set the Control Register and Read the process ID.
  */
@@ -99,9 +143,7 @@ __after_proc_init:
 #endif
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
 #endif /* CONFIG_CPU_CP15 */
-
-	b	__mmap_switched			@ clear the BSS and jump
-						@ to start_kernel
+	mov	pc, r13
 ENDPROC(__after_proc_init)
 	.ltorg
 
-- 
cgit 


From c4a1f032ed35d744e3d74b8aebe8d85f29aecd88 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 28 Feb 2012 13:02:59 +0000
Subject: ARM: nommu: do not initialise page tables in secondary_data structure

nommu systems do not require any page tables, so don't try to initialise
them when bringing up secondary cores.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/smp.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 550d63cef68e..44d1c00dc45f 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -87,8 +87,10 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * its stack and the page tables.
 	 */
 	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+#ifdef CONFIG_MMU
 	secondary_data.pgdir = virt_to_phys(idmap_pgd);
 	secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
+#endif
 	__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
 	outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
 
-- 
cgit 


From aa1aadc3305c4917c39f0291613a5ec81dd4c73b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 23 Feb 2012 13:51:38 +0000
Subject: ARM: suspend: fix CPU suspend code for !CONFIG_MMU configurations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ARM CPU suspend code can be selected even for a !CONFIG_MMU
configuration. The resulting kernel will not compile and, even if it did,
would access undefined co-processor registers when executing.

This patch fixes the v6 and v7 CPU suspend code for the nommu case.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Tested-by: Jonathan Austin <jonathan.austin@arm.com>
CC: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> (commit_signer:1/3=33%)
CC: Santosh Shilimkar <santosh.shilimkar@ti.com> (commit_signer:1/3=33%)
CC: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 arch/arm/kernel/suspend.c | 64 ++++++++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 28 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index c59c97ea8268..38a50676213b 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -10,6 +10,42 @@
 extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
 extern void cpu_resume_mmu(void);
 
+#ifdef CONFIG_MMU
+/*
+ * Hide the first two arguments to __cpu_suspend - these are an implementation
+ * detail which platform code shouldn't have to know about.
+ */
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	struct mm_struct *mm = current->active_mm;
+	int ret;
+
+	if (!idmap_pgd)
+		return -EINVAL;
+
+	/*
+	 * Provide a temporary page table with an identity mapping for
+	 * the MMU-enable code, required for resuming.  On successful
+	 * resume (indicated by a zero return code), we need to switch
+	 * back to the correct page tables.
+	 */
+	ret = __cpu_suspend(arg, fn);
+	if (ret == 0) {
+		cpu_switch_mm(mm->pgd, mm);
+		local_flush_bp_all();
+		local_flush_tlb_all();
+	}
+
+	return ret;
+}
+#else
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	return __cpu_suspend(arg, fn);
+}
+#define	idmap_pgd	NULL
+#endif
+
 /*
  * This is called by __cpu_suspend() to save the state, and do whatever
  * flushing is required to ensure that when the CPU goes to sleep we have
@@ -46,31 +82,3 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
 	outer_clean_range(virt_to_phys(save_ptr),
 			  virt_to_phys(save_ptr) + sizeof(*save_ptr));
 }
-
-/*
- * Hide the first two arguments to __cpu_suspend - these are an implementation
- * detail which platform code shouldn't have to know about.
- */
-int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
-{
-	struct mm_struct *mm = current->active_mm;
-	int ret;
-
-	if (!idmap_pgd)
-		return -EINVAL;
-
-	/*
-	 * Provide a temporary page table with an identity mapping for
-	 * the MMU-enable code, required for resuming.  On successful
-	 * resume (indicated by a zero return code), we need to switch
-	 * back to the correct page tables.
-	 */
-	ret = __cpu_suspend(arg, fn);
-	if (ret == 0) {
-		cpu_switch_mm(mm->pgd, mm);
-		local_flush_bp_all();
-		local_flush_tlb_all();
-	}
-
-	return ret;
-}
-- 
cgit 


From 8006b4d1a7c70f27a87cb753b5ed90483f0cfe26 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Wed, 24 Apr 2013 11:51:38 +0100
Subject: ARM: nommu: Don't build smp_tlb.c for !CONFIG_MMU

Without an MMU we don't need to do any TLB maintenance. Until the addition
of 93dc68876b60 (ARM: 7684/1: errata: Workaround for Cortex-A15 erratum 798181
(TLBI/DSB operations)) building the tlb maintenance ops in smp_tlb.c worked,
though none of the contents were used.

Since that commit, however, SMP NOMMU has not been able to build. This patch
restores that ability by making the building of smp_tlb.c dependent on MMU.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
CC: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index f4285b5ffb05..fccfbdb03df1 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -38,7 +38,10 @@ obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
 obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
-obj-$(CONFIG_SMP)		+= smp.o smp_tlb.o
+obj-$(CONFIG_SMP)		+= smp.o
+ifdef CONFIG_MMU
+obj-$(CONFIG_SMP)		+= smp_tlb.o
+endif
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
 obj-$(CONFIG_HAVE_ARM_TWD)	+= smp_twd.o
 obj-$(CONFIG_ARM_ARCH_TIMER)	+= arch_timer.o
-- 
cgit 


From 67c9845beab16a0c97b9c07f72a4b36b7175bb86 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Fri, 22 Feb 2013 17:48:56 +0000
Subject: ARM: mpu: add early bring-up code for the ARMv7 PMSA-compliant MPU

This patch adds initial support for using the MPU, which is necessary for
SMP operation on PMSAv7 processors because it is the only way to ensure
memory is shared. This is an initial patch and full SMP support is added
later in this series.

The setup of the MPU is performed in a way analagous to that for the MMU:
Very early initialisation before the C environment is brought up, followed
by a sanity check and more complete initialisation in C.

This patch provides the simplest possible memory region configuration:
MPU_PROBE_REGION: Reserved for probing MPU details, not enabled
MPU_BG_REGION: A 'background' region that specifies all memory strongly ordered
MPU_RAM_REGION: A single shared, cacheable, normal region for the valid RAM.

In this early initialisation code we simply map the whole of the address
space with the BG_REGION and (at least) the kernel with the RAM_REGION. The
MPU has region alignment constraints that require us to round past the end
of the kernel.

As region 2 has a higher priority than region 1, it overrides the strongly-
ordered behaviour for RAM only.

Subsequent patches will add more complete initialisation from the C-world
and support for bringing up secondary CPUs.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC: Hyok S. Choi <hyok.choi@samsung.com>
---
 arch/arm/kernel/head-nommu.S | 87 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 06ba9c8e62be..659912c49571 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -17,9 +17,11 @@
 #include <asm/assembler.h>
 #include <asm/ptrace.h>
 #include <asm/asm-offsets.h>
+#include <asm/memory.h>
 #include <asm/cp15.h>
 #include <asm/thread_info.h>
 #include <asm/v7m.h>
+#include <asm/mpu.h>
 
 /*
  * Kernel startup entry point.
@@ -63,6 +65,17 @@ ENTRY(stext)
 	movs	r10, r5				@ invalid processor (r5=0)?
 	beq	__error_p				@ yes, error 'p'
 
+#ifdef CONFIG_ARM_MPU
+	/* Calculate the size of a region covering just the kernel */
+	ldr	r5, =PHYS_OFFSET		@ Region start: PHYS_OFFSET
+	ldr     r6, =(_end)			@ Cover whole kernel
+	sub	r6, r6, r5			@ Minimum size of region to map
+	clz	r6, r6				@ Region size must be 2^N...
+	rsb	r6, r6, #31			@ ...so round up region size
+	lsl	r6, r6, #MPU_RSR_SZ		@ Put size in right field
+	orr	r6, r6, #(1 << MPU_RSR_EN)	@ Set region enabled bit
+	bl	__setup_mpu
+#endif
 	ldr	r13, =__mmap_switched		@ address to jump to after
 						@ initialising sctlr
 	adr	lr, BSYM(1f)			@ return (PIC) address
@@ -147,4 +160,78 @@ __after_proc_init:
 ENDPROC(__after_proc_init)
 	.ltorg
 
+#ifdef CONFIG_ARM_MPU
+
+
+/* Set which MPU region should be programmed */
+.macro set_region_nr tmp, rgnr
+	mov	\tmp, \rgnr			@ Use static region numbers
+	mcr	p15, 0, \tmp, c6, c2, 0		@ Write RGNR
+.endm
+
+/* Setup a single MPU region, either D or I side (D-side for unified) */
+.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE
+	mcr	p15, 0, \bar, c6, c1, (0 + \side)	@ I/DRBAR
+	mcr	p15, 0, \acr, c6, c1, (4 + \side)	@ I/DRACR
+	mcr	p15, 0, \sr, c6, c1, (2 + \side)		@ I/DRSR
+.endm
+
+/*
+ * Setup the MPU and initial MPU Regions. We create the following regions:
+ * Region 0: Use this for probing the MPU details, so leave disabled.
+ * Region 1: Background region - covers the whole of RAM as strongly ordered
+ * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6
+ *
+ * r6: Value to be written to DRSR (and IRSR if required) for MPU_RAM_REGION
+*/
+
+ENTRY(__setup_mpu)
+
+	/* Probe for v7 PMSA compliance */
+	mrc	p15, 0, r0, c0, c1, 4		@ Read ID_MMFR0
+	and	r0, r0, #(MMFR0_PMSA)		@ PMSA field
+	teq	r0, #(MMFR0_PMSAv7)		@ PMSA v7
+	bne	__error_p			@ Fail: ARM_MPU on NOT v7 PMSA
+
+	/* Determine whether the D/I-side memory map is unified. We set the
+	 * flags here and continue to use them for the rest of this function */
+	mrc	p15, 0, r0, c0, c0, 4		@ MPUIR
+	ands	r5, r0, #MPUIR_DREGION_SZMASK	@ 0 size d region => No MPU
+	beq	__error_p			@ Fail: ARM_MPU and no MPU
+	tst	r0, #MPUIR_nU			@ MPUIR_nU = 0 for unified
+
+	/* Setup second region first to free up r6 */
+	set_region_nr r0, #MPU_RAM_REGION
+	isb
+	/* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */
+	ldr	r0, =PHYS_OFFSET		@ RAM starts at PHYS_OFFSET
+	ldr	r5,=(MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL)
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE	@ PHYS_OFFSET, shared, enabled
+	beq	1f				@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE @ PHYS_OFFSET, shared, enabled
+1:	isb
+
+	/* First/background region */
+	set_region_nr r0, #MPU_BG_REGION
+	isb
+	/* Execute Never,  strongly ordered, inaccessible to PL0, rw PL1  */
+	mov	r0, #0				@ BG region starts at 0x0
+	ldr	r5,=(MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA)
+	mov	r6, #MPU_RSR_ALL_MEM		@ 4GB region, enabled
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE	@ 0x0, BG region, enabled
+	beq	2f				@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled
+2:	isb
+
+	/* Enable the MPU */
+	mrc	p15, 0, r0, c1, c0, 0		@ Read SCTLR
+	bic     r0, r0, #CR_BR			@ Disable the 'default mem-map'
+	orr	r0, r0, #CR_M			@ Set SCTRL.M (MPU on)
+	mcr	p15, 0, r0, c1, c0, 0		@ Enable MPU
+	isb
+	mov pc,lr
+ENDPROC(__setup_mpu)
+#endif
 #include "head-common.S"
-- 
cgit 


From eb08375ea66e63c5e11dea69b43c5633d531ce81 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Fri, 22 Feb 2013 18:51:30 +0000
Subject: ARM: mpu: add MPU initialisation for secondary cores

The MPU initialisation on the primary core is performed in two stages, one
minimal stage to ensure the CPU can boot and a second one after
sanity_check_meminfo. As the memory configuration is known by the time we
boot secondary cores only a single step is necessary, provided the values
for DRSR are passed to secondaries.

This patch implements this arrangement. The configuration generated for the
MPU regions is made available to the secondary core, which can then use the
asm MPU intialisation code to program a complete region configuration.

This is necessary for SMP configurations without an MMU, as the MPU
initialisation is the only way to ensure that memory is specified as
'shared'.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/kernel/head-nommu.S | 7 +++++++
 arch/arm/kernel/smp.c        | 8 ++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 659912c49571..13741d004de5 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -107,6 +107,13 @@ ENTRY(secondary_startup)
 
 	adr	r4, __secondary_data
 	ldmia	r4, {r7, r12}
+
+#ifdef CONFIG_ARM_MPU
+	/* Use MPU region info supplied by __cpu_up */
+	ldr	r6, [r7]			@ get secondary_data.mpu_szr
+	bl      __setup_mpu			@ Initialize the MPU
+#endif
+
 	adr	lr, BSYM(__after_proc_init)	@ return address
 	mov	r13, r12			@ __secondary_switched address
  ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 44d1c00dc45f..e17d9346baee 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -45,6 +45,7 @@
 #include <asm/smp_plat.h>
 #include <asm/virt.h>
 #include <asm/mach/arch.h>
+#include <asm/mpu.h>
 
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
@@ -87,6 +88,10 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * its stack and the page tables.
 	 */
 	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+#ifdef CONFIG_ARM_MPU
+	secondary_data.mpu_rgn_szr = mpu_rgn_info.rgns[MPU_RAM_REGION].drsr;
+#endif
+
 #ifdef CONFIG_MMU
 	secondary_data.pgdir = virt_to_phys(idmap_pgd);
 	secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
@@ -114,9 +119,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 		pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
 	}
 
-	secondary_data.stack = NULL;
-	secondary_data.pgdir = 0;
 
+	memset(&secondary_data, 0, sizeof(secondary_data));
 	return ret;
 }
 
-- 
cgit 


From 1aa2b3b7a6c4f3dbd3671171113a20e6a6190e3b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 5 Jun 2013 11:25:13 +0100
Subject: ARM: 7748/1: oabi: handle faults when loading swi instruction from
 userspace

Running an OABI_COMPAT kernel on an SMP platform can lead to fun and
games with page aging.

If one CPU issues a swi instruction immediately before another CPU
decides to mkold the page containing the swi instruction, then we will
fault attempting to load the instruction during the vector_swi handler
in order to retrieve its immediate field. Since this fault is not
currently dealt with by our exception tables, this results in a panic:

  Unable to handle kernel paging request at virtual address 4020841c
  pgd = c490c000
  [4020841c] *pgd=84451831, *pte=bf05859d, *ppte=00000000
  Internal error: Oops: 17 [#1] PREEMPT SMP ARM
  Modules linked in: hid_sony(O)
  CPU: 1    Tainted: G        W  O  (3.4.0-perf-gf496dca-01162-gcbcc62b #1)
  PC is at vector_swi+0x28/0x88
  LR is at 0x40208420

This patch wraps all of the swi instruction loads with the USER macro
and provides a shared exception table entry which simply rewinds the
saved user PC and returns from the system call (without setting tbl, so
there's no worries with tracing or syscall restarting). Returning to
userspace will re-enter the page fault handler, from where we will
probably send SIGSEGV to the current task.

Reported-by: Wang, Yalin <yalin.wang@sonymobile.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-common.S | 42 +++++++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 13 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index bc5bc0a97131..4bc816a74a2e 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -362,6 +362,16 @@ ENTRY(vector_swi)
 	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
 	zero_fp
 
+#ifdef CONFIG_ALIGNMENT_TRAP
+	ldr	ip, __cr_alignment
+	ldr	ip, [ip]
+	mcr	p15, 0, ip, c1, c0		@ update control register
+#endif
+
+	enable_irq
+	ct_user_exit
+	get_thread_info tsk
+
 	/*
 	 * Get the system call number.
 	 */
@@ -375,9 +385,9 @@ ENTRY(vector_swi)
 #ifdef CONFIG_ARM_THUMB
 	tst	r8, #PSR_T_BIT
 	movne	r10, #0				@ no thumb OABI emulation
-	ldreq	r10, [lr, #-4]			@ get SWI instruction
+ USER(	ldreq	r10, [lr, #-4]		)	@ get SWI instruction
 #else
-	ldr	r10, [lr, #-4]			@ get SWI instruction
+ USER(	ldr	r10, [lr, #-4]		)	@ get SWI instruction
 #endif
 #ifdef CONFIG_CPU_ENDIAN_BE8
 	rev	r10, r10			@ little endian instruction
@@ -392,22 +402,13 @@ ENTRY(vector_swi)
 	/* Legacy ABI only, possibly thumb mode. */
 	tst	r8, #PSR_T_BIT			@ this is SPSR from save_user_regs
 	addne	scno, r7, #__NR_SYSCALL_BASE	@ put OS number in
-	ldreq	scno, [lr, #-4]
+ USER(	ldreq	scno, [lr, #-4]		)
 
 #else
 	/* Legacy ABI only. */
-	ldr	scno, [lr, #-4]			@ get SWI instruction
+ USER(	ldr	scno, [lr, #-4]		)	@ get SWI instruction
 #endif
 
-#ifdef CONFIG_ALIGNMENT_TRAP
-	ldr	ip, __cr_alignment
-	ldr	ip, [ip]
-	mcr	p15, 0, ip, c1, c0		@ update control register
-#endif
-	enable_irq
-	ct_user_exit
-
-	get_thread_info tsk
 	adr	tbl, sys_call_table		@ load syscall table pointer
 
 #if defined(CONFIG_OABI_COMPAT)
@@ -442,6 +443,21 @@ local_restart:
 	eor	r0, scno, #__NR_SYSCALL_BASE	@ put OS number back
 	bcs	arm_syscall	
 	b	sys_ni_syscall			@ not private func
+
+#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
+	/*
+	 * We failed to handle a fault trying to access the page
+	 * containing the swi instruction, but we're not really in a
+	 * position to return -EFAULT. Instead, return back to the
+	 * instruction and re-enter the user fault handling path trying
+	 * to page it in. This will likely result in sending SEGV to the
+	 * current task.
+	 */
+9001:
+	sub	lr, lr, #4
+	str	lr, [sp, #S_PC]
+	b	ret_fast_syscall
+#endif
 ENDPROC(vector_swi)
 
 	/*
-- 
cgit 


From 9dfc28b6308096e48b54c28259825a1200f60742 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 18 Apr 2013 18:37:24 +0100
Subject: ARM: mpu: protect the vectors page with an MPU region

Without an MMU it is possible for userspace programs to start executing code
in places that they have no business executing. The MPU allows some level of
protection against this.

This patch protects the vectors page from access by userspace processes.
Userspace tasks that dereference a null pointer are already protected by an
svc at 0x0 that kills them. However when tasks use an offset from a null
pointer (eg a function in a null struct) they miss this carefully placed svc
and enter the exception vectors in user mode, ending up in the kernel.

This patch causes programs that do this to receive a SEGV instead of happily
entering the kernel in user-mode, and hence avoid a 'Bad Mode' panic.

As part of this change it is necessary to make sigreturn happen via the
stack when there is not an sa_restorer function. This change is invisible to
userspace, and irrelevant to code compiled using a uClibc toolchain, which
always uses an sa_restorer function.

Because we don't get to remap the vectors in !MMU kuser_helpers are not
in a defined location, and hence aren't usable. This means we don't need to
worry about keeping them accessible from PL0

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC: Nicolas Pitre <nico@linaro.org>
CC: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/kernel/head-nommu.S | 16 ++++++++++++++++
 arch/arm/kernel/signal.c     |  9 +++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 13741d004de5..75f14cc3e073 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -22,6 +22,7 @@
 #include <asm/thread_info.h>
 #include <asm/v7m.h>
 #include <asm/mpu.h>
+#include <asm/page.h>
 
 /*
  * Kernel startup entry point.
@@ -188,6 +189,7 @@ ENDPROC(__after_proc_init)
  * Region 0: Use this for probing the MPU details, so leave disabled.
  * Region 1: Background region - covers the whole of RAM as strongly ordered
  * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6
+ * Region 3: Normal, shared, inaccessible from PL0 to protect the vectors page
  *
  * r6: Value to be written to DRSR (and IRSR if required) for MPU_RAM_REGION
 */
@@ -232,6 +234,20 @@ ENTRY(__setup_mpu)
 	setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled
 2:	isb
 
+	/* Vectors region */
+	set_region_nr r0, #MPU_VECTORS_REGION
+	isb
+	/* Shared, inaccessible to PL0, rw PL1 */
+	mov	r0, #CONFIG_VECTORS_BASE	@ Cover from VECTORS_BASE
+	ldr	r5,=(MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL)
+	/* Writing N to bits 5:1 (RSR_SZ) --> region size 2^N+1 */
+	mov	r6, #(((PAGE_SHIFT - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN)
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE	@ VECTORS_BASE, PL0 NA, enabled
+	beq	3f				@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE	@ VECTORS_BASE, PL0 NA, enabled
+3:	isb
+
 	/* Enable the MPU */
 	mrc	p15, 0, r0, c1, c0, 0		@ Read SCTLR
 	bic     r0, r0, #CR_BR			@ Disable the 'default mem-map'
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 296786bdbb73..1c16c35c271a 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -392,14 +392,19 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig,
 		if (ksig->ka.sa.sa_flags & SA_SIGINFO)
 			idx += 3;
 
+		/*
+		 * Put the sigreturn code on the stack no matter which return
+		 * mechanism we use in order to remain ABI compliant
+		 */
 		if (__put_user(sigreturn_codes[idx],   rc) ||
 		    __put_user(sigreturn_codes[idx+1], rc+1))
 			return 1;
 
-		if (cpsr & MODE32_BIT) {
+		if ((cpsr & MODE32_BIT) && !IS_ENABLED(CONFIG_ARM_MPU)) {
 			/*
 			 * 32-bit code can use the new high-page
-			 * signal return code support.
+			 * signal return code support except when the MPU has
+			 * protected the vectors page from PL0
 			 */
 			retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb;
 		} else {
-- 
cgit 


From 8cf72172d739639f2699131821a3ebc291287cf2 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Thu, 16 May 2013 10:32:09 +0100
Subject: ARM: kernel: build MPIDR hash function data structure

On ARM SMP systems, cores are identified by their MPIDR register.
The MPIDR guidelines in the ARM ARM do not provide strict enforcement of
MPIDR layout, only recommendations that, if followed, split the MPIDR
on ARM 32 bit platforms in three affinity levels. In multi-cluster
systems like big.LITTLE, if the affinity guidelines are followed, the
MPIDR can not be considered an index anymore. This means that the
association between logical CPU in the kernel and the HW CPU identifier
becomes somewhat more complicated requiring methods like hashing to
associate a given MPIDR to a CPU logical index, in order for the look-up
to be carried out in an efficient and scalable way.

This patch provides a function in the kernel that starting from the
cpu_logical_map, implement collision-free hashing of MPIDR values by checking
all significative bits of MPIDR affinity level bitfields. The hashing
can then be carried out through bits shifting and ORing; the resulting
hash algorithm is a collision-free though not minimal hash that can be
executed with few assembly instructions. The mpidr is filtered through a
mpidr mask that is built by checking all bits that toggle in the set of
MPIDRs corresponding to possible CPUs. Bits that do not toggle do not carry
information so they do not contribute to the resulting hash.

Pseudo code:

/* check all bits that toggle, so they are required */
for (i = 1, mpidr_mask = 0; i < num_possible_cpus(); i++)
	mpidr_mask |= (cpu_logical_map(i) ^ cpu_logical_map(0));

/*
 * Build shifts to be applied to aff0, aff1, aff2 values to hash the mpidr
 * fls() returns the last bit set in a word, 0 if none
 * ffs() returns the first bit set in a word, 0 if none
 */
fs0 = mpidr_mask[7:0] ? ffs(mpidr_mask[7:0]) - 1 : 0;
fs1 = mpidr_mask[15:8] ? ffs(mpidr_mask[15:8]) - 1 : 0;
fs2 = mpidr_mask[23:16] ? ffs(mpidr_mask[23:16]) - 1 : 0;
ls0 = fls(mpidr_mask[7:0]);
ls1 = fls(mpidr_mask[15:8]);
ls2 = fls(mpidr_mask[23:16]);
bits0 = ls0 - fs0;
bits1 = ls1 - fs1;
bits2 = ls2 - fs2;
aff0_shift = fs0;
aff1_shift = 8 + fs1 - bits0;
aff2_shift = 16 + fs2 - (bits0 + bits1);
u32 hash(u32 mpidr) {
	u32 l0, l1, l2;
	u32 mpidr_masked = mpidr & mpidr_mask;
	l0 = mpidr_masked & 0xff;
	l1 = mpidr_masked & 0xff00;
	l2 = mpidr_masked & 0xff0000;
	return (l0 >> aff0_shift | l1 >> aff1_shift | l2 >> aff2_shift);
}

The hashing algorithm relies on the inherent properties set in the ARM ARM
recommendations for the MPIDR. Exotic configurations, where for instance the
MPIDR values at a given affinity level have large holes, can end up requiring
big hash tables since the compression of values that can be achieved through
shifting is somewhat crippled when holes are present. Kernel warns if
the number of buckets of the resulting hash table exceeds the number of
possible CPUs by a factor of 4, which is a symptom of a very sparse HW
MPIDR configuration.

The hash algorithm is quite simple and can easily be implemented in assembly
code, to be used in code paths where the kernel virtual address space is
not set-up (ie cpu_resume) and instruction and data fetches are strongly
ordered so code must be compact and must carry out few data accesses.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Colin Cross <ccross@android.com>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Amit Kucheria <amit.kucheria@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Stephen Warren <swarren@wwwdotorg.org>
---
 arch/arm/kernel/setup.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index ca34224f891f..9048513cbe0d 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -478,6 +478,72 @@ void __init smp_setup_processor_id(void)
 	printk(KERN_INFO "Booting Linux on physical CPU 0x%x\n", mpidr);
 }
 
+struct mpidr_hash mpidr_hash;
+#ifdef CONFIG_SMP
+/**
+ * smp_build_mpidr_hash - Pre-compute shifts required at each affinity
+ *			  level in order to build a linear index from an
+ *			  MPIDR value. Resulting algorithm is a collision
+ *			  free hash carried out through shifting and ORing
+ */
+static void __init smp_build_mpidr_hash(void)
+{
+	u32 i, affinity;
+	u32 fs[3], bits[3], ls, mask = 0;
+	/*
+	 * Pre-scan the list of MPIDRS and filter out bits that do
+	 * not contribute to affinity levels, ie they never toggle.
+	 */
+	for_each_possible_cpu(i)
+		mask |= (cpu_logical_map(i) ^ cpu_logical_map(0));
+	pr_debug("mask of set bits 0x%x\n", mask);
+	/*
+	 * Find and stash the last and first bit set at all affinity levels to
+	 * check how many bits are required to represent them.
+	 */
+	for (i = 0; i < 3; i++) {
+		affinity = MPIDR_AFFINITY_LEVEL(mask, i);
+		/*
+		 * Find the MSB bit and LSB bits position
+		 * to determine how many bits are required
+		 * to express the affinity level.
+		 */
+		ls = fls(affinity);
+		fs[i] = affinity ? ffs(affinity) - 1 : 0;
+		bits[i] = ls - fs[i];
+	}
+	/*
+	 * An index can be created from the MPIDR by isolating the
+	 * significant bits at each affinity level and by shifting
+	 * them in order to compress the 24 bits values space to a
+	 * compressed set of values. This is equivalent to hashing
+	 * the MPIDR through shifting and ORing. It is a collision free
+	 * hash though not minimal since some levels might contain a number
+	 * of CPUs that is not an exact power of 2 and their bit
+	 * representation might contain holes, eg MPIDR[7:0] = {0x2, 0x80}.
+	 */
+	mpidr_hash.shift_aff[0] = fs[0];
+	mpidr_hash.shift_aff[1] = MPIDR_LEVEL_BITS + fs[1] - bits[0];
+	mpidr_hash.shift_aff[2] = 2*MPIDR_LEVEL_BITS + fs[2] -
+						(bits[1] + bits[0]);
+	mpidr_hash.mask = mask;
+	mpidr_hash.bits = bits[2] + bits[1] + bits[0];
+	pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] mask[0x%x] bits[%u]\n",
+				mpidr_hash.shift_aff[0],
+				mpidr_hash.shift_aff[1],
+				mpidr_hash.shift_aff[2],
+				mpidr_hash.mask,
+				mpidr_hash.bits);
+	/*
+	 * 4x is an arbitrary value used to warn on a hash table much bigger
+	 * than expected on most systems.
+	 */
+	if (mpidr_hash_size() > 4 * num_possible_cpus())
+		pr_warn("Large number of MPIDR hash buckets detected\n");
+	sync_cache_w(&mpidr_hash);
+}
+#endif
+
 static void __init setup_processor(void)
 {
 	struct proc_info_list *list;
@@ -825,6 +891,7 @@ void __init setup_arch(char **cmdline_p)
 				smp_set_ops(mdesc->smp);
 		}
 		smp_init_cpus();
+		smp_build_mpidr_hash();
 	}
 #endif
 
-- 
cgit 


From 7604537bbb5720376e8c9e6bc74a8e6305e3094d Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Thu, 16 May 2013 10:34:30 +0100
Subject: ARM: kernel: implement stack pointer save array through MPIDR hashing

Current implementation of cpu_{suspend}/cpu_{resume} relies on the MPIDR
to index the array of pointers where the context is saved and restored.
The current approach works as long as the MPIDR can be considered a
linear index, so that the pointers array can simply be dereferenced by
using the MPIDR[7:0] value.
On ARM multi-cluster systems, where the MPIDR may not be a linear index,
to properly dereference the stack pointer array, a mapping function should
be applied to it so that it can be used for arrays look-ups.

This patch adds code in the cpu_{suspend}/cpu_{resume} implementation
that relies on shifting and ORing hashing method to map a MPIDR value to a
set of buckets precomputed at boot to have a collision free mapping from
MPIDR to context pointers.

The hashing algorithm must be simple, fast, and implementable with few
instructions since in the cpu_resume path the mapping is carried out with
the MMU off and the I-cache off, hence code and data are fetched from DRAM
with no-caching available. Simplicity is counterbalanced with a little
increase of memory (allocated dynamically) for stack pointers buckets, that
should be anyway fairly limited on most systems.

Memory for context pointers is allocated in a early_initcall with
size precomputed and stashed previously in kernel data structures.
Memory for context pointers is allocated through kmalloc; this
guarantees contiguous physical addresses for the allocated memory which
is fundamental to the correct functioning of the resume mechanism that
relies on the context pointer array to be a chunk of contiguous physical
memory. Virtual to physical address conversion for the context pointer
array base is carried out at boot to avoid fiddling with virt_to_phys
conversions in the cpu_resume path which is quite fragile and should be
optimized to execute as few instructions as possible.
Virtual and physical context pointer base array addresses are stashed in a
struct that is accessible from assembly using values generated through the
asm-offsets.c mechanism.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Colin Cross <ccross@android.com>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Amit Kucheria <amit.kucheria@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Stephen Warren <swarren@wwwdotorg.org>
---
 arch/arm/kernel/asm-offsets.c |  6 +++
 arch/arm/kernel/sleep.S       | 97 +++++++++++++++++++++++++++++++++++--------
 arch/arm/kernel/suspend.c     | 20 +++++++++
 3 files changed, 105 insertions(+), 18 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index ee68cce6b48e..ded041711beb 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -23,6 +23,7 @@
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/procinfo.h>
+#include <asm/suspend.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <linux/kbuild.h>
 
@@ -144,6 +145,11 @@ int main(void)
 #endif
 #ifdef MULTI_CACHE
   DEFINE(CACHE_FLUSH_KERN_ALL,	offsetof(struct cpu_cache_fns, flush_kern_all));
+#endif
+#ifdef CONFIG_ARM_CPU_SUSPEND
+  DEFINE(SLEEP_SAVE_SP_SZ,	sizeof(struct sleep_save_sp));
+  DEFINE(SLEEP_SAVE_SP_PHYS,	offsetof(struct sleep_save_sp, save_ptr_stash_phys));
+  DEFINE(SLEEP_SAVE_SP_VIRT,	offsetof(struct sleep_save_sp, save_ptr_stash));
 #endif
   BLANK();
   DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 987dcf33415c..db1536b8b30b 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -6,6 +6,49 @@
 #include <asm/glue-proc.h>
 	.text
 
+/*
+ * Implementation of MPIDR hash algorithm through shifting
+ * and OR'ing.
+ *
+ * @dst: register containing hash result
+ * @rs0: register containing affinity level 0 bit shift
+ * @rs1: register containing affinity level 1 bit shift
+ * @rs2: register containing affinity level 2 bit shift
+ * @mpidr: register containing MPIDR value
+ * @mask: register containing MPIDR mask
+ *
+ * Pseudo C-code:
+ *
+ *u32 dst;
+ *
+ *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 mpidr, u32 mask) {
+ *	u32 aff0, aff1, aff2;
+ *	u32 mpidr_masked = mpidr & mask;
+ *	aff0 = mpidr_masked & 0xff;
+ *	aff1 = mpidr_masked & 0xff00;
+ *	aff2 = mpidr_masked & 0xff0000;
+ *	dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2);
+ *}
+ * Input registers: rs0, rs1, rs2, mpidr, mask
+ * Output register: dst
+ * Note: input and output registers must be disjoint register sets
+         (eg: a macro instance with mpidr = r1 and dst = r1 is invalid)
+ */
+	.macro compute_mpidr_hash dst, rs0, rs1, rs2, mpidr, mask
+	and	\mpidr, \mpidr, \mask			@ mask out MPIDR bits
+	and	\dst, \mpidr, #0xff			@ mask=aff0
+ ARM(	mov	\dst, \dst, lsr \rs0		)	@ dst=aff0>>rs0
+ THUMB(	lsr	\dst, \dst, \rs0		)
+	and	\mask, \mpidr, #0xff00			@ mask = aff1
+ ARM(	orr	\dst, \dst, \mask, lsr \rs1	)	@ dst|=(aff1>>rs1)
+ THUMB(	lsr	\mask, \mask, \rs1		)
+ THUMB(	orr	\dst, \dst, \mask		)
+	and	\mask, \mpidr, #0xff0000		@ mask = aff2
+ ARM(	orr	\dst, \dst, \mask, lsr \rs2	)	@ dst|=(aff2>>rs2)
+ THUMB(	lsr	\mask, \mask, \rs2		)
+ THUMB(	orr	\dst, \dst, \mask		)
+	.endm
+
 /*
  * Save CPU state for a suspend.  This saves the CPU general purpose
  * registers, and allocates space on the kernel stack to save the CPU
@@ -29,12 +72,18 @@ ENTRY(__cpu_suspend)
 	mov	r1, r4			@ size of save block
 	mov	r2, r5			@ virtual SP
 	ldr	r3, =sleep_save_sp
-#ifdef CONFIG_SMP
-	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
-	ALT_UP(mov lr, #0)
-	and	lr, lr, #15
+	ldr	r3, [r3, #SLEEP_SAVE_SP_VIRT]
+	ALT_SMP(mrc p15, 0, r9, c0, c0, 5)
+        ALT_UP_B(1f)
+	ldr	r8, =mpidr_hash
+	/*
+	 * This ldmia relies on the memory layout of the mpidr_hash
+	 * struct mpidr_hash.
+	 */
+	ldmia	r8, {r4-r7}	@ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts
+	compute_mpidr_hash	lr, r5, r6, r7, r9, r4
 	add	r3, r3, lr, lsl #2
-#endif
+1:
 	bl	__cpu_suspend_save
 	adr	lr, BSYM(cpu_suspend_abort)
 	ldmfd	sp!, {r0, pc}		@ call suspend fn
@@ -81,15 +130,23 @@ ENDPROC(cpu_resume_after_mmu)
 	.data
 	.align
 ENTRY(cpu_resume)
-#ifdef CONFIG_SMP
-	adr	r0, sleep_save_sp
-	ALT_SMP(mrc p15, 0, r1, c0, c0, 5)
-	ALT_UP(mov r1, #0)
-	and	r1, r1, #15
-	ldr	r0, [r0, r1, lsl #2]	@ stack phys addr
-#else
-	ldr	r0, sleep_save_sp	@ stack phys addr
-#endif
+	mov	r1, #0
+	ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
+	ALT_UP_B(1f)
+	adr	r2, mpidr_hash_ptr
+	ldr	r3, [r2]
+	add	r2, r2, r3		@ r2 = struct mpidr_hash phys address
+	/*
+	 * This ldmia relies on the memory layout of the mpidr_hash
+	 * struct mpidr_hash.
+	 */
+	ldmia	r2, { r3-r6 }	@ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts
+	compute_mpidr_hash	r1, r4, r5, r6, r0, r3
+1:
+	adr	r0, _sleep_save_sp
+	ldr	r0, [r0, #SLEEP_SAVE_SP_PHYS]
+	ldr	r0, [r0, r1, lsl #2]
+
 	setmode	PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1  @ set SVC, irqs off
 	@ load phys pgd, stack, resume fn
   ARM(	ldmia	r0!, {r1, sp, pc}	)
@@ -98,7 +155,11 @@ THUMB(	mov	sp, r2			)
 THUMB(	bx	r3			)
 ENDPROC(cpu_resume)
 
-sleep_save_sp:
-	.rept	CONFIG_NR_CPUS
-	.long	0				@ preserve stack phys ptr here
-	.endr
+	.align 2
+mpidr_hash_ptr:
+	.long	mpidr_hash - .			@ mpidr_hash struct offset
+
+	.type	sleep_save_sp, #object
+ENTRY(sleep_save_sp)
+_sleep_save_sp:
+	.space	SLEEP_SAVE_SP_SZ		@ struct sleep_save_sp
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index 38a50676213b..41cf3cbf756d 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -1,9 +1,12 @@
 #include <linux/init.h>
+#include <linux/slab.h>
 
+#include <asm/cacheflush.h>
 #include <asm/idmap.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/memory.h>
+#include <asm/smp_plat.h>
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
 
@@ -82,3 +85,20 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
 	outer_clean_range(virt_to_phys(save_ptr),
 			  virt_to_phys(save_ptr) + sizeof(*save_ptr));
 }
+
+extern struct sleep_save_sp sleep_save_sp;
+
+static int cpu_suspend_alloc_sp(void)
+{
+	void *ctx_ptr;
+	/* ctx_ptr is an array of physical addresses */
+	ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(u32), GFP_KERNEL);
+
+	if (WARN_ON(!ctx_ptr))
+		return -ENOMEM;
+	sleep_save_sp.save_ptr_stash = ctx_ptr;
+	sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
+	sync_cache_w(&sleep_save_sp);
+	return 0;
+}
+early_initcall(cpu_suspend_alloc_sp);
-- 
cgit 


From a4780adeefd042482f624f5e0d577bf9cdcbb760 Mon Sep 17 00:00:00 2001
From: André Hentschel <nerv@dawncrow.de>
Date: Tue, 18 Jun 2013 23:23:26 +0100
Subject: ARM: 7735/2: Preserve the user r/w register TPIDRURW on context
 switch and fork
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 6a1c53124aa1 the user writeable TLS register was zeroed to
prevent it from being used as a covert channel between two tasks.

There are more and more applications coming to Windows RT,
Wine could support them, but mostly they expect to have
the thread environment block (TEB) in TPIDRURW.

This patch preserves that register per thread instead of clearing it.
Unlike the TPIDRURO, which is already switched, the TPIDRURW
can be updated from userspace so needs careful treatment in the case that we
modify TPIDRURW and call fork(). To avoid this we must always read
TPIDRURW in copy_thread.

Signed-off-by: André Hentschel <nerv@dawncrow.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-armv.S | 5 +++--
 arch/arm/kernel/process.c    | 4 +++-
 arch/arm/kernel/ptrace.c     | 2 +-
 arch/arm/kernel/traps.c      | 4 ++--
 4 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 582b405befc5..a39cfc2a1f90 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -685,15 +685,16 @@ ENTRY(__switch_to)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
-	ldr	r3, [r2, #TI_TP_VALUE]
  ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
  THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
  THUMB(	str	sp, [ip], #4		   )
  THUMB(	str	lr, [ip], #4		   )
+	ldr	r4, [r2, #TI_TP_VALUE]
+	ldr	r5, [r2, #TI_TP_VALUE + 4]
 #ifdef CONFIG_CPU_USE_DOMAINS
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-	set_tls	r3, r4, r5
+	switch_tls r1, r4, r5, r3, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	ldr	r7, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index f21970316836..087064148ebf 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
+#include <asm/tls.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -343,7 +344,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = childregs->ARM_r3;
+		thread->tp_value[0] = childregs->ARM_r3;
+	thread->tp_value[1] = get_tpuser();
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeffd9f6d..2bc1514d6dbe 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
 		case PTRACE_GET_THREAD_AREA:
-			ret = put_user(task_thread_info(child)->tp_value,
+			ret = put_user(task_thread_info(child)->tp_value[0],
 				       datap);
 			break;
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 18b32e8e4497..517bfd4da1c9 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -581,7 +581,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		return regs->ARM_r0;
 
 	case NR(set_tls):
-		thread->tp_value = regs->ARM_r0;
+		thread->tp_value[0] = regs->ARM_r0;
 		if (tls_emu)
 			return 0;
 		if (has_tls_reg) {
@@ -699,7 +699,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
 	int reg = (instr >> 12) & 15;
 	if (reg == 15)
 		return 1;
-	regs->uregs[reg] = current_thread_info()->tp_value;
+	regs->uregs[reg] = current_thread_info()->tp_value[0];
 	regs->ARM_pc += 4;
 	return 0;
 }
-- 
cgit 


From c5f927a6f62196226915f12194c9d0df4e2210d7 Mon Sep 17 00:00:00 2001
From: Jed Davis <jld@mozilla.com>
Date: Thu, 20 Jun 2013 10:16:29 +0100
Subject: ARM: 7765/1: perf: Record the user-mode PC in the call chain.

With this change, we no longer lose the innermost entry in the user-mode
part of the call chain.  See also the x86 port, which includes the ip.

It's possible to partially work around this problem by post-processing
the data to use the PERF_SAMPLE_IP value, but this works only if the CPU
wasn't in the kernel when the sample was taken.

Cc: <stable@vger.kernel.org>
Signed-off-by: Jed Davis <jld@mozilla.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/perf_event.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 8c3094d0f7b7..d9f5cd4e533f 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -569,6 +569,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 		return;
 	}
 
+	perf_callchain_store(entry, regs->ARM_pc);
 	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
 
 	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
-- 
cgit 


From 0d0752bca1f9a91fb646647aa4abbb21156f316c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Fri, 21 Jun 2013 12:07:27 +0100
Subject: ARM: 7769/1: Cortex-A15: fix erratum 798181 implementation

Looking into the active_asids array is not enough, as we also need
to look into the reserved_asids array (they both represent processes
that are currently running).

Also, not holding the ASID allocator lock is racy, as another CPU
could schedule that process and trigger a rollover, making the erratum
workaround miss an IPI.

Exposing this outside of context.c is a little ugly on the side, so
let's define a new entry point that the erratum workaround can call
to obtain the cpumask.

Cc: <stable@vger.kernel.org> # 3.9
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp_tlb.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index 9a52a07aa40e..a98b62dca2fa 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -103,7 +103,7 @@ static void broadcast_tlb_a15_erratum(void)
 
 static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
 {
-	int cpu, this_cpu;
+	int this_cpu;
 	cpumask_t mask = { CPU_BITS_NONE };
 
 	if (!erratum_a15_798181())
@@ -111,21 +111,7 @@ static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
 
 	dummy_flush_tlb_a15_erratum();
 	this_cpu = get_cpu();
-	for_each_online_cpu(cpu) {
-		if (cpu == this_cpu)
-			continue;
-		/*
-		 * We only need to send an IPI if the other CPUs are running
-		 * the same ASID as the one being invalidated. There is no
-		 * need for locking around the active_asids check since the
-		 * switch_mm() function has at least one dmb() (as required by
-		 * this workaround) in case a context switch happens on
-		 * another CPU after the condition below.
-		 */
-		if (atomic64_read(&mm->context.id) ==
-		    atomic64_read(&per_cpu(active_asids, cpu)))
-			cpumask_set_cpu(cpu, &mask);
-	}
+	a15_erratum_get_cpumask(this_cpu, mm, &mask);
 	smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1);
 	put_cpu();
 }
-- 
cgit