1 files changed, 231 insertions, 123 deletions
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index f8f24685f10a..7502066c3c53 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -1,112 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * c 2001 PPC 64 Team, IBM Corp
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
  */
 
 #include <linux/smp.h>
 #include <linux/export.h>
 #include <linux/memblock.h>
+#include <linux/sched/task.h>
+#include <linux/numa.h>
+#include <linux/pgtable.h>
 
 #include <asm/lppaca.h>
 #include <asm/paca.h>
 #include <asm/sections.h>
-#include <asm/pgtable.h>
 #include <asm/kexec.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
+
+#include "setup.h"
+
+#ifndef CONFIG_SMP
+#define boot_cpuid 0
+#endif
+
+static void *__init alloc_paca_data(unsigned long size, unsigned long align,
+				unsigned long limit, int cpu)
+{
+	void *ptr;
+	int nid;
+
+	/*
+	 * boot_cpuid paca is allocated very early before cpu_to_node is up.
+	 * Set bottom-up mode, because the boot CPU should be on node-0,
+	 * which will put its paca in the right place.
+	 */
+	if (cpu == boot_cpuid) {
+		nid = NUMA_NO_NODE;
+		memblock_set_bottom_up(true);
+	} else {
+		nid = early_cpu_to_node(cpu);
+	}
+
+	ptr = memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+				     limit, nid);
+	if (!ptr)
+		panic("cannot allocate paca data");
+
+	if (cpu == boot_cpuid)
+		memblock_set_bottom_up(false);
+
+	return ptr;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+
+#define LPPACA_SIZE 0x400
 
-/* This symbol is provided by the linker - let it fill in the paca
- * field correctly */
-extern unsigned long __toc_start;
+static void *__init alloc_shared_lppaca(unsigned long size, unsigned long limit,
+					int cpu)
+{
+	size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE);
+	static unsigned long shared_lppaca_size;
+	static void *shared_lppaca;
+	void *ptr;
+
+	if (!shared_lppaca) {
+		memblock_set_bottom_up(true);
+
+		/*
+		 * See Documentation/arch/powerpc/ultravisor.rst for more details.
+		 *
+		 * UV/HV data sharing is in PAGE_SIZE granularity. In order to
+		 * minimize the number of pages shared, align the allocation to
+		 * PAGE_SIZE.
+		 */
+		shared_lppaca =
+			memblock_alloc_try_nid(shared_lppaca_total_size,
+					       PAGE_SIZE, MEMBLOCK_LOW_LIMIT,
+					       limit, NUMA_NO_NODE);
+		if (!shared_lppaca)
+			panic("cannot allocate shared data");
+
+		memblock_set_bottom_up(false);
+		uv_share_page(PHYS_PFN(__pa(shared_lppaca)),
+			      shared_lppaca_total_size >> PAGE_SHIFT);
+	}
+
+	ptr = shared_lppaca + shared_lppaca_size;
+	shared_lppaca_size += size;
+
+	/*
+	 * This is very early in boot, so no harm done if the kernel crashes at
+	 * this point.
+	 */
+	BUG_ON(shared_lppaca_size > shared_lppaca_total_size);
 
-#ifdef CONFIG_PPC_BOOK3S
+	return ptr;
+}
 
 /*
- * The structure which the hypervisor knows about - this structure
- * should not cross a page boundary.  The vpa_init/register_vpa call
- * is now known to fail if the lppaca structure crosses a page
- * boundary.  The lppaca is also used on POWER5 pSeries boxes.
- * The lppaca is 640 bytes long, and cannot readily
- * change since the hypervisor knows its layout, so a 1kB alignment
- * will suffice to ensure that it doesn't cross a page boundary.
+ * See asm/lppaca.h for more detail.
+ *
+ * lppaca structures must must be 1kB in size, L1 cache line aligned,
+ * and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy
+ * these requirements.
  */
-struct lppaca lppaca[] = {
-	[0 ... (NR_LPPACAS-1)] = {
-		.desc = 0xd397d781,	/* "LpPa" */
-		.size = sizeof(struct lppaca),
+static inline void init_lppaca(struct lppaca *lppaca)
+{
+	BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+
+	*lppaca = (struct lppaca) {
+		.desc = cpu_to_be32(0xd397d781),	/* "LpPa" */
+		.size = cpu_to_be16(LPPACA_SIZE),
 		.fpregs_in_use = 1,
-		.slb_count = 64,
+		.slb_count = cpu_to_be16(64),
 		.vmxregs_in_use = 0,
-		.page_ins = 0,
-	},
+		.page_ins = 0, };
 };
 
-static struct lppaca *extra_lppacas;
-static long __initdata lppaca_size;
-
-static void allocate_lppacas(int nr_cpus, unsigned long limit)
+static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
 {
-	if (nr_cpus <= NR_LPPACAS)
-		return;
+	struct lppaca *lp;
 
-	lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) *
-				 (nr_cpus - NR_LPPACAS));
-	extra_lppacas = __va(memblock_alloc_base(lppaca_size,
-						 PAGE_SIZE, limit));
-}
+	BUILD_BUG_ON(sizeof(struct lppaca) > LPPACA_SIZE);
 
-static struct lppaca *new_lppaca(int cpu)
-{
-	struct lppaca *lp;
+	if (early_cpu_has_feature(CPU_FTR_HVMODE))
+		return NULL;
 
-	if (cpu < NR_LPPACAS)
-		return &lppaca[cpu];
+	if (is_secure_guest())
+		lp = alloc_shared_lppaca(LPPACA_SIZE, limit, cpu);
+	else
+		lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu);
 
-	lp = extra_lppacas + (cpu - NR_LPPACAS);
-	*lp = lppaca[0];
+	init_lppaca(lp);
 
 	return lp;
 }
+#endif /* CONFIG_PPC_PSERIES */
 
-static void free_lppacas(void)
+#ifdef CONFIG_PPC_64S_HASH_MMU
+/*
+ * 3 persistent SLBs are allocated here.  The buffer will be zero
+ * initially, hence will all be invaild until we actually write them.
+ *
+ * If you make the number of persistent SLB entries dynamic, please also
+ * update PR KVM to flush and restore them accordingly.
+ */
+static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
 {
-	long new_size = 0, nr;
-
-	if (!lppaca_size)
-		return;
-	nr = num_possible_cpus() - NR_LPPACAS;
-	if (nr > 0)
-		new_size = PAGE_ALIGN(nr * sizeof(struct lppaca));
-	if (new_size >= lppaca_size)
-		return;
-
-	memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size);
-	lppaca_size = new_size;
-}
-
-#else
+	struct slb_shadow *s;
 
-static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { }
-static inline void free_lppacas(void) { }
+	if (cpu != boot_cpuid) {
+		/*
+		 * Boot CPU comes here before early_radix_enabled
+		 * is parsed (e.g., for disable_radix). So allocate
+		 * always and this will be fixed up in free_unused_pacas.
+		 */
+		if (early_radix_enabled())
+			return NULL;
+	}
 
-#endif /* CONFIG_PPC_BOOK3S */
+	s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
 
-#ifdef CONFIG_PPC_STD_MMU_64
+	s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
+	s->buffer_length = cpu_to_be32(sizeof(*s));
 
-/*
- * 3 persistent SLBs are registered here.  The buffer will be zero
- * initially, hence will all be invaild until we actually write them.
- */
-struct slb_shadow slb_shadow[] __cacheline_aligned = {
-	[0 ... (NR_CPUS-1)] = {
-		.persistent = SLB_NUM_BOLTED,
-		.buffer_length = sizeof(struct slb_shadow),
-	},
-};
-
-#endif /* CONFIG_PPC_STD_MMU_64 */
+	return s;
+}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
 
 /* The Paca is an array with one entry per processor.  Each contains an
  * lppaca, which contains the information shared between the
@@ -117,33 +178,37 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = {
  * processors.  The processor VPD array needs one entry per physical
  * processor (not thread).
  */
-struct paca_struct *paca;
-EXPORT_SYMBOL(paca);
+struct paca_struct **paca_ptrs __read_mostly;
+EXPORT_SYMBOL(paca_ptrs);
 
 void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 {
-       /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB
-	* of the TOC can be addressed using a single machine instruction.
-	*/
-	unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL;
-
-#ifdef CONFIG_PPC_BOOK3S
-	new_paca->lppaca_ptr = new_lppaca(cpu);
-#else
+#ifdef CONFIG_PPC_PSERIES
+	new_paca->lppaca_ptr = NULL;
+#endif
+#ifdef CONFIG_PPC_BOOK3E_64
 	new_paca->kernel_pgd = swapper_pg_dir;
 #endif
 	new_paca->lock_token = 0x8000;
 	new_paca->paca_index = cpu;
-	new_paca->kernel_toc = kernel_toc;
+#ifndef CONFIG_PPC_KERNEL_PCREL
+	new_paca->kernel_toc = kernel_toc_addr();
+#endif
 	new_paca->kernelbase = (unsigned long) _stext;
-	new_paca->kernel_msr = MSR_KERNEL;
+	/* Only set MSR:IR/DR when MMU is initialized */
+	new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR);
 	new_paca->hw_cpu_id = 0xffff;
 	new_paca->kexec_state = KEXEC_STATE_NONE;
 	new_paca->__current = &init_task;
 	new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
-#ifdef CONFIG_PPC_STD_MMU_64
-	new_paca->slb_shadow_ptr = &slb_shadow[cpu];
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	new_paca->slb_shadow_ptr = NULL;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3E_64
+	/* For now -- if we have threads this will be adjusted later */
+	new_paca->tcd_ptr = &new_paca->tcd;
+#endif
 }
 
 /* Put the paca pointer into r13 and SPRG_PACA */
@@ -152,64 +217,107 @@ void setup_paca(struct paca_struct *new_paca)
 	/* Setup r13 */
 	local_paca = new_paca;
 
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	/* On Book3E, initialize the TLB miss exception frames */
 	mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
 #else
-	/* In HV mode, we setup both HPACA and PACA to avoid problems
+	/*
+	 * In HV mode, we setup both HPACA and PACA to avoid problems
 	 * if we do a GET_PACA() before the feature fixups have been
-	 * applied
+	 * applied.
+	 *
+	 * Normally you should test against CPU_FTR_HVMODE, but CPU features
+	 * are not yet set up when we first reach here.
 	 */
-	if (cpu_has_feature(CPU_FTR_HVMODE))
+	if (mfmsr() & MSR_HV)
 		mtspr(SPRN_SPRG_HPACA, local_paca);
 #endif
 	mtspr(SPRN_SPRG_PACA, local_paca);
 
 }
 
-static int __initdata paca_size;
+static int __initdata paca_nr_cpu_ids;
+static int __initdata paca_ptrs_size;
+static int __initdata paca_struct_size;
 
-void __init allocate_pacas(void)
+void __init allocate_paca_ptrs(void)
 {
-	int cpu, limit;
+	paca_nr_cpu_ids = nr_cpu_ids;
 
-	/*
-	 * We can't take SLB misses on the paca, and we want to access them
-	 * in real mode, so allocate them within the RMA and also within
-	 * the first segment.
-	 */
-	limit = min(0x10000000ULL, ppc64_rma_size);
+	paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+	paca_ptrs = memblock_alloc_raw(paca_ptrs_size, SMP_CACHE_BYTES);
+	if (!paca_ptrs)
+		panic("Failed to allocate %d bytes for paca pointers\n",
+		      paca_ptrs_size);
 
-	paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
+	memset(paca_ptrs, 0x88, paca_ptrs_size);
+}
+
+void __init allocate_paca(int cpu)
+{
+	u64 limit;
+	struct paca_struct *paca;
 
-	paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
-	memset(paca, 0, paca_size);
+	BUG_ON(cpu >= paca_nr_cpu_ids);
 
-	printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
-		paca_size, nr_cpu_ids, paca);
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * We access pacas in real mode, and cannot take SLB faults
+	 * on them when in virtual mode, so allocate them accordingly.
+	 */
+	limit = min(ppc64_bolted_size(), ppc64_rma_size);
+#else
+	limit = ppc64_rma_size;
+#endif
 
-	allocate_lppacas(nr_cpu_ids, limit);
+	paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
+				limit, cpu);
+	paca_ptrs[cpu] = paca;
 
-	/* Can't use for_each_*_cpu, as they aren't functional yet */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
-		initialise_paca(&paca[cpu], cpu);
+	initialise_paca(paca, cpu);
+#ifdef CONFIG_PPC_PSERIES
+	paca->lppaca_ptr = new_lppaca(cpu, limit);
+#endif
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
+#endif
+	paca_struct_size += sizeof(struct paca_struct);
 }
 
 void __init free_unused_pacas(void)
 {
-	int new_size;
-
-	new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
-
-	if (new_size >= paca_size)
-		return;
-
-	memblock_free(__pa(paca) + new_size, paca_size - new_size);
+	int new_ptrs_size;
+
+	new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+	if (new_ptrs_size < paca_ptrs_size)
+		memblock_phys_free(__pa(paca_ptrs) + new_ptrs_size,
+				   paca_ptrs_size - new_ptrs_size);
+
+	paca_nr_cpu_ids = nr_cpu_ids;
+	paca_ptrs_size = new_ptrs_size;
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	if (early_radix_enabled()) {
+		/* Ugly fixup, see new_slb_shadow() */
+		memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
+				   sizeof(struct slb_shadow));
+		paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL;
+	}
+#endif
 
-	printk(KERN_DEBUG "Freed %u bytes for unused pacas\n",
-		paca_size - new_size);
+	printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n",
+			paca_ptrs_size + paca_struct_size, nr_cpu_ids);
+}
 
-	paca_size = new_size;
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void copy_mm_to_paca(struct mm_struct *mm)
+{
+	mm_context_t *context = &mm->context;
 
-	free_lppacas();
+	VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
+	memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
+	       LOW_SLICE_ARRAY_SZ);
+	memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
+	       TASK_SLICE_ARRAY_SZ(context));
 }
+#endif /* CONFIG_PPC_64S_HASH_MMU */