summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel/setup_64.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/setup_64.c')
-rw-r--r--arch/powerpc/kernel/setup_64.c390
1 files changed, 177 insertions, 213 deletions
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6104917a282d..2f19d5e94485 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -30,13 +30,15 @@
#include <linux/lockdep.h>
#include <linux/memory.h>
#include <linux/nmi.h>
+#include <linux/pgtable.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
-#include <asm/debugfs.h>
+#include <asm/asm-prototypes.h>
+#include <asm/kvm_guest.h>
#include <asm/io.h>
#include <asm/kdump.h>
-#include <asm/prom.h>
#include <asm/processor.h>
-#include <asm/pgtable.h>
#include <asm/smp.h>
#include <asm/elf.h>
#include <asm/machdep.h>
@@ -59,13 +61,14 @@
#include <asm/udbg.h>
#include <asm/kexec.h>
#include <asm/code-patching.h>
-#include <asm/livepatch.h>
+#include <asm/ftrace.h>
#include <asm/opal.h>
#include <asm/cputhreads.h>
#include <asm/hw_irq.h>
#include <asm/feature-fixups.h>
#include <asm/kup.h>
#include <asm/early_ioremap.h>
+#include <asm/pgalloc.h>
#include "setup.h"
@@ -84,7 +87,7 @@ struct ppc64_caches ppc64_caches = {
};
EXPORT_SYMBOL_GPL(ppc64_caches);
-#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
+#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP)
void __init setup_tlb_core_data(void)
{
int cpu;
@@ -111,7 +114,6 @@ void __init setup_tlb_core_data(void)
* Should we panic instead?
*/
WARN_ONCE(smt_enabled_at_boot >= 2 &&
- !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
book3e_htw_mode != PPC_HTW_E6500,
"%s: unsupported MMU configuration\n", __func__);
}
@@ -175,14 +177,26 @@ early_param("smt-enabled", early_smt_enabled);
#endif /* CONFIG_SMP */
/** Fix up paca fields required for the boot cpu */
-static void __init fixup_boot_paca(void)
+static void __init fixup_boot_paca(struct paca_struct *boot_paca)
{
/* The boot cpu is started */
- get_paca()->cpu_start = 1;
+ boot_paca->cpu_start = 1;
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * Give the early boot machine check stack somewhere to use, use
+ * half of the init stack. This is a bit hacky but there should not be
+ * deep stack usage in early init so shouldn't overflow it or overwrite
+ * things.
+ */
+ boot_paca->mc_emergency_sp = (void *)&init_thread_union +
+ (THREAD_SIZE/2);
+#endif
/* Allow percpu accesses to work until we setup percpu data */
- get_paca()->data_offset = 0;
- /* Mark interrupts disabled in PACA */
- irq_soft_mask_set(IRQS_DISABLED);
+ boot_paca->data_offset = 0;
+ /* Mark interrupts soft and hard disabled in PACA */
+ boot_paca->irq_soft_mask = IRQS_DISABLED;
+ boot_paca->irq_happened = PACA_IRQ_HARD_DIS;
+ WARN_ON(mfmsr() & MSR_EE);
}
static void __init configure_exceptions(void)
@@ -195,8 +209,39 @@ static void __init configure_exceptions(void)
/* Under a PAPR hypervisor, we need hypercalls */
if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+ /*
+ * - PR KVM does not support AIL mode interrupts in the host
+ * while a PR guest is running.
+ *
+ * - SCV system call interrupt vectors are only implemented for
+ * AIL mode interrupts.
+ *
+ * - On pseries, AIL mode can only be enabled and disabled
+ * system-wide so when a PR VM is created on a pseries host,
+ * all CPUs of the host are set to AIL=0 mode.
+ *
+ * - Therefore host CPUs must not execute scv while a PR VM
+ * exists.
+ *
+ * - SCV support can not be disabled dynamically because the
+ * feature is advertised to host userspace. Disabling the
+ * facility and emulating it would be possible but is not
+ * implemented.
+ *
+ * - So SCV support is blanket disabled if PR KVM could possibly
+ * run. That is, PR support compiled in, booting on pseries
+ * with hash MMU.
+ */
+ if (IS_ENABLED(CONFIG_KVM_BOOK3S_PR_POSSIBLE) && !radix_enabled()) {
+ init_task.thread.fscr &= ~FSCR_SCV;
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+ }
+
/* Enable AIL if possible */
- pseries_enable_reloc_on_exc();
+ if (!pseries_enable_reloc_on_exc()) {
+ init_task.thread.fscr &= ~FSCR_SCV;
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+ }
/*
* Tell the hypervisor that we want our exceptions to
@@ -227,10 +272,23 @@ static void cpu_ready_for_interrupts(void)
* If we are not in hypervisor mode the job is done once for
* the whole partition in configure_exceptions().
*/
- if (cpu_has_feature(CPU_FTR_HVMODE) &&
- cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
unsigned long lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+ unsigned long new_lpcr = lpcr;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ /* P10 DD1 does not have HAIL */
+ if (pvr_version_is(PVR_POWER10) &&
+ (mfspr(SPRN_PVR) & 0xf00) == 0x100)
+ new_lpcr |= LPCR_AIL_3;
+ else
+ new_lpcr |= LPCR_HAIL;
+ } else if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ new_lpcr |= LPCR_AIL_3;
+ }
+
+ if (new_lpcr != lpcr)
+ mtspr(SPRN_LPCR, new_lpcr);
}
/*
@@ -254,7 +312,7 @@ static void cpu_ready_for_interrupts(void)
unsigned long spr_default_dscr = 0;
-void __init record_spr_defaults(void)
+static void __init record_spr_defaults(void)
{
if (early_cpu_has_feature(CPU_FTR_DSCR))
spr_default_dscr = mfspr(SPRN_DSCR);
@@ -285,18 +343,40 @@ void __init early_setup(unsigned long dt_ptr)
/* -------- printk is _NOT_ safe to use here ! ------- */
+ /*
+ * Assume we're on cpu 0 for now.
+ *
+ * We need to load a PACA very early for a few reasons.
+ *
+ * The stack protector canary is stored in the paca, so as soon as we
+ * call any stack protected code we need r13 pointing somewhere valid.
+ *
+ * If we are using kcov it will call in_task() in its instrumentation,
+ * which relies on the current task from the PACA.
+ *
+ * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as
+ * printk(), which can trigger both stack protector and kcov.
+ *
+ * percpu variables and spin locks also use the paca.
+ *
+ * So set up a temporary paca. It will be replaced below once we know
+ * what CPU we are on.
+ */
+ initialise_paca(&boot_paca, 0);
+ fixup_boot_paca(&boot_paca);
+ WARN_ON(local_paca);
+ setup_paca(&boot_paca); /* install the paca into registers */
+
+ /* -------- printk is now safe to use ------- */
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && (mfmsr() & MSR_HV))
+ enable_machine_check();
+
/* Try new device tree based feature discovery ... */
if (!dt_cpu_ftrs_init(__va(dt_ptr)))
/* Otherwise use the old style CPU table */
identify_cpu(0, mfspr(SPRN_PVR));
- /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
- initialise_paca(&boot_paca, 0);
- setup_paca(&boot_paca);
- fixup_boot_paca();
-
- /* -------- printk is now safe to use ------- */
-
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
@@ -305,17 +385,21 @@ void __init early_setup(unsigned long dt_ptr)
/*
* Do early initialization using the flattened device
* tree, such as retrieving the physical memory map or
- * calculating/retrieving the hash table size.
+ * calculating/retrieving the hash table size, discover
+ * boot_cpuid and boot_cpu_hwid.
*/
early_init_devtree(__va(dt_ptr));
- /* Now we know the logical id of our boot cpu, setup the paca. */
- if (boot_cpuid != 0) {
- /* Poison paca_ptrs[0] again if it's not the boot cpu */
- memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0]));
- }
- setup_paca(paca_ptrs[boot_cpuid]);
- fixup_boot_paca();
+ allocate_paca_ptrs();
+ allocate_paca(boot_cpuid);
+ set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid);
+ fixup_boot_paca(paca_ptrs[boot_cpuid]);
+ setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */
+ // smp_processor_id() now reports boot_cpuid
+
+#ifdef CONFIG_SMP
+ task_thread_info(current)->cpu = boot_cpuid; // fix task_cpu(current)
+#endif
/*
* Configure exception handlers. This include setting up trampolines
@@ -333,11 +417,11 @@ void __init early_setup(unsigned long dt_ptr)
apply_feature_fixups();
setup_feature_keys();
- early_ioremap_setup();
-
/* Initialize the hash table or TLB handling */
early_init_mmu();
+ early_ioremap_setup();
+
/*
* After firmware and early platform setup code has set things up,
* we note the SPR values for configurable control/performance
@@ -396,7 +480,7 @@ void early_setup_secondary(void)
#endif /* CONFIG_SMP */
-void panic_smp_self_stop(void)
+void __noreturn panic_smp_self_stop(void)
{
hard_irq_disable();
spin_begin();
@@ -463,7 +547,7 @@ void smp_release_cpus(void)
* routines and/or provided to userland
*/
-static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
+static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
u32 bsize, u32 sets)
{
info->size = size;
@@ -516,6 +600,8 @@ static bool __init parse_cache_info(struct device_node *np,
lsizep = of_get_property(np, propnames[3], NULL);
if (bsizep == NULL)
bsizep = lsizep;
+ if (lsizep == NULL)
+ lsizep = bsizep;
if (lsizep != NULL)
lsize = be32_to_cpu(*lsizep);
if (bsizep != NULL)
@@ -608,7 +694,7 @@ void __init initialize_cache_info(void)
*/
__init u64 ppc64_bolted_size(void)
{
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
/* Freescale BookE bolts the entire linear mapping */
/* XXX: BookE ppc64_rma_limit setup seems to disagree? */
if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E))
@@ -633,7 +719,7 @@ static void *__init alloc_stack(unsigned long limit, int cpu)
BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
- ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_SIZE,
+ ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_ALIGN,
MEMBLOCK_LOW_LIMIT, limit,
early_cpu_to_node(cpu));
if (!ptr)
@@ -658,7 +744,7 @@ void __init irqstack_early_init(void)
}
}
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
void __init exc_lvl_early_init(void)
{
unsigned int i;
@@ -691,7 +777,7 @@ void __init exc_lvl_early_init(void)
*/
void __init emergency_stack_init(void)
{
- u64 limit;
+ u64 limit, mce_limit;
unsigned int i;
/*
@@ -708,7 +794,16 @@ void __init emergency_stack_init(void)
* initialized in kernel/irq.c. These are initialized here in order
* to have emergency stacks available as early as possible.
*/
- limit = min(ppc64_bolted_size(), ppc64_rma_size);
+ limit = mce_limit = min(ppc64_bolted_size(), ppc64_rma_size);
+
+ /*
+ * Machine check on pseries calls rtas, but can't use the static
+ * rtas_args due to a machine check hitting while the lock is held.
+ * rtas args have to be under 4GB, so the machine check stack is
+ * limited to 4GB so args can be put on stack.
+ */
+ if (firmware_has_feature(FW_FEATURE_LPAR) && mce_limit > SZ_4G)
+ mce_limit = SZ_4G;
for_each_possible_cpu(i) {
paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
@@ -718,27 +813,12 @@ void __init emergency_stack_init(void)
paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
- paca_ptrs[i]->mc_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
+ paca_ptrs[i]->mc_emergency_sp = alloc_stack(mce_limit, i) + THREAD_SIZE;
#endif
}
}
#ifdef CONFIG_SMP
-#define PCPU_DYN_SIZE ()
-
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
-{
- return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
- MEMBLOCK_ALLOC_ACCESSIBLE,
- early_cpu_to_node(cpu));
-
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- memblock_free(__pa(ptr), size);
-}
-
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (early_cpu_to_node(from) == early_cpu_to_node(to))
@@ -747,6 +827,11 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
return REMOTE_DISTANCE;
}
+static __init int pcpu_cpu_to_node(int cpu)
+{
+ return early_cpu_to_node(cpu);
+}
+
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
@@ -756,20 +841,38 @@ void __init setup_per_cpu_areas(void)
size_t atom_size;
unsigned long delta;
unsigned int cpu;
- int rc;
+ int rc = -EINVAL;
/*
- * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
- * to group units. For larger mappings, use 1M atom which
- * should be large enough to contain a number of units.
+ * BookE and BookS radix are historical values and should be revisited.
*/
- if (mmu_linear_psize == MMU_PAGE_4K)
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
+ atom_size = SZ_1M;
+ } else if (radix_enabled()) {
atom_size = PAGE_SIZE;
- else
- atom_size = 1 << 20;
+ } else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) {
+ /*
+ * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
+ * to group units. For larger mappings, use 1M atom which
+ * should be large enough to contain a number of units.
+ */
+ if (mmu_linear_psize == MMU_PAGE_4K)
+ atom_size = PAGE_SIZE;
+ else
+ atom_size = SZ_1M;
+ }
+
+ if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+ rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+ pcpu_cpu_to_node);
+ if (rc)
+ pr_warn("PERCPU: %s allocator failed (%d), "
+ "falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
+ }
- rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+ if (rc < 0)
+ rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
@@ -781,7 +884,7 @@ void __init setup_per_cpu_areas(void)
}
#endif
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
unsigned long memory_block_size_bytes(void)
{
if (ppc_md.memory_block_size)
@@ -808,161 +911,22 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh)
* disable it by default. Book3S has a soft-nmi hardlockup detector based
* on the decrementer interrupt, so it does not suffer from this problem.
*
- * It is likely to get false positives in VM guests, so disable it there
- * by default too.
+ * It is likely to get false positives in KVM guests, so disable it there
+ * by default too. PowerVM will not stop or arbitrarily oversubscribe
+ * CPUs, but give a minimum regular allotment even with SPLPAR, so enable
+ * the detector for non-KVM guests, assume PowerVM.
*/
static int __init disable_hardlockup_detector(void)
{
#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
hardlockup_detector_disable();
#else
- if (firmware_has_feature(FW_FEATURE_LPAR))
- hardlockup_detector_disable();
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ if (is_kvm_guest())
+ hardlockup_detector_disable();
+ }
#endif
return 0;
}
early_initcall(disable_hardlockup_detector);
-
-#ifdef CONFIG_PPC_BOOK3S_64
-static enum l1d_flush_type enabled_flush_types;
-static void *l1d_flush_fallback_area;
-static bool no_rfi_flush;
-bool rfi_flush;
-
-static int __init handle_no_rfi_flush(char *p)
-{
- pr_info("rfi-flush: disabled on command line.");
- no_rfi_flush = true;
- return 0;
-}
-early_param("no_rfi_flush", handle_no_rfi_flush);
-
-/*
- * The RFI flush is not KPTI, but because users will see doco that says to use
- * nopti we hijack that option here to also disable the RFI flush.
- */
-static int __init handle_no_pti(char *p)
-{
- pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
- handle_no_rfi_flush(NULL);
- return 0;
-}
-early_param("nopti", handle_no_pti);
-
-static void do_nothing(void *unused)
-{
- /*
- * We don't need to do the flush explicitly, just enter+exit kernel is
- * sufficient, the RFI exit handlers will do the right thing.
- */
-}
-
-void rfi_flush_enable(bool enable)
-{
- if (enable) {
- do_rfi_flush_fixups(enabled_flush_types);
- on_each_cpu(do_nothing, NULL, 1);
- } else
- do_rfi_flush_fixups(L1D_FLUSH_NONE);
-
- rfi_flush = enable;
-}
-
-static void __ref init_fallback_flush(void)
-{
- u64 l1d_size, limit;
- int cpu;
-
- /* Only allocate the fallback flush area once (at boot time). */
- if (l1d_flush_fallback_area)
- return;
-
- l1d_size = ppc64_caches.l1d.size;
-
- /*
- * If there is no d-cache-size property in the device tree, l1d_size
- * could be zero. That leads to the loop in the asm wrapping around to
- * 2^64-1, and then walking off the end of the fallback area and
- * eventually causing a page fault which is fatal. Just default to
- * something vaguely sane.
- */
- if (!l1d_size)
- l1d_size = (64 * 1024);
-
- limit = min(ppc64_bolted_size(), ppc64_rma_size);
-
- /*
- * Align to L1d size, and size it at 2x L1d size, to catch possible
- * hardware prefetch runoff. We don't have a recipe for load patterns to
- * reliably avoid the prefetcher.
- */
- l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
- l1d_size, MEMBLOCK_LOW_LIMIT,
- limit, NUMA_NO_NODE);
- if (!l1d_flush_fallback_area)
- panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n",
- __func__, l1d_size * 2, l1d_size, &limit);
-
-
- for_each_possible_cpu(cpu) {
- struct paca_struct *paca = paca_ptrs[cpu];
- paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
- paca->l1d_flush_size = l1d_size;
- }
-}
-
-void setup_rfi_flush(enum l1d_flush_type types, bool enable)
-{
- if (types & L1D_FLUSH_FALLBACK) {
- pr_info("rfi-flush: fallback displacement flush available\n");
- init_fallback_flush();
- }
-
- if (types & L1D_FLUSH_ORI)
- pr_info("rfi-flush: ori type flush available\n");
-
- if (types & L1D_FLUSH_MTTRIG)
- pr_info("rfi-flush: mttrig type flush available\n");
-
- enabled_flush_types = types;
-
- if (!no_rfi_flush && !cpu_mitigations_off())
- rfi_flush_enable(enable);
-}
-
-#ifdef CONFIG_DEBUG_FS
-static int rfi_flush_set(void *data, u64 val)
-{
- bool enable;
-
- if (val == 1)
- enable = true;
- else if (val == 0)
- enable = false;
- else
- return -EINVAL;
-
- /* Only do anything if we're changing state */
- if (enable != rfi_flush)
- rfi_flush_enable(enable);
-
- return 0;
-}
-
-static int rfi_flush_get(void *data, u64 *val)
-{
- *val = rfi_flush ? 1 : 0;
- return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
-
-static __init int rfi_flush_debugfs_init(void)
-{
- debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
- return 0;
-}
-device_initcall(rfi_flush_debugfs_init);
-#endif
-#endif /* CONFIG_PPC_BOOK3S_64 */