diff options
Diffstat (limited to 'arch/powerpc/kernel/setup_64.c')
| -rw-r--r-- | arch/powerpc/kernel/setup_64.c | 491 |
1 files changed, 210 insertions, 281 deletions
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 236c1151a3a7..8fd7cbf3bd04 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * * Common boot and setup code. * * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/export.h> @@ -34,13 +30,15 @@ #include <linux/lockdep.h> #include <linux/memory.h> #include <linux/nmi.h> +#include <linux/pgtable.h> +#include <linux/of.h> +#include <linux/of_fdt.h> -#include <asm/debugfs.h> +#include <asm/asm-prototypes.h> +#include <asm/kvm_guest.h> #include <asm/io.h> #include <asm/kdump.h> -#include <asm/prom.h> #include <asm/processor.h> -#include <asm/pgtable.h> #include <asm/smp.h> #include <asm/elf.h> #include <asm/machdep.h> @@ -62,21 +60,18 @@ #include <asm/xmon.h> #include <asm/udbg.h> #include <asm/kexec.h> -#include <asm/code-patching.h> -#include <asm/livepatch.h> +#include <asm/text-patching.h> +#include <asm/ftrace.h> #include <asm/opal.h> #include <asm/cputhreads.h> #include <asm/hw_irq.h> #include <asm/feature-fixups.h> +#include <asm/kup.h> +#include <asm/early_ioremap.h> +#include <asm/pgalloc.h> #include "setup.h" -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - int spinning_secondaries; u64 ppc64_pft_size; @@ -92,7 +87,7 @@ struct ppc64_caches ppc64_caches = { }; EXPORT_SYMBOL_GPL(ppc64_caches); -#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP) void __init setup_tlb_core_data(void) { int cpu; @@ -119,7 +114,6 @@ void __init setup_tlb_core_data(void) * Should we panic instead? */ WARN_ONCE(smt_enabled_at_boot >= 2 && - !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && book3e_htw_mode != PPC_HTW_E6500, "%s: unsupported MMU configuration\n", __func__); } @@ -147,10 +141,7 @@ void __init check_smt_enabled(void) smt_enabled_at_boot = 0; else { int smt; - int rc; - - rc = kstrtoint(smt_enabled_cmdline, 10, &smt); - if (!rc) + if (!kstrtoint(smt_enabled_cmdline, 10, &smt)) smt_enabled_at_boot = min(threads_per_core, smt); } @@ -183,14 +174,26 @@ early_param("smt-enabled", early_smt_enabled); #endif /* CONFIG_SMP */ /** Fix up paca fields required for the boot cpu */ -static void __init fixup_boot_paca(void) +static void __init fixup_boot_paca(struct paca_struct *boot_paca) { /* The boot cpu is started */ - get_paca()->cpu_start = 1; + boot_paca->cpu_start = 1; +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * Give the early boot machine check stack somewhere to use, use + * half of the init stack. This is a bit hacky but there should not be + * deep stack usage in early init so shouldn't overflow it or overwrite + * things. + */ + boot_paca->mc_emergency_sp = (void *)&init_thread_union + + (THREAD_SIZE/2); +#endif /* Allow percpu accesses to work until we setup percpu data */ - get_paca()->data_offset = 0; - /* Mark interrupts disabled in PACA */ - irq_soft_mask_set(IRQS_DISABLED); + boot_paca->data_offset = 0; + /* Mark interrupts soft and hard disabled in PACA */ + boot_paca->irq_soft_mask = IRQS_DISABLED; + boot_paca->irq_happened = PACA_IRQ_HARD_DIS; + WARN_ON(mfmsr() & MSR_EE); } static void __init configure_exceptions(void) @@ -203,8 +206,39 @@ static void __init configure_exceptions(void) /* Under a PAPR hypervisor, we need hypercalls */ if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + /* + * - PR KVM does not support AIL mode interrupts in the host + * while a PR guest is running. + * + * - SCV system call interrupt vectors are only implemented for + * AIL mode interrupts. + * + * - On pseries, AIL mode can only be enabled and disabled + * system-wide so when a PR VM is created on a pseries host, + * all CPUs of the host are set to AIL=0 mode. + * + * - Therefore host CPUs must not execute scv while a PR VM + * exists. + * + * - SCV support can not be disabled dynamically because the + * feature is advertised to host userspace. Disabling the + * facility and emulating it would be possible but is not + * implemented. + * + * - So SCV support is blanket disabled if PR KVM could possibly + * run. That is, PR support compiled in, booting on pseries + * with hash MMU. + */ + if (IS_ENABLED(CONFIG_KVM_BOOK3S_PR_POSSIBLE) && !radix_enabled()) { + init_task.thread.fscr &= ~FSCR_SCV; + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV; + } + /* Enable AIL if possible */ - pseries_enable_reloc_on_exc(); + if (!pseries_enable_reloc_on_exc()) { + init_task.thread.fscr &= ~FSCR_SCV; + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV; + } /* * Tell the hypervisor that we want our exceptions to @@ -235,10 +269,23 @@ static void cpu_ready_for_interrupts(void) * If we are not in hypervisor mode the job is done once for * the whole partition in configure_exceptions(). */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) { + if (cpu_has_feature(CPU_FTR_HVMODE)) { unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + unsigned long new_lpcr = lpcr; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + /* P10 DD1 does not have HAIL */ + if (pvr_version_is(PVR_POWER10) && + (mfspr(SPRN_PVR) & 0xf00) == 0x100) + new_lpcr |= LPCR_AIL_3; + else + new_lpcr |= LPCR_HAIL; + } else if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + new_lpcr |= LPCR_AIL_3; + } + + if (new_lpcr != lpcr) + mtspr(SPRN_LPCR, new_lpcr); } /* @@ -262,7 +309,7 @@ static void cpu_ready_for_interrupts(void) unsigned long spr_default_dscr = 0; -void __init record_spr_defaults(void) +static void __init record_spr_defaults(void) { if (early_cpu_has_feature(CPU_FTR_DSCR)) spr_default_dscr = mfspr(SPRN_DSCR); @@ -293,37 +340,63 @@ void __init early_setup(unsigned long dt_ptr) /* -------- printk is _NOT_ safe to use here ! ------- */ + /* + * Assume we're on cpu 0 for now. + * + * We need to load a PACA very early for a few reasons. + * + * The stack protector canary is stored in the paca, so as soon as we + * call any stack protected code we need r13 pointing somewhere valid. + * + * If we are using kcov it will call in_task() in its instrumentation, + * which relies on the current task from the PACA. + * + * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as + * printk(), which can trigger both stack protector and kcov. + * + * percpu variables and spin locks also use the paca. + * + * So set up a temporary paca. It will be replaced below once we know + * what CPU we are on. + */ + initialise_paca(&boot_paca, 0); + fixup_boot_paca(&boot_paca); + WARN_ON(local_paca); + setup_paca(&boot_paca); /* install the paca into registers */ + + /* -------- printk is now safe to use ------- */ + + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && (mfmsr() & MSR_HV)) + enable_machine_check(); + /* Try new device tree based feature discovery ... */ if (!dt_cpu_ftrs_init(__va(dt_ptr))) /* Otherwise use the old style CPU table */ identify_cpu(0, mfspr(SPRN_PVR)); - /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ - initialise_paca(&boot_paca, 0); - setup_paca(&boot_paca); - fixup_boot_paca(); - - /* -------- printk is now safe to use ------- */ - /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr); + udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr); /* * Do early initialization using the flattened device * tree, such as retrieving the physical memory map or - * calculating/retrieving the hash table size. + * calculating/retrieving the hash table size, discover + * boot_cpuid and boot_cpu_hwid. */ early_init_devtree(__va(dt_ptr)); - /* Now we know the logical id of our boot cpu, setup the paca. */ - if (boot_cpuid != 0) { - /* Poison paca_ptrs[0] again if it's not the boot cpu */ - memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0])); - } - setup_paca(paca_ptrs[boot_cpuid]); - fixup_boot_paca(); + allocate_paca_ptrs(); + allocate_paca(boot_cpuid); + set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); + fixup_boot_paca(paca_ptrs[boot_cpuid]); + setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */ + // smp_processor_id() now reports boot_cpuid + +#ifdef CONFIG_SMP + task_thread_info(current)->cpu = boot_cpuid; // fix task_cpu(current) +#endif /* * Configure exception handlers. This include setting up trampolines @@ -331,6 +404,12 @@ void __init early_setup(unsigned long dt_ptr) */ configure_exceptions(); + /* + * Configure Kernel Userspace Protection. This needs to happen before + * feature fixups for platforms that implement this using features. + */ + setup_kup(); + /* Apply all the dynamic patching */ apply_feature_fixups(); setup_feature_keys(); @@ -338,6 +417,8 @@ void __init early_setup(unsigned long dt_ptr) /* Initialize the hash table or TLB handling */ early_init_mmu(); + early_ioremap_setup(); + /* * After firmware and early platform setup code has set things up, * we note the SPR values for configurable control/performance @@ -359,11 +440,11 @@ void __init early_setup(unsigned long dt_ptr) */ this_cpu_enable_ftrace(); - DBG(" <- early_setup()\n"); + udbg_printf(" <- %s()\n", __func__); #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX /* - * This needs to be done *last* (after the above DBG() even) + * This needs to be done *last* (after the above udbg_printf() even) * * Right after we return from this function, we turn on the MMU * which means the real-mode access trick that btext does will @@ -383,6 +464,9 @@ void early_setup_secondary(void) /* Initialize the hash table or TLB handling */ early_init_mmu_secondary(); + /* Perform any KUP setup that is per-cpu */ + setup_kup(); + /* * At this point, we can let interrupts switch to virtual mode * (the MMU has been setup), so adjust the MSR in the PACA to @@ -393,7 +477,7 @@ void early_setup_secondary(void) #endif /* CONFIG_SMP */ -void panic_smp_self_stop(void) +void __noreturn panic_smp_self_stop(void) { hard_irq_disable(); spin_begin(); @@ -430,8 +514,6 @@ void smp_release_cpus(void) if (!use_spinloop()) return; - DBG(" -> smp_release_cpus()\n"); - /* All secondary cpus are spinning on a common spinloop, release them * all now so they can start to spin on their individual paca * spinloops. For non SMP kernels, the secondary cpus never get out @@ -450,9 +532,7 @@ void smp_release_cpus(void) break; udelay(1); } - DBG("spinning_secondaries = %d\n", spinning_secondaries); - - DBG(" <- smp_release_cpus()\n"); + pr_debug("spinning_secondaries = %d\n", spinning_secondaries); } #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */ @@ -464,7 +544,7 @@ void smp_release_cpus(void) * routines and/or provided to userland */ -static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, +static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, u32 bsize, u32 sets) { info->size = size; @@ -517,6 +597,8 @@ static bool __init parse_cache_info(struct device_node *np, lsizep = of_get_property(np, propnames[3], NULL); if (bsizep == NULL) bsizep = lsizep; + if (lsizep == NULL) + lsizep = bsizep; if (lsizep != NULL) lsize = be32_to_cpu(*lsizep); if (bsizep != NULL) @@ -545,8 +627,6 @@ void __init initialize_cache_info(void) struct device_node *cpu = NULL, *l2, *l3 = NULL; u32 pvr; - DBG(" -> initialize_cache_info()\n"); - /* * All shipping POWER8 machines have a firmware bug that * puts incorrect information in the device-tree. This will @@ -570,10 +650,10 @@ void __init initialize_cache_info(void) */ if (cpu) { if (!parse_cache_info(cpu, false, &ppc64_caches.l1d)) - DBG("Argh, can't find dcache properties !\n"); + pr_warn("Argh, can't find dcache properties !\n"); if (!parse_cache_info(cpu, true, &ppc64_caches.l1i)) - DBG("Argh, can't find icache properties !\n"); + pr_warn("Argh, can't find icache properties !\n"); /* * Try to find the L2 and L3 if any. Assume they are @@ -598,8 +678,6 @@ void __init initialize_cache_info(void) cur_cpu_spec->dcache_bsize = dcache_bsize; cur_cpu_spec->icache_bsize = icache_bsize; - - DBG(" <- initialize_cache_info()\n"); } /* @@ -613,13 +691,9 @@ void __init initialize_cache_info(void) */ __init u64 ppc64_bolted_size(void) { -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* Freescale BookE bolts the entire linear mapping */ - /* XXX: BookE ppc64_rma_limit setup seems to disagree? */ - if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) - return linear_map_top; - /* Other BookE, we assume the first GB is bolted */ - return 1ul << 30; + return linear_map_top; #else /* BookS radix, does not take faults on linear mapping */ if (early_radix_enabled()) @@ -634,19 +708,17 @@ __init u64 ppc64_bolted_size(void) static void *__init alloc_stack(unsigned long limit, int cpu) { - unsigned long pa; + void *ptr; BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16); - pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit, - early_cpu_to_node(cpu), MEMBLOCK_NONE); - if (!pa) { - pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); - if (!pa) - panic("cannot allocate stacks"); - } + ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_ALIGN, + MEMBLOCK_LOW_LIMIT, limit, + early_cpu_to_node(cpu)); + if (!ptr) + panic("cannot allocate stacks"); - return __va(pa); + return ptr; } void __init irqstack_early_init(void) @@ -665,7 +737,7 @@ void __init irqstack_early_init(void) } } -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 void __init exc_lvl_early_init(void) { unsigned int i; @@ -692,31 +764,13 @@ void __init exc_lvl_early_init(void) #endif /* - * Emergency stacks are used for a range of things, from asynchronous - * NMIs (system reset, machine check) to synchronous, process context. - * We set preempt_count to zero, even though that isn't necessarily correct. To - * get the right value we'd need to copy it from the previous thread_info, but - * doing that might fault causing more problems. - * TODO: what to do with accounting? - */ -static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu) -{ - ti->task = NULL; - ti->cpu = cpu; - ti->preempt_count = 0; - ti->local_flags = 0; - ti->flags = 0; - klp_init_thread_info(ti); -} - -/* * Stack space used when we detect a bad kernel stack pointer, and * early in SMP boots before relocation is enabled. Exclusive emergency * stack for machine checks. */ void __init emergency_stack_init(void) { - u64 limit; + u64 limit, mce_limit; unsigned int i; /* @@ -733,48 +787,31 @@ void __init emergency_stack_init(void) * initialized in kernel/irq.c. These are initialized here in order * to have emergency stacks available as early as possible. */ - limit = min(ppc64_bolted_size(), ppc64_rma_size); + limit = mce_limit = min(ppc64_bolted_size(), ppc64_rma_size); - for_each_possible_cpu(i) { - struct thread_info *ti; + /* + * Machine check on pseries calls rtas, but can't use the static + * rtas_args due to a machine check hitting while the lock is held. + * rtas args have to be under 4GB, so the machine check stack is + * limited to 4GB so args can be put on stack. + */ + if (firmware_has_feature(FW_FEATURE_LPAR) && mce_limit > SZ_4G) + mce_limit = SZ_4G; - ti = alloc_stack(limit, i); - memset(ti, 0, THREAD_SIZE); - emerg_stack_init_thread_info(ti, i); - paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE; + for_each_possible_cpu(i) { + paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 /* emergency stack for NMI exception handling. */ - ti = alloc_stack(limit, i); - memset(ti, 0, THREAD_SIZE); - emerg_stack_init_thread_info(ti, i); - paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; /* emergency stack for machine check exception handling. */ - ti = alloc_stack(limit, i); - memset(ti, 0, THREAD_SIZE); - emerg_stack_init_thread_info(ti, i); - paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->mc_emergency_sp = alloc_stack(mce_limit, i) + THREAD_SIZE; #endif } } #ifdef CONFIG_SMP -#define PCPU_DYN_SIZE () - -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) -{ - return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS), - MEMBLOCK_ALLOC_ACCESSIBLE, - early_cpu_to_node(cpu)); - -} - -static void __init pcpu_fc_free(void *ptr, size_t size) -{ - memblock_free(__pa(ptr), size); -} - static int pcpu_cpu_distance(unsigned int from, unsigned int to) { if (early_cpu_to_node(from) == early_cpu_to_node(to)) @@ -783,8 +820,14 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } +static __init int pcpu_cpu_to_node(int cpu) +{ + return early_cpu_to_node(cpu); +} + unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); +DEFINE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged); void __init setup_per_cpu_areas(void) { @@ -792,23 +835,42 @@ void __init setup_per_cpu_areas(void) size_t atom_size; unsigned long delta; unsigned int cpu; - int rc; + int rc = -EINVAL; /* - * Linear mapping is one of 4K, 1M and 16M. For 4K, no need - * to group units. For larger mappings, use 1M atom which - * should be large enough to contain a number of units. + * BookE and BookS radix are historical values and should be revisited. */ - if (mmu_linear_psize == MMU_PAGE_4K) + if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { + atom_size = SZ_1M; + } else if (radix_enabled()) { atom_size = PAGE_SIZE; - else - atom_size = 1 << 20; + } else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) { + /* + * Linear mapping is one of 4K, 1M and 16M. For 4K, no need + * to group units. For larger mappings, use 1M atom which + * should be large enough to contain a number of units. + */ + if (mmu_linear_psize == MMU_PAGE_4K) + atom_size = PAGE_SIZE; + else + atom_size = SZ_1M; + } - rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, - pcpu_fc_alloc, pcpu_fc_free); + if (pcpu_chosen_fc != PCPU_FC_PAGE) { + rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, + pcpu_cpu_to_node); + if (rc) + pr_warn("PERCPU: %s allocator failed (%d), " + "falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); + } + + if (rc < 0) + rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); + static_key_enable(&__percpu_first_chunk_is_paged.key); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; @@ -817,7 +879,7 @@ void __init setup_per_cpu_areas(void) } #endif -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG unsigned long memory_block_size_bytes(void) { if (ppc_md.memory_block_size) @@ -827,7 +889,7 @@ unsigned long memory_block_size_bytes(void) } #endif -#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO) +#ifdef CONFIG_PPC_INDIRECT_PIO struct ppc_pci_io ppc_pci_io; EXPORT_SYMBOL(ppc_pci_io); #endif @@ -844,156 +906,23 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh) * disable it by default. Book3S has a soft-nmi hardlockup detector based * on the decrementer interrupt, so it does not suffer from this problem. * - * It is likely to get false positives in VM guests, so disable it there - * by default too. + * It is likely to get false positives in KVM guests, so disable it there + * by default too. PowerVM will not stop or arbitrarily oversubscribe + * CPUs, but give a minimum regular allotment even with SPLPAR, so enable + * the detector for non-KVM guests, assume PowerVM. */ static int __init disable_hardlockup_detector(void) { #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF hardlockup_detector_disable(); #else - if (firmware_has_feature(FW_FEATURE_LPAR)) - hardlockup_detector_disable(); + if (firmware_has_feature(FW_FEATURE_LPAR)) { + check_kvm_guest(); + if (is_kvm_guest()) + hardlockup_detector_disable(); + } #endif return 0; } early_initcall(disable_hardlockup_detector); - -#ifdef CONFIG_PPC_BOOK3S_64 -static enum l1d_flush_type enabled_flush_types; -static void *l1d_flush_fallback_area; -static bool no_rfi_flush; -bool rfi_flush; - -static int __init handle_no_rfi_flush(char *p) -{ - pr_info("rfi-flush: disabled on command line."); - no_rfi_flush = true; - return 0; -} -early_param("no_rfi_flush", handle_no_rfi_flush); - -/* - * The RFI flush is not KPTI, but because users will see doco that says to use - * nopti we hijack that option here to also disable the RFI flush. - */ -static int __init handle_no_pti(char *p) -{ - pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); - handle_no_rfi_flush(NULL); - return 0; -} -early_param("nopti", handle_no_pti); - -static void do_nothing(void *unused) -{ - /* - * We don't need to do the flush explicitly, just enter+exit kernel is - * sufficient, the RFI exit handlers will do the right thing. - */ -} - -void rfi_flush_enable(bool enable) -{ - if (enable) { - do_rfi_flush_fixups(enabled_flush_types); - on_each_cpu(do_nothing, NULL, 1); - } else - do_rfi_flush_fixups(L1D_FLUSH_NONE); - - rfi_flush = enable; -} - -static void __ref init_fallback_flush(void) -{ - u64 l1d_size, limit; - int cpu; - - /* Only allocate the fallback flush area once (at boot time). */ - if (l1d_flush_fallback_area) - return; - - l1d_size = ppc64_caches.l1d.size; - - /* - * If there is no d-cache-size property in the device tree, l1d_size - * could be zero. That leads to the loop in the asm wrapping around to - * 2^64-1, and then walking off the end of the fallback area and - * eventually causing a page fault which is fatal. Just default to - * something vaguely sane. - */ - if (!l1d_size) - l1d_size = (64 * 1024); - - limit = min(ppc64_bolted_size(), ppc64_rma_size); - - /* - * Align to L1d size, and size it at 2x L1d size, to catch possible - * hardware prefetch runoff. We don't have a recipe for load patterns to - * reliably avoid the prefetcher. - */ - l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); - memset(l1d_flush_fallback_area, 0, l1d_size * 2); - - for_each_possible_cpu(cpu) { - struct paca_struct *paca = paca_ptrs[cpu]; - paca->rfi_flush_fallback_area = l1d_flush_fallback_area; - paca->l1d_flush_size = l1d_size; - } -} - -void setup_rfi_flush(enum l1d_flush_type types, bool enable) -{ - if (types & L1D_FLUSH_FALLBACK) { - pr_info("rfi-flush: fallback displacement flush available\n"); - init_fallback_flush(); - } - - if (types & L1D_FLUSH_ORI) - pr_info("rfi-flush: ori type flush available\n"); - - if (types & L1D_FLUSH_MTTRIG) - pr_info("rfi-flush: mttrig type flush available\n"); - - enabled_flush_types = types; - - if (!no_rfi_flush) - rfi_flush_enable(enable); -} - -#ifdef CONFIG_DEBUG_FS -static int rfi_flush_set(void *data, u64 val) -{ - bool enable; - - if (val == 1) - enable = true; - else if (val == 0) - enable = false; - else - return -EINVAL; - - /* Only do anything if we're changing state */ - if (enable != rfi_flush) - rfi_flush_enable(enable); - - return 0; -} - -static int rfi_flush_get(void *data, u64 *val) -{ - *val = rfi_flush ? 1 : 0; - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); - -static __init int rfi_flush_debugfs_init(void) -{ - debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); - return 0; -} -device_initcall(rfi_flush_debugfs_init); -#endif -#endif /* CONFIG_PPC_BOOK3S_64 */ |
