diff options
Diffstat (limited to 'arch/powerpc/kernel/prom.c')
| -rw-r--r-- | arch/powerpc/kernel/prom.c | 948 |
1 files changed, 533 insertions, 415 deletions
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index eb23ac92abb9..9ed9dde7d231 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Procedures for creating, accessing and interpreting the device tree. * @@ -6,16 +7,10 @@ * * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. * {engebret|bergner}@us.ibm.com - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #undef DEBUG -#include <stdarg.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/init.h> @@ -23,18 +18,20 @@ #include <linux/spinlock.h> #include <linux/types.h> #include <linux/pci.h> -#include <linux/stringify.h> #include <linux/delay.h> #include <linux/initrd.h> #include <linux/bitops.h> #include <linux/export.h> #include <linux/kexec.h> -#include <linux/debugfs.h> #include <linux/irq.h> #include <linux/memblock.h> #include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/libfdt.h> +#include <linux/cpu.h> +#include <linux/pgtable.h> +#include <linux/seq_buf.h> -#include <asm/prom.h> #include <asm/rtas.h> #include <asm/page.h> #include <asm/processor.h> @@ -44,17 +41,22 @@ #include <asm/smp.h> #include <asm/mmu.h> #include <asm/paca.h> -#include <asm/pgtable.h> -#include <asm/pci.h> +#include <asm/powernv.h> #include <asm/iommu.h> #include <asm/btext.h> #include <asm/sections.h> -#include <asm/machdep.h> +#include <asm/setup.h> #include <asm/pci-bridge.h> #include <asm/kexec.h> #include <asm/opal.h> #include <asm/fadump.h> -#include <asm/debug.h> +#include <asm/epapr_hcalls.h> +#include <asm/firmware.h> +#include <asm/dt_cpu_ftrs.h> +#include <asm/drmem.h> +#include <asm/ultravisor.h> +#include <asm/prom.h> +#include <asm/plpks.h> #include <mm/mmu_decl.h> @@ -64,11 +66,14 @@ #define DBG(fmt...) #endif +int *chip_id_lookup_table; + #ifdef CONFIG_PPC64 int __initdata iommu_is_off; int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; u64 ppc64_rma_size; +unsigned int boot_cpu_node_count __ro_after_init; #endif static phys_addr_t first_memblock_size; static int __initdata boot_cpu_count; @@ -95,8 +100,8 @@ static inline int overlaps_initrd(unsigned long start, unsigned long size) if (!initrd_start) return 0; - return (start + size) > _ALIGN_DOWN(initrd_start, PAGE_SIZE) && - start <= _ALIGN_UP(initrd_end, PAGE_SIZE); + return (start + size) > ALIGN_DOWN(initrd_start, PAGE_SIZE) && + start <= ALIGN(initrd_end, PAGE_SIZE); #else return 0; #endif @@ -117,22 +122,25 @@ static void __init move_device_tree(void) DBG("-> move_device_tree\n"); start = __pa(initial_boot_params); - size = be32_to_cpu(initial_boot_params->totalsize); + size = fdt_totalsize(initial_boot_params); if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) || - overlaps_crashkernel(start, size) || - overlaps_initrd(start, size)) { - p = __va(memblock_alloc(size, PAGE_SIZE)); + !memblock_is_memory(start + size - 1) || + overlaps_crashkernel(start, size) || overlaps_initrd(start, size)) { + p = memblock_alloc_raw(size, PAGE_SIZE); + if (!p) + panic("Failed to allocate %lu bytes to move device tree\n", + size); memcpy(p, initial_boot_params, size); - initial_boot_params = (struct boot_param_header *)p; - DBG("Moved device tree to 0x%p\n", p); + initial_boot_params = p; + DBG("Moved device tree to 0x%px\n", p); } DBG("<- move_device_tree\n"); } /* - * ibm,pa-features is a per-cpu property that contains a string of + * ibm,pa/pi-features is a per-cpu property that contains a string of * attribute descriptors, each of which has a 2 byte header plus up * to 254 bytes worth of processor attribute bits. First header * byte specifies the number of bytes following the header. @@ -143,28 +151,57 @@ static void __init move_device_tree(void) * pa-features property is missing, or a 1/0 to indicate if the feature * is supported/not supported. Note that the bit numbers are * big-endian to match the definition in PAPR. + * Note: the 'clear' flag clears the feature if the bit is set in the + * ibm,pa/pi-features property, it does not set the feature if the + * bit is clear. */ -static struct ibm_pa_feature { +struct ibm_feature { unsigned long cpu_features; /* CPU_FTR_xxx bit */ unsigned long mmu_features; /* MMU_FTR_xxx bit */ unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */ - unsigned char pabyte; /* byte number in ibm,pa-features */ + unsigned int cpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */ + unsigned char pabyte; /* byte number in ibm,pa/pi-features */ unsigned char pabit; /* bit number (big-endian) */ - unsigned char invert; /* if 1, pa bit set => clear feature */ -} ibm_pa_features[] __initdata = { - {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, - {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, - {0, MMU_FTR_SLB, 0, 0, 2, 0}, - {CPU_FTR_CTRL, 0, 0, 0, 3, 0}, - {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, - {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, - {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, - {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, + unsigned char clear; /* if 1, pa bit set => clear feature */ }; -static void __init scan_features(unsigned long node, unsigned char *ftrs, +static struct ibm_feature ibm_pa_features[] __initdata = { + { .pabyte = 0, .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU }, + { .pabyte = 0, .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU }, + { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL }, + { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE }, + { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE }, +#ifdef CONFIG_PPC_RADIX_MMU + { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE }, +#endif + { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, + .cpu_user_ftrs = PPC_FEATURE_TRUE_LE }, + /* + * If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n), + * we don't want to turn on TM here, so we use the *_COMP versions + * which are 0 if the kernel doesn't support TM. + */ + { .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP, + .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP }, + + { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 }, + { .pabyte = 68, .pabit = 5, .cpu_features = CPU_FTR_DEXCR_NPHIE }, +}; + +/* + * ibm,pi-features property provides the support of processor specific + * options not described in ibm,pa-features. Right now use byte 0, bit 3 + * which indicates the occurrence of DSI interrupt when the paste operation + * on the suspended NX window. + */ +static struct ibm_feature ibm_pi_features[] __initdata = { + { .pabyte = 0, .pabit = 3, .mmu_features = MMU_FTR_NX_DSI }, + { .pabyte = 0, .pabit = 4, .cpu_features = CPU_FTR_DBELL, .clear = 1 }, +}; + +static void __init scan_features(unsigned long node, const unsigned char *ftrs, unsigned long tablelen, - struct ibm_pa_feature *fp, + struct ibm_feature *fp, unsigned long ft_size) { unsigned long i, len, bit; @@ -187,48 +224,47 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs, if (fp->pabyte >= ftrs[0]) continue; bit = (ftrs[2 + fp->pabyte] >> (7 - fp->pabit)) & 1; - if (bit ^ fp->invert) { + if (bit && !fp->clear) { cur_cpu_spec->cpu_features |= fp->cpu_features; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; + cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2; cur_cpu_spec->mmu_features |= fp->mmu_features; - } else { + } else if (bit == fp->clear) { cur_cpu_spec->cpu_features &= ~fp->cpu_features; cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; + cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2; cur_cpu_spec->mmu_features &= ~fp->mmu_features; } } } -static void __init check_cpu_pa_features(unsigned long node) +static void __init check_cpu_features(unsigned long node, char *name, + struct ibm_feature *fp, + unsigned long size) { - unsigned char *pa_ftrs; - unsigned long tablelen; + const unsigned char *pa_ftrs; + int tablelen; - pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen); + pa_ftrs = of_get_flat_dt_prop(node, name, &tablelen); if (pa_ftrs == NULL) return; - scan_features(node, pa_ftrs, tablelen, - ibm_pa_features, ARRAY_SIZE(ibm_pa_features)); + scan_features(node, pa_ftrs, tablelen, fp, size); } -#ifdef CONFIG_PPC_STD_MMU_64 -static void __init check_cpu_slb_size(unsigned long node) +#ifdef CONFIG_PPC_64S_HASH_MMU +static void __init init_mmu_slb_size(unsigned long node) { - u32 *slb_size_ptr; + const __be32 *slb_size_ptr; - slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL); - if (slb_size_ptr != NULL) { - mmu_slb_size = *slb_size_ptr; - return; - } - slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL); - if (slb_size_ptr != NULL) { - mmu_slb_size = *slb_size_ptr; - } + slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL) ? : + of_get_flat_dt_prop(node, "ibm,slb-size", NULL); + + if (slb_size_ptr) + mmu_slb_size = be32_to_cpup(slb_size_ptr); } #else -#define check_cpu_slb_size(node) do { } while(0) +#define init_mmu_slb_size(node) do { } while(0) #endif static struct feature_property { @@ -253,10 +289,10 @@ static struct feature_property { }; #if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU) -static inline void identical_pvr_fixup(unsigned long node) +static __init void identical_pvr_fixup(unsigned long node) { unsigned int pvr; - char *model = of_get_flat_dt_prop(node, "model", NULL); + const char *model = of_get_flat_dt_prop(node, "model", NULL); /* * Since 440GR(x)/440EP(x) processors have the same pvr, @@ -277,13 +313,13 @@ static inline void identical_pvr_fixup(unsigned long node) static void __init check_cpu_feature_properties(unsigned long node) { - unsigned long i; + int i; struct feature_property *fp = feature_properties; - const u32 *prop; + const __be32 *prop; - for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) { + for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) { prop = of_get_flat_dt_prop(node, fp->name, NULL); - if (prop && *prop >= fp->min_value) { + if (prop && be32_to_cpup(prop) >= fp->min_value) { cur_cpu_spec->cpu_features |= fp->cpu_feature; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftr; } @@ -294,11 +330,12 @@ static int __init early_init_dt_scan_cpus(unsigned long node, const char *uname, int depth, void *data) { - char *type = of_get_flat_dt_prop(node, "device_type", NULL); - const u32 *prop; - const u32 *intserv; + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *cpu_version = NULL; + const __be32 *prop; + const __be32 *intserv; int i, nthreads; - unsigned long len; + int len; int found = -1; int found_thread = 0; @@ -306,38 +343,25 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; + if (IS_ENABLED(CONFIG_PPC64)) + boot_cpu_node_count++; + /* Get physical cpuid */ intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len); - if (intserv) { - nthreads = len / sizeof(int); - } else { - intserv = of_get_flat_dt_prop(node, "reg", NULL); - nthreads = 1; - } + if (!intserv) + intserv = of_get_flat_dt_prop(node, "reg", &len); + + nthreads = len / sizeof(int); /* * Now see if any of these threads match our boot cpu. * NOTE: This must match the parsing done in smp_setup_cpu_maps. */ for (i = 0; i < nthreads; i++) { - /* - * version 2 of the kexec param format adds the phys cpuid of - * booted proc. - */ - if (initial_boot_params->version >= 2) { - if (intserv[i] == initial_boot_params->boot_cpuid_phys) { - found = boot_cpu_count; - found_thread = i; - } - } else { - /* - * Check if it's the boot-cpu, set it's hw index now, - * unfortunately this format did not support booting - * off secondary threads. - */ - if (of_get_flat_dt_prop(node, - "linux,boot-cpu", NULL) != NULL) - found = boot_cpu_count; + if (be32_to_cpu(intserv[i]) == + fdt_boot_cpuid_phys(initial_boot_params)) { + found = boot_cpu_count; + found_thread = i; } #ifdef CONFIG_SMP /* logical cpu id is always 0 on UP kernels */ @@ -345,54 +369,95 @@ static int __init early_init_dt_scan_cpus(unsigned long node, #endif } - if (found >= 0) { - DBG("boot cpu: logical %d physical %d\n", found, - intserv[found_thread]); - boot_cpuid = found; - set_hard_smp_processor_id(found, intserv[found_thread]); + /* Not the boot CPU */ + if (found < 0) + return 0; - /* - * PAPR defines "logical" PVR values for cpus that - * meet various levels of the architecture: - * 0x0f000001 Architecture version 2.04 - * 0x0f000002 Architecture version 2.05 - * If the cpu-version property in the cpu node contains - * such a value, we call identify_cpu again with the - * logical PVR value in order to use the cpu feature - * bits appropriate for the architecture level. - * - * A POWER6 partition in "POWER6 architected" mode - * uses the 0x0f000002 PVR value; in POWER5+ mode - * it uses 0x0f000001. - */ + boot_cpuid = found; + + if (IS_ENABLED(CONFIG_PPC64)) + boot_cpu_hwid = be32_to_cpu(intserv[found_thread]); + + if (nr_cpu_ids % nthreads != 0) { + set_nr_cpu_ids(ALIGN(nr_cpu_ids, nthreads)); + pr_warn("nr_cpu_ids was not a multiple of threads_per_core, adjusted to %d\n", + nr_cpu_ids); + } + + if (boot_cpuid >= nr_cpu_ids) { + // Remember boot core for smp_setup_cpu_maps() + boot_core_hwid = be32_to_cpu(intserv[0]); + + pr_warn("Boot CPU %d (core hwid %d) >= nr_cpu_ids, adjusted boot CPU to %d\n", + boot_cpuid, boot_core_hwid, found_thread); + + // Adjust boot CPU to appear on logical core 0 + boot_cpuid = found_thread; + } + + DBG("boot cpu: logical %d physical %d\n", boot_cpuid, + be32_to_cpu(intserv[found_thread])); + + /* + * PAPR defines "logical" PVR values for cpus that + * meet various levels of the architecture: + * 0x0f000001 Architecture version 2.04 + * 0x0f000002 Architecture version 2.05 + * If the cpu-version property in the cpu node contains + * such a value, we call identify_cpu again with the + * logical PVR value in order to use the cpu feature + * bits appropriate for the architecture level. + * + * A POWER6 partition in "POWER6 architected" mode + * uses the 0x0f000002 PVR value; in POWER5+ mode + * it uses 0x0f000001. + * + * If we're using device tree CPU feature discovery then we don't + * support the cpu-version property, and it's the responsibility of the + * firmware/hypervisor to provide the correct feature set for the + * architecture level via the ibm,powerpc-cpu-features binding. + */ + if (!dt_cpu_ftrs_in_use()) { prop = of_get_flat_dt_prop(node, "cpu-version", NULL); - if (prop && (*prop & 0xff000000) == 0x0f000000) - identify_cpu(0, *prop); + if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) { + identify_cpu(0, be32_to_cpup(prop)); + cpu_version = prop; + } - identical_pvr_fixup(node); + check_cpu_feature_properties(node); + check_cpu_features(node, "ibm,pa-features", ibm_pa_features, + ARRAY_SIZE(ibm_pa_features)); + check_cpu_features(node, "ibm,pi-features", ibm_pi_features, + ARRAY_SIZE(ibm_pi_features)); } - check_cpu_feature_properties(node); - check_cpu_pa_features(node); - check_cpu_slb_size(node); + identical_pvr_fixup(node); -#ifdef CONFIG_PPC_PSERIES - if (nthreads > 1) - cur_cpu_spec->cpu_features |= CPU_FTR_SMT; - else + // We can now add the CPU name & PVR to the hardware description + seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR)); + if (cpu_version) + seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(cpu_version)); + + init_mmu_slb_size(node); + +#ifdef CONFIG_PPC64 + if (nthreads == 1) cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; + else if (!dt_cpu_ftrs_in_use()) + cur_cpu_spec->cpu_features |= CPU_FTR_SMT; #endif return 0; } -int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, - int depth, void *data) +static int __init early_init_dt_scan_chosen_ppc(unsigned long node, + const char *uname, + int depth, void *data) { - unsigned long *lprop; + const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */ /* Use common scan routine to determine if this is the chosen node */ - if (early_init_dt_scan_chosen(node, uname, depth, data) == 0) + if (early_init_dt_scan_chosen(data) < 0) return 0; #ifdef CONFIG_PPC64 @@ -417,7 +482,7 @@ int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, tce_alloc_end = *lprop; #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_CRASH_RESERVE lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL); if (lprop) crashk_res.start = *lprop; @@ -431,96 +496,125 @@ int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, return 1; } +/* + * Compare the range against max mem limit and update + * size if it cross the limit. + */ + +#ifdef CONFIG_SPARSEMEM +static bool __init validate_mem_limit(u64 base, u64 *size) +{ + u64 max_mem = 1UL << (MAX_PHYSMEM_BITS); + + if (base >= max_mem) + return false; + if ((base + *size) > max_mem) + *size = max_mem - base; + return true; +} +#else +static bool __init validate_mem_limit(u64 base, u64 *size) +{ + return true; +} +#endif + #ifdef CONFIG_PPC_PSERIES /* - * Interpret the ibm,dynamic-memory property in the - * /ibm,dynamic-reconfiguration-memory node. + * Interpret the ibm dynamic reconfiguration memory LMBs. * This contains a list of memory blocks along with NUMA affinity * information. */ -static int __init early_init_dt_scan_drconf_memory(unsigned long node) +static int __init early_init_drmem_lmb(struct drmem_lmb *lmb, + const __be32 **usm, + void *data) { - __be32 *dm, *ls, *usm; - unsigned long l, n, flags; - u64 base, size, memblock_size; - unsigned int is_kexec_kdump = 0, rngs; - - ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l); - if (ls == NULL || l < dt_root_size_cells * sizeof(__be32)) - return 0; - memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls); + u64 base, size; + int is_kexec_kdump = 0, rngs; - dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l); - if (dm == NULL || l < sizeof(__be32)) - return 0; + base = lmb->base_addr; + size = drmem_lmb_size(); + rngs = 1; - n = *dm++; /* number of entries */ - if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32)) + /* + * Skip this block if the reserved bit is set in flags + * or if the block is not assigned to this partition. + */ + if ((lmb->flags & DRCONF_MEM_RESERVED) || + !(lmb->flags & DRCONF_MEM_ASSIGNED)) return 0; - /* check if this is a kexec/kdump kernel. */ - usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", - &l); - if (usm != NULL) + if (*usm) is_kexec_kdump = 1; - for (; n != 0; --n) { - base = dt_mem_next_cell(dt_root_addr_cells, &dm); - flags = dm[3]; - /* skip DRC index, pad, assoc. list index, flags */ - dm += 4; - /* skip this block if the reserved bit is set in flags (0x80) - or if the block is not assigned to this partition (0x8) */ - if ((flags & 0x80) || !(flags & 0x8)) - continue; - size = memblock_size; - rngs = 1; + if (is_kexec_kdump) { + /* + * For each memblock in ibm,dynamic-memory, a + * corresponding entry in linux,drconf-usable-memory + * property contains a counter 'p' followed by 'p' + * (base, size) duple. Now read the counter from + * linux,drconf-usable-memory property + */ + rngs = dt_mem_next_cell(dt_root_size_cells, usm); + if (!rngs) /* there are no (base, size) duple */ + return 0; + } + + do { if (is_kexec_kdump) { - /* - * For each memblock in ibm,dynamic-memory, a corresponding - * entry in linux,drconf-usable-memory property contains - * a counter 'p' followed by 'p' (base, size) duple. - * Now read the counter from - * linux,drconf-usable-memory property - */ - rngs = dt_mem_next_cell(dt_root_size_cells, &usm); - if (!rngs) /* there are no (base, size) duple */ + base = dt_mem_next_cell(dt_root_addr_cells, usm); + size = dt_mem_next_cell(dt_root_size_cells, usm); + } + + if (iommu_is_off) { + if (base >= 0x80000000ul) continue; + if ((base + size) > 0x80000000ul) + size = 0x80000000ul - base; } - do { - if (is_kexec_kdump) { - base = dt_mem_next_cell(dt_root_addr_cells, - &usm); - size = dt_mem_next_cell(dt_root_size_cells, - &usm); - } - if (iommu_is_off) { - if (base >= 0x80000000ul) - continue; - if ((base + size) > 0x80000000ul) - size = 0x80000000ul - base; - } - memblock_add(base, size); - } while (--rngs); - } - memblock_dump_all(); + + if (!validate_mem_limit(base, &size)) + continue; + + DBG("Adding: %llx -> %llx\n", base, size); + memblock_add(base, size); + + if (lmb->flags & DRCONF_MEM_HOTREMOVABLE) + memblock_mark_hotplug(base, size); + } while (--rngs); + return 0; } -#else -#define early_init_dt_scan_drconf_memory(node) 0 #endif /* CONFIG_PPC_PSERIES */ -static int __init early_init_dt_scan_memory_ppc(unsigned long node, - const char *uname, - int depth, void *data) +static int __init early_init_dt_scan_memory_ppc(void) { - if (depth == 1 && - strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) - return early_init_dt_scan_drconf_memory(node); - - return early_init_dt_scan_memory(node, uname, depth, data); +#ifdef CONFIG_PPC_PSERIES + const void *fdt = initial_boot_params; + int node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory"); + + if (node > 0) + walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb); + +#endif + + return early_init_dt_scan_memory(); } +/* + * For a relocatable kernel, we need to get the memstart_addr first, + * then use it to calculate the virtual kernel start address. This has + * to happen at a very early stage (before machine_init). In this case, + * we just want to get the memstart_address and would not like to mess the + * memblock at this stage. So introduce a variable to skip the memblock_add() + * for this reason. + */ +#ifdef CONFIG_RELOCATABLE +static int add_mem_to_memblock = 1; +#else +#define add_mem_to_memblock 1 +#endif + void __init early_init_dt_add_memory_arch(u64 base, u64 size) { #ifdef CONFIG_PPC64 @@ -541,29 +635,21 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) } /* Add the chunk to the MEMBLOCK list */ - memblock_add(base, size); -} - -void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) -{ - return __va(memblock_alloc(size, align)); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) -{ - initrd_start = (unsigned long)__va(start); - initrd_end = (unsigned long)__va(end); - initrd_below_start_ok = 1; + if (add_mem_to_memblock) { + if (validate_mem_limit(base, &size)) + memblock_add(base, size); + } } -#endif static void __init early_reserve_mem_dt(void) { - unsigned long i, len, dt_root; + unsigned long i, dt_root; + int len; const __be32 *prop; + early_init_fdt_reserve_self(); + early_init_fdt_scan_reserved_mem(); + dt_root = of_get_flat_dt_root(); prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len); @@ -590,18 +676,10 @@ static void __init early_reserve_mem_dt(void) static void __init early_reserve_mem(void) { - u64 base, size; - u64 *reserve_map; - unsigned long self_base; - unsigned long self_size; + __be64 *reserve_map; - reserve_map = (u64 *)(((unsigned long)initial_boot_params) + - initial_boot_params->off_mem_rsvmap); - - /* before we do anything, lets reserve the dt blob */ - self_base = __pa((unsigned long)initial_boot_params); - self_size = initial_boot_params->totalsize; - memblock_reserve(self_base, self_size); + reserve_map = (__be64 *)(((unsigned long)initial_boot_params) + + fdt_off_mem_rsvmap(initial_boot_params)); /* Look for the new "reserved-regions" property in the DT */ early_reserve_mem_dt(); @@ -609,58 +687,114 @@ static void __init early_reserve_mem(void) #ifdef CONFIG_BLK_DEV_INITRD /* Then reserve the initrd, if any */ if (initrd_start && (initrd_end > initrd_start)) { - memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), - _ALIGN_UP(initrd_end, PAGE_SIZE) - - _ALIGN_DOWN(initrd_start, PAGE_SIZE)); + memblock_reserve(ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), + ALIGN(initrd_end, PAGE_SIZE) - + ALIGN_DOWN(initrd_start, PAGE_SIZE)); } #endif /* CONFIG_BLK_DEV_INITRD */ -#ifdef CONFIG_PPC32 + if (!IS_ENABLED(CONFIG_PPC32)) + return; + /* * Handle the case where we might be booting from an old kexec * image that setup the mem_rsvmap as pairs of 32-bit values */ - if (*reserve_map > 0xffffffffull) { + if (be64_to_cpup(reserve_map) > 0xffffffffull) { u32 base_32, size_32; - u32 *reserve_map_32 = (u32 *)reserve_map; + __be32 *reserve_map_32 = (__be32 *)reserve_map; DBG("Found old 32-bit reserve map\n"); while (1) { - base_32 = *(reserve_map_32++); - size_32 = *(reserve_map_32++); + base_32 = be32_to_cpup(reserve_map_32++); + size_32 = be32_to_cpup(reserve_map_32++); if (size_32 == 0) break; - /* skip if the reservation is for the blob */ - if (base_32 == self_base && size_32 == self_size) - continue; DBG("reserving: %x -> %x\n", base_32, size_32); memblock_reserve(base_32, size_32); } return; } -#endif - DBG("Processing reserve map\n"); +} - /* Handle the reserve map in the fdt blob if it exists */ - while (1) { - base = *(reserve_map++); - size = *(reserve_map++); - if (size == 0) - break; - DBG("reserving: %llx -> %llx\n", base, size); - memblock_reserve(base, size); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static bool tm_disabled __initdata; + +static int __init parse_ppc_tm(char *str) +{ + bool res; + + if (kstrtobool(str, &res)) + return -EINVAL; + + tm_disabled = !res; + + return 0; +} +early_param("ppc_tm", parse_ppc_tm); + +static void __init tm_init(void) +{ + if (tm_disabled) { + pr_info("Disabling hardware transactional memory (HTM)\n"); + cur_cpu_spec->cpu_user_features2 &= + ~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM); + cur_cpu_spec->cpu_features &= ~CPU_FTR_TM; + return; } + + pnv_tm_init(); +} +#else +static void tm_init(void) { } +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + +static int __init +early_init_dt_scan_model(unsigned long node, const char *uname, + int depth, void *data) +{ + const char *prop; + + if (depth != 0) + return 0; + + prop = of_get_flat_dt_prop(node, "model", NULL); + if (prop) + seq_buf_printf(&ppc_hw_desc, "%s ", prop); + + /* break now */ + return 1; } +#ifdef CONFIG_PPC64 +static void __init save_fscr_to_task(void) +{ + /* + * Ensure the init_task (pid 0, aka swapper) uses the value of FSCR we + * have configured via the device tree features or via __init_FSCR(). + * That value will then be propagated to pid 1 (init) and all future + * processes. + */ + if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) + init_task.thread.fscr = mfspr(SPRN_FSCR); +} +#else +static inline void save_fscr_to_task(void) {} +#endif + + void __init early_init_devtree(void *params) { - phys_addr_t limit; + phys_addr_t int_vector_size; - DBG(" -> early_init_devtree(%p)\n", params); + DBG(" -> early_init_devtree(%px)\n", params); - /* Setup flat device-tree pointer */ - initial_boot_params = params; + /* Too early to BUG_ON(), do it by hand */ + if (!early_init_dt_verify(params, __pa(params))) + panic("BUG: Failed verifying flat device tree, bad version?"); + + of_scan_flat_dt(early_init_dt_scan_model, NULL); #ifdef CONFIG_PPC_RTAS /* Some machines might need RTAS info for debugging, grab it now. */ @@ -670,32 +804,36 @@ void __init early_init_devtree(void *params) #ifdef CONFIG_PPC_POWERNV /* Some machines might need OPAL info for debugging, grab it now. */ of_scan_flat_dt(early_init_dt_scan_opal, NULL); + + /* Scan tree for ultravisor feature */ + of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL); #endif -#ifdef CONFIG_FA_DUMP +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) /* scan tree to see if dump is active during last boot */ of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL); #endif - /* Pre-initialize the cmd_line with the content of boot_commmand_line, - * which will be empty except when the content of the variable has - * been overriden by a bootloading mechanism. This happens typically - * with HAL takeover - */ - strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); - /* Retrieve various informations from the /chosen node of the * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... */ - of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line); + of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); + + /* Append additional parameters passed for fadump capture kernel */ + fadump_append_bootargs(); /* Scan memory nodes and rebuild MEMBLOCKs */ - of_scan_flat_dt(early_init_dt_scan_root, NULL); - of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); + early_init_dt_scan_root(); + early_init_dt_scan_memory_ppc(); - /* Save command line for /proc/cmdline and then parse parameters */ - strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); + /* + * As generic code authors expect to be able to use static keys + * in early_param() handlers, we initialize the static keys just + * before parsing early params (it's fine to call jump_label_init() + * more than once). + */ + jump_label_init(); parse_early_param(); /* make sure we've parsed cmdline for mem= before this */ @@ -703,45 +841,63 @@ void __init early_init_devtree(void *params) first_memblock_size = min_t(u64, first_memblock_size, memory_limit); setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ - memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); + memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START); +#ifdef CONFIG_PPC64 + /* If relocatable, reserve at least 32k for interrupt vectors etc. */ + int_vector_size = __end_interrupts - _stext; + int_vector_size = max_t(phys_addr_t, SZ_32K, int_vector_size); +#else /* If relocatable, reserve first 32k for interrupt vectors etc. */ + int_vector_size = SZ_32K; +#endif if (PHYSICAL_START > MEMORY_START) - memblock_reserve(MEMORY_START, 0x8000); + memblock_reserve(MEMORY_START, int_vector_size); reserve_kdump_trampoline(); -#ifdef CONFIG_FA_DUMP +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) /* * If we fail to reserve memory for firmware-assisted dump then * fallback to kexec based kdump. */ if (fadump_reserve_mem() == 0) #endif - reserve_crashkernel(); + arch_reserve_crashkernel(); early_reserve_mem(); - /* - * Ensure that total memory size is page-aligned, because otherwise - * mark_bootmem() gets upset. - */ - limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); - memblock_enforce_memory_limit(limit); + if (memory_limit > memblock_phys_mem_size()) + memory_limit = 0; + + /* Align down to 16 MB which is large page size with hash page translation */ + memory_limit = ALIGN_DOWN(memory_limit ?: memblock_phys_mem_size(), SZ_16M); + memblock_enforce_memory_limit(memory_limit); + +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES) + if (!early_radix_enabled()) + memblock_cap_memory_range(0, 1UL << (H_MAX_PHYSMEM_BITS)); +#endif memblock_allow_resize(); memblock_dump_all(); - DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); + DBG("Phys. mem: %llx\n", (unsigned long long)memblock_phys_mem_size()); /* We may need to relocate the flat tree, do it now. * FIXME .. and the initrd too? */ move_device_tree(); - allocate_pacas(); - DBG("Scanning CPUs ...\n"); + dt_cpu_ftrs_scan(); + /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ of_scan_flat_dt(early_init_dt_scan_cpus, NULL); + if (boot_cpuid < 0) { + printk("Failed to identify boot CPU !\n"); + BUG(); + } + + save_fscr_to_task(); #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) /* We'll later wait for secondaries to check in; there are @@ -750,9 +906,63 @@ void __init early_init_devtree(void *params) spinning_secondaries = boot_cpu_count - 1; #endif + mmu_early_init_devtree(); + + /* Setup param area for passing additional parameters to fadump capture kernel. */ + fadump_setup_param_area(); + +#ifdef CONFIG_PPC_POWERNV + /* Scan and build the list of machine check recoverable ranges */ + of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); +#endif + epapr_paravirt_early_init(); + + /* Now try to figure out if we are running on LPAR and so on */ + pseries_probe_fw_features(); + + /* + * Initialize pkey features and default AMR/IAMR values + */ + pkey_early_init_devtree(); + +#ifdef CONFIG_PPC_PS3 + /* Identify PS3 firmware */ + if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3")) + powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; +#endif + + /* If kexec left a PLPKS password in the DT, get it and clear it */ + plpks_early_init_devtree(); + + tm_init(); + DBG(" <- early_init_devtree()\n"); } +#ifdef CONFIG_RELOCATABLE +/* + * This function run before early_init_devtree, so we have to init + * initial_boot_params. + */ +void __init early_get_first_memblock_info(void *params, phys_addr_t *size) +{ + /* Setup flat device-tree pointer */ + initial_boot_params = params; + + /* + * Scan the memory nodes and set add_mem_to_memblock to 0 to avoid + * mess the memblock. + */ + add_mem_to_memblock = 0; + early_init_dt_scan_root(); + early_init_dt_scan_memory_ppc(); + add_mem_to_memblock = 1; + + if (size) + *size = first_memblock_size; +} +#endif + /******* * * New implementation of the OF "find" APIs, return a refcounted @@ -765,166 +975,74 @@ void __init early_init_devtree(void *params) *******/ /** - * of_find_next_cache_node - Find a node's subsidiary cache - * @np: node of type "cpu" or "cache" + * of_get_ibm_chip_id - Returns the IBM "chip-id" of a device + * @np: device node of the device * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. Caller should hold a reference - * to np. + * This looks for a property "ibm,chip-id" in the node or any + * of its parents and returns its content, or -1 if it cannot + * be found. */ -struct device_node *of_find_next_cache_node(struct device_node *np) +int of_get_ibm_chip_id(struct device_node *np) { - struct device_node *child; - const phandle *handle; - - handle = of_get_property(np, "l2-cache", NULL); - if (!handle) - handle = of_get_property(np, "next-level-cache", NULL); - - if (handle) - return of_find_node_by_phandle(*handle); - - /* OF on pmac has nodes instead of properties named "l2-cache" - * beneath CPU nodes. - */ - if (!strcmp(np->type, "cpu")) - for_each_child_of_node(np, child) - if (!strcmp(child->type, "cache")) - return child; - - return NULL; -} - -#ifdef CONFIG_PPC_PSERIES -/* - * Fix up the uninitialized fields in a new device node: - * name, type and pci-specific fields - */ - -static int of_finish_dynamic_node(struct device_node *node) -{ - struct device_node *parent = of_get_parent(node); - int err = 0; - const phandle *ibm_phandle; - - node->name = of_get_property(node, "name", NULL); - node->type = of_get_property(node, "device_type", NULL); - - if (!node->name) - node->name = "<NULL>"; - if (!node->type) - node->type = "<NULL>"; - - if (!parent) { - err = -ENODEV; - goto out; - } - - /* We don't support that function on PowerMac, at least - * not yet - */ - if (machine_is(powermac)) - return -ENODEV; - - /* fix up new node's phandle field */ - if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL))) - node->phandle = *ibm_phandle; + of_node_get(np); + while (np) { + u32 chip_id; -out: - of_node_put(parent); - return err; -} + /* + * Skiboot may produce memory nodes that contain more than one + * cell in chip-id, we only read the first one here. + */ + if (!of_property_read_u32(np, "ibm,chip-id", &chip_id)) { + of_node_put(np); + return chip_id; + } -static int prom_reconfig_notifier(struct notifier_block *nb, - unsigned long action, void *node) -{ - int err; - - switch (action) { - case OF_RECONFIG_ATTACH_NODE: - err = of_finish_dynamic_node(node); - if (err < 0) - printk(KERN_ERR "finish_node returned %d\n", err); - break; - default: - err = 0; - break; + np = of_get_next_parent(np); } - return notifier_from_errno(err); + return -1; } +EXPORT_SYMBOL(of_get_ibm_chip_id); -static struct notifier_block prom_reconfig_nb = { - .notifier_call = prom_reconfig_notifier, - .priority = 10, /* This one needs to run first */ -}; - -static int __init prom_reconfig_setup(void) -{ - return of_reconfig_notifier_register(&prom_reconfig_nb); -} -__initcall(prom_reconfig_setup); -#endif - -/* Find the device node for a given logical cpu number, also returns the cpu - * local thread number (index in ibm,interrupt-server#s) if relevant and - * asked for (non NULL) +/** + * cpu_to_chip_id - Return the cpus chip-id + * @cpu: The logical cpu number. + * + * Return the value of the ibm,chip-id property corresponding to the given + * logical cpu number. If the chip-id can not be found, returns -1. */ -struct device_node *of_get_cpu_node(int cpu, unsigned int *thread) +int cpu_to_chip_id(int cpu) { - int hardid; struct device_node *np; + int ret = -1, idx; - hardid = get_hard_smp_processor_id(cpu); + idx = cpu / threads_per_core; + if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1) + return chip_id_lookup_table[idx]; - for_each_node_by_type(np, "cpu") { - const u32 *intserv; - unsigned int plen, t; + np = of_get_cpu_node(cpu, NULL); + if (np) { + ret = of_get_ibm_chip_id(np); + of_node_put(np); - /* Check for ibm,ppc-interrupt-server#s. If it doesn't exist - * fallback to "reg" property and assume no threads - */ - intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", - &plen); - if (intserv == NULL) { - const u32 *reg = of_get_property(np, "reg", NULL); - if (reg == NULL) - continue; - if (*reg == hardid) { - if (thread) - *thread = 0; - return np; - } - } else { - plen /= sizeof(u32); - for (t = 0; t < plen; t++) { - if (hardid == intserv[t]) { - if (thread) - *thread = t; - return np; - } - } - } + if (chip_id_lookup_table) + chip_id_lookup_table[idx] = ret; } - return NULL; -} -EXPORT_SYMBOL(of_get_cpu_node); -#if defined(CONFIG_DEBUG_FS) && defined(DEBUG) -static struct debugfs_blob_wrapper flat_dt_blob; + return ret; +} +EXPORT_SYMBOL(cpu_to_chip_id); -static int __init export_flat_device_tree(void) +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { - struct dentry *d; - - flat_dt_blob.data = initial_boot_params; - flat_dt_blob.size = initial_boot_params->totalsize; - - d = debugfs_create_blob("flat-device-tree", S_IFREG | S_IRUSR, - powerpc_debugfs_root, &flat_dt_blob); - if (!d) - return 1; +#ifdef CONFIG_SMP + /* + * Early firmware scanning must use this rather than + * get_hard_smp_processor_id because we don't have pacas allocated + * until memory topology is discovered. + */ + if (cpu_to_phys_id != NULL) + return (int)phys_id == cpu_to_phys_id[cpu]; +#endif - return 0; + return (int)phys_id == get_hard_smp_processor_id(cpu); } -__initcall(export_flat_device_tree); -#endif |
