diff options
39 files changed, 902 insertions, 373 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fa4e3737149c..8889ce7094e0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -281,6 +281,9 @@ config ZONE_DMA32 config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y +config ARCH_ENABLE_MEMORY_HOTREMOVE + def_bool y + config SMP def_bool y @@ -952,11 +955,11 @@ config HOTPLUG_CPU # Common NUMA Features config NUMA - bool "Numa Memory Allocation and Scheduler Support" + bool "NUMA Memory Allocation and Scheduler Support" select ACPI_NUMA if ACPI select OF_NUMA help - Enable NUMA (Non Uniform Memory Access) support. + Enable NUMA (Non-Uniform Memory Access) support. The kernel will try to allocate memory used by a CPU on the local memory of the CPU and add some more diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index c5487806273f..0bff325117b4 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -257,12 +257,6 @@ alternative_endif .endm /* - * mmid - get context id from mm pointer (mm->context.id) - */ - .macro mmid, rd, rn - ldr \rd, [\rn, #MM_CONTEXT_ID] - .endm -/* * read_ctr - read CTR_EL0. If the system has mismatched register fields, * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val */ diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h index 8d2a7de39744..b6f7bc6da5fb 100644 --- a/arch/arm64/include/asm/checksum.h +++ b/arch/arm64/include/asm/checksum.h @@ -5,7 +5,12 @@ #ifndef __ASM_CHECKSUM_H #define __ASM_CHECKSUM_H -#include <linux/types.h> +#include <linux/in6.h> + +#define _HAVE_ARCH_IPV6_CSUM +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum sum); static inline __sum16 csum_fold(__wsum csum) { diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index 86aabf1e0199..d28e8f37d3b4 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -55,12 +55,12 @@ struct cpu_operations { #endif }; -extern const struct cpu_operations *cpu_ops[NR_CPUS]; -int __init cpu_read_ops(int cpu); +int __init init_cpu_ops(int cpu); +extern const struct cpu_operations *get_cpu_ops(int cpu); -static inline void __init cpu_read_bootcpu_ops(void) +static inline void __init init_bootcpu_ops(void) { - cpu_read_ops(0); + init_cpu_ops(0); } #endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 485e069d8768..e75f7df746ba 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -390,14 +390,16 @@ unsigned long cpu_get_elf_hwcap2(void); #define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name)) #define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name)) -/* System capability check for constant caps */ -static __always_inline bool __cpus_have_const_cap(int num) +static __always_inline bool system_capabilities_finalized(void) { - if (num >= ARM64_NCAPS) - return false; - return static_branch_unlikely(&cpu_hwcap_keys[num]); + return static_branch_likely(&arm64_const_caps_ready); } +/* + * Test for a capability with a runtime check. + * + * Before the capability is detected, this returns false. + */ static inline bool cpus_have_cap(unsigned int num) { if (num >= ARM64_NCAPS) @@ -405,14 +407,53 @@ static inline bool cpus_have_cap(unsigned int num) return test_bit(num, cpu_hwcaps); } +/* + * Test for a capability without a runtime check. + * + * Before capabilities are finalized, this returns false. + * After capabilities are finalized, this is patched to avoid a runtime check. + * + * @num must be a compile-time constant. + */ +static __always_inline bool __cpus_have_const_cap(int num) +{ + if (num >= ARM64_NCAPS) + return false; + return static_branch_unlikely(&cpu_hwcap_keys[num]); +} + +/* + * Test for a capability, possibly with a runtime check. + * + * Before capabilities are finalized, this behaves as cpus_have_cap(). + * After capabilities are finalized, this is patched to avoid a runtime check. + * + * @num must be a compile-time constant. + */ static __always_inline bool cpus_have_const_cap(int num) { - if (static_branch_likely(&arm64_const_caps_ready)) + if (system_capabilities_finalized()) return __cpus_have_const_cap(num); else return cpus_have_cap(num); } +/* + * Test for a capability without a runtime check. + * + * Before capabilities are finalized, this will BUG(). + * After capabilities are finalized, this is patched to avoid a runtime check. + * + * @num must be a compile-time constant. + */ +static __always_inline bool cpus_have_final_cap(int num) +{ + if (system_capabilities_finalized()) + return __cpus_have_const_cap(num); + else + BUG(); +} + static inline void cpus_set_cap(unsigned int num) { if (num >= ARM64_NCAPS) { @@ -447,6 +488,29 @@ cpuid_feature_extract_unsigned_field(u64 features, int field) return cpuid_feature_extract_unsigned_field_width(features, field, 4); } +/* + * Fields that identify the version of the Performance Monitors Extension do + * not follow the standard ID scheme. See ARM DDI 0487E.a page D13-2825, + * "Alternative ID scheme used for the Performance Monitors Extension version". + */ +static inline u64 __attribute_const__ +cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap) +{ + u64 val = cpuid_feature_extract_unsigned_field(features, field); + u64 mask = GENMASK_ULL(field + 3, field); + + /* Treat IMPLEMENTATION DEFINED functionality as unimplemented */ + if (val == 0xf) + val = 0; + + if (val > cap) { + features &= ~mask; + features |= (cap << field) & mask; + } + + return features; +} + static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp) { return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); @@ -613,11 +677,6 @@ static inline bool system_has_prio_mask_debugging(void) system_uses_irq_prio_masking(); } -static inline bool system_capabilities_finalized(void) -{ - return static_branch_likely(&arm64_const_caps_ready); -} - #define ARM64_BP_HARDEN_UNKNOWN -1 #define ARM64_BP_HARDEN_WA_NEEDED 0 #define ARM64_BP_HARDEN_NOT_REQUIRED 1 diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index cb29253ae86b..6a395a7e6707 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -60,7 +60,7 @@ #define ESR_ELx_EC_BKPT32 (0x38) /* Unallocated EC: 0x39 */ #define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */ -/* Unallocted EC: 0x3B */ +/* Unallocated EC: 0x3B */ #define ESR_ELx_EC_BRK64 (0x3C) /* Unallocated EC: 0x3D - 0x3F */ #define ESR_ELx_EC_MAX (0x3F) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 4d94676e5a8b..2be67b232499 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -54,6 +54,7 @@ #define MODULES_VADDR (BPF_JIT_REGION_END) #define MODULES_VSIZE (SZ_128M) #define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M) +#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) #define PCI_IO_END (VMEMMAP_START - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) #define FIXADDR_TOP (PCI_IO_START - SZ_2M) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index e4d862420bb4..21a4bcfdb378 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -23,9 +23,9 @@ typedef struct { } mm_context_t; /* - * This macro is only used by the TLBI code, which cannot race with an - * ASID change and therefore doesn't need to reload the counter using - * atomic64_read. + * This macro is only used by the TLBI and low-level switch_mm() code, + * neither of which can race with an ASID change. We therefore don't + * need to reload the counter using atomic64_read(). */ #define ASID(mm) ((mm)->context.id.counter & 0xffff) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 3827ff4040a3..ab46187c6300 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -46,6 +46,8 @@ static inline void cpu_set_reserved_ttbr0(void) isb(); } +void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); + static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) { BUG_ON(pgd == swapper_pg_dir); diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index d39ddb258a04..75d6cd23a679 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -21,6 +21,10 @@ extern void __cpu_copy_user_page(void *to, const void *from, extern void copy_page(void *to, const void *from); extern void clear_page(void *to); +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + #define clear_user_page(addr,vaddr,pg) __cpu_clear_user_page(addr, vaddr) #define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr) diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2bdbc79bbd01..e7765b62c712 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -176,9 +176,10 @@ #define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ +#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ #define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ #define ARMV8_PMU_PMCR_N_MASK 0x1f -#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */ +#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ /* * PMOVSR: counters overflow flag status reg diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index a2ce65a0c1fa..0d5d1f0525eb 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -13,11 +13,9 @@ #include <asm/page.h> -struct mm_struct; struct cpu_suspend_ctx; extern void cpu_do_idle(void); -extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm); extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr); extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 085d248f824e..ebc622432831 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -740,6 +740,16 @@ #define ID_AA64DFR0_TRACEVER_SHIFT 4 #define ID_AA64DFR0_DEBUGVER_SHIFT 0 +#define ID_AA64DFR0_PMUVER_8_0 0x1 +#define ID_AA64DFR0_PMUVER_8_1 0x4 +#define ID_AA64DFR0_PMUVER_8_4 0x5 +#define ID_AA64DFR0_PMUVER_8_5 0x6 +#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf + +#define ID_DFR0_PERFMON_SHIFT 24 + +#define ID_DFR0_PERFMON_8_1 0x4 + #define ID_ISAR5_RDM_SHIFT 24 #define ID_ISAR5_CRC32_SHIFT 16 #define ID_ISAR5_SHA2_SHIFT 12 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index fc6488660f64..4e5b8ee31442 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -21,7 +21,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ syscall.o -extra-$(CONFIG_EFI) := efi-entry.o +targets += efi-entry.o OBJCOPYFLAGS := --prefix-symbols=__efistub_ $(obj)/%.stub.o: $(obj)/%.o FORCE diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 7832b3216370..4cc581af2d96 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -630,7 +630,7 @@ static int __init armv8_deprecated_init(void) register_insn_emulation(&cp15_barrier_ops); if (IS_ENABLED(CONFIG_SETEND_EMULATION)) { - if(system_supports_mixed_endian_el0()) + if (system_supports_mixed_endian_el0()) register_insn_emulation(&setend_ops); else pr_info("setend instruction emulation is not supported on this system\n"); diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index 7e07072757af..e133011f64b5 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -15,10 +15,12 @@ #include <asm/smp_plat.h> extern const struct cpu_operations smp_spin_table_ops; +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL extern const struct cpu_operations acpi_parking_protocol_ops; +#endif extern const struct cpu_operations cpu_psci_ops; -const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; +static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, @@ -94,7 +96,7 @@ static const char *__init cpu_read_enable_method(int cpu) /* * Read a cpu's enable method and record it in cpu_ops. */ -int __init cpu_read_ops(int cpu) +int __init init_cpu_ops(int cpu) { const char *enable_method = cpu_read_enable_method(cpu); @@ -109,3 +111,8 @@ int __init cpu_read_ops(int cpu) return 0; } + +const struct cpu_operations *get_cpu_ops(int cpu) +{ + return cpu_ops[cpu]; +} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index bf98a2386534..38ebad880f5c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -552,7 +552,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) BUG_ON(!reg); - for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { + for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { u64 ftr_mask = arm64_ftr_mask(ftrp); s64 ftr_new = arm64_ftr_value(ftrp, new); diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index e4d6af2fdec7..b512b5503f6e 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -18,11 +18,11 @@ int arm_cpuidle_init(unsigned int cpu) { + const struct cpu_operations *ops = get_cpu_ops(cpu); int ret = -EOPNOTSUPP; - if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_suspend && - cpu_ops[cpu]->cpu_init_idle) - ret = cpu_ops[cpu]->cpu_init_idle(cpu); + if (ops && ops->cpu_suspend && ops->cpu_init_idle) + ret = ops->cpu_init_idle(cpu); return ret; } @@ -37,8 +37,9 @@ int arm_cpuidle_init(unsigned int cpu) int arm_cpuidle_suspend(int index) { int cpu = smp_processor_id(); + const struct cpu_operations *ops = get_cpu_ops(cpu); - return cpu_ops[cpu]->cpu_suspend(index); + return ops->cpu_suspend(index); } #ifdef CONFIG_ACPI diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index fde59981445c..c839b5bf1904 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -175,7 +175,7 @@ NOKPROBE_SYMBOL(el0_pc); static void notrace el0_sp(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); - local_daif_restore(DAIF_PROCCTX_NOIRQ); + local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(regs->sp, esr, regs); } NOKPROBE_SYMBOL(el0_sp); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 989b1944cb71..f79023c9b374 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -404,7 +404,6 @@ __create_page_tables: ret x28 ENDPROC(__create_page_tables) - .ltorg /* * The following fragment of code is executed with the MMU enabled. diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 38bcd4d4e43b..6532105b3e32 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -110,8 +110,6 @@ ENTRY(swsusp_arch_suspend_exit) cbz x24, 3f /* Do we need to re-initialise EL2? */ hvc #0 3: ret - - .ltorg ENDPROC(swsusp_arch_suspend_exit) /* diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index dd3ae8081b38..b40c3b0def92 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -121,7 +121,7 @@ static int setup_dtb(struct kimage *image, /* add kaslr-seed */ ret = fdt_delprop(dtb, off, FDT_PROP_KASLR_SEED); - if (ret == -FDT_ERR_NOTFOUND) + if (ret == -FDT_ERR_NOTFOUND) ret = 0; else if (ret) goto out; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index e40b65645c86..4d7879484cec 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -285,6 +285,17 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { #define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) + +/* + * We unconditionally enable ARMv8.5-PMU long event counter support + * (64-bit events) where supported. Indicate if this arm_pmu has long + * event counter support. + */ +static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) +{ + return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_5); +} + /* * We must chain two programmable counters for 64 bit events, * except when we have allocated the 64bit cycle counter (for CPU @@ -294,9 +305,11 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { static inline bool armv8pmu_event_is_chained(struct perf_event *event) { int idx = event->hw.idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); return !WARN_ON(idx < 0) && armv8pmu_event_is_64bit(event) && + !armv8pmu_has_long_event(cpu_pmu) && (idx != ARMV8_IDX_CYCLE_COUNTER); } @@ -345,7 +358,7 @@ static inline void armv8pmu_select_counter(int idx) isb(); } -static inline u32 armv8pmu_read_evcntr(int idx) +static inline u64 armv8pmu_read_evcntr(int idx) { armv8pmu_select_counter(idx); return read_sysreg(pmxevcntr_el0); @@ -362,6 +375,44 @@ static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) return val; } +/* + * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP + * is set the event counters also become 64-bit counters. Unless the + * user has requested a long counter (attr.config1) then we want to + * interrupt upon 32-bit overflow - we achieve this by applying a bias. + */ +static bool armv8pmu_event_needs_bias(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (armv8pmu_event_is_64bit(event)) + return false; + + if (armv8pmu_has_long_event(cpu_pmu) || + idx == ARMV8_IDX_CYCLE_COUNTER) + return true; + + return false; +} + +static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) +{ + if (armv8pmu_event_needs_bias(event)) + value |= GENMASK(63, 32); + + return value; +} + +static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) +{ + if (armv8pmu_event_needs_bias(event)) + value &= ~GENMASK(63, 32); + + return value; +} + static u64 armv8pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); @@ -377,10 +428,10 @@ static u64 armv8pmu_read_counter(struct perf_event *event) else value = armv8pmu_read_hw_counter(event); - return value; + return armv8pmu_unbias_long_counter(event, value); } -static inline void armv8pmu_write_evcntr(int idx, u32 value) +static inline void armv8pmu_write_evcntr(int idx, u64 value) { armv8pmu_select_counter(idx); write_sysreg(value, pmxevcntr_el0); @@ -405,20 +456,14 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + value = armv8pmu_bias_long_counter(event, value); + if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) { - /* - * The cycles counter is really a 64-bit counter. - * When treating it as a 32-bit counter, we only count - * the lower 32 bits, and set the upper 32-bits so that - * we get an interrupt upon 32-bit overflow. - */ - if (!armv8pmu_event_is_64bit(event)) - value |= 0xffffffff00000000ULL; + else if (idx == ARMV8_IDX_CYCLE_COUNTER) write_sysreg(value, pmccntr_el0); - } else + else armv8pmu_write_hw_counter(event, value); } @@ -450,86 +495,74 @@ static inline void armv8pmu_write_event_type(struct perf_event *event) } } -static inline int armv8pmu_enable_counter(int idx) +static u32 armv8pmu_event_cnten_mask(struct perf_event *event) { - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(BIT(counter), pmcntenset_el0); - return idx; + int counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); + u32 mask = BIT(counter); + + if (armv8pmu_event_is_chained(event)) + mask |= BIT(counter - 1); + return mask; +} + +static inline void armv8pmu_enable_counter(u32 mask) +{ + write_sysreg(mask, pmcntenset_el0); } static inline void armv8pmu_enable_event_counter(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - int idx = event->hw.idx; - u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx)); + u32 mask = armv8pmu_event_cnten_mask(event); - if (armv8pmu_event_is_chained(event)) - counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1)); - - kvm_set_pmu_events(counter_bits, attr); + kvm_set_pmu_events(mask, attr); /* We rely on the hypervisor switch code to enable guest counters */ - if (!kvm_pmu_counter_deferred(attr)) { - armv8pmu_enable_counter(idx); - if (armv8pmu_event_is_chained(event)) - armv8pmu_enable_counter(idx - 1); - } + if (!kvm_pmu_counter_deferred(attr)) + armv8pmu_enable_counter(mask); } -static inline int armv8pmu_disable_counter(int idx) +static inline void armv8pmu_disable_counter(u32 mask) { - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(BIT(counter), pmcntenclr_el0); - return idx; + write_sysreg(mask, pmcntenclr_el0); } static inline void armv8pmu_disable_event_counter(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; struct perf_event_attr *attr = &event->attr; - int idx = hwc->idx; - u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx)); - - if (armv8pmu_event_is_chained(event)) - counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1)); + u32 mask = armv8pmu_event_cnten_mask(event); - kvm_clr_pmu_events(counter_bits); + kvm_clr_pmu_events(mask); /* We rely on the hypervisor switch code to disable guest counters */ - if (!kvm_pmu_counter_deferred(attr)) { - if (armv8pmu_event_is_chained(event)) - armv8pmu_disable_counter(idx - 1); - armv8pmu_disable_counter(idx); - } + if (!kvm_pmu_counter_deferred(attr)) + armv8pmu_disable_counter(mask); } -static inline int armv8pmu_enable_intens(int idx) +static inline void armv8pmu_enable_intens(u32 mask) { - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(BIT(counter), pmintenset_el1); - return idx; + write_sysreg(mask, pmintenset_el1); } -static inline int armv8pmu_enable_event_irq(struct perf_event *event) +static inline void armv8pmu_enable_event_irq(struct perf_event *event) { - return armv8pmu_enable_intens(event->hw.idx); + u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); + armv8pmu_enable_intens(BIT(counter)); } -static inline int armv8pmu_disable_intens(int idx) +static inline void armv8pmu_disable_intens(u32 mask) { - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(BIT(counter), pmintenclr_el1); + write_sysreg(mask, pmintenclr_el1); isb(); /* Clear the overflow flag in case an interrupt is pending. */ - write_sysreg(BIT(counter), pmovsclr_el0); + write_sysreg(mask, pmovsclr_el0); isb(); - - return idx; } -static inline int armv8pmu_disable_event_irq(struct perf_event *event) +static inline void armv8pmu_disable_event_irq(struct perf_event *event) { - return armv8pmu_disable_intens(event->hw.idx); + u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx); + armv8pmu_disable_intens(BIT(counter)); } static inline u32 armv8pmu_getreset_flags(void) @@ -743,7 +776,8 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, /* * Otherwise use events counters */ - if (armv8pmu_event_is_64bit(event)) + if (armv8pmu_event_is_64bit(event) && + !armv8pmu_has_long_event(cpu_pmu)) return armv8pmu_get_chain_idx(cpuc, cpu_pmu); else return armv8pmu_get_single_idx(cpuc, cpu_pmu); @@ -815,13 +849,11 @@ static int armv8pmu_filter_match(struct perf_event *event) static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u32 idx, nb_cnt = cpu_pmu->num_events; + u32 pmcr; /* The counter and interrupt enable registers are unknown at reset. */ - for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { - armv8pmu_disable_counter(idx); - armv8pmu_disable_intens(idx); - } + armv8pmu_disable_counter(U32_MAX); + armv8pmu_disable_intens(U32_MAX); /* Clear the counters we flip at guest entry/exit */ kvm_clr_pmu_events(U32_MAX); @@ -830,8 +862,13 @@ static void armv8pmu_reset(void *info) * Initialize & Reset PMNC. Request overflow interrupt for * 64 bit cycle counter but cheat in armv8pmu_write_counter(). */ - armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | - ARMV8_PMU_PMCR_LC); + pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; + + /* Enable long event counter support where available */ + if (armv8pmu_has_long_event(cpu_pmu)) + pmcr |= ARMV8_PMU_PMCR_LP; + + armv8pmu_pmcr_write(pmcr); } static int __armv8_pmuv3_map_event(struct perf_event *event, @@ -914,6 +951,7 @@ static void __armv8pmu_probe_pmu(void *info) if (pmuver == 0xf || pmuver == 0) return; + cpu_pmu->pmuver = pmuver; probe->present = true; /* Read the nb of CNTx counters supported from PMNC */ @@ -953,7 +991,10 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) return probe.present ? 0 : -ENODEV; } -static int armv8_pmu_init(struct arm_pmu *cpu_pmu) +static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, + int (*map_event)(struct perf_event *event), + const struct attribute_group *events, + const struct attribute_group *format) { int ret = armv8pmu_probe_pmu(cpu_pmu); if (ret) @@ -972,144 +1013,127 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->set_event_filter = armv8pmu_set_event_filter; cpu_pmu->filter_match = armv8pmu_filter_match; + cpu_pmu->name = name; + cpu_pmu->map_event = map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ? + events : &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ? + format : &armv8_pmuv3_format_attr_group; + return 0; } static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_pmuv3"; - cpu_pmu->map_event = armv8_pmuv3_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; + return armv8_pmu_init(cpu_pmu, "armv8_pmuv3", + armv8_pmuv3_map_event, NULL, NULL); +} - return 0; +static int armv8_a34_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a34", + armv8_pmuv3_map_event, NULL, NULL); } static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_cortex_a35"; - cpu_pmu->map_event = armv8_a53_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; - - return 0; + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", + armv8_a53_map_event, NULL, NULL); } static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_cortex_a53"; - cpu_pmu->map_event = armv8_a53_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", + armv8_a53_map_event, NULL, NULL); +} - return 0; +static int armv8_a55_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a55", + armv8_pmuv3_map_event, NULL, NULL); } static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_cortex_a57"; - cpu_pmu->map_event = armv8_a57_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", + armv8_a57_map_event, NULL, NULL); +} - return 0; +static int armv8_a65_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a65", + armv8_pmuv3_map_event, NULL, NULL); } static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_cortex_a72"; - cpu_pmu->map_event = armv8_a57_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; - - return 0; + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", + armv8_a57_map_event, NULL, NULL); } static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_cortex_a73"; - cpu_pmu->map_event = armv8_a73_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", + armv8_a73_map_event, NULL, NULL); +} - return 0; +static int armv8_a75_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a75", + armv8_pmuv3_map_event, NULL, NULL); } -static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) +static int armv8_a76_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a76", + armv8_pmuv3_map_event, NULL, NULL); +} - cpu_pmu->name = "armv8_cavium_thunder"; - cpu_pmu->map_event = armv8_thunder_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; +static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a77", + armv8_pmuv3_map_event, NULL, NULL); +} - return 0; +static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_neoverse_e1", + armv8_pmuv3_map_event, NULL, NULL); } -static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) +static int armv8_n1_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; + return armv8_pmu_init(cpu_pmu, "armv8_neoverse_n1", + armv8_pmuv3_map_event, NULL, NULL); +} - cpu_pmu->name = "armv8_brcm_vulcan"; - cpu_pmu->map_event = armv8_vulcan_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; +static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", + armv8_thunder_map_event, NULL, NULL); +} - return 0; +static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", + armv8_vulcan_map_event, NULL, NULL); } static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, + {.compatible = "arm,cortex-a34-pmu", .data = armv8_a34_pmu_init}, {.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init}, {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, + {.compatible = "arm,cortex-a55-pmu", .data = armv8_a55_pmu_init}, {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, + {.compatible = "arm,cortex-a65-pmu", .data = armv8_a65_pmu_init}, {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, {.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init}, + {.compatible = "arm,cortex-a75-pmu", .data = armv8_a75_pmu_init}, + {.compatible = "arm,cortex-a76-pmu", .data = armv8_a76_pmu_init}, + {.compatible = "arm,cortex-a77-pmu", .data = armv8_a77_pmu_init}, + {.compatible = "arm,neoverse-e1-pmu", .data = armv8_e1_pmu_init}, + {.compatible = "arm,neoverse-n1-pmu", .data = armv8_n1_pmu_init}, {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, {.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init}, {}, diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index a34890bf309f..3fd2c11c09fc 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -344,7 +344,7 @@ void __init setup_arch(char **cmdline_p) else psci_acpi_init(); - cpu_read_bootcpu_ops(); + init_bootcpu_ops(); smp_init_cpus(); smp_build_mpidr_hash(); @@ -371,8 +371,10 @@ void __init setup_arch(char **cmdline_p) static inline bool cpu_can_disable(unsigned int cpu) { #ifdef CONFIG_HOTPLUG_CPU - if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_can_disable) - return cpu_ops[cpu]->cpu_can_disable(cpu); + const struct cpu_operations *ops = get_cpu_ops(cpu); + + if (ops && ops->cpu_can_disable) + return ops->cpu_can_disable(cpu); #endif return false; } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d4ed9a19d8fe..034806725598 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -93,8 +93,10 @@ static inline int op_cpu_kill(unsigned int cpu) */ static int boot_secondary(unsigned int cpu, struct task_struct *idle) { - if (cpu_ops[cpu]->cpu_boot) - return cpu_ops[cpu]->cpu_boot(cpu); + const struct cpu_operations *ops = get_cpu_ops(cpu); + + if (ops->cpu_boot) + return ops->cpu_boot(cpu); return -EOPNOTSUPP; } @@ -115,60 +117,55 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) update_cpu_boot_status(CPU_MMU_OFF); __flush_dcache_area(&secondary_data, sizeof(secondary_data)); - /* - * Now bring the CPU into our world. - */ + /* Now bring the CPU into our world */ ret = boot_secondary(cpu, idle); - if (ret == 0) { - /* - * CPU was successfully started, wait for it to come online or - * time out. - */ - wait_for_completion_timeout(&cpu_running, - msecs_to_jiffies(5000)); - - if (!cpu_online(cpu)) { - pr_crit("CPU%u: failed to come online\n", cpu); - ret = -EIO; - } - } else { + if (ret) { pr_err("CPU%u: failed to boot: %d\n", cpu, ret); return ret; } + /* + * CPU was successfully started, wait for it to come online or + * time out. + */ + wait_for_completion_timeout(&cpu_running, + msecs_to_jiffies(5000)); + if (cpu_online(cpu)) + return 0; + + pr_crit("CPU%u: failed to come online\n", cpu); secondary_data.task = NULL; secondary_data.stack = NULL; __flush_dcache_area(&secondary_data, sizeof(secondary_data)); status = READ_ONCE(secondary_data.status); - if (ret && status) { + if (status == CPU_MMU_OFF) + status = READ_ONCE(__early_cpu_boot_status); - if (status == CPU_MMU_OFF) - status = READ_ONCE(__early_cpu_boot_status); - - switch (status & CPU_BOOT_STATUS_MASK) { - default: - pr_err("CPU%u: failed in unknown state : 0x%lx\n", - cpu, status); - cpus_stuck_in_kernel++; - break; - case CPU_KILL_ME: - if (!op_cpu_kill(cpu)) { - pr_crit("CPU%u: died during early boot\n", cpu); - break; - } - pr_crit("CPU%u: may not have shut down cleanly\n", cpu); - /* Fall through */ - case CPU_STUCK_IN_KERNEL: - pr_crit("CPU%u: is stuck in kernel\n", cpu); - if (status & CPU_STUCK_REASON_52_BIT_VA) - pr_crit("CPU%u: does not support 52-bit VAs\n", cpu); - if (status & CPU_STUCK_REASON_NO_GRAN) - pr_crit("CPU%u: does not support %luK granule \n", cpu, PAGE_SIZE / SZ_1K); - cpus_stuck_in_kernel++; + switch (status & CPU_BOOT_STATUS_MASK) { + default: + pr_err("CPU%u: failed in unknown state : 0x%lx\n", + cpu, status); + cpus_stuck_in_kernel++; + break; + case CPU_KILL_ME: + if (!op_cpu_kill(cpu)) { + pr_crit("CPU%u: died during early boot\n", cpu); break; - case CPU_PANIC_KERNEL: - panic("CPU%u detected unsupported configuration\n", cpu); } + pr_crit("CPU%u: may not have shut down cleanly\n", cpu); + /* Fall through */ + case CPU_STUCK_IN_KERNEL: + pr_crit("CPU%u: is stuck in kernel\n", cpu); + if (status & CPU_STUCK_REASON_52_BIT_VA) + pr_crit("CPU%u: does not support 52-bit VAs\n", cpu); + if (status & CPU_STUCK_REASON_NO_GRAN) { + pr_crit("CPU%u: does not support %luK granule\n", + cpu, PAGE_SIZE / SZ_1K); + } + cpus_stuck_in_kernel++; + break; + case CPU_PANIC_KERNEL: + panic("CPU%u detected unsupported configuration\n", cpu); } return ret; @@ -196,6 +193,7 @@ asmlinkage notrace void secondary_start_kernel(void) { u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; struct mm_struct *mm = &init_mm; + const struct cpu_operations *ops; unsigned int cpu; cpu = task_cpu(current); @@ -227,8 +225,9 @@ asmlinkage notrace void secondary_start_kernel(void) */ check_local_cpu_capabilities(); - if (cpu_ops[cpu]->cpu_postboot) - cpu_ops[cpu]->cpu_postboot(); + ops = get_cpu_ops(cpu); + if (ops->cpu_postboot) + ops->cpu_postboot(); /* * Log the CPU info before it is marked online and might get read. @@ -266,19 +265,21 @@ asmlinkage notrace void secondary_start_kernel(void) #ifdef CONFIG_HOTPLUG_CPU static int op_cpu_disable(unsigned int cpu) { + const struct cpu_operations *ops = get_cpu_ops(cpu); + /* * If we don't have a cpu_die method, abort before we reach the point * of no return. CPU0 may not have an cpu_ops, so test for it. */ - if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die) + if (!ops || !ops->cpu_die) return -EOPNOTSUPP; /* * We may need to abort a hot unplug for some other mechanism-specific * reason. */ - if (cpu_ops[cpu]->cpu_disable) - return cpu_ops[cpu]->cpu_disable(cpu); + if (ops->cpu_disable) + return ops->cpu_disable(cpu); return 0; } @@ -314,15 +315,17 @@ int __cpu_disable(void) static int op_cpu_kill(unsigned int cpu) { + const struct cpu_operations *ops = get_cpu_ops(cpu); + /* * If we have no means of synchronising with the dying CPU, then assume * that it is really dead. We can only wait for an arbitrary length of * time and hope that it's dead, so let's skip the wait and just hope. */ - if (!cpu_ops[cpu]->cpu_kill) + if (!ops->cpu_kill) return 0; - return cpu_ops[cpu]->cpu_kill(cpu); + return ops->cpu_kill(cpu); } /* @@ -357,6 +360,7 @@ void __cpu_die(unsigned int cpu) void cpu_die(void) { unsigned int cpu = smp_processor_id(); + const struct cpu_operations *ops = get_cpu_ops(cpu); idle_task_exit(); @@ -370,12 +374,22 @@ void cpu_die(void) * mechanism must perform all required cache maintenance to ensure that * no dirty lines are lost in the process of shutting down the CPU. */ - cpu_ops[cpu]->cpu_die(cpu); + ops->cpu_die(cpu); BUG(); } #endif +static void __cpu_try_die(int cpu) +{ +#ifdef CONFIG_HOTPLUG_CPU + const struct cpu_operations *ops = get_cpu_ops(cpu); + + if (ops && ops->cpu_die) + ops->cpu_die(cpu); +#endif +} + /* * Kill the calling secondary CPU, early in bringup before it is turned * online. @@ -389,12 +403,11 @@ void cpu_die_early(void) /* Mark this CPU absent */ set_cpu_present(cpu, 0); -#ifdef CONFIG_HOTPLUG_CPU - update_cpu_boot_status(CPU_KILL_ME); - /* Check if we can park ourselves */ - if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die) - cpu_ops[cpu]->cpu_die(cpu); -#endif + if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { + update_cpu_boot_status(CPU_KILL_ME); + __cpu_try_die(cpu); + } + update_cpu_boot_status(CPU_STUCK_IN_KERNEL); cpu_park_loop(); @@ -488,10 +501,13 @@ static bool __init is_mpidr_duplicate(unsigned int cpu, u64 hwid) */ static int __init smp_cpu_setup(int cpu) { - if (cpu_read_ops(cpu)) + const struct cpu_operations *ops; + + if (init_cpu_ops(cpu)) return -ENODEV; - if (cpu_ops[cpu]->cpu_init(cpu)) + ops = get_cpu_ops(cpu); + if (ops->cpu_init(cpu)) return -ENODEV; set_cpu_possible(cpu, true); @@ -714,6 +730,7 @@ void __init smp_init_cpus(void) void __init smp_prepare_cpus(unsigned int max_cpus) { + const struct cpu_operations *ops; int err; unsigned int cpu; unsigned int this_cpu; @@ -744,10 +761,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (cpu == smp_processor_id()) continue; - if (!cpu_ops[cpu]) + ops = get_cpu_ops(cpu); + if (!ops) continue; - err = cpu_ops[cpu]->cpu_prepare(cpu); + err = ops->cpu_prepare(cpu); if (err) continue; @@ -863,10 +881,8 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) local_irq_disable(); sdei_mask_local_cpu(); -#ifdef CONFIG_HOTPLUG_CPU - if (cpu_ops[cpu]->cpu_die) - cpu_ops[cpu]->cpu_die(cpu); -#endif + if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) + __cpu_try_die(cpu); /* just in case */ cpu_park_loop(); @@ -1044,8 +1060,9 @@ static bool have_cpu_die(void) { #ifdef CONFIG_HOTPLUG_CPU int any_cpu = raw_smp_processor_id(); + const struct cpu_operations *ops = get_cpu_ops(any_cpu); - if (cpu_ops[any_cpu] && cpu_ops[any_cpu]->cpu_die) + if (ops && ops->cpu_die) return true; #endif return false; diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 46292a370781..0f53996c4bfe 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -139,7 +139,7 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) write_sysreg(val, cptr_el2); - if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; isb(); @@ -158,12 +158,12 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) { u64 hcr = vcpu->arch.hcr_el2; - if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) hcr |= HCR_TVM; write_sysreg(hcr, hcr_el2); - if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); if (has_vhe()) @@ -193,7 +193,7 @@ static void __hyp_text __deactivate_traps_nvhe(void) { u64 mdcr_el2 = read_sysreg(mdcr_el2); - if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { u64 val; /* @@ -340,7 +340,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) * resolve the IPA using the AT instruction. */ if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_const_cap(ARM64_WORKAROUND_834220) || + (cpus_have_final_cap(ARM64_WORKAROUND_834220) || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { if (!__translate_far_to_hpfar(far, &hpfar)) return false; @@ -510,7 +510,7 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (*exit_code != ARM_EXCEPTION_TRAP) goto exit; - if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && handle_tx2_tvm(vcpu)) return true; @@ -567,7 +567,7 @@ exit: static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) { - if (!cpus_have_const_cap(ARM64_SSBD)) + if (!cpus_have_final_cap(ARM64_SSBD)) return false; return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 7672a978926c..75b1925763f1 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -71,7 +71,7 @@ static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ct ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR); ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); - if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); } @@ -118,7 +118,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - if (!cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); } else if (!ctxt->__hyp_running_vcpu) { @@ -149,7 +149,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); - if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) && + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) && ctxt->__hyp_running_vcpu) { /* * Must only be done for host registers, hence the context @@ -194,7 +194,7 @@ __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR); write_sysreg_el2(pstate, SYS_SPSR); - if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); } diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 92f560e3e1aa..ceaddbe4279f 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -23,7 +23,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, local_irq_save(cxt->flags); - if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { /* * For CPUs that are affected by ARM errata 1165522 or 1530923, * we cannot trust stage-1 to be in a correct state at that @@ -63,7 +63,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, struct tlb_inv_context *cxt) { - if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { u64 val; /* @@ -103,7 +103,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); isb(); - if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { /* Restore the registers to what they were */ write_sysreg_el1(cxt->tcr, SYS_TCR); write_sysreg_el1(cxt->sctlr, SYS_SCTLR); @@ -117,7 +117,7 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, { write_sysreg(0, vttbr_el2); - if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { /* Ensure write of the host VMID */ isb(); /* Restore the host's TCR_EL1 */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 44354c812783..090f46d3add1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1101,6 +1101,16 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, (0xfUL << ID_AA64ISAR1_API_SHIFT) | (0xfUL << ID_AA64ISAR1_GPA_SHIFT) | (0xfUL << ID_AA64ISAR1_GPI_SHIFT)); + } else if (id == SYS_ID_AA64DFR0_EL1) { + /* Limit guests to PMUv3 for ARMv8.1 */ + val = cpuid_feature_cap_perfmon_field(val, + ID_AA64DFR0_PMUVER_SHIFT, + ID_AA64DFR0_PMUVER_8_1); + } else if (id == SYS_ID_DFR0_EL1) { + /* Limit guests to PMUv3 for ARMv8.1 */ + val = cpuid_feature_cap_perfmon_field(val, + ID_DFR0_PERFMON_SHIFT, + ID_DFR0_PERFMON_8_1); } return val; diff --git a/arch/arm64/lib/csum.c b/arch/arm64/lib/csum.c index 1f82c66b32ea..60eccae2abad 100644 --- a/arch/arm64/lib/csum.c +++ b/arch/arm64/lib/csum.c @@ -124,3 +124,30 @@ unsigned int do_csum(const unsigned char *buff, int len) return sum >> 16; } + +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum csum) +{ + __uint128_t src, dst; + u64 sum = (__force u64)csum; + + src = *(const __uint128_t *)saddr->s6_addr; + dst = *(const __uint128_t *)daddr->s6_addr; + + sum += (__force u32)htonl(len); +#ifdef __LITTLE_ENDIAN + sum += (u32)proto << 24; +#else + sum += proto; +#endif + src += (src >> 64) | (src << 64); + dst += (dst >> 64) | (dst << 64); + + sum = accumulate(sum, src >> 64); + sum = accumulate(sum, dst >> 64); + + sum += ((sum >> 32) | (sum << 32)); + return csum_fold((__force __wsum)(sum >> 32)); +} +EXPORT_SYMBOL(csum_ipv6_magic); diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 4767540d1b94..4e79566726c8 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -186,7 +186,7 @@ CPU_LE( rev data2, data2 ) * as carry-propagation can corrupt the upper bits if the trailing * bytes in the string contain 0x01. * However, if there is no NUL byte in the dword, we can generate - * the result directly. We ca not just subtract the bytes as the + * the result directly. We cannot just subtract the bytes as the * MSB might be significant. */ CPU_BE( cbnz has_nul, 1f ) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 8ef73e89d514..8524f03d629c 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -6,6 +6,7 @@ * Copyright (C) 2012 ARM Ltd. */ +#include <linux/bitfield.h> #include <linux/bitops.h> #include <linux/sched.h> #include <linux/slab.h> @@ -254,10 +255,37 @@ switch_mm_fastpath: /* Errata workaround post TTBRx_EL1 update. */ asmlinkage void post_ttbr_update_workaround(void) { + if (!IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) + return; + asm(ALTERNATIVE("nop; nop; nop", "ic iallu; dsb nsh; isb", - ARM64_WORKAROUND_CAVIUM_27456, - CONFIG_CAVIUM_ERRATUM_27456)); + ARM64_WORKAROUND_CAVIUM_27456)); +} + +void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm) +{ + unsigned long ttbr1 = read_sysreg(ttbr1_el1); + unsigned long asid = ASID(mm); + unsigned long ttbr0 = phys_to_ttbr(pgd_phys); + + /* Skip CNP for the reserved ASID */ + if (system_supports_cnp() && asid) + ttbr0 |= TTBR_CNP_BIT; + + /* SW PAN needs a copy of the ASID in TTBR0 for entry */ + if (IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN)) + ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid); + + /* Set ASID in TTBR1 since TCR.A1 is set */ + ttbr1 &= ~TTBR_ASID_MASK; + ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); + + write_sysreg(ttbr1, ttbr1_el1); + isb(); + write_sysreg(ttbr0, ttbr0_el1); + isb(); + post_ttbr_update_workaround(); } static int asids_init(void) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 128f70852bf3..9b08f7c7e6f0 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -17,6 +17,7 @@ #include <linux/mman.h> #include <linux/nodemask.h> #include <linux/memblock.h> +#include <linux/memory.h> #include <linux/fs.h> #include <linux/io.h> #include <linux/mm.h> @@ -724,6 +725,312 @@ int kern_addr_valid(unsigned long addr) return pfn_valid(pte_pfn(pte)); } + +#ifdef CONFIG_MEMORY_HOTPLUG +static void free_hotplug_page_range(struct page *page, size_t size) +{ + WARN_ON(PageReserved(page)); + free_pages((unsigned long)page_address(page), get_order(size)); +} + +static void free_hotplug_pgtable_page(struct page *page) +{ + free_hotplug_page_range(page, PAGE_SIZE); +} + +static bool pgtable_range_aligned(unsigned long start, unsigned long end, + unsigned long floor, unsigned long ceiling, + unsigned long mask) +{ + start &= mask; + if (start < floor) + return false; + + if (ceiling) { + ceiling &= mask; + if (!ceiling) + return false; + } + + if (end - 1 > ceiling - 1) + return false; + return true; +} + +static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr, + unsigned long end, bool free_mapped) +{ + pte_t *ptep, pte; + + do { + ptep = pte_offset_kernel(pmdp, addr); + pte = READ_ONCE(*ptep); + if (pte_none(pte)) + continue; + + WARN_ON(!pte_present(pte)); + pte_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + if (free_mapped) + free_hotplug_page_range(pte_page(pte), PAGE_SIZE); + } while (addr += PAGE_SIZE, addr < end); +} + +static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr, + unsigned long end, bool free_mapped) +{ + unsigned long next; + pmd_t *pmdp, pmd; + + do { + next = pmd_addr_end(addr, end); + pmdp = pmd_offset(pudp, addr); + pmd = READ_ONCE(*pmdp); + if (pmd_none(pmd)) + continue; + + WARN_ON(!pmd_present(pmd)); + if (pmd_sect(pmd)) { + pmd_clear(pmdp); + + /* + * One TLBI should be sufficient here as the PMD_SIZE + * range is mapped with a single block entry. + */ + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + if (free_mapped) + free_hotplug_page_range(pmd_page(pmd), + PMD_SIZE); + continue; + } + WARN_ON(!pmd_table(pmd)); + unmap_hotplug_pte_range(pmdp, addr, next, free_mapped); + } while (addr = next, addr < end); +} + +static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr, + unsigned long end, bool free_mapped) +{ + unsigned long next; + pud_t *pudp, pud; + + do { + next = pud_addr_end(addr, end); + pudp = pud_offset(p4dp, addr); + pud = READ_ONCE(*pudp); + if (pud_none(pud)) + continue; + + WARN_ON(!pud_present(pud)); + if (pud_sect(pud)) { + pud_clear(pudp); + + /* + * One TLBI should be sufficient here as the PUD_SIZE + * range is mapped with a single block entry. + */ + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + if (free_mapped) + free_hotplug_page_range(pud_page(pud), + PUD_SIZE); + continue; + } + WARN_ON(!pud_table(pud)); + unmap_hotplug_pmd_range(pudp, addr, next, free_mapped); + } while (addr = next, addr < end); +} + +static void unmap_hotplug_p4d_range(pgd_t *pgdp, unsigned long addr, + unsigned long end, bool free_mapped) +{ + unsigned long next; + p4d_t *p4dp, p4d; + + do { + next = p4d_addr_end(addr, end); + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + if (p4d_none(p4d)) + continue; + + WARN_ON(!p4d_present(p4d)); + unmap_hotplug_pud_range(p4dp, addr, next, free_mapped); + } while (addr = next, addr < end); +} + +static void unmap_hotplug_range(unsigned long addr, unsigned long end, + bool free_mapped) +{ + unsigned long next; + pgd_t *pgdp, pgd; + + do { + next = pgd_addr_end(addr, end); + pgdp = pgd_offset_k(addr); + pgd = READ_ONCE(*pgdp); + if (pgd_none(pgd)) + continue; + + WARN_ON(!pgd_present(pgd)); + unmap_hotplug_p4d_range(pgdp, addr, next, free_mapped); + } while (addr = next, addr < end); +} + +static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling) +{ + pte_t *ptep, pte; + unsigned long i, start = addr; + + do { + ptep = pte_offset_kernel(pmdp, addr); + pte = READ_ONCE(*ptep); + + /* + * This is just a sanity check here which verifies that + * pte clearing has been done by earlier unmap loops. + */ + WARN_ON(!pte_none(pte)); + } while (addr += PAGE_SIZE, addr < end); + + if (!pgtable_range_aligned(start, end, floor, ceiling, PMD_MASK)) + return; + + /* + * Check whether we can free the pte page if the rest of the + * entries are empty. Overlap with other regions have been + * handled by the floor/ceiling check. + */ + ptep = pte_offset_kernel(pmdp, 0UL); + for (i = 0; i < PTRS_PER_PTE; i++) { + if (!pte_none(READ_ONCE(ptep[i]))) + return; + } + + pmd_clear(pmdp); + __flush_tlb_kernel_pgtable(start); + free_hotplug_pgtable_page(virt_to_page(ptep)); +} + +static void free_empty_pmd_table(pud_t *pudp, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling) +{ + pmd_t *pmdp, pmd; + unsigned long i, next, start = addr; + + do { + next = pmd_addr_end(addr, end); + pmdp = pmd_offset(pudp, addr); + pmd = READ_ONCE(*pmdp); + if (pmd_none(pmd)) + continue; + + WARN_ON(!pmd_present(pmd) || !pmd_table(pmd) || pmd_sect(pmd)); + free_empty_pte_table(pmdp, addr, next, floor, ceiling); + } while (addr = next, addr < end); + + if (CONFIG_PGTABLE_LEVELS <= 2) + return; + + if (!pgtable_range_aligned(start, end, floor, ceiling, PUD_MASK)) + return; + + /* + * Check whether we can free the pmd page if the rest of the + * entries are empty. Overlap with other regions have been + * handled by the floor/ceiling check. + */ + pmdp = pmd_offset(pudp, 0UL); + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(READ_ONCE(pmdp[i]))) + return; + } + + pud_clear(pudp); + __flush_tlb_kernel_pgtable(start); + free_hotplug_pgtable_page(virt_to_page(pmdp)); +} + +static void free_empty_pud_table(p4d_t *p4dp, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling) +{ + pud_t *pudp, pud; + unsigned long i, next, start = addr; + + do { + next = pud_addr_end(addr, end); + pudp = pud_offset(p4dp, addr); + pud = READ_ONCE(*pudp); + if (pud_none(pud)) + continue; + + WARN_ON(!pud_present(pud) || !pud_table(pud) || pud_sect(pud)); + free_empty_pmd_table(pudp, addr, next, floor, ceiling); + } while (addr = next, addr < end); + + if (CONFIG_PGTABLE_LEVELS <= 3) + return; + + if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK)) + return; + + /* + * Check whether we can free the pud page if the rest of the + * entries are empty. Overlap with other regions have been + * handled by the floor/ceiling check. + */ + pudp = pud_offset(p4dp, 0UL); + for (i = 0; i < PTRS_PER_PUD; i++) { + if (!pud_none(READ_ONCE(pudp[i]))) + return; + } + + p4d_clear(p4dp); + __flush_tlb_kernel_pgtable(start); + free_hotplug_pgtable_page(virt_to_page(pudp)); +} + +static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling) +{ + unsigned long next; + p4d_t *p4dp, p4d; + + do { + next = p4d_addr_end(addr, end); + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + if (p4d_none(p4d)) + continue; + + WARN_ON(!p4d_present(p4d)); + free_empty_pud_table(p4dp, addr, next, floor, ceiling); + } while (addr = next, addr < end); +} + +static void free_empty_tables(unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + unsigned long next; + pgd_t *pgdp, pgd; + + do { + next = pgd_addr_end(addr, end); + pgdp = pgd_offset_k(addr); + pgd = READ_ONCE(*pgdp); + if (pgd_none(pgd)) + continue; + + WARN_ON(!pgd_present(pgd)); + free_empty_p4d_table(pgdp, addr, next, floor, ceiling); + } while (addr = next, addr < end); +} +#endif + #ifdef CONFIG_SPARSEMEM_VMEMMAP #if !ARM64_SWAPPER_USES_SECTION_MAPS int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, @@ -771,6 +1078,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) { +#ifdef CONFIG_MEMORY_HOTPLUG + WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); + + unmap_hotplug_range(start, end, true); + free_empty_tables(start, end, VMEMMAP_START, VMEMMAP_END); +#endif } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ @@ -1049,10 +1362,21 @@ int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) } #ifdef CONFIG_MEMORY_HOTPLUG +static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) +{ + unsigned long end = start + size; + + WARN_ON(pgdir != init_mm.pgd); + WARN_ON((start < PAGE_OFFSET) || (end > PAGE_END)); + + unmap_hotplug_range(start, end, false); + free_empty_tables(start, end, PAGE_OFFSET, PAGE_END); +} + int arch_add_memory(int nid, u64 start, u64 size, struct mhp_restrictions *restrictions) { - int flags = 0; + int ret, flags = 0; if (rodata_full || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; @@ -1062,22 +1386,59 @@ int arch_add_memory(int nid, u64 start, u64 size, memblock_clear_nomap(start, size); - return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, + ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, restrictions); + if (ret) + __remove_pgd_mapping(swapper_pg_dir, + __phys_to_virt(start), size); + return ret; } + void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - /* - * FIXME: Cleanup page tables (also in arch_add_memory() in case - * adding fails). Until then, this function should only be used - * during memory hotplug (adding memory), not for memory - * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be - * unlocked yet. - */ __remove_pages(start_pfn, nr_pages, altmap); + __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); +} + +/* + * This memory hotplug notifier helps prevent boot memory from being + * inadvertently removed as it blocks pfn range offlining process in + * __offline_pages(). Hence this prevents both offlining as well as + * removal process for boot memory which is initially always online. + * In future if and when boot memory could be removed, this notifier + * should be dropped and free_hotplug_page_range() should handle any + * reserved pages allocated during boot. + */ +static int prevent_bootmem_remove_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct mem_section *ms; + struct memory_notify *arg = data; + unsigned long end_pfn = arg->start_pfn + arg->nr_pages; + unsigned long pfn = arg->start_pfn; + + if (action != MEM_GOING_OFFLINE) + return NOTIFY_OK; + + for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + ms = __pfn_to_section(pfn); + if (early_section(ms)) + return NOTIFY_BAD; + } + return NOTIFY_OK; +} + +static struct notifier_block prevent_bootmem_remove_nb = { + .notifier_call = prevent_bootmem_remove_notifier, +}; + +static int __init prevent_bootmem_remove_init(void) +{ + return register_memory_notifier(&prevent_bootmem_remove_nb); } +device_initcall(prevent_bootmem_remove_init); #endif diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 7103027b4e64..1b871f141eb4 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -143,34 +143,6 @@ SYM_FUNC_END(cpu_do_resume) .popsection #endif -/* - * cpu_do_switch_mm(pgd_phys, tsk) - * - * Set the translation table base pointer to be pgd_phys. - * - * - pgd_phys - physical address of new TTB - */ -SYM_FUNC_START(cpu_do_switch_mm) - mrs x2, ttbr1_el1 - mmid x1, x1 // get mm->context.id - phys_to_ttbr x3, x0 - -alternative_if ARM64_HAS_CNP - cbz x1, 1f // skip CNP for reserved ASID - orr x3, x3, #TTBR_CNP_BIT -1: -alternative_else_nop_endif -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - bfi x3, x1, #48, #16 // set the ASID field in TTBR0 -#endif - bfi x2, x1, #48, #16 // set the ASID - msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set) - isb - msr ttbr0_el1, x3 // now update TTBR0 - isb - b post_ttbr_update_workaround // Back to C code... -SYM_FUNC_END(cpu_do_switch_mm) - .pushsection ".idmap.text", "awx" .macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c index 1f2eae3e988b..d29d722ec3ec 100644 --- a/arch/arm64/mm/ptdump_debugfs.c +++ b/arch/arm64/mm/ptdump_debugfs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/debugfs.h> +#include <linux/memory_hotplug.h> #include <linux/seq_file.h> #include <asm/ptdump.h> @@ -7,7 +8,10 @@ static int ptdump_show(struct seq_file *m, void *v) { struct ptdump_info *info = m->private; + + get_online_mems(); ptdump_walk(m, info); + put_online_mems(); return 0; } DEFINE_SHOW_ATTRIBUTE(ptdump); diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index a479023fa036..334c8be0c11f 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -267,26 +267,19 @@ static struct sdei_event *sdei_event_create(u32 event_num, event->private_registered = regs; } - if (sdei_event_find(event_num)) { - kfree(event->registered); - kfree(event); - event = ERR_PTR(-EBUSY); - } else { - spin_lock(&sdei_list_lock); - list_add(&event->list, &sdei_list); - spin_unlock(&sdei_list_lock); - } + spin_lock(&sdei_list_lock); + list_add(&event->list, &sdei_list); + spin_unlock(&sdei_list_lock); return event; } -static void sdei_event_destroy(struct sdei_event *event) +static void sdei_event_destroy_llocked(struct sdei_event *event) { lockdep_assert_held(&sdei_events_lock); + lockdep_assert_held(&sdei_list_lock); - spin_lock(&sdei_list_lock); list_del(&event->list); - spin_unlock(&sdei_list_lock); if (event->type == SDEI_EVENT_TYPE_SHARED) kfree(event->registered); @@ -296,6 +289,13 @@ static void sdei_event_destroy(struct sdei_event *event) kfree(event); } +static void sdei_event_destroy(struct sdei_event *event) +{ + spin_lock(&sdei_list_lock); + sdei_event_destroy_llocked(event); + spin_unlock(&sdei_list_lock); +} + static int sdei_api_get_version(u64 *version) { return invoke_sdei_fn(SDEI_1_0_FN_SDEI_VERSION, 0, 0, 0, 0, 0, version); @@ -412,14 +412,19 @@ int sdei_event_enable(u32 event_num) return -ENOENT; } - spin_lock(&sdei_list_lock); - event->reenable = true; - spin_unlock(&sdei_list_lock); + cpus_read_lock(); if (event->type == SDEI_EVENT_TYPE_SHARED) err = sdei_api_event_enable(event->event_num); else err = sdei_do_cross_call(_local_event_enable, event); + + if (!err) { + spin_lock(&sdei_list_lock); + event->reenable = true; + spin_unlock(&sdei_list_lock); + } + cpus_read_unlock(); mutex_unlock(&sdei_events_lock); return err; @@ -491,11 +496,6 @@ static int _sdei_event_unregister(struct sdei_event *event) { lockdep_assert_held(&sdei_events_lock); - spin_lock(&sdei_list_lock); - event->reregister = false; - event->reenable = false; - spin_unlock(&sdei_list_lock); - if (event->type == SDEI_EVENT_TYPE_SHARED) return sdei_api_event_unregister(event->event_num); @@ -518,6 +518,11 @@ int sdei_event_unregister(u32 event_num) break; } + spin_lock(&sdei_list_lock); + event->reregister = false; + event->reenable = false; + spin_unlock(&sdei_list_lock); + err = _sdei_event_unregister(event); if (err) break; @@ -585,26 +590,15 @@ static int _sdei_event_register(struct sdei_event *event) lockdep_assert_held(&sdei_events_lock); - spin_lock(&sdei_list_lock); - event->reregister = true; - spin_unlock(&sdei_list_lock); - if (event->type == SDEI_EVENT_TYPE_SHARED) return sdei_api_event_register(event->event_num, sdei_entry_point, event->registered, SDEI_EVENT_REGISTER_RM_ANY, 0); - err = sdei_do_cross_call(_local_event_register, event); - if (err) { - spin_lock(&sdei_list_lock); - event->reregister = false; - event->reenable = false; - spin_unlock(&sdei_list_lock); - + if (err) sdei_do_cross_call(_local_event_unregister, event); - } return err; } @@ -632,12 +626,18 @@ int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg) break; } + cpus_read_lock(); err = _sdei_event_register(event); if (err) { sdei_event_destroy(event); pr_warn("Failed to register event %u: %d\n", event_num, err); + } else { + spin_lock(&sdei_list_lock); + event->reregister = true; + spin_unlock(&sdei_list_lock); } + cpus_read_unlock(); } while (0); mutex_unlock(&sdei_events_lock); @@ -645,16 +645,17 @@ int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg) } EXPORT_SYMBOL(sdei_event_register); -static int sdei_reregister_event(struct sdei_event *event) +static int sdei_reregister_event_llocked(struct sdei_event *event) { int err; lockdep_assert_held(&sdei_events_lock); + lockdep_assert_held(&sdei_list_lock); err = _sdei_event_register(event); if (err) { pr_err("Failed to re-register event %u\n", event->event_num); - sdei_event_destroy(event); + sdei_event_destroy_llocked(event); return err; } @@ -683,7 +684,7 @@ static int sdei_reregister_shared(void) continue; if (event->reregister) { - err = sdei_reregister_event(event); + err = sdei_reregister_event_llocked(event); if (err) break; } diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index fea354d6fb29..d50edef91f59 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -328,15 +328,15 @@ static ssize_t arm_ccn_pmu_event_show(struct device *dev, struct arm_ccn_pmu_event, attr); ssize_t res; - res = snprintf(buf, PAGE_SIZE, "type=0x%x", event->type); + res = scnprintf(buf, PAGE_SIZE, "type=0x%x", event->type); if (event->event) - res += snprintf(buf + res, PAGE_SIZE - res, ",event=0x%x", + res += scnprintf(buf + res, PAGE_SIZE - res, ",event=0x%x", event->event); if (event->def) - res += snprintf(buf + res, PAGE_SIZE - res, ",%s", + res += scnprintf(buf + res, PAGE_SIZE - res, ",%s", event->def); if (event->mask) - res += snprintf(buf + res, PAGE_SIZE - res, ",mask=0x%x", + res += scnprintf(buf + res, PAGE_SIZE - res, ",mask=0x%x", event->mask); /* Arguments required by an event */ @@ -344,25 +344,25 @@ static ssize_t arm_ccn_pmu_event_show(struct device *dev, case CCN_TYPE_CYCLES: break; case CCN_TYPE_XP: - res += snprintf(buf + res, PAGE_SIZE - res, + res += scnprintf(buf + res, PAGE_SIZE - res, ",xp=?,vc=?"); if (event->event == CCN_EVENT_WATCHPOINT) - res += snprintf(buf + res, PAGE_SIZE - res, + res += scnprintf(buf + res, PAGE_SIZE - res, ",port=?,dir=?,cmp_l=?,cmp_h=?,mask=?"); else - res += snprintf(buf + res, PAGE_SIZE - res, + res += scnprintf(buf + res, PAGE_SIZE - res, ",bus=?"); break; case CCN_TYPE_MN: - res += snprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id); + res += scnprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id); break; default: - res += snprintf(buf + res, PAGE_SIZE - res, ",node=?"); + res += scnprintf(buf + res, PAGE_SIZE - res, ",node=?"); break; } - res += snprintf(buf + res, PAGE_SIZE - res, "\n"); + res += scnprintf(buf + res, PAGE_SIZE - res, "\n"); return res; } diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 4e4984a55cd1..b72c04852599 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -831,7 +831,7 @@ static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages, * parts and give userspace a fighting chance of getting some * useful data out of it. */ - if (!nr_pages || (snapshot && (nr_pages & 1))) + if (snapshot && (nr_pages & 1)) return NULL; if (cpu == -1) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 71f525a35ac2..5b616dde9a4c 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -80,6 +80,7 @@ struct arm_pmu { struct pmu pmu; cpumask_t supported_cpus; char *name; + int pmuver; irqreturn_t (*handle_irq)(struct arm_pmu *pmu); void (*enable)(struct perf_event *event); void (*disable)(struct perf_event *event); |