summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/Kconfig7
-rw-r--r--arch/arm64/include/asm/assembler.h6
-rw-r--r--arch/arm64/include/asm/checksum.h7
-rw-r--r--arch/arm64/include/asm/cpu_ops.h8
-rw-r--r--arch/arm64/include/asm/cpufeature.h81
-rw-r--r--arch/arm64/include/asm/esr.h2
-rw-r--r--arch/arm64/include/asm/memory.h1
-rw-r--r--arch/arm64/include/asm/mmu.h6
-rw-r--r--arch/arm64/include/asm/mmu_context.h2
-rw-r--r--arch/arm64/include/asm/page.h4
-rw-r--r--arch/arm64/include/asm/perf_event.h3
-rw-r--r--arch/arm64/include/asm/proc-fns.h2
-rw-r--r--arch/arm64/include/asm/sysreg.h10
-rw-r--r--arch/arm64/kernel/Makefile2
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c2
-rw-r--r--arch/arm64/kernel/cpu_ops.c11
-rw-r--r--arch/arm64/kernel/cpufeature.c2
-rw-r--r--arch/arm64/kernel/cpuidle.c9
-rw-r--r--arch/arm64/kernel/entry-common.c2
-rw-r--r--arch/arm64/kernel/head.S1
-rw-r--r--arch/arm64/kernel/hibernate-asm.S2
-rw-r--r--arch/arm64/kernel/machine_kexec_file.c2
-rw-r--r--arch/arm64/kernel/perf_event.c338
-rw-r--r--arch/arm64/kernel/setup.c8
-rw-r--r--arch/arm64/kernel/smp.c151
-rw-r--r--arch/arm64/kvm/hyp/switch.c14
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c8
-rw-r--r--arch/arm64/kvm/hyp/tlb.c8
-rw-r--r--arch/arm64/kvm/sys_regs.c10
-rw-r--r--arch/arm64/lib/csum.c27
-rw-r--r--arch/arm64/lib/strcmp.S2
-rw-r--r--arch/arm64/mm/context.c32
-rw-r--r--arch/arm64/mm/mmu.c379
-rw-r--r--arch/arm64/mm/proc.S28
-rw-r--r--arch/arm64/mm/ptdump_debugfs.c4
35 files changed, 854 insertions, 327 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fa4e3737149c..8889ce7094e0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -281,6 +281,9 @@ config ZONE_DMA32
config ARCH_ENABLE_MEMORY_HOTPLUG
def_bool y
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+ def_bool y
+
config SMP
def_bool y
@@ -952,11 +955,11 @@ config HOTPLUG_CPU
# Common NUMA Features
config NUMA
- bool "Numa Memory Allocation and Scheduler Support"
+ bool "NUMA Memory Allocation and Scheduler Support"
select ACPI_NUMA if ACPI
select OF_NUMA
help
- Enable NUMA (Non Uniform Memory Access) support.
+ Enable NUMA (Non-Uniform Memory Access) support.
The kernel will try to allocate memory used by a CPU on the
local memory of the CPU and add some more
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index c5487806273f..0bff325117b4 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -257,12 +257,6 @@ alternative_endif
.endm
/*
- * mmid - get context id from mm pointer (mm->context.id)
- */
- .macro mmid, rd, rn
- ldr \rd, [\rn, #MM_CONTEXT_ID]
- .endm
-/*
* read_ctr - read CTR_EL0. If the system has mismatched register fields,
* provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val
*/
diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h
index 8d2a7de39744..b6f7bc6da5fb 100644
--- a/arch/arm64/include/asm/checksum.h
+++ b/arch/arm64/include/asm/checksum.h
@@ -5,7 +5,12 @@
#ifndef __ASM_CHECKSUM_H
#define __ASM_CHECKSUM_H
-#include <linux/types.h>
+#include <linux/in6.h>
+
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum sum);
static inline __sum16 csum_fold(__wsum csum)
{
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index 86aabf1e0199..d28e8f37d3b4 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -55,12 +55,12 @@ struct cpu_operations {
#endif
};
-extern const struct cpu_operations *cpu_ops[NR_CPUS];
-int __init cpu_read_ops(int cpu);
+int __init init_cpu_ops(int cpu);
+extern const struct cpu_operations *get_cpu_ops(int cpu);
-static inline void __init cpu_read_bootcpu_ops(void)
+static inline void __init init_bootcpu_ops(void)
{
- cpu_read_ops(0);
+ init_cpu_ops(0);
}
#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 485e069d8768..e75f7df746ba 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -390,14 +390,16 @@ unsigned long cpu_get_elf_hwcap2(void);
#define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name))
#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name))
-/* System capability check for constant caps */
-static __always_inline bool __cpus_have_const_cap(int num)
+static __always_inline bool system_capabilities_finalized(void)
{
- if (num >= ARM64_NCAPS)
- return false;
- return static_branch_unlikely(&cpu_hwcap_keys[num]);
+ return static_branch_likely(&arm64_const_caps_ready);
}
+/*
+ * Test for a capability with a runtime check.
+ *
+ * Before the capability is detected, this returns false.
+ */
static inline bool cpus_have_cap(unsigned int num)
{
if (num >= ARM64_NCAPS)
@@ -405,14 +407,53 @@ static inline bool cpus_have_cap(unsigned int num)
return test_bit(num, cpu_hwcaps);
}
+/*
+ * Test for a capability without a runtime check.
+ *
+ * Before capabilities are finalized, this returns false.
+ * After capabilities are finalized, this is patched to avoid a runtime check.
+ *
+ * @num must be a compile-time constant.
+ */
+static __always_inline bool __cpus_have_const_cap(int num)
+{
+ if (num >= ARM64_NCAPS)
+ return false;
+ return static_branch_unlikely(&cpu_hwcap_keys[num]);
+}
+
+/*
+ * Test for a capability, possibly with a runtime check.
+ *
+ * Before capabilities are finalized, this behaves as cpus_have_cap().
+ * After capabilities are finalized, this is patched to avoid a runtime check.
+ *
+ * @num must be a compile-time constant.
+ */
static __always_inline bool cpus_have_const_cap(int num)
{
- if (static_branch_likely(&arm64_const_caps_ready))
+ if (system_capabilities_finalized())
return __cpus_have_const_cap(num);
else
return cpus_have_cap(num);
}
+/*
+ * Test for a capability without a runtime check.
+ *
+ * Before capabilities are finalized, this will BUG().
+ * After capabilities are finalized, this is patched to avoid a runtime check.
+ *
+ * @num must be a compile-time constant.
+ */
+static __always_inline bool cpus_have_final_cap(int num)
+{
+ if (system_capabilities_finalized())
+ return __cpus_have_const_cap(num);
+ else
+ BUG();
+}
+
static inline void cpus_set_cap(unsigned int num)
{
if (num >= ARM64_NCAPS) {
@@ -447,6 +488,29 @@ cpuid_feature_extract_unsigned_field(u64 features, int field)
return cpuid_feature_extract_unsigned_field_width(features, field, 4);
}
+/*
+ * Fields that identify the version of the Performance Monitors Extension do
+ * not follow the standard ID scheme. See ARM DDI 0487E.a page D13-2825,
+ * "Alternative ID scheme used for the Performance Monitors Extension version".
+ */
+static inline u64 __attribute_const__
+cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap)
+{
+ u64 val = cpuid_feature_extract_unsigned_field(features, field);
+ u64 mask = GENMASK_ULL(field + 3, field);
+
+ /* Treat IMPLEMENTATION DEFINED functionality as unimplemented */
+ if (val == 0xf)
+ val = 0;
+
+ if (val > cap) {
+ features &= ~mask;
+ features |= (cap << field) & mask;
+ }
+
+ return features;
+}
+
static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp)
{
return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
@@ -613,11 +677,6 @@ static inline bool system_has_prio_mask_debugging(void)
system_uses_irq_prio_masking();
}
-static inline bool system_capabilities_finalized(void)
-{
- return static_branch_likely(&arm64_const_caps_ready);
-}
-
#define ARM64_BP_HARDEN_UNKNOWN -1
#define ARM64_BP_HARDEN_WA_NEEDED 0
#define ARM64_BP_HARDEN_NOT_REQUIRED 1
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index cb29253ae86b..6a395a7e6707 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -60,7 +60,7 @@
#define ESR_ELx_EC_BKPT32 (0x38)
/* Unallocated EC: 0x39 */
#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */
-/* Unallocted EC: 0x3B */
+/* Unallocated EC: 0x3B */
#define ESR_ELx_EC_BRK64 (0x3C)
/* Unallocated EC: 0x3D - 0x3F */
#define ESR_ELx_EC_MAX (0x3F)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 4d94676e5a8b..2be67b232499 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -54,6 +54,7 @@
#define MODULES_VADDR (BPF_JIT_REGION_END)
#define MODULES_VSIZE (SZ_128M)
#define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M)
+#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
#define PCI_IO_END (VMEMMAP_START - SZ_2M)
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
#define FIXADDR_TOP (PCI_IO_START - SZ_2M)
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index e4d862420bb4..21a4bcfdb378 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -23,9 +23,9 @@ typedef struct {
} mm_context_t;
/*
- * This macro is only used by the TLBI code, which cannot race with an
- * ASID change and therefore doesn't need to reload the counter using
- * atomic64_read.
+ * This macro is only used by the TLBI and low-level switch_mm() code,
+ * neither of which can race with an ASID change. We therefore don't
+ * need to reload the counter using atomic64_read().
*/
#define ASID(mm) ((mm)->context.id.counter & 0xffff)
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 3827ff4040a3..ab46187c6300 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -46,6 +46,8 @@ static inline void cpu_set_reserved_ttbr0(void)
isb();
}
+void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+
static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
{
BUG_ON(pgd == swapper_pg_dir);
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index d39ddb258a04..75d6cd23a679 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -21,6 +21,10 @@ extern void __cpu_copy_user_page(void *to, const void *from,
extern void copy_page(void *to, const void *from);
extern void clear_page(void *to);
+#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
+ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
#define clear_user_page(addr,vaddr,pg) __cpu_clear_user_page(addr, vaddr)
#define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr)
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 2bdbc79bbd01..e7765b62c712 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -176,9 +176,10 @@
#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */
#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
+#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */
#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
#define ARMV8_PMU_PMCR_N_MASK 0x1f
-#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */
+#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */
/*
* PMOVSR: counters overflow flag status reg
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index a2ce65a0c1fa..0d5d1f0525eb 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -13,11 +13,9 @@
#include <asm/page.h>
-struct mm_struct;
struct cpu_suspend_ctx;
extern void cpu_do_idle(void);
-extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 085d248f824e..ebc622432831 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -740,6 +740,16 @@
#define ID_AA64DFR0_TRACEVER_SHIFT 4
#define ID_AA64DFR0_DEBUGVER_SHIFT 0
+#define ID_AA64DFR0_PMUVER_8_0 0x1
+#define ID_AA64DFR0_PMUVER_8_1 0x4
+#define ID_AA64DFR0_PMUVER_8_4 0x5
+#define ID_AA64DFR0_PMUVER_8_5 0x6
+#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf
+
+#define ID_DFR0_PERFMON_SHIFT 24
+
+#define ID_DFR0_PERFMON_8_1 0x4
+
#define ID_ISAR5_RDM_SHIFT 24
#define ID_ISAR5_CRC32_SHIFT 16
#define ID_ISAR5_SHA2_SHIFT 12
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index fc6488660f64..4e5b8ee31442 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -21,7 +21,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
syscall.o
-extra-$(CONFIG_EFI) := efi-entry.o
+targets += efi-entry.o
OBJCOPYFLAGS := --prefix-symbols=__efistub_
$(obj)/%.stub.o: $(obj)/%.o FORCE
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 7832b3216370..4cc581af2d96 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -630,7 +630,7 @@ static int __init armv8_deprecated_init(void)
register_insn_emulation(&cp15_barrier_ops);
if (IS_ENABLED(CONFIG_SETEND_EMULATION)) {
- if(system_supports_mixed_endian_el0())
+ if (system_supports_mixed_endian_el0())
register_insn_emulation(&setend_ops);
else
pr_info("setend instruction emulation is not supported on this system\n");
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index 7e07072757af..e133011f64b5 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -15,10 +15,12 @@
#include <asm/smp_plat.h>
extern const struct cpu_operations smp_spin_table_ops;
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
extern const struct cpu_operations acpi_parking_protocol_ops;
+#endif
extern const struct cpu_operations cpu_psci_ops;
-const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
+static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
&smp_spin_table_ops,
@@ -94,7 +96,7 @@ static const char *__init cpu_read_enable_method(int cpu)
/*
* Read a cpu's enable method and record it in cpu_ops.
*/
-int __init cpu_read_ops(int cpu)
+int __init init_cpu_ops(int cpu)
{
const char *enable_method = cpu_read_enable_method(cpu);
@@ -109,3 +111,8 @@ int __init cpu_read_ops(int cpu)
return 0;
}
+
+const struct cpu_operations *get_cpu_ops(int cpu)
+{
+ return cpu_ops[cpu];
+}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index bf98a2386534..38ebad880f5c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -552,7 +552,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
BUG_ON(!reg);
- for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
+ for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
u64 ftr_mask = arm64_ftr_mask(ftrp);
s64 ftr_new = arm64_ftr_value(ftrp, new);
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index e4d6af2fdec7..b512b5503f6e 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -18,11 +18,11 @@
int arm_cpuidle_init(unsigned int cpu)
{
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
int ret = -EOPNOTSUPP;
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_suspend &&
- cpu_ops[cpu]->cpu_init_idle)
- ret = cpu_ops[cpu]->cpu_init_idle(cpu);
+ if (ops && ops->cpu_suspend && ops->cpu_init_idle)
+ ret = ops->cpu_init_idle(cpu);
return ret;
}
@@ -37,8 +37,9 @@ int arm_cpuidle_init(unsigned int cpu)
int arm_cpuidle_suspend(int index)
{
int cpu = smp_processor_id();
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
- return cpu_ops[cpu]->cpu_suspend(index);
+ return ops->cpu_suspend(index);
}
#ifdef CONFIG_ACPI
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index fde59981445c..c839b5bf1904 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -175,7 +175,7 @@ NOKPROBE_SYMBOL(el0_pc);
static void notrace el0_sp(struct pt_regs *regs, unsigned long esr)
{
user_exit_irqoff();
- local_daif_restore(DAIF_PROCCTX_NOIRQ);
+ local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(regs->sp, esr, regs);
}
NOKPROBE_SYMBOL(el0_sp);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 989b1944cb71..f79023c9b374 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -404,7 +404,6 @@ __create_page_tables:
ret x28
ENDPROC(__create_page_tables)
- .ltorg
/*
* The following fragment of code is executed with the MMU enabled.
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index 38bcd4d4e43b..6532105b3e32 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -110,8 +110,6 @@ ENTRY(swsusp_arch_suspend_exit)
cbz x24, 3f /* Do we need to re-initialise EL2? */
hvc #0
3: ret
-
- .ltorg
ENDPROC(swsusp_arch_suspend_exit)
/*
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index dd3ae8081b38..b40c3b0def92 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -121,7 +121,7 @@ static int setup_dtb(struct kimage *image,
/* add kaslr-seed */
ret = fdt_delprop(dtb, off, FDT_PROP_KASLR_SEED);
- if (ret == -FDT_ERR_NOTFOUND)
+ if (ret == -FDT_ERR_NOTFOUND)
ret = 0;
else if (ret)
goto out;
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index e40b65645c86..4d7879484cec 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -285,6 +285,17 @@ static struct attribute_group armv8_pmuv3_format_attr_group = {
#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \
(ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+
+/*
+ * We unconditionally enable ARMv8.5-PMU long event counter support
+ * (64-bit events) where supported. Indicate if this arm_pmu has long
+ * event counter support.
+ */
+static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu)
+{
+ return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_5);
+}
+
/*
* We must chain two programmable counters for 64 bit events,
* except when we have allocated the 64bit cycle counter (for CPU
@@ -294,9 +305,11 @@ static struct attribute_group armv8_pmuv3_format_attr_group = {
static inline bool armv8pmu_event_is_chained(struct perf_event *event)
{
int idx = event->hw.idx;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
return !WARN_ON(idx < 0) &&
armv8pmu_event_is_64bit(event) &&
+ !armv8pmu_has_long_event(cpu_pmu) &&
(idx != ARMV8_IDX_CYCLE_COUNTER);
}
@@ -345,7 +358,7 @@ static inline void armv8pmu_select_counter(int idx)
isb();
}
-static inline u32 armv8pmu_read_evcntr(int idx)
+static inline u64 armv8pmu_read_evcntr(int idx)
{
armv8pmu_select_counter(idx);
return read_sysreg(pmxevcntr_el0);
@@ -362,6 +375,44 @@ static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
return val;
}
+/*
+ * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP
+ * is set the event counters also become 64-bit counters. Unless the
+ * user has requested a long counter (attr.config1) then we want to
+ * interrupt upon 32-bit overflow - we achieve this by applying a bias.
+ */
+static bool armv8pmu_event_needs_bias(struct perf_event *event)
+{
+ struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (armv8pmu_event_is_64bit(event))
+ return false;
+
+ if (armv8pmu_has_long_event(cpu_pmu) ||
+ idx == ARMV8_IDX_CYCLE_COUNTER)
+ return true;
+
+ return false;
+}
+
+static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value)
+{
+ if (armv8pmu_event_needs_bias(event))
+ value |= GENMASK(63, 32);
+
+ return value;
+}
+
+static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value)
+{
+ if (armv8pmu_event_needs_bias(event))
+ value &= ~GENMASK(63, 32);
+
+ return value;
+}
+
static u64 armv8pmu_read_counter(struct perf_event *event)
{
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
@@ -377,10 +428,10 @@ static u64 armv8pmu_read_counter(struct perf_event *event)
else
value = armv8pmu_read_hw_counter(event);
- return value;
+ return armv8pmu_unbias_long_counter(event, value);
}
-static inline void armv8pmu_write_evcntr(int idx, u32 value)
+static inline void armv8pmu_write_evcntr(int idx, u64 value)
{
armv8pmu_select_counter(idx);
write_sysreg(value, pmxevcntr_el0);
@@ -405,20 +456,14 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value)
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
+ value = armv8pmu_bias_long_counter(event, value);
+
if (!armv8pmu_counter_valid(cpu_pmu, idx))
pr_err("CPU%u writing wrong counter %d\n",
smp_processor_id(), idx);
- else if (idx == ARMV8_IDX_CYCLE_COUNTER) {
- /*
- * The cycles counter is really a 64-bit counter.
- * When treating it as a 32-bit counter, we only count
- * the lower 32 bits, and set the upper 32-bits so that
- * we get an interrupt upon 32-bit overflow.
- */
- if (!armv8pmu_event_is_64bit(event))
- value |= 0xffffffff00000000ULL;
+ else if (idx == ARMV8_IDX_CYCLE_COUNTER)
write_sysreg(value, pmccntr_el0);
- } else
+ else
armv8pmu_write_hw_counter(event, value);
}
@@ -450,86 +495,74 @@ static inline void armv8pmu_write_event_type(struct perf_event *event)
}
}
-static inline int armv8pmu_enable_counter(int idx)
+static u32 armv8pmu_event_cnten_mask(struct perf_event *event)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(BIT(counter), pmcntenset_el0);
- return idx;
+ int counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
+ u32 mask = BIT(counter);
+
+ if (armv8pmu_event_is_chained(event))
+ mask |= BIT(counter - 1);
+ return mask;
+}
+
+static inline void armv8pmu_enable_counter(u32 mask)
+{
+ write_sysreg(mask, pmcntenset_el0);
}
static inline void armv8pmu_enable_event_counter(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
- int idx = event->hw.idx;
- u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx));
+ u32 mask = armv8pmu_event_cnten_mask(event);
- if (armv8pmu_event_is_chained(event))
- counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1));
-
- kvm_set_pmu_events(counter_bits, attr);
+ kvm_set_pmu_events(mask, attr);
/* We rely on the hypervisor switch code to enable guest counters */
- if (!kvm_pmu_counter_deferred(attr)) {
- armv8pmu_enable_counter(idx);
- if (armv8pmu_event_is_chained(event))
- armv8pmu_enable_counter(idx - 1);
- }
+ if (!kvm_pmu_counter_deferred(attr))
+ armv8pmu_enable_counter(mask);
}
-static inline int armv8pmu_disable_counter(int idx)
+static inline void armv8pmu_disable_counter(u32 mask)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(BIT(counter), pmcntenclr_el0);
- return idx;
+ write_sysreg(mask, pmcntenclr_el0);
}
static inline void armv8pmu_disable_event_counter(struct perf_event *event)
{
- struct hw_perf_event *hwc = &event->hw;
struct perf_event_attr *attr = &event->attr;
- int idx = hwc->idx;
- u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx));
-
- if (armv8pmu_event_is_chained(event))
- counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1));
+ u32 mask = armv8pmu_event_cnten_mask(event);
- kvm_clr_pmu_events(counter_bits);
+ kvm_clr_pmu_events(mask);
/* We rely on the hypervisor switch code to disable guest counters */
- if (!kvm_pmu_counter_deferred(attr)) {
- if (armv8pmu_event_is_chained(event))
- armv8pmu_disable_counter(idx - 1);
- armv8pmu_disable_counter(idx);
- }
+ if (!kvm_pmu_counter_deferred(attr))
+ armv8pmu_disable_counter(mask);
}
-static inline int armv8pmu_enable_intens(int idx)
+static inline void armv8pmu_enable_intens(u32 mask)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(BIT(counter), pmintenset_el1);
- return idx;
+ write_sysreg(mask, pmintenset_el1);
}
-static inline int armv8pmu_enable_event_irq(struct perf_event *event)
+static inline void armv8pmu_enable_event_irq(struct perf_event *event)
{
- return armv8pmu_enable_intens(event->hw.idx);
+ u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
+ armv8pmu_enable_intens(BIT(counter));
}
-static inline int armv8pmu_disable_intens(int idx)
+static inline void armv8pmu_disable_intens(u32 mask)
{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(BIT(counter), pmintenclr_el1);
+ write_sysreg(mask, pmintenclr_el1);
isb();
/* Clear the overflow flag in case an interrupt is pending. */
- write_sysreg(BIT(counter), pmovsclr_el0);
+ write_sysreg(mask, pmovsclr_el0);
isb();
-
- return idx;
}
-static inline int armv8pmu_disable_event_irq(struct perf_event *event)
+static inline void armv8pmu_disable_event_irq(struct perf_event *event)
{
- return armv8pmu_disable_intens(event->hw.idx);
+ u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
+ armv8pmu_disable_intens(BIT(counter));
}
static inline u32 armv8pmu_getreset_flags(void)
@@ -743,7 +776,8 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
/*
* Otherwise use events counters
*/
- if (armv8pmu_event_is_64bit(event))
+ if (armv8pmu_event_is_64bit(event) &&
+ !armv8pmu_has_long_event(cpu_pmu))
return armv8pmu_get_chain_idx(cpuc, cpu_pmu);
else
return armv8pmu_get_single_idx(cpuc, cpu_pmu);
@@ -815,13 +849,11 @@ static int armv8pmu_filter_match(struct perf_event *event)
static void armv8pmu_reset(void *info)
{
struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
- u32 idx, nb_cnt = cpu_pmu->num_events;
+ u32 pmcr;
/* The counter and interrupt enable registers are unknown at reset. */
- for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
- armv8pmu_disable_counter(idx);
- armv8pmu_disable_intens(idx);
- }
+ armv8pmu_disable_counter(U32_MAX);
+ armv8pmu_disable_intens(U32_MAX);
/* Clear the counters we flip at guest entry/exit */
kvm_clr_pmu_events(U32_MAX);
@@ -830,8 +862,13 @@ static void armv8pmu_reset(void *info)
* Initialize & Reset PMNC. Request overflow interrupt for
* 64 bit cycle counter but cheat in armv8pmu_write_counter().
*/
- armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C |
- ARMV8_PMU_PMCR_LC);
+ pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC;
+
+ /* Enable long event counter support where available */
+ if (armv8pmu_has_long_event(cpu_pmu))
+ pmcr |= ARMV8_PMU_PMCR_LP;
+
+ armv8pmu_pmcr_write(pmcr);
}
static int __armv8_pmuv3_map_event(struct perf_event *event,
@@ -914,6 +951,7 @@ static void __armv8pmu_probe_pmu(void *info)
if (pmuver == 0xf || pmuver == 0)
return;
+ cpu_pmu->pmuver = pmuver;
probe->present = true;
/* Read the nb of CNTx counters supported from PMNC */
@@ -953,7 +991,10 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
return probe.present ? 0 : -ENODEV;
}
-static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
+static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
+ int (*map_event)(struct perf_event *event),
+ const struct attribute_group *events,
+ const struct attribute_group *format)
{
int ret = armv8pmu_probe_pmu(cpu_pmu);
if (ret)
@@ -972,144 +1013,127 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
cpu_pmu->filter_match = armv8pmu_filter_match;
+ cpu_pmu->name = name;
+ cpu_pmu->map_event = map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ?
+ events : &armv8_pmuv3_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ?
+ format : &armv8_pmuv3_format_attr_group;
+
return 0;
}
static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_pmuv3";
- cpu_pmu->map_event = armv8_pmuv3_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
+ return armv8_pmu_init(cpu_pmu, "armv8_pmuv3",
+ armv8_pmuv3_map_event, NULL, NULL);
+}
- return 0;
+static int armv8_a34_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a34",
+ armv8_pmuv3_map_event, NULL, NULL);
}
static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cortex_a35";
- cpu_pmu->map_event = armv8_a53_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35",
+ armv8_a53_map_event, NULL, NULL);
}
static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cortex_a53";
- cpu_pmu->map_event = armv8_a53_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53",
+ armv8_a53_map_event, NULL, NULL);
+}
- return 0;
+static int armv8_a55_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a55",
+ armv8_pmuv3_map_event, NULL, NULL);
}
static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cortex_a57";
- cpu_pmu->map_event = armv8_a57_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57",
+ armv8_a57_map_event, NULL, NULL);
+}
- return 0;
+static int armv8_a65_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a65",
+ armv8_pmuv3_map_event, NULL, NULL);
}
static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cortex_a72";
- cpu_pmu->map_event = armv8_a57_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72",
+ armv8_a57_map_event, NULL, NULL);
}
static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cortex_a73";
- cpu_pmu->map_event = armv8_a73_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73",
+ armv8_a73_map_event, NULL, NULL);
+}
- return 0;
+static int armv8_a75_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a75",
+ armv8_pmuv3_map_event, NULL, NULL);
}
-static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
+static int armv8_a76_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a76",
+ armv8_pmuv3_map_event, NULL, NULL);
+}
- cpu_pmu->name = "armv8_cavium_thunder";
- cpu_pmu->map_event = armv8_thunder_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
+static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a77",
+ armv8_pmuv3_map_event, NULL, NULL);
+}
- return 0;
+static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init(cpu_pmu, "armv8_neoverse_e1",
+ armv8_pmuv3_map_event, NULL, NULL);
}
-static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
+static int armv8_n1_pmu_init(struct arm_pmu *cpu_pmu)
{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
+ return armv8_pmu_init(cpu_pmu, "armv8_neoverse_n1",
+ armv8_pmuv3_map_event, NULL, NULL);
+}
- cpu_pmu->name = "armv8_brcm_vulcan";
- cpu_pmu->map_event = armv8_vulcan_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
+static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder",
+ armv8_thunder_map_event, NULL, NULL);
+}
- return 0;
+static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan",
+ armv8_vulcan_map_event, NULL, NULL);
}
static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init},
+ {.compatible = "arm,cortex-a34-pmu", .data = armv8_a34_pmu_init},
{.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init},
{.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init},
+ {.compatible = "arm,cortex-a55-pmu", .data = armv8_a55_pmu_init},
{.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
+ {.compatible = "arm,cortex-a65-pmu", .data = armv8_a65_pmu_init},
{.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init},
{.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init},
+ {.compatible = "arm,cortex-a75-pmu", .data = armv8_a75_pmu_init},
+ {.compatible = "arm,cortex-a76-pmu", .data = armv8_a76_pmu_init},
+ {.compatible = "arm,cortex-a77-pmu", .data = armv8_a77_pmu_init},
+ {.compatible = "arm,neoverse-e1-pmu", .data = armv8_e1_pmu_init},
+ {.compatible = "arm,neoverse-n1-pmu", .data = armv8_n1_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},
{.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init},
{},
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index a34890bf309f..3fd2c11c09fc 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -344,7 +344,7 @@ void __init setup_arch(char **cmdline_p)
else
psci_acpi_init();
- cpu_read_bootcpu_ops();
+ init_bootcpu_ops();
smp_init_cpus();
smp_build_mpidr_hash();
@@ -371,8 +371,10 @@ void __init setup_arch(char **cmdline_p)
static inline bool cpu_can_disable(unsigned int cpu)
{
#ifdef CONFIG_HOTPLUG_CPU
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_can_disable)
- return cpu_ops[cpu]->cpu_can_disable(cpu);
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
+ if (ops && ops->cpu_can_disable)
+ return ops->cpu_can_disable(cpu);
#endif
return false;
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d4ed9a19d8fe..034806725598 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -93,8 +93,10 @@ static inline int op_cpu_kill(unsigned int cpu)
*/
static int boot_secondary(unsigned int cpu, struct task_struct *idle)
{
- if (cpu_ops[cpu]->cpu_boot)
- return cpu_ops[cpu]->cpu_boot(cpu);
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
+ if (ops->cpu_boot)
+ return ops->cpu_boot(cpu);
return -EOPNOTSUPP;
}
@@ -115,60 +117,55 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
- /*
- * Now bring the CPU into our world.
- */
+ /* Now bring the CPU into our world */
ret = boot_secondary(cpu, idle);
- if (ret == 0) {
- /*
- * CPU was successfully started, wait for it to come online or
- * time out.
- */
- wait_for_completion_timeout(&cpu_running,
- msecs_to_jiffies(5000));
-
- if (!cpu_online(cpu)) {
- pr_crit("CPU%u: failed to come online\n", cpu);
- ret = -EIO;
- }
- } else {
+ if (ret) {
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
return ret;
}
+ /*
+ * CPU was successfully started, wait for it to come online or
+ * time out.
+ */
+ wait_for_completion_timeout(&cpu_running,
+ msecs_to_jiffies(5000));
+ if (cpu_online(cpu))
+ return 0;
+
+ pr_crit("CPU%u: failed to come online\n", cpu);
secondary_data.task = NULL;
secondary_data.stack = NULL;
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
status = READ_ONCE(secondary_data.status);
- if (ret && status) {
+ if (status == CPU_MMU_OFF)
+ status = READ_ONCE(__early_cpu_boot_status);
- if (status == CPU_MMU_OFF)
- status = READ_ONCE(__early_cpu_boot_status);
-
- switch (status & CPU_BOOT_STATUS_MASK) {
- default:
- pr_err("CPU%u: failed in unknown state : 0x%lx\n",
- cpu, status);
- cpus_stuck_in_kernel++;
- break;
- case CPU_KILL_ME:
- if (!op_cpu_kill(cpu)) {
- pr_crit("CPU%u: died during early boot\n", cpu);
- break;
- }
- pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
- /* Fall through */
- case CPU_STUCK_IN_KERNEL:
- pr_crit("CPU%u: is stuck in kernel\n", cpu);
- if (status & CPU_STUCK_REASON_52_BIT_VA)
- pr_crit("CPU%u: does not support 52-bit VAs\n", cpu);
- if (status & CPU_STUCK_REASON_NO_GRAN)
- pr_crit("CPU%u: does not support %luK granule \n", cpu, PAGE_SIZE / SZ_1K);
- cpus_stuck_in_kernel++;
+ switch (status & CPU_BOOT_STATUS_MASK) {
+ default:
+ pr_err("CPU%u: failed in unknown state : 0x%lx\n",
+ cpu, status);
+ cpus_stuck_in_kernel++;
+ break;
+ case CPU_KILL_ME:
+ if (!op_cpu_kill(cpu)) {
+ pr_crit("CPU%u: died during early boot\n", cpu);
break;
- case CPU_PANIC_KERNEL:
- panic("CPU%u detected unsupported configuration\n", cpu);
}
+ pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
+ /* Fall through */
+ case CPU_STUCK_IN_KERNEL:
+ pr_crit("CPU%u: is stuck in kernel\n", cpu);
+ if (status & CPU_STUCK_REASON_52_BIT_VA)
+ pr_crit("CPU%u: does not support 52-bit VAs\n", cpu);
+ if (status & CPU_STUCK_REASON_NO_GRAN) {
+ pr_crit("CPU%u: does not support %luK granule\n",
+ cpu, PAGE_SIZE / SZ_1K);
+ }
+ cpus_stuck_in_kernel++;
+ break;
+ case CPU_PANIC_KERNEL:
+ panic("CPU%u detected unsupported configuration\n", cpu);
}
return ret;
@@ -196,6 +193,7 @@ asmlinkage notrace void secondary_start_kernel(void)
{
u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
struct mm_struct *mm = &init_mm;
+ const struct cpu_operations *ops;
unsigned int cpu;
cpu = task_cpu(current);
@@ -227,8 +225,9 @@ asmlinkage notrace void secondary_start_kernel(void)
*/
check_local_cpu_capabilities();
- if (cpu_ops[cpu]->cpu_postboot)
- cpu_ops[cpu]->cpu_postboot();
+ ops = get_cpu_ops(cpu);
+ if (ops->cpu_postboot)
+ ops->cpu_postboot();
/*
* Log the CPU info before it is marked online and might get read.
@@ -266,19 +265,21 @@ asmlinkage notrace void secondary_start_kernel(void)
#ifdef CONFIG_HOTPLUG_CPU
static int op_cpu_disable(unsigned int cpu)
{
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
/*
* If we don't have a cpu_die method, abort before we reach the point
* of no return. CPU0 may not have an cpu_ops, so test for it.
*/
- if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die)
+ if (!ops || !ops->cpu_die)
return -EOPNOTSUPP;
/*
* We may need to abort a hot unplug for some other mechanism-specific
* reason.
*/
- if (cpu_ops[cpu]->cpu_disable)
- return cpu_ops[cpu]->cpu_disable(cpu);
+ if (ops->cpu_disable)
+ return ops->cpu_disable(cpu);
return 0;
}
@@ -314,15 +315,17 @@ int __cpu_disable(void)
static int op_cpu_kill(unsigned int cpu)
{
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
/*
* If we have no means of synchronising with the dying CPU, then assume
* that it is really dead. We can only wait for an arbitrary length of
* time and hope that it's dead, so let's skip the wait and just hope.
*/
- if (!cpu_ops[cpu]->cpu_kill)
+ if (!ops->cpu_kill)
return 0;
- return cpu_ops[cpu]->cpu_kill(cpu);
+ return ops->cpu_kill(cpu);
}
/*
@@ -357,6 +360,7 @@ void __cpu_die(unsigned int cpu)
void cpu_die(void)
{
unsigned int cpu = smp_processor_id();
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
idle_task_exit();
@@ -370,12 +374,22 @@ void cpu_die(void)
* mechanism must perform all required cache maintenance to ensure that
* no dirty lines are lost in the process of shutting down the CPU.
*/
- cpu_ops[cpu]->cpu_die(cpu);
+ ops->cpu_die(cpu);
BUG();
}
#endif
+static void __cpu_try_die(int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
+ if (ops && ops->cpu_die)
+ ops->cpu_die(cpu);
+#endif
+}
+
/*
* Kill the calling secondary CPU, early in bringup before it is turned
* online.
@@ -389,12 +403,11 @@ void cpu_die_early(void)
/* Mark this CPU absent */
set_cpu_present(cpu, 0);
-#ifdef CONFIG_HOTPLUG_CPU
- update_cpu_boot_status(CPU_KILL_ME);
- /* Check if we can park ourselves */
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
- cpu_ops[cpu]->cpu_die(cpu);
-#endif
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
+ update_cpu_boot_status(CPU_KILL_ME);
+ __cpu_try_die(cpu);
+ }
+
update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
cpu_park_loop();
@@ -488,10 +501,13 @@ static bool __init is_mpidr_duplicate(unsigned int cpu, u64 hwid)
*/
static int __init smp_cpu_setup(int cpu)
{
- if (cpu_read_ops(cpu))
+ const struct cpu_operations *ops;
+
+ if (init_cpu_ops(cpu))
return -ENODEV;
- if (cpu_ops[cpu]->cpu_init(cpu))
+ ops = get_cpu_ops(cpu);
+ if (ops->cpu_init(cpu))
return -ENODEV;
set_cpu_possible(cpu, true);
@@ -714,6 +730,7 @@ void __init smp_init_cpus(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
+ const struct cpu_operations *ops;
int err;
unsigned int cpu;
unsigned int this_cpu;
@@ -744,10 +761,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
if (cpu == smp_processor_id())
continue;
- if (!cpu_ops[cpu])
+ ops = get_cpu_ops(cpu);
+ if (!ops)
continue;
- err = cpu_ops[cpu]->cpu_prepare(cpu);
+ err = ops->cpu_prepare(cpu);
if (err)
continue;
@@ -863,10 +881,8 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
local_irq_disable();
sdei_mask_local_cpu();
-#ifdef CONFIG_HOTPLUG_CPU
- if (cpu_ops[cpu]->cpu_die)
- cpu_ops[cpu]->cpu_die(cpu);
-#endif
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
+ __cpu_try_die(cpu);
/* just in case */
cpu_park_loop();
@@ -1044,8 +1060,9 @@ static bool have_cpu_die(void)
{
#ifdef CONFIG_HOTPLUG_CPU
int any_cpu = raw_smp_processor_id();
+ const struct cpu_operations *ops = get_cpu_ops(any_cpu);
- if (cpu_ops[any_cpu] && cpu_ops[any_cpu]->cpu_die)
+ if (ops && ops->cpu_die)
return true;
#endif
return false;
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 46292a370781..0f53996c4bfe 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -139,7 +139,7 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
write_sysreg(val, cptr_el2);
- if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
isb();
@@ -158,12 +158,12 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
{
u64 hcr = vcpu->arch.hcr_el2;
- if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
+ if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
hcr |= HCR_TVM;
write_sysreg(hcr, hcr_el2);
- if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
if (has_vhe())
@@ -193,7 +193,7 @@ static void __hyp_text __deactivate_traps_nvhe(void)
{
u64 mdcr_el2 = read_sysreg(mdcr_el2);
- if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
u64 val;
/*
@@ -340,7 +340,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
* resolve the IPA using the AT instruction.
*/
if (!(esr & ESR_ELx_S1PTW) &&
- (cpus_have_const_cap(ARM64_WORKAROUND_834220) ||
+ (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
@@ -510,7 +510,7 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
if (*exit_code != ARM_EXCEPTION_TRAP)
goto exit;
- if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+ if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
handle_tx2_tvm(vcpu))
return true;
@@ -567,7 +567,7 @@ exit:
static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
{
- if (!cpus_have_const_cap(ARM64_SSBD))
+ if (!cpus_have_final_cap(ARM64_SSBD))
return false;
return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 7672a978926c..75b1925763f1 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -71,7 +71,7 @@ static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ct
ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR);
ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
- if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
}
@@ -118,7 +118,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
- if (!cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+ if (!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
} else if (!ctxt->__hyp_running_vcpu) {
@@ -149,7 +149,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
- if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) &&
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) &&
ctxt->__hyp_running_vcpu) {
/*
* Must only be done for host registers, hence the context
@@ -194,7 +194,7 @@ __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR);
write_sysreg_el2(pstate, SYS_SPSR);
- if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
}
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 92f560e3e1aa..ceaddbe4279f 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -23,7 +23,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
local_irq_save(cxt->flags);
- if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) {
/*
* For CPUs that are affected by ARM errata 1165522 or 1530923,
* we cannot trust stage-1 to be in a correct state at that
@@ -63,7 +63,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
struct tlb_inv_context *cxt)
{
- if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
u64 val;
/*
@@ -103,7 +103,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
isb();
- if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) {
/* Restore the registers to what they were */
write_sysreg_el1(cxt->tcr, SYS_TCR);
write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
@@ -117,7 +117,7 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
{
write_sysreg(0, vttbr_el2);
- if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
/* Ensure write of the host VMID */
isb();
/* Restore the host's TCR_EL1 */
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 44354c812783..090f46d3add1 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1101,6 +1101,16 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
(0xfUL << ID_AA64ISAR1_API_SHIFT) |
(0xfUL << ID_AA64ISAR1_GPA_SHIFT) |
(0xfUL << ID_AA64ISAR1_GPI_SHIFT));
+ } else if (id == SYS_ID_AA64DFR0_EL1) {
+ /* Limit guests to PMUv3 for ARMv8.1 */
+ val = cpuid_feature_cap_perfmon_field(val,
+ ID_AA64DFR0_PMUVER_SHIFT,
+ ID_AA64DFR0_PMUVER_8_1);
+ } else if (id == SYS_ID_DFR0_EL1) {
+ /* Limit guests to PMUv3 for ARMv8.1 */
+ val = cpuid_feature_cap_perfmon_field(val,
+ ID_DFR0_PERFMON_SHIFT,
+ ID_DFR0_PERFMON_8_1);
}
return val;
diff --git a/arch/arm64/lib/csum.c b/arch/arm64/lib/csum.c
index 1f82c66b32ea..60eccae2abad 100644
--- a/arch/arm64/lib/csum.c
+++ b/arch/arm64/lib/csum.c
@@ -124,3 +124,30 @@ unsigned int do_csum(const unsigned char *buff, int len)
return sum >> 16;
}
+
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum csum)
+{
+ __uint128_t src, dst;
+ u64 sum = (__force u64)csum;
+
+ src = *(const __uint128_t *)saddr->s6_addr;
+ dst = *(const __uint128_t *)daddr->s6_addr;
+
+ sum += (__force u32)htonl(len);
+#ifdef __LITTLE_ENDIAN
+ sum += (u32)proto << 24;
+#else
+ sum += proto;
+#endif
+ src += (src >> 64) | (src << 64);
+ dst += (dst >> 64) | (dst << 64);
+
+ sum = accumulate(sum, src >> 64);
+ sum = accumulate(sum, dst >> 64);
+
+ sum += ((sum >> 32) | (sum << 32));
+ return csum_fold((__force __wsum)(sum >> 32));
+}
+EXPORT_SYMBOL(csum_ipv6_magic);
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 4767540d1b94..4e79566726c8 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -186,7 +186,7 @@ CPU_LE( rev data2, data2 )
* as carry-propagation can corrupt the upper bits if the trailing
* bytes in the string contain 0x01.
* However, if there is no NUL byte in the dword, we can generate
- * the result directly. We ca not just subtract the bytes as the
+ * the result directly. We cannot just subtract the bytes as the
* MSB might be significant.
*/
CPU_BE( cbnz has_nul, 1f )
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 8ef73e89d514..8524f03d629c 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -6,6 +6,7 @@
* Copyright (C) 2012 ARM Ltd.
*/
+#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -254,10 +255,37 @@ switch_mm_fastpath:
/* Errata workaround post TTBRx_EL1 update. */
asmlinkage void post_ttbr_update_workaround(void)
{
+ if (!IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456))
+ return;
+
asm(ALTERNATIVE("nop; nop; nop",
"ic iallu; dsb nsh; isb",
- ARM64_WORKAROUND_CAVIUM_27456,
- CONFIG_CAVIUM_ERRATUM_27456));
+ ARM64_WORKAROUND_CAVIUM_27456));
+}
+
+void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm)
+{
+ unsigned long ttbr1 = read_sysreg(ttbr1_el1);
+ unsigned long asid = ASID(mm);
+ unsigned long ttbr0 = phys_to_ttbr(pgd_phys);
+
+ /* Skip CNP for the reserved ASID */
+ if (system_supports_cnp() && asid)
+ ttbr0 |= TTBR_CNP_BIT;
+
+ /* SW PAN needs a copy of the ASID in TTBR0 for entry */
+ if (IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN))
+ ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid);
+
+ /* Set ASID in TTBR1 since TCR.A1 is set */
+ ttbr1 &= ~TTBR_ASID_MASK;
+ ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid);
+
+ write_sysreg(ttbr1, ttbr1_el1);
+ isb();
+ write_sysreg(ttbr0, ttbr0_el1);
+ isb();
+ post_ttbr_update_workaround();
}
static int asids_init(void)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 128f70852bf3..9b08f7c7e6f0 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -17,6 +17,7 @@
#include <linux/mman.h>
#include <linux/nodemask.h>
#include <linux/memblock.h>
+#include <linux/memory.h>
#include <linux/fs.h>
#include <linux/io.h>
#include <linux/mm.h>
@@ -724,6 +725,312 @@ int kern_addr_valid(unsigned long addr)
return pfn_valid(pte_pfn(pte));
}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static void free_hotplug_page_range(struct page *page, size_t size)
+{
+ WARN_ON(PageReserved(page));
+ free_pages((unsigned long)page_address(page), get_order(size));
+}
+
+static void free_hotplug_pgtable_page(struct page *page)
+{
+ free_hotplug_page_range(page, PAGE_SIZE);
+}
+
+static bool pgtable_range_aligned(unsigned long start, unsigned long end,
+ unsigned long floor, unsigned long ceiling,
+ unsigned long mask)
+{
+ start &= mask;
+ if (start < floor)
+ return false;
+
+ if (ceiling) {
+ ceiling &= mask;
+ if (!ceiling)
+ return false;
+ }
+
+ if (end - 1 > ceiling - 1)
+ return false;
+ return true;
+}
+
+static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr,
+ unsigned long end, bool free_mapped)
+{
+ pte_t *ptep, pte;
+
+ do {
+ ptep = pte_offset_kernel(pmdp, addr);
+ pte = READ_ONCE(*ptep);
+ if (pte_none(pte))
+ continue;
+
+ WARN_ON(!pte_present(pte));
+ pte_clear(&init_mm, addr, ptep);
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+ if (free_mapped)
+ free_hotplug_page_range(pte_page(pte), PAGE_SIZE);
+ } while (addr += PAGE_SIZE, addr < end);
+}
+
+static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
+ unsigned long end, bool free_mapped)
+{
+ unsigned long next;
+ pmd_t *pmdp, pmd;
+
+ do {
+ next = pmd_addr_end(addr, end);
+ pmdp = pmd_offset(pudp, addr);
+ pmd = READ_ONCE(*pmdp);
+ if (pmd_none(pmd))
+ continue;
+
+ WARN_ON(!pmd_present(pmd));
+ if (pmd_sect(pmd)) {
+ pmd_clear(pmdp);
+
+ /*
+ * One TLBI should be sufficient here as the PMD_SIZE
+ * range is mapped with a single block entry.
+ */
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+ if (free_mapped)
+ free_hotplug_page_range(pmd_page(pmd),
+ PMD_SIZE);
+ continue;
+ }
+ WARN_ON(!pmd_table(pmd));
+ unmap_hotplug_pte_range(pmdp, addr, next, free_mapped);
+ } while (addr = next, addr < end);
+}
+
+static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr,
+ unsigned long end, bool free_mapped)
+{
+ unsigned long next;
+ pud_t *pudp, pud;
+
+ do {
+ next = pud_addr_end(addr, end);
+ pudp = pud_offset(p4dp, addr);
+ pud = READ_ONCE(*pudp);
+ if (pud_none(pud))
+ continue;
+
+ WARN_ON(!pud_present(pud));
+ if (pud_sect(pud)) {
+ pud_clear(pudp);
+
+ /*
+ * One TLBI should be sufficient here as the PUD_SIZE
+ * range is mapped with a single block entry.
+ */
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+ if (free_mapped)
+ free_hotplug_page_range(pud_page(pud),
+ PUD_SIZE);
+ continue;
+ }
+ WARN_ON(!pud_table(pud));
+ unmap_hotplug_pmd_range(pudp, addr, next, free_mapped);
+ } while (addr = next, addr < end);
+}
+
+static void unmap_hotplug_p4d_range(pgd_t *pgdp, unsigned long addr,
+ unsigned long end, bool free_mapped)
+{
+ unsigned long next;
+ p4d_t *p4dp, p4d;
+
+ do {
+ next = p4d_addr_end(addr, end);
+ p4dp = p4d_offset(pgdp, addr);
+ p4d = READ_ONCE(*p4dp);
+ if (p4d_none(p4d))
+ continue;
+
+ WARN_ON(!p4d_present(p4d));
+ unmap_hotplug_pud_range(p4dp, addr, next, free_mapped);
+ } while (addr = next, addr < end);
+}
+
+static void unmap_hotplug_range(unsigned long addr, unsigned long end,
+ bool free_mapped)
+{
+ unsigned long next;
+ pgd_t *pgdp, pgd;
+
+ do {
+ next = pgd_addr_end(addr, end);
+ pgdp = pgd_offset_k(addr);
+ pgd = READ_ONCE(*pgdp);
+ if (pgd_none(pgd))
+ continue;
+
+ WARN_ON(!pgd_present(pgd));
+ unmap_hotplug_p4d_range(pgdp, addr, next, free_mapped);
+ } while (addr = next, addr < end);
+}
+
+static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr,
+ unsigned long end, unsigned long floor,
+ unsigned long ceiling)
+{
+ pte_t *ptep, pte;
+ unsigned long i, start = addr;
+
+ do {
+ ptep = pte_offset_kernel(pmdp, addr);
+ pte = READ_ONCE(*ptep);
+
+ /*
+ * This is just a sanity check here which verifies that
+ * pte clearing has been done by earlier unmap loops.
+ */
+ WARN_ON(!pte_none(pte));
+ } while (addr += PAGE_SIZE, addr < end);
+
+ if (!pgtable_range_aligned(start, end, floor, ceiling, PMD_MASK))
+ return;
+
+ /*
+ * Check whether we can free the pte page if the rest of the
+ * entries are empty. Overlap with other regions have been
+ * handled by the floor/ceiling check.
+ */
+ ptep = pte_offset_kernel(pmdp, 0UL);
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ if (!pte_none(READ_ONCE(ptep[i])))
+ return;
+ }
+
+ pmd_clear(pmdp);
+ __flush_tlb_kernel_pgtable(start);
+ free_hotplug_pgtable_page(virt_to_page(ptep));
+}
+
+static void free_empty_pmd_table(pud_t *pudp, unsigned long addr,
+ unsigned long end, unsigned long floor,
+ unsigned long ceiling)
+{
+ pmd_t *pmdp, pmd;
+ unsigned long i, next, start = addr;
+
+ do {
+ next = pmd_addr_end(addr, end);
+ pmdp = pmd_offset(pudp, addr);
+ pmd = READ_ONCE(*pmdp);
+ if (pmd_none(pmd))
+ continue;
+
+ WARN_ON(!pmd_present(pmd) || !pmd_table(pmd) || pmd_sect(pmd));
+ free_empty_pte_table(pmdp, addr, next, floor, ceiling);
+ } while (addr = next, addr < end);
+
+ if (CONFIG_PGTABLE_LEVELS <= 2)
+ return;
+
+ if (!pgtable_range_aligned(start, end, floor, ceiling, PUD_MASK))
+ return;
+
+ /*
+ * Check whether we can free the pmd page if the rest of the
+ * entries are empty. Overlap with other regions have been
+ * handled by the floor/ceiling check.
+ */
+ pmdp = pmd_offset(pudp, 0UL);
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ if (!pmd_none(READ_ONCE(pmdp[i])))
+ return;
+ }
+
+ pud_clear(pudp);
+ __flush_tlb_kernel_pgtable(start);
+ free_hotplug_pgtable_page(virt_to_page(pmdp));
+}
+
+static void free_empty_pud_table(p4d_t *p4dp, unsigned long addr,
+ unsigned long end, unsigned long floor,
+ unsigned long ceiling)
+{
+ pud_t *pudp, pud;
+ unsigned long i, next, start = addr;
+
+ do {
+ next = pud_addr_end(addr, end);
+ pudp = pud_offset(p4dp, addr);
+ pud = READ_ONCE(*pudp);
+ if (pud_none(pud))
+ continue;
+
+ WARN_ON(!pud_present(pud) || !pud_table(pud) || pud_sect(pud));
+ free_empty_pmd_table(pudp, addr, next, floor, ceiling);
+ } while (addr = next, addr < end);
+
+ if (CONFIG_PGTABLE_LEVELS <= 3)
+ return;
+
+ if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK))
+ return;
+
+ /*
+ * Check whether we can free the pud page if the rest of the
+ * entries are empty. Overlap with other regions have been
+ * handled by the floor/ceiling check.
+ */
+ pudp = pud_offset(p4dp, 0UL);
+ for (i = 0; i < PTRS_PER_PUD; i++) {
+ if (!pud_none(READ_ONCE(pudp[i])))
+ return;
+ }
+
+ p4d_clear(p4dp);
+ __flush_tlb_kernel_pgtable(start);
+ free_hotplug_pgtable_page(virt_to_page(pudp));
+}
+
+static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr,
+ unsigned long end, unsigned long floor,
+ unsigned long ceiling)
+{
+ unsigned long next;
+ p4d_t *p4dp, p4d;
+
+ do {
+ next = p4d_addr_end(addr, end);
+ p4dp = p4d_offset(pgdp, addr);
+ p4d = READ_ONCE(*p4dp);
+ if (p4d_none(p4d))
+ continue;
+
+ WARN_ON(!p4d_present(p4d));
+ free_empty_pud_table(p4dp, addr, next, floor, ceiling);
+ } while (addr = next, addr < end);
+}
+
+static void free_empty_tables(unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
+{
+ unsigned long next;
+ pgd_t *pgdp, pgd;
+
+ do {
+ next = pgd_addr_end(addr, end);
+ pgdp = pgd_offset_k(addr);
+ pgd = READ_ONCE(*pgdp);
+ if (pgd_none(pgd))
+ continue;
+
+ WARN_ON(!pgd_present(pgd));
+ free_empty_p4d_table(pgdp, addr, next, floor, ceiling);
+ } while (addr = next, addr < end);
+}
+#endif
+
#ifdef CONFIG_SPARSEMEM_VMEMMAP
#if !ARM64_SWAPPER_USES_SECTION_MAPS
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
@@ -771,6 +1078,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{
+#ifdef CONFIG_MEMORY_HOTPLUG
+ WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
+
+ unmap_hotplug_range(start, end, true);
+ free_empty_tables(start, end, VMEMMAP_START, VMEMMAP_END);
+#endif
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
@@ -1049,10 +1362,21 @@ int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
}
#ifdef CONFIG_MEMORY_HOTPLUG
+static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
+{
+ unsigned long end = start + size;
+
+ WARN_ON(pgdir != init_mm.pgd);
+ WARN_ON((start < PAGE_OFFSET) || (end > PAGE_END));
+
+ unmap_hotplug_range(start, end, false);
+ free_empty_tables(start, end, PAGE_OFFSET, PAGE_END);
+}
+
int arch_add_memory(int nid, u64 start, u64 size,
struct mhp_restrictions *restrictions)
{
- int flags = 0;
+ int ret, flags = 0;
if (rodata_full || debug_pagealloc_enabled())
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
@@ -1062,22 +1386,59 @@ int arch_add_memory(int nid, u64 start, u64 size,
memblock_clear_nomap(start, size);
- return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
+ ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
restrictions);
+ if (ret)
+ __remove_pgd_mapping(swapper_pg_dir,
+ __phys_to_virt(start), size);
+ return ret;
}
+
void arch_remove_memory(int nid, u64 start, u64 size,
struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- /*
- * FIXME: Cleanup page tables (also in arch_add_memory() in case
- * adding fails). Until then, this function should only be used
- * during memory hotplug (adding memory), not for memory
- * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be
- * unlocked yet.
- */
__remove_pages(start_pfn, nr_pages, altmap);
+ __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size);
+}
+
+/*
+ * This memory hotplug notifier helps prevent boot memory from being
+ * inadvertently removed as it blocks pfn range offlining process in
+ * __offline_pages(). Hence this prevents both offlining as well as
+ * removal process for boot memory which is initially always online.
+ * In future if and when boot memory could be removed, this notifier
+ * should be dropped and free_hotplug_page_range() should handle any
+ * reserved pages allocated during boot.
+ */
+static int prevent_bootmem_remove_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct mem_section *ms;
+ struct memory_notify *arg = data;
+ unsigned long end_pfn = arg->start_pfn + arg->nr_pages;
+ unsigned long pfn = arg->start_pfn;
+
+ if (action != MEM_GOING_OFFLINE)
+ return NOTIFY_OK;
+
+ for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+ ms = __pfn_to_section(pfn);
+ if (early_section(ms))
+ return NOTIFY_BAD;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block prevent_bootmem_remove_nb = {
+ .notifier_call = prevent_bootmem_remove_notifier,
+};
+
+static int __init prevent_bootmem_remove_init(void)
+{
+ return register_memory_notifier(&prevent_bootmem_remove_nb);
}
+device_initcall(prevent_bootmem_remove_init);
#endif
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 7103027b4e64..1b871f141eb4 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -143,34 +143,6 @@ SYM_FUNC_END(cpu_do_resume)
.popsection
#endif
-/*
- * cpu_do_switch_mm(pgd_phys, tsk)
- *
- * Set the translation table base pointer to be pgd_phys.
- *
- * - pgd_phys - physical address of new TTB
- */
-SYM_FUNC_START(cpu_do_switch_mm)
- mrs x2, ttbr1_el1
- mmid x1, x1 // get mm->context.id
- phys_to_ttbr x3, x0
-
-alternative_if ARM64_HAS_CNP
- cbz x1, 1f // skip CNP for reserved ASID
- orr x3, x3, #TTBR_CNP_BIT
-1:
-alternative_else_nop_endif
-#ifdef CONFIG_ARM64_SW_TTBR0_PAN
- bfi x3, x1, #48, #16 // set the ASID field in TTBR0
-#endif
- bfi x2, x1, #48, #16 // set the ASID
- msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set)
- isb
- msr ttbr0_el1, x3 // now update TTBR0
- isb
- b post_ttbr_update_workaround // Back to C code...
-SYM_FUNC_END(cpu_do_switch_mm)
-
.pushsection ".idmap.text", "awx"
.macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index 1f2eae3e988b..d29d722ec3ec 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/debugfs.h>
+#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <asm/ptdump.h>
@@ -7,7 +8,10 @@
static int ptdump_show(struct seq_file *m, void *v)
{
struct ptdump_info *info = m->private;
+
+ get_online_mems();
ptdump_walk(m, info);
+ put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);