diff options
Diffstat (limited to 'arch/riscv/kernel')
72 files changed, 2963 insertions, 1213 deletions
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 604d6bf7e476..8d186bfced45 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -20,17 +20,26 @@ endif ifdef CONFIG_RISCV_ALTERNATIVE_EARLY CFLAGS_alternative.o := -mcmodel=medany CFLAGS_cpufeature.o := -mcmodel=medany +CFLAGS_sbi_ecall.o := -mcmodel=medany ifdef CONFIG_FTRACE CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_sbi_ecall.o = $(CC_FLAGS_FTRACE) endif ifdef CONFIG_RELOCATABLE CFLAGS_alternative.o += -fno-pie CFLAGS_cpufeature.o += -fno-pie +CFLAGS_sbi_ecall.o += -fno-pie endif ifdef CONFIG_KASAN KASAN_SANITIZE_alternative.o := n KASAN_SANITIZE_cpufeature.o := n +KASAN_SANITIZE_sbi_ecall.o := n +endif +ifdef CONFIG_FORTIFY_SOURCE +CFLAGS_alternative.o += -D__NO_FORTIFY +CFLAGS_cpufeature.o += -D__NO_FORTIFY +CFLAGS_sbi_ecall.o += -D__NO_FORTIFY endif endif @@ -39,7 +48,6 @@ extra-y += vmlinux.lds obj-y += head.o obj-y += soc.o obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o -obj-y += copy-unaligned.o obj-y += cpu.o obj-y += cpufeature.o obj-y += entry.o @@ -59,12 +67,19 @@ obj-y += riscv_ksyms.o obj-y += stacktrace.o obj-y += cacheinfo.o obj-y += patch.o +obj-y += vendor_extensions.o +obj-y += vendor_extensions/ obj-y += probes/ obj-y += tests/ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o +obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o +obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o +obj-$(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS) += vec-copy-unaligned.o + obj-$(CONFIG_FPU) += fpu.o +obj-$(CONFIG_FPU) += kernel_mode_fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o obj-$(CONFIG_SMP) += smpboot.o @@ -83,7 +98,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o -obj-$(CONFIG_RISCV_SBI) += sbi.o +obj-$(CONFIG_RISCV_SBI) += sbi.o sbi_ecall.o ifeq ($(CONFIG_RISCV_SBI), y) obj-$(CONFIG_SMP) += sbi-ipi.o obj-$(CONFIG_SMP) += cpu_ops_sbi.o @@ -94,7 +109,7 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o -obj-$(CONFIG_CRASH_CORE) += crash_core.o +obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o @@ -107,3 +122,6 @@ obj-$(CONFIG_COMPAT) += compat_vdso/ obj-$(CONFIG_64BIT) += pi/ obj-$(CONFIG_ACPI) += acpi.o +obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o + +obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += bugs.o diff --git a/arch/riscv/kernel/Makefile.syscalls b/arch/riscv/kernel/Makefile.syscalls new file mode 100644 index 000000000000..9668fd1faf60 --- /dev/null +++ b/arch/riscv/kernel/Makefile.syscalls @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 + +syscall_abis_32 += riscv memfd_secret +syscall_abis_64 += riscv rlimit memfd_secret diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index e619edc8b0cc..2fd29695a788 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -17,7 +17,9 @@ #include <linux/efi.h> #include <linux/io.h> #include <linux/memblock.h> +#include <linux/of_fdt.h> #include <linux/pci.h> +#include <linux/serial_core.h> int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; @@ -131,7 +133,7 @@ void __init acpi_boot_table_init(void) if (param_acpi_off || (!param_acpi_on && !param_acpi_force && efi.acpi20 == EFI_INVALID_TABLE_ADDR)) - return; + goto done; /* * ACPI is disabled at this point. Enable it in order to parse @@ -151,6 +153,14 @@ void __init acpi_boot_table_init(void) if (!param_acpi_force) disable_acpi(); } + +done: + if (acpi_disabled) { + if (earlycon_acpi_spcr_enable) + early_init_dt_scan_chosen_stdout(); + } else { + acpi_parse_spcr(earlycon_acpi_spcr_enable, true); + } } static int acpi_parse_madt_rintc(union acpi_subtable_headers *header, const unsigned long end) @@ -191,11 +201,6 @@ struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) return &cpu_madt_rintc[cpu]; } -u32 get_acpi_id_for_cpu(int cpu) -{ - return acpi_cpu_get_madt_rintc(cpu)->uid; -} - /* * __acpi_map_table() will be called before paging_init(), so early_ioremap() * or early_memremap() should be called here to for ACPI table mapping. @@ -205,7 +210,7 @@ void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size) if (!size) return NULL; - return early_ioremap(phys, size); + return early_memremap(phys, size); } void __init __acpi_unmap_table(void __iomem *map, unsigned long size) @@ -213,7 +218,7 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) if (!map || !size) return; - early_iounmap(map, size); + early_memunmap(map, size); } void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) @@ -306,29 +311,26 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) #ifdef CONFIG_PCI /* - * These interfaces are defined just to enable building ACPI core. - * TODO: Update it with actual implementation when external interrupt - * controller support is added in RISC-V ACPI. + * raw_pci_read/write - Platform-specific PCI config space access. */ -int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, - int reg, int len, u32 *val) +int raw_pci_read(unsigned int domain, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *val) { - return PCIBIOS_DEVICE_NOT_FOUND; -} + struct pci_bus *b = pci_find_bus(domain, bus); -int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, - int reg, int len, u32 val) -{ - return PCIBIOS_DEVICE_NOT_FOUND; + if (!b) + return PCIBIOS_DEVICE_NOT_FOUND; + return b->ops->read(b, devfn, reg, len, val); } -int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) +int raw_pci_write(unsigned int domain, unsigned int bus, + unsigned int devfn, int reg, int len, u32 val) { - return -1; -} + struct pci_bus *b = pci_find_bus(domain, bus); -struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) -{ - return NULL; + if (!b) + return PCIBIOS_DEVICE_NOT_FOUND; + return b->ops->write(b, devfn, reg, len, val); } + #endif /* CONFIG_PCI */ diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c new file mode 100644 index 000000000000..130769e3a99c --- /dev/null +++ b/arch/riscv/kernel/acpi_numa.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ACPI 6.6 based NUMA setup for RISCV + * Lots of code was borrowed from arch/arm64/kernel/acpi_numa.c + * + * Copyright 2004 Andi Kleen, SuSE Labs. + * Copyright (C) 2013-2016, Linaro Ltd. + * Author: Hanjun Guo <hanjun.guo@linaro.org> + * Copyright (C) 2024 Intel Corporation. + * + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. + * + * Called from acpi_numa_init while reading the SRAT and SLIT tables. + * Assumes all memory regions belonging to a single proximity domain + * are in one chunk. Holes between them will be included in the node. + */ + +#define pr_fmt(fmt) "ACPI: NUMA: " fmt + +#include <linux/acpi.h> +#include <linux/bitmap.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/memblock.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/topology.h> + +#include <asm/numa.h> + +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; + +static int __init acpi_numa_get_nid(unsigned int cpu) +{ + return acpi_early_node_map[cpu]; +} + +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + +static int __init acpi_parse_rintc_pxm(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_srat_rintc_affinity *pa; + int cpu, pxm, node; + + if (srat_disabled()) + return -EINVAL; + + pa = (struct acpi_srat_rintc_affinity *)header; + if (!pa) + return -EINVAL; + + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) + return 0; + + pxm = pa->proximity_domain; + node = pxm_to_node(pxm); + + /* + * If we can't map the UID to a logical cpu this + * means that the UID is not part of possible cpus + * so we do not need a NUMA mapping for it, skip + * the SRAT entry and keep parsing. + */ + cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid); + if (cpu < 0) + return 0; + + acpi_early_node_map[cpu] = node; + pr_info("SRAT: PXM %d -> HARTID 0x%lx -> Node %d\n", pxm, + cpuid_to_hartid_map(cpu), node); + + return 0; +} + +void __init acpi_map_cpus_to_nodes(void) +{ + int i; + + /* + * In ACPI, SMP and CPU NUMA information is provided in separate + * static tables, namely the MADT and the SRAT. + * + * Thus, it is simpler to first create the cpu logical map through + * an MADT walk and then map the logical cpus to their node ids + * as separate steps. + */ + acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_RINTC_AFFINITY, acpi_parse_rintc_pxm, 0); + + for (i = 0; i < nr_cpu_ids; i++) + early_map_cpu_to_node(i, acpi_numa_get_nid(i)); +} + +/* Callback for Proximity Domain -> logical node ID mapping */ +void __init acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) +{ + int pxm, node; + + if (srat_disabled()) + return; + + if (pa->header.length < sizeof(struct acpi_srat_rintc_affinity)) { + pr_err("SRAT: Invalid SRAT header length: %d\n", pa->header.length); + bad_srat(); + return; + } + + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) + return; + + pxm = pa->proximity_domain; + node = acpi_map_pxm_to_node(pxm); + + if (node == NUMA_NO_NODE) { + pr_err("SRAT: Too many proximity domains %d\n", pxm); + bad_srat(); + return; + } + + node_set(node, numa_nodes_parsed); +} diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c index 319a1da0358b..7eb3cb1215c6 100644 --- a/arch/riscv/kernel/alternative.c +++ b/arch/riscv/kernel/alternative.c @@ -18,7 +18,7 @@ #include <asm/sbi.h> #include <asm/csr.h> #include <asm/insn.h> -#include <asm/patch.h> +#include <asm/text-patching.h> struct cpu_manufacturer_info_t { unsigned long vendor_id; @@ -43,7 +43,7 @@ static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info switch (cpu_mfr_info->vendor_id) { #ifdef CONFIG_ERRATA_ANDES - case ANDESTECH_VENDOR_ID: + case ANDES_VENDOR_ID: cpu_mfr_info->patch_func = andes_errata_patch_func; break; #endif diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index a03129f40c46..e89455a6a0e5 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -4,11 +4,10 @@ * Copyright (C) 2017 SiFive */ -#define GENERATING_ASM_OFFSETS - #include <linux/kbuild.h> #include <linux/mm.h> #include <linux/sched.h> +#include <linux/ftrace.h> #include <linux/suspend.h> #include <asm/kvm_host.h> #include <asm/thread_info.h> @@ -35,6 +34,8 @@ void asm_offsets(void) OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]); OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]); OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]); + + OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags); OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); @@ -42,6 +43,11 @@ void asm_offsets(void) #ifdef CONFIG_SHADOW_CALL_STACK OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp); #endif +#ifdef CONFIG_64BIT + OFFSET(TASK_TI_A0, task_struct, thread_info.a0); + OFFSET(TASK_TI_A1, task_struct, thread_info.a1); + OFFSET(TASK_TI_A2, task_struct, thread_info.a2); +#endif OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu); OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]); @@ -488,4 +494,21 @@ void asm_offsets(void) DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN)); OFFSET(STACKFRAME_FP, stackframe, fp); OFFSET(STACKFRAME_RA, stackframe, ra); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS + DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct __arch_ftrace_regs), STACK_ALIGN)); + DEFINE(FREGS_EPC, offsetof(struct __arch_ftrace_regs, epc)); + DEFINE(FREGS_RA, offsetof(struct __arch_ftrace_regs, ra)); + DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp)); + DEFINE(FREGS_S0, offsetof(struct __arch_ftrace_regs, s0)); + DEFINE(FREGS_T1, offsetof(struct __arch_ftrace_regs, t1)); + DEFINE(FREGS_A0, offsetof(struct __arch_ftrace_regs, a0)); + DEFINE(FREGS_A1, offsetof(struct __arch_ftrace_regs, a1)); + DEFINE(FREGS_A2, offsetof(struct __arch_ftrace_regs, a2)); + DEFINE(FREGS_A3, offsetof(struct __arch_ftrace_regs, a3)); + DEFINE(FREGS_A4, offsetof(struct __arch_ftrace_regs, a4)); + DEFINE(FREGS_A5, offsetof(struct __arch_ftrace_regs, a5)); + DEFINE(FREGS_A6, offsetof(struct __arch_ftrace_regs, a6)); + DEFINE(FREGS_A7, offsetof(struct __arch_ftrace_regs, a7)); +#endif } diff --git a/arch/riscv/kernel/bugs.c b/arch/riscv/kernel/bugs.c new file mode 100644 index 000000000000..3655fe7d678c --- /dev/null +++ b/arch/riscv/kernel/bugs.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Rivos Inc. + */ + +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/sprintf.h> + +#include <asm/bugs.h> +#include <asm/vendor_extensions/thead.h> + +static enum mitigation_state ghostwrite_state; + +void ghostwrite_set_vulnerable(void) +{ + ghostwrite_state = VULNERABLE; +} + +/* + * Vendor extension alternatives will use the value set at the time of boot + * alternative patching, thus this must be called before boot alternatives are + * patched (and after extension probing) to be effective. + * + * Returns true if mitgated, false otherwise. + */ +bool ghostwrite_enable_mitigation(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR) && + ghostwrite_state == VULNERABLE && !cpu_mitigations_off()) { + disable_xtheadvector(); + ghostwrite_state = MITIGATED; + return true; + } + + return false; +} + +enum mitigation_state ghostwrite_get_state(void) +{ + return ghostwrite_state; +} + +ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) { + switch (ghostwrite_state) { + case UNAFFECTED: + return sprintf(buf, "Not affected\n"); + case MITIGATED: + return sprintf(buf, "Mitigation: xtheadvector disabled\n"); + case VULNERABLE: + fallthrough; + default: + return sprintf(buf, "Vulnerable\n"); + } + } else { + return sprintf(buf, "Not affected\n"); + } +} diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 09e9b88110d1..26b085dbdd07 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -3,6 +3,7 @@ * Copyright (C) 2017 SiFive */ +#include <linux/acpi.h> #include <linux/cpu.h> #include <linux/of.h> #include <asm/cacheinfo.h> @@ -64,27 +65,55 @@ uintptr_t get_cache_geometry(u32 level, enum cache_type type) } static void ci_leaf_init(struct cacheinfo *this_leaf, - struct device_node *node, enum cache_type type, unsigned int level) { this_leaf->level = level; this_leaf->type = type; } +int init_cache_level(unsigned int cpu) +{ + return init_of_cache_level(cpu); +} + int populate_cache_leaves(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; - struct device_node *np = of_cpu_device_node_get(cpu); - struct device_node *prev = NULL; + struct device_node *np, *prev; int levels = 1, level = 1; - if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + if (!acpi_disabled) { + int ret, fw_levels, split_levels; + + ret = acpi_get_cache_info(cpu, &fw_levels, &split_levels); + if (ret) + return ret; + + BUG_ON((split_levels > fw_levels) || + (split_levels + fw_levels > this_cpu_ci->num_leaves)); + + for (; level <= this_cpu_ci->num_levels; level++) { + if (level <= split_levels) { + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + } else { + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + } + } + return 0; + } + + np = of_cpu_device_node_get(cpu); + if (!np) + return -ENOENT; + + if (of_property_present(np, "cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + if (of_property_present(np, "i-cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (of_property_present(np, "d-cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); prev = np; while ((np = of_find_next_cache_node(np))) { @@ -96,12 +125,12 @@ int populate_cache_leaves(unsigned int cpu) break; if (level <= levels) break; - if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + if (of_property_present(np, "cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + if (of_property_present(np, "i-cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (of_property_present(np, "d-cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); levels = level; } of_node_put(np); diff --git a/arch/riscv/kernel/cfi.c b/arch/riscv/kernel/cfi.c index 6ec9dbd7292e..64bdd3e1ab8c 100644 --- a/arch/riscv/kernel/cfi.c +++ b/arch/riscv/kernel/cfi.c @@ -75,3 +75,56 @@ enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) return report_cfi_failure(regs, regs->epc, &target, type); } + +#ifdef CONFIG_CFI_CLANG +struct bpf_insn; + +/* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */ +extern unsigned int __bpf_prog_runX(const void *ctx, + const struct bpf_insn *insn); + +/* + * Force a reference to the external symbol so the compiler generates + * __kcfi_typid. + */ +__ADDRESSABLE(__bpf_prog_runX); + +/* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */ +asm ( +" .pushsection .data..ro_after_init,\"aw\",@progbits \n" +" .type cfi_bpf_hash,@object \n" +" .globl cfi_bpf_hash \n" +" .p2align 2, 0x0 \n" +"cfi_bpf_hash: \n" +" .word __kcfi_typeid___bpf_prog_runX \n" +" .size cfi_bpf_hash, 4 \n" +" .popsection \n" +); + +/* Must match bpf_callback_t */ +extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64); + +__ADDRESSABLE(__bpf_callback_fn); + +/* u32 __ro_after_init cfi_bpf_subprog_hash = __kcfi_typeid___bpf_callback_fn; */ +asm ( +" .pushsection .data..ro_after_init,\"aw\",@progbits \n" +" .type cfi_bpf_subprog_hash,@object \n" +" .globl cfi_bpf_subprog_hash \n" +" .p2align 2, 0x0 \n" +"cfi_bpf_subprog_hash: \n" +" .word __kcfi_typeid___bpf_callback_fn \n" +" .size cfi_bpf_subprog_hash, 4 \n" +" .popsection \n" +); + +u32 cfi_get_func_hash(void *func) +{ + u32 hash; + + if (get_kernel_nofault(hash, func - cfi_get_offset())) + return 0; + + return hash; +} +#endif diff --git a/arch/riscv/kernel/compat_syscall_table.c b/arch/riscv/kernel/compat_syscall_table.c index ad7f2d712f5f..e884c069e88f 100644 --- a/arch/riscv/kernel/compat_syscall_table.c +++ b/arch/riscv/kernel/compat_syscall_table.c @@ -8,9 +8,11 @@ #include <asm-generic/syscalls.h> #include <asm/syscall.h> +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat) + #undef __SYSCALL #define __SYSCALL(nr, call) asmlinkage long __riscv_##call(const struct pt_regs *); -#include <asm/unistd.h> +#include <asm/syscall_table_32.h> #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = __riscv_##call, @@ -19,5 +21,5 @@ asmlinkage long compat_sys_rt_sigreturn(void); void * const compat_sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = __riscv_sys_ni_syscall, -#include <asm/unistd.h> +#include <asm/syscall_table_32.h> }; diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 62fa393b2eb2..24e37d1ef7ec 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -34,12 +34,6 @@ obj-compat_vdso := $(addprefix $(obj)/, $(obj-compat_vdso)) obj-y += compat_vdso.o CPPFLAGS_compat_vdso.lds += -P -C -DCOMPAT_VDSO -U$(ARCH) -# Disable profiling and instrumentation for VDSO code -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n - # Force dependency $(obj)/compat_vdso.o: $(obj)/compat_vdso.so @@ -58,7 +52,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # Generate VDSO offsets using helper script -gen-compat_vdsosym := $(srctree)/$(src)/gen_compat_vdso_offsets.sh +gen-compat_vdsosym := $(src)/gen_compat_vdso_offsets.sh quiet_cmd_compat_vdsosym = VDSOSYM $@ cmd_compat_vdsosym = $(NM) $< | $(gen-compat_vdsosym) | LC_ALL=C sort > $@ @@ -74,5 +68,5 @@ quiet_cmd_compat_vdsold = VDSOLD $@ rm $@.tmp # actual build commands -quiet_cmd_compat_vdsoas = VDSOAS $@ +quiet_cmd_compat_vdsoas = VDSOAS $@ cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $< diff --git a/arch/riscv/kernel/copy-unaligned.h b/arch/riscv/kernel/copy-unaligned.h index e3d70d35b708..85d4d11450cb 100644 --- a/arch/riscv/kernel/copy-unaligned.h +++ b/arch/riscv/kernel/copy-unaligned.h @@ -10,4 +10,9 @@ void __riscv_copy_words_unaligned(void *dst, const void *src, size_t size); void __riscv_copy_bytes_unaligned(void *dst, const void *src, size_t size); +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS +void __riscv_copy_vec_words_unaligned(void *dst, const void *src, size_t size); +void __riscv_copy_vec_bytes_unaligned(void *dst, const void *src, size_t size); +#endif + #endif /* __RISCV_KERNEL_COPY_UNALIGNED_H */ diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index 28b58fc5ad19..a1e38ecfc8be 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -58,7 +58,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) if (cpu_ops->cpu_is_stopped) ret = cpu_ops->cpu_is_stopped(cpu); if (ret) - pr_warn("CPU%d may not have stopped: %d\n", cpu, ret); + pr_warn("CPU%u may not have stopped: %d\n", cpu, ret); } /* diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index d11d6320fb0d..f6b13e9f5e6c 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -16,6 +16,7 @@ #include <asm/sbi.h> #include <asm/smp.h> #include <asm/pgtable.h> +#include <asm/vendor_extensions.h> bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { @@ -139,6 +140,34 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) return -1; } +unsigned long __init riscv_get_marchid(void) +{ + struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); + +#if IS_ENABLED(CONFIG_RISCV_SBI) + ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); +#elif IS_ENABLED(CONFIG_RISCV_M_MODE) + ci->marchid = csr_read(CSR_MARCHID); +#else + ci->marchid = 0; +#endif + return ci->marchid; +} + +unsigned long __init riscv_get_mvendorid(void) +{ + struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); + +#if IS_ENABLED(CONFIG_RISCV_SBI) + ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); +#elif IS_ENABLED(CONFIG_RISCV_M_MODE) + ci->mvendorid = csr_read(CSR_MVENDORID); +#else + ci->mvendorid = 0; +#endif + return ci->mvendorid; +} + DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); unsigned long riscv_cached_mvendorid(unsigned int cpu_id) @@ -170,12 +199,16 @@ static int riscv_cpuinfo_starting(unsigned int cpu) struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); #if IS_ENABLED(CONFIG_RISCV_SBI) - ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); - ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); + if (!ci->mvendorid) + ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); + if (!ci->marchid) + ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); ci->mimpid = sbi_spec_is_0_1() ? 0 : sbi_get_mimpid(); #elif IS_ENABLED(CONFIG_RISCV_M_MODE) - ci->mvendorid = csr_read(CSR_MVENDORID); - ci->marchid = csr_read(CSR_MARCHID); + if (!ci->mvendorid) + ci->mvendorid = csr_read(CSR_MVENDORID); + if (!ci->marchid) + ci->marchid = csr_read(CSR_MARCHID); ci->mimpid = csr_read(CSR_MIMPID); #else ci->mvendorid = 0; @@ -203,7 +236,33 @@ arch_initcall(riscv_cpuinfo_init); #ifdef CONFIG_PROC_FS -static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) +#define ALL_CPUS -1 + +static void print_vendor_isa(struct seq_file *f, int cpu) +{ + struct riscv_isavendorinfo *vendor_bitmap; + struct riscv_isa_vendor_ext_data_list *ext_list; + const struct riscv_isa_ext_data *ext_data; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + ext_list = riscv_isa_vendor_ext_list[i]; + ext_data = riscv_isa_vendor_ext_list[i]->ext_data; + + if (cpu == ALL_CPUS) + vendor_bitmap = &ext_list->all_harts_isa_bitmap; + else + vendor_bitmap = &ext_list->per_hart_isa_bitmap[cpu]; + + for (int j = 0; j < ext_list->ext_data_count; j++) { + if (!__riscv_isa_extension_available(vendor_bitmap->isa, ext_data[j].id)) + continue; + + seq_printf(f, "_%s", ext_data[j].name); + } + } +} + +static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap, int cpu) { if (IS_ENABLED(CONFIG_32BIT)) @@ -222,6 +281,8 @@ static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) seq_printf(f, "%s", riscv_isa_ext[i].name); } + print_vendor_isa(f, cpu); + seq_puts(f, "\n"); } @@ -284,7 +345,7 @@ static int c_show(struct seq_file *m, void *v) * line. */ seq_puts(m, "isa\t\t: "); - print_isa(m, NULL); + print_isa(m, NULL, ALL_CPUS); print_mmu(m); if (acpi_disabled) { @@ -306,7 +367,7 @@ static int c_show(struct seq_file *m, void *v) * additional extensions not present across all harts. */ seq_puts(m, "hart isa\t: "); - print_isa(m, hart_isa[cpu_id].isa); + print_isa(m, hart_isa[cpu_id].isa, cpu_id); seq_puts(m, "\n"); return 0; diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index 1cc7df740edd..e6fbaaf54956 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -72,7 +72,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) /* Make sure tidle is updated */ smp_mb(); bdata->task_ptr = tidle; - bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE; + bdata->stack_ptr = task_pt_regs(tidle); /* Make sure boot data is updated */ smp_mb(); hsm_data = __pa(bdata); diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c index 613872b0a21a..24869eb88908 100644 --- a/arch/riscv/kernel/cpu_ops_spinwait.c +++ b/arch/riscv/kernel/cpu_ops_spinwait.c @@ -34,8 +34,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid, /* Make sure tidle is updated */ smp_mb(); - WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], - task_stack_page(tidle) + THREAD_SIZE); + WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], task_pt_regs(tidle)); WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle); } diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 79a5a35fab96..40ac72e407b6 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -11,30 +11,27 @@ #include <linux/cpu.h> #include <linux/cpuhotplug.h> #include <linux/ctype.h> -#include <linux/jump_label.h> #include <linux/log2.h> #include <linux/memory.h> #include <linux/module.h> #include <linux/of.h> #include <asm/acpi.h> #include <asm/alternative.h> +#include <asm/bugs.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwcap.h> +#include <asm/text-patching.h> #include <asm/hwprobe.h> -#include <asm/patch.h> #include <asm/processor.h> #include <asm/sbi.h> #include <asm/vector.h> - -#include "copy-unaligned.h" +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> #define NUM_ALPHA_EXTS ('z' - 'a' + 1) -#define MISALIGNED_ACCESS_JIFFIES_LG2 1 -#define MISALIGNED_BUFFER_SIZE 0x4000 -#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) -#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) +static bool any_cpu_has_zicboz; unsigned long elf_hwcap __read_mostly; @@ -44,10 +41,7 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; /* Per-cpu ISA extensions. */ struct riscv_isainfo hart_isa[NR_CPUS]; -/* Performance information */ -DEFINE_PER_CPU(long, misaligned_access_speed); - -static cpumask_t fast_misaligned_access; +u32 thead_vlenb_of; /** * riscv_isa_extension_base() - Get base extension word @@ -86,51 +80,75 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned i } EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); -static bool riscv_isa_extension_check(int id) +static int riscv_ext_zicbom_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) { - switch (id) { - case RISCV_ISA_EXT_ZICBOM: - if (!riscv_cbom_block_size) { - pr_err("Zicbom detected in ISA string, disabling as no cbom-block-size found\n"); - return false; - } else if (!is_power_of_2(riscv_cbom_block_size)) { - pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n"); - return false; - } - return true; - case RISCV_ISA_EXT_ZICBOZ: - if (!riscv_cboz_block_size) { - pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n"); - return false; - } else if (!is_power_of_2(riscv_cboz_block_size)) { - pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n"); - return false; - } - return true; - case RISCV_ISA_EXT_INVALID: - return false; + if (!riscv_cbom_block_size) { + pr_err("Zicbom detected in ISA string, disabling as no cbom-block-size found\n"); + return -EINVAL; } + if (!is_power_of_2(riscv_cbom_block_size)) { + pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n"); + return -EINVAL; + } + return 0; +} + +static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!riscv_cboz_block_size) { + pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n"); + return -EINVAL; + } + if (!is_power_of_2(riscv_cboz_block_size)) { + pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n"); + return -EINVAL; + } + any_cpu_has_zicboz = true; + return 0; +} - return true; +static int riscv_ext_zca_depends(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA)) + return 0; + + return -EPROBE_DEFER; } +static int riscv_ext_zcd_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA) && + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d)) + return 0; -#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size) { \ - .name = #_name, \ - .property = #_name, \ - .id = _id, \ - .subset_ext_ids = _subset_exts, \ - .subset_ext_size = _subset_exts_size \ + return -EPROBE_DEFER; } -#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0) +static int riscv_ext_zcf_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (IS_ENABLED(CONFIG_64BIT)) + return -EINVAL; -/* Used to declare pure "lasso" extension (Zk for instance) */ -#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \ - _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, ARRAY_SIZE(_bundled_exts)) + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA) && + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_f)) + return 0; -/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */ -#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \ - _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts)) + return -EPROBE_DEFER; +} + +static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + /* SVADE has already been detected, use SVADE only */ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_SVADE)) + return -EOPNOTSUPP; + + return 0; +} static const unsigned int riscv_zk_bundled_exts[] = { RISCV_ISA_EXT_ZBKB, @@ -202,6 +220,40 @@ static const unsigned int riscv_zvbb_exts[] = { RISCV_ISA_EXT_ZVKB }; +#define RISCV_ISA_EXT_ZVE64F_IMPLY_LIST \ + RISCV_ISA_EXT_ZVE64X, \ + RISCV_ISA_EXT_ZVE32F, \ + RISCV_ISA_EXT_ZVE32X + +#define RISCV_ISA_EXT_ZVE64D_IMPLY_LIST \ + RISCV_ISA_EXT_ZVE64F, \ + RISCV_ISA_EXT_ZVE64F_IMPLY_LIST + +#define RISCV_ISA_EXT_V_IMPLY_LIST \ + RISCV_ISA_EXT_ZVE64D, \ + RISCV_ISA_EXT_ZVE64D_IMPLY_LIST + +static const unsigned int riscv_zve32f_exts[] = { + RISCV_ISA_EXT_ZVE32X +}; + +static const unsigned int riscv_zve64f_exts[] = { + RISCV_ISA_EXT_ZVE64F_IMPLY_LIST +}; + +static const unsigned int riscv_zve64d_exts[] = { + RISCV_ISA_EXT_ZVE64D_IMPLY_LIST +}; + +static const unsigned int riscv_v_exts[] = { + RISCV_ISA_EXT_V_IMPLY_LIST +}; + +static const unsigned int riscv_zve64x_exts[] = { + RISCV_ISA_EXT_ZVE32X, + RISCV_ISA_EXT_ZVE64X +}; + /* * While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V * privileged ISA, the existence of the CSRs is implied by any extension which @@ -213,6 +265,21 @@ static const unsigned int riscv_xlinuxenvcfg_exts[] = { }; /* + * Zc* spec states that: + * - C always implies Zca + * - C+F implies Zcf (RV32 only) + * - C+D implies Zcd + * + * These extensions will be enabled and then validated depending on the + * availability of F/D RV32. + */ +static const unsigned int riscv_c_exts[] = { + RISCV_ISA_EXT_ZCA, + RISCV_ISA_EXT_ZCF, + RISCV_ISA_EXT_ZCD, +}; + +/* * The canonical order of ISA extension names in the ISA string is defined in * chapter 27 of the unprivileged specification. * @@ -258,11 +325,14 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(f, RISCV_ISA_EXT_f), __RISCV_ISA_EXT_DATA(d, RISCV_ISA_EXT_d), __RISCV_ISA_EXT_DATA(q, RISCV_ISA_EXT_q), - __RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c), - __RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v), + __RISCV_ISA_EXT_SUPERSET(c, RISCV_ISA_EXT_c, riscv_c_exts), + __RISCV_ISA_EXT_SUPERSET(v, RISCV_ISA_EXT_v, riscv_v_exts), __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), - __RISCV_ISA_EXT_SUPERSET(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts), - __RISCV_ISA_EXT_SUPERSET(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, + riscv_ext_zicbom_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, + riscv_ext_zicboz_validate), + __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND), __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), @@ -270,10 +340,18 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zihintntl, RISCV_ISA_EXT_ZIHINTNTL), __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM), + __RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP), + __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA), __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS), + __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS), __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA), __RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH), __RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN), + __RISCV_ISA_EXT_DATA(zca, RISCV_ISA_EXT_ZCA), + __RISCV_ISA_EXT_DATA_VALIDATE(zcb, RISCV_ISA_EXT_ZCB, riscv_ext_zca_depends), + __RISCV_ISA_EXT_DATA_VALIDATE(zcd, RISCV_ISA_EXT_ZCD, riscv_ext_zcd_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zcf, RISCV_ISA_EXT_ZCF, riscv_ext_zcf_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zcmop, RISCV_ISA_EXT_ZCMOP, riscv_ext_zca_depends), __RISCV_ISA_EXT_DATA(zba, RISCV_ISA_EXT_ZBA), __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB), __RISCV_ISA_EXT_DATA(zbc, RISCV_ISA_EXT_ZBC), @@ -294,6 +372,11 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(ztso, RISCV_ISA_EXT_ZTSO), __RISCV_ISA_EXT_SUPERSET(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts), __RISCV_ISA_EXT_DATA(zvbc, RISCV_ISA_EXT_ZVBC), + __RISCV_ISA_EXT_SUPERSET(zve32f, RISCV_ISA_EXT_ZVE32F, riscv_zve32f_exts), + __RISCV_ISA_EXT_DATA(zve32x, RISCV_ISA_EXT_ZVE32X), + __RISCV_ISA_EXT_SUPERSET(zve64d, RISCV_ISA_EXT_ZVE64D, riscv_zve64d_exts), + __RISCV_ISA_EXT_SUPERSET(zve64f, RISCV_ISA_EXT_ZVE64F, riscv_zve64f_exts), + __RISCV_ISA_EXT_SUPERSET(zve64x, RISCV_ISA_EXT_ZVE64X, riscv_zve64x_exts), __RISCV_ISA_EXT_DATA(zvfh, RISCV_ISA_EXT_ZVFH), __RISCV_ISA_EXT_DATA(zvfhmin, RISCV_ISA_EXT_ZVFHMIN), __RISCV_ISA_EXT_DATA(zvkb, RISCV_ISA_EXT_ZVKB), @@ -311,44 +394,108 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_BUNDLE(zvksg, riscv_zvksg_bundled_exts), __RISCV_ISA_EXT_DATA(zvkt, RISCV_ISA_EXT_ZVKT), __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA), + __RISCV_ISA_EXT_DATA(smmpm, RISCV_ISA_EXT_SMMPM), + __RISCV_ISA_EXT_SUPERSET(smnpm, RISCV_ISA_EXT_SMNPM, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(smstateen, RISCV_ISA_EXT_SMSTATEEN), __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA), __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), + __RISCV_ISA_EXT_SUPERSET(ssnpm, RISCV_ISA_EXT_SSNPM, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), + __RISCV_ISA_EXT_DATA(svade, RISCV_ISA_EXT_SVADE), + __RISCV_ISA_EXT_DATA_VALIDATE(svadu, RISCV_ISA_EXT_SVADU, riscv_ext_svadu_validate), __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), + __RISCV_ISA_EXT_DATA(svvptc, RISCV_ISA_EXT_SVVPTC), }; const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext); -static void __init match_isa_ext(const struct riscv_isa_ext_data *ext, const char *name, - const char *name_end, struct riscv_isainfo *isainfo) +static void riscv_isa_set_ext(const struct riscv_isa_ext_data *ext, unsigned long *bitmap) { - if ((name_end - name == strlen(ext->name)) && - !strncasecmp(name, ext->name, name_end - name)) { - /* - * If this is a bundle, enable all the ISA extensions that - * comprise the bundle. - */ - if (ext->subset_ext_size) { - for (int i = 0; i < ext->subset_ext_size; i++) { - if (riscv_isa_extension_check(ext->subset_ext_ids[i])) - set_bit(ext->subset_ext_ids[i], isainfo->isa); + if (ext->id != RISCV_ISA_EXT_INVALID) + set_bit(ext->id, bitmap); + + for (int i = 0; i < ext->subset_ext_size; i++) { + if (ext->subset_ext_ids[i] != RISCV_ISA_EXT_INVALID) + set_bit(ext->subset_ext_ids[i], bitmap); + } +} + +static const struct riscv_isa_ext_data *riscv_get_isa_ext_data(unsigned int ext_id) +{ + for (int i = 0; i < riscv_isa_ext_count; i++) { + if (riscv_isa_ext[i].id == ext_id) + return &riscv_isa_ext[i]; + } + + return NULL; +} + +/* + * "Resolve" a source ISA bitmap into one that matches kernel configuration as + * well as correct extension dependencies. Some extensions depends on specific + * kernel configuration to be usable (V needs CONFIG_RISCV_ISA_V for instance) + * and this function will actually validate all the extensions provided in + * source_isa into the resolved_isa based on extensions validate() callbacks. + */ +static void __init riscv_resolve_isa(unsigned long *source_isa, + unsigned long *resolved_isa, unsigned long *this_hwcap, + unsigned long *isa2hwcap) +{ + bool loop; + const struct riscv_isa_ext_data *ext; + DECLARE_BITMAP(prev_resolved_isa, RISCV_ISA_EXT_MAX); + int max_loop_count = riscv_isa_ext_count, ret; + unsigned int bit; + + do { + loop = false; + if (max_loop_count-- < 0) { + pr_err("Failed to reach a stable ISA state\n"); + return; + } + bitmap_copy(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX); + for_each_set_bit(bit, source_isa, RISCV_ISA_EXT_MAX) { + ext = riscv_get_isa_ext_data(bit); + + if (ext && ext->validate) { + ret = ext->validate(ext, resolved_isa); + if (ret == -EPROBE_DEFER) { + loop = true; + continue; + } else if (ret) { + /* Disable the extension entirely */ + clear_bit(bit, source_isa); + continue; + } } + + set_bit(bit, resolved_isa); + /* No need to keep it in source isa now that it is enabled */ + clear_bit(bit, source_isa); + + /* Single letter extensions get set in hwcap */ + if (bit < RISCV_ISA_EXT_BASE) + *this_hwcap |= isa2hwcap[bit]; } + } while (loop && !bitmap_equal(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX)); +} - /* - * This is valid even for bundle extensions which uses the RISCV_ISA_EXT_INVALID id - * (rejected by riscv_isa_extension_check()). - */ - if (riscv_isa_extension_check(ext->id)) - set_bit(ext->id, isainfo->isa); +static void __init match_isa_ext(const char *name, const char *name_end, unsigned long *bitmap) +{ + for (int i = 0; i < riscv_isa_ext_count; i++) { + const struct riscv_isa_ext_data *ext = &riscv_isa_ext[i]; + + if ((name_end - name == strlen(ext->name)) && + !strncasecmp(name, ext->name, name_end - name)) { + riscv_isa_set_ext(ext, bitmap); + break; + } } } -static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct riscv_isainfo *isainfo, - unsigned long *isa2hwcap, const char *isa) +static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap) { /* * For all possible cpus, we have already validated in @@ -361,9 +508,24 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc while (*isa) { const char *ext = isa++; const char *ext_end = isa; - bool ext_long = false, ext_err = false; + bool ext_err = false; switch (*ext) { + case 'x': + case 'X': + if (acpi_disabled) + pr_warn_once("Vendor extensions are ignored in riscv,isa. Use riscv,isa-extensions instead."); + /* + * To skip an extension, we find its end. + * As multi-letter extensions must be split from other multi-letter + * extensions with an "_", the end of a multi-letter extension will + * either be the null character or the "_" at the start of the next + * multi-letter extension. + */ + for (; *isa && *isa != '_'; ++isa) + ; + ext_err = true; + break; case 's': /* * Workaround for invalid single-letter 's' & 'u' (QEMU). @@ -379,8 +541,6 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc } fallthrough; case 'S': - case 'x': - case 'X': case 'z': case 'Z': /* @@ -401,7 +561,6 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc * character itself while eliminating the extensions version number. * A simple re-increment solves this problem. */ - ext_long = true; for (; *isa && *isa != '_'; ++isa) if (unlikely(!isalnum(*isa))) ext_err = true; @@ -481,17 +640,8 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc if (unlikely(ext_err)) continue; - if (!ext_long) { - int nr = tolower(*ext) - 'a'; - if (riscv_isa_extension_check(nr)) { - *this_hwcap |= isa2hwcap[nr]; - set_bit(nr, isainfo->isa); - } - } else { - for (int i = 0; i < riscv_isa_ext_count; i++) - match_isa_ext(&riscv_isa_ext[i], ext, ext_end, isainfo); - } + match_isa_ext(ext, ext_end, bitmap); } } @@ -503,6 +653,8 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) struct acpi_table_header *rhct; acpi_status status; unsigned int cpu; + u64 boot_vendorid; + u64 boot_archid; if (!acpi_disabled) { status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct); @@ -510,9 +662,13 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) return; } + boot_vendorid = riscv_get_mvendorid(); + boot_archid = riscv_get_marchid(); + for_each_possible_cpu(cpu) { struct riscv_isainfo *isainfo = &hart_isa[cpu]; unsigned long this_hwcap = 0; + DECLARE_BITMAP(source_isa, RISCV_ISA_EXT_MAX) = { 0 }; if (acpi_disabled) { node = of_cpu_device_node_get(cpu); @@ -535,7 +691,7 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) } } - riscv_parse_isa_string(&this_hwcap, isainfo, isa2hwcap, isa); + riscv_parse_isa_string(isa, source_isa); /* * These ones were as they were part of the base ISA when the @@ -543,10 +699,10 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) * unconditionally where `i` is in riscv,isa on DT systems. */ if (acpi_disabled) { - set_bit(RISCV_ISA_EXT_ZICSR, isainfo->isa); - set_bit(RISCV_ISA_EXT_ZIFENCEI, isainfo->isa); - set_bit(RISCV_ISA_EXT_ZICNTR, isainfo->isa); - set_bit(RISCV_ISA_EXT_ZIHPM, isainfo->isa); + set_bit(RISCV_ISA_EXT_ZICSR, source_isa); + set_bit(RISCV_ISA_EXT_ZIFENCEI, source_isa); + set_bit(RISCV_ISA_EXT_ZICNTR, source_isa); + set_bit(RISCV_ISA_EXT_ZIHPM, source_isa); } /* @@ -557,12 +713,13 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) * CPU cores with the ratified spec will contain non-zero * marchid. */ - if (acpi_disabled && riscv_cached_mvendorid(cpu) == THEAD_VENDOR_ID && - riscv_cached_marchid(cpu) == 0x0) { + if (acpi_disabled && boot_vendorid == THEAD_VENDOR_ID && boot_archid == 0x0) { this_hwcap &= ~isa2hwcap[RISCV_ISA_EXT_v]; - clear_bit(RISCV_ISA_EXT_v, isainfo->isa); + clear_bit(RISCV_ISA_EXT_v, source_isa); } + riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap); + /* * All "okay" hart should have same isa. Set HWCAP based on * common capabilities of every "okay" hart, in case they don't @@ -583,14 +740,111 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) acpi_put_table((struct acpi_table_header *)rhct); } +static void __init riscv_fill_cpu_vendor_ext(struct device_node *cpu_node, int cpu) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i]; + + for (int j = 0; j < ext_list->ext_data_count; j++) { + const struct riscv_isa_ext_data ext = ext_list->ext_data[j]; + struct riscv_isavendorinfo *isavendorinfo = &ext_list->per_hart_isa_bitmap[cpu]; + + if (of_property_match_string(cpu_node, "riscv,isa-extensions", + ext.property) < 0) + continue; + + /* + * Assume that subset extensions are all members of the + * same vendor. + */ + if (ext.subset_ext_size) + for (int k = 0; k < ext.subset_ext_size; k++) + set_bit(ext.subset_ext_ids[k], isavendorinfo->isa); + + set_bit(ext.id, isavendorinfo->isa); + } + } +} + +/* + * Populate all_harts_isa_bitmap for each vendor with all of the extensions that + * are shared across CPUs for that vendor. + */ +static void __init riscv_fill_vendor_ext_list(int cpu) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i]; + + if (!ext_list->is_initialized) { + bitmap_copy(ext_list->all_harts_isa_bitmap.isa, + ext_list->per_hart_isa_bitmap[cpu].isa, + RISCV_ISA_VENDOR_EXT_MAX); + ext_list->is_initialized = true; + } else { + bitmap_and(ext_list->all_harts_isa_bitmap.isa, + ext_list->all_harts_isa_bitmap.isa, + ext_list->per_hart_isa_bitmap[cpu].isa, + RISCV_ISA_VENDOR_EXT_MAX); + } + } +} + +static int has_thead_homogeneous_vlenb(void) +{ + int cpu; + u32 prev_vlenb = 0; + u32 vlenb; + + /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */ + if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return 0; + + for_each_possible_cpu(cpu) { + struct device_node *cpu_node; + + cpu_node = of_cpu_device_node_get(cpu); + if (!cpu_node) { + pr_warn("Unable to find cpu node\n"); + return -ENOENT; + } + + if (of_property_read_u32(cpu_node, "thead,vlenb", &vlenb)) { + of_node_put(cpu_node); + + if (prev_vlenb) + return -ENOENT; + continue; + } + + if (prev_vlenb && vlenb != prev_vlenb) { + of_node_put(cpu_node); + return -ENOENT; + } + + prev_vlenb = vlenb; + of_node_put(cpu_node); + } + + thead_vlenb_of = vlenb; + return 0; +} + static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) { unsigned int cpu; + bool mitigated; for_each_possible_cpu(cpu) { unsigned long this_hwcap = 0; struct device_node *cpu_node; struct riscv_isainfo *isainfo = &hart_isa[cpu]; + DECLARE_BITMAP(source_isa, RISCV_ISA_EXT_MAX) = { 0 }; cpu_node = of_cpu_device_node_get(cpu); if (!cpu_node) { @@ -610,22 +864,12 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) ext->property) < 0) continue; - if (ext->subset_ext_size) { - for (int j = 0; j < ext->subset_ext_size; j++) { - if (riscv_isa_extension_check(ext->subset_ext_ids[i])) - set_bit(ext->subset_ext_ids[j], isainfo->isa); - } - } - - if (riscv_isa_extension_check(ext->id)) { - set_bit(ext->id, isainfo->isa); - - /* Only single letter extensions get set in hwcap */ - if (strnlen(riscv_isa_ext[i].name, 2) == 1) - this_hwcap |= isa2hwcap[riscv_isa_ext[i].id]; - } + riscv_isa_set_ext(ext, source_isa); } + riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap); + riscv_fill_cpu_vendor_ext(cpu_node, cpu); + of_node_put(cpu_node); /* @@ -641,6 +885,19 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) bitmap_copy(riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); else bitmap_and(riscv_isa, riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); + + riscv_fill_vendor_ext_list(cpu); + } + + /* + * Execute ghostwrite mitigation immediately after detecting extensions + * to disable xtheadvector if necessary. + */ + mitigated = ghostwrite_enable_mitigation(); + + if (!mitigated && has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) { + pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n"); + disable_xtheadvector(); } if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) @@ -695,8 +952,15 @@ void __init riscv_fill_hwcap(void) elf_hwcap &= ~COMPAT_HWCAP_ISA_F; } - if (elf_hwcap & COMPAT_HWCAP_ISA_V) { + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X) || + has_xtheadvector_no_alternatives()) { + /* + * This cannot fail when called on the boot hart + */ riscv_v_setup_vsize(); + } + + if (elf_hwcap & COMPAT_HWCAP_ISA_V) { /* * ISA string in device tree might have 'v' flag, but * CONFIG_RISCV_ISA_V is disabled in kernel. @@ -731,251 +995,12 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } -static int check_unaligned_access(void *param) -{ - int cpu = smp_processor_id(); - u64 start_cycles, end_cycles; - u64 word_cycles; - u64 byte_cycles; - int ratio; - unsigned long start_jiffies, now; - struct page *page = param; - void *dst; - void *src; - long speed = RISCV_HWPROBE_MISALIGNED_SLOW; - - if (check_unaligned_access_emulated(cpu)) - return 0; - - /* Make an unaligned destination buffer. */ - dst = (void *)((unsigned long)page_address(page) | 0x1); - /* Unalign src as well, but differently (off by 1 + 2 = 3). */ - src = dst + (MISALIGNED_BUFFER_SIZE / 2); - src += 2; - word_cycles = -1ULL; - /* Do a warmup. */ - __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); - preempt_disable(); - start_jiffies = jiffies; - while ((now = jiffies) == start_jiffies) - cpu_relax(); - - /* - * For a fixed amount of time, repeatedly try the function, and take - * the best time in cycles as the measurement. - */ - while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { - start_cycles = get_cycles64(); - /* Ensure the CSR read can't reorder WRT to the copy. */ - mb(); - __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); - /* Ensure the copy ends before the end time is snapped. */ - mb(); - end_cycles = get_cycles64(); - if ((end_cycles - start_cycles) < word_cycles) - word_cycles = end_cycles - start_cycles; - } - - byte_cycles = -1ULL; - __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); - start_jiffies = jiffies; - while ((now = jiffies) == start_jiffies) - cpu_relax(); - - while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { - start_cycles = get_cycles64(); - mb(); - __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); - mb(); - end_cycles = get_cycles64(); - if ((end_cycles - start_cycles) < byte_cycles) - byte_cycles = end_cycles - start_cycles; - } - - preempt_enable(); - - /* Don't divide by zero. */ - if (!word_cycles || !byte_cycles) { - pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", - cpu); - - return 0; - } - - if (word_cycles < byte_cycles) - speed = RISCV_HWPROBE_MISALIGNED_FAST; - - ratio = div_u64((byte_cycles * 100), word_cycles); - pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", - cpu, - ratio / 100, - ratio % 100, - (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); - - per_cpu(misaligned_access_speed, cpu) = speed; - - /* - * Set the value of fast_misaligned_access of a CPU. These operations - * are atomic to avoid race conditions. - */ - if (speed == RISCV_HWPROBE_MISALIGNED_FAST) - cpumask_set_cpu(cpu, &fast_misaligned_access); - else - cpumask_clear_cpu(cpu, &fast_misaligned_access); - - return 0; -} - -static void check_unaligned_access_nonboot_cpu(void *param) +void __init riscv_user_isa_enable(void) { - unsigned int cpu = smp_processor_id(); - struct page **pages = param; - - if (smp_processor_id() != 0) - check_unaligned_access(pages[cpu]); -} - -DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key); - -static void modify_unaligned_access_branches(cpumask_t *mask, int weight) -{ - if (cpumask_weight(mask) == weight) - static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key); - else - static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key); -} - -static void set_unaligned_access_static_branches_except_cpu(int cpu) -{ - /* - * Same as set_unaligned_access_static_branches, except excludes the - * given CPU from the result. When a CPU is hotplugged into an offline - * state, this function is called before the CPU is set to offline in - * the cpumask, and thus the CPU needs to be explicitly excluded. - */ - - cpumask_t fast_except_me; - - cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask); - cpumask_clear_cpu(cpu, &fast_except_me); - - modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1); -} - -static void set_unaligned_access_static_branches(void) -{ - /* - * This will be called after check_unaligned_access_all_cpus so the - * result of unaligned access speed for all CPUs will be available. - * - * To avoid the number of online cpus changing between reading - * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be - * held before calling this function. - */ - - cpumask_t fast_and_online; - - cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask); - - modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); -} - -static int lock_and_set_unaligned_access_static_branch(void) -{ - cpus_read_lock(); - set_unaligned_access_static_branches(); - cpus_read_unlock(); - - return 0; -} - -arch_initcall_sync(lock_and_set_unaligned_access_static_branch); - -static int riscv_online_cpu(unsigned int cpu) -{ - static struct page *buf; - - /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) - goto exit; - - buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!buf) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return -ENOMEM; - } - - check_unaligned_access(buf); - __free_pages(buf, MISALIGNED_BUFFER_ORDER); - -exit: - set_unaligned_access_static_branches(); - - return 0; -} - -static int riscv_offline_cpu(unsigned int cpu) -{ - set_unaligned_access_static_branches_except_cpu(cpu); - - return 0; -} - -/* Measure unaligned access on all CPUs present at boot in parallel. */ -static int check_unaligned_access_all_cpus(void) -{ - unsigned int cpu; - unsigned int cpu_count = num_possible_cpus(); - struct page **bufs = kzalloc(cpu_count * sizeof(struct page *), - GFP_KERNEL); - - if (!bufs) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return 0; - } - - /* - * Allocate separate buffers for each CPU so there's no fighting over - * cache lines. - */ - for_each_cpu(cpu, cpu_online_mask) { - bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!bufs[cpu]) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - goto out; - } - } - - /* Check everybody except 0, who stays behind to tend jiffies. */ - on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); - - /* Check core 0. */ - smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); - - /* - * Setup hotplug callbacks for any new CPUs that come online or go - * offline. - */ - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu, riscv_offline_cpu); - -out: - unaligned_emulation_finish(); - for_each_cpu(cpu, cpu_online_mask) { - if (bufs[cpu]) - __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); - } - - kfree(bufs); - return 0; -} - -arch_initcall(check_unaligned_access_all_cpus); - -void riscv_user_isa_enable(void) -{ - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) - csr_set(CSR_ENVCFG, ENVCFG_CBZE); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOZ)) + current->thread.envcfg |= ENVCFG_CBZE; + else if (any_cpu_has_zicboz) + pr_warn("Zicboz disabled as it is unavailable on some harts\n"); } #ifdef CONFIG_RISCV_ALTERNATIVE @@ -1018,28 +1043,45 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, { struct alt_entry *alt; void *oldptr, *altptr; - u16 id, value; + u16 id, value, vendor; if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return; for (alt = begin; alt < end; alt++) { - if (alt->vendor_id != 0) - continue; - id = PATCH_ID_CPUFEATURE_ID(alt->patch_id); + vendor = PATCH_ID_CPUFEATURE_ID(alt->vendor_id); - if (id >= RISCV_ISA_EXT_MAX) { - WARN(1, "This extension id:%d is not in ISA extension list", id); - continue; - } + /* + * Any alternative with a patch_id that is less than + * RISCV_ISA_EXT_MAX is interpreted as a standard extension. + * + * Any alternative with patch_id that is greater than or equal + * to RISCV_VENDOR_EXT_ALTERNATIVES_BASE is interpreted as a + * vendor extension. + */ + if (id < RISCV_ISA_EXT_MAX) { + /* + * This patch should be treated as errata so skip + * processing here. + */ + if (alt->vendor_id != 0) + continue; - if (!__riscv_isa_extension_available(NULL, id)) - continue; + if (!__riscv_isa_extension_available(NULL, id)) + continue; - value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); - if (!riscv_cpufeature_patch_check(id, value)) + value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); + if (!riscv_cpufeature_patch_check(id, value)) + continue; + } else if (id >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE) { + if (!__riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, + id - RISCV_VENDOR_EXT_ALTERNATIVES_BASE)) + continue; + } else { + WARN(1, "This extension id:%d is not in ISA extension list", id); continue; + } oldptr = ALT_OLD_PTR(alt); altptr = ALT_ALT_PTR(alt); diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S index 515b2dfbca75..c5f17c2710b5 100644 --- a/arch/riscv/kernel/efi-header.S +++ b/arch/riscv/kernel/efi-header.S @@ -64,7 +64,7 @@ extra_header_fields: .long efi_header_end - _start // SizeOfHeaders .long 0 // CheckSum .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem - .short 0 // DllCharacteristics + .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics .quad 0 // SizeOfStackReserve .quad 0 // SizeOfStackCommit .quad 0 // SizeOfHeapReserve diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 5bd1ec3341fe..3c37661801f9 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -19,6 +19,7 @@ #include <linux/libfdt.h> #include <linux/types.h> #include <linux/memblock.h> +#include <linux/vmalloc.h> #include <asm/setup.h> int arch_kimage_file_post_load_cleanup(struct kimage *image) @@ -117,6 +118,7 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, return ret; } +#ifdef CONFIG_CRASH_DUMP static int get_nr_ram_ranges_callback(struct resource *res, void *arg) { unsigned int *nr_ranges = arg; @@ -189,6 +191,7 @@ static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; return cmdline_ptr; } +#endif static void *elf_kexec_load(struct kimage *image, char *kernel_buf, unsigned long kernel_len, char *initrd, @@ -196,12 +199,11 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, unsigned long cmdline_len) { int ret; + void *fdt; unsigned long old_kernel_pbase = ULONG_MAX; unsigned long new_kernel_pbase = 0UL; unsigned long initrd_pbase = 0UL; - unsigned long headers_sz; unsigned long kernel_start; - void *fdt, *headers; struct elfhdr ehdr; struct kexec_buf kbuf; struct kexec_elf_info elf_info; @@ -227,8 +229,11 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, kbuf.buf_min = new_kernel_pbase + kernel_len; kbuf.buf_max = ULONG_MAX; +#ifdef CONFIG_CRASH_DUMP /* Add elfcorehdr */ if (image->type == KEXEC_TYPE_CRASH) { + void *headers; + unsigned long headers_sz; ret = prepare_elf_headers(&headers, &headers_sz); if (ret) { pr_err("Preparing elf core header failed\n"); @@ -264,6 +269,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, } cmdline = modified_cmdline; } +#endif #ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY /* Add purgatory to the image */ @@ -445,6 +451,12 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | ENCODE_CJTYPE_IMM(val - addr); break; + case R_RISCV_ADD16: + *(u16 *)loc += val; + break; + case R_RISCV_SUB16: + *(u16 *)loc -= val; + break; case R_RISCV_ADD32: *(u32 *)loc += val; break; diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 9d1a305d5508..33a5a9f2a0d4 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -19,6 +19,79 @@ .section .irqentry.text, "ax" +.macro new_vmalloc_check + REG_S a0, TASK_TI_A0(tp) + csrr a0, CSR_CAUSE + /* Exclude IRQs */ + blt a0, zero, .Lnew_vmalloc_restore_context_a0 + + REG_S a1, TASK_TI_A1(tp) + /* Only check new_vmalloc if we are in page/protection fault */ + li a1, EXC_LOAD_PAGE_FAULT + beq a0, a1, .Lnew_vmalloc_kernel_address + li a1, EXC_STORE_PAGE_FAULT + beq a0, a1, .Lnew_vmalloc_kernel_address + li a1, EXC_INST_PAGE_FAULT + bne a0, a1, .Lnew_vmalloc_restore_context_a1 + +.Lnew_vmalloc_kernel_address: + /* Is it a kernel address? */ + csrr a0, CSR_TVAL + bge a0, zero, .Lnew_vmalloc_restore_context_a1 + + /* Check if a new vmalloc mapping appeared that could explain the trap */ + REG_S a2, TASK_TI_A2(tp) + /* + * Computes: + * a0 = &new_vmalloc[BIT_WORD(cpu)] + * a1 = BIT_MASK(cpu) + */ + REG_L a2, TASK_TI_CPU(tp) + /* + * Compute the new_vmalloc element position: + * (cpu / 64) * 8 = (cpu >> 6) << 3 + */ + srli a1, a2, 6 + slli a1, a1, 3 + la a0, new_vmalloc + add a0, a0, a1 + /* + * Compute the bit position in the new_vmalloc element: + * bit_pos = cpu % 64 = cpu - (cpu / 64) * 64 = cpu - (cpu >> 6) << 6 + * = cpu - ((cpu >> 6) << 3) << 3 + */ + slli a1, a1, 3 + sub a1, a2, a1 + /* Compute the "get mask": 1 << bit_pos */ + li a2, 1 + sll a1, a2, a1 + + /* Check the value of new_vmalloc for this cpu */ + REG_L a2, 0(a0) + and a2, a2, a1 + beq a2, zero, .Lnew_vmalloc_restore_context + + /* Atomically reset the current cpu bit in new_vmalloc */ + amoxor.d a0, a1, (a0) + + /* Only emit a sfence.vma if the uarch caches invalid entries */ + ALTERNATIVE("sfence.vma", "nop", 0, RISCV_ISA_EXT_SVVPTC, 1) + + REG_L a0, TASK_TI_A0(tp) + REG_L a1, TASK_TI_A1(tp) + REG_L a2, TASK_TI_A2(tp) + csrw CSR_SCRATCH, x0 + sret + +.Lnew_vmalloc_restore_context: + REG_L a2, TASK_TI_A2(tp) +.Lnew_vmalloc_restore_context_a1: + REG_L a1, TASK_TI_A1(tp) +.Lnew_vmalloc_restore_context_a0: + REG_L a0, TASK_TI_A0(tp) +.endm + + SYM_CODE_START(handle_exception) /* * If coming from userspace, preserve the user thread pointer and load @@ -30,6 +103,20 @@ SYM_CODE_START(handle_exception) .Lrestore_kernel_tpsp: csrr tp, CSR_SCRATCH + +#ifdef CONFIG_64BIT + /* + * The RISC-V kernel does not eagerly emit a sfence.vma after each + * new vmalloc mapping, which may result in exceptions: + * - if the uarch caches invalid entries, the new mapping would not be + * observed by the page table walker and an invalidation is needed. + * - if the uarch does not cache invalid entries, a reordered access + * could "miss" the new mapping and traps: in that case, we only need + * to retry the access, no sfence.vma is required. + */ + new_vmalloc_check +#endif + REG_S sp, TASK_TI_KERNEL_SP(tp) #ifdef CONFIG_VMAP_STACK @@ -88,7 +175,6 @@ SYM_CODE_START(handle_exception) call riscv_v_context_nesting_start #endif move a0, sp /* pt_regs */ - la ra, ret_from_exception /* * MSB of cause differentiates between @@ -97,7 +183,8 @@ SYM_CODE_START(handle_exception) bge s4, zero, 1f /* Handle interrupts */ - tail do_irq + call do_irq + j ret_from_exception 1: /* Handle other exceptions */ slli t0, s4, RISCV_LGPTR @@ -105,12 +192,16 @@ SYM_CODE_START(handle_exception) la t2, excp_vect_table_end add t0, t1, t0 /* Check if exception code lies within bounds */ - bgeu t0, t2, 1f - REG_L t0, 0(t0) - jr t0 -1: - tail do_trap_unknown + bgeu t0, t2, 3f + REG_L t1, 0(t0) +2: jalr t1 + j ret_from_exception +3: + + la t1, do_trap_unknown + j 2b SYM_CODE_END(handle_exception) +ASM_NOKPROBE(handle_exception) /* * The ret_from_exception must be called with interrupt disabled. Here is the @@ -129,6 +220,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #endif bnez s0, 1f +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + call stackleak_erase_on_task_stack +#endif + /* Save unwound kernel stack pointer in thread_info */ addi s0, sp, PT_SIZE_ON_STACK REG_S s0, TASK_TI_KERNEL_SP(tp) @@ -183,7 +278,9 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #else sret #endif +SYM_INNER_LABEL(ret_from_exception_end, SYM_L_GLOBAL) SYM_CODE_END(ret_from_exception) +ASM_NOKPROBE(ret_from_exception) #ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) @@ -219,6 +316,7 @@ SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) move a0, sp tail handle_bad_stack SYM_CODE_END(handle_kernel_stack_overflow) +ASM_NOKPROBE(handle_kernel_stack_overflow) #endif SYM_CODE_START(ret_from_fork) @@ -229,8 +327,8 @@ SYM_CODE_START(ret_from_fork) jalr s0 1: move a0, sp /* pt_regs */ - la ra, ret_from_exception - tail syscall_exit_to_user_mode + call syscall_exit_to_user_mode + j ret_from_exception SYM_CODE_END(ret_from_fork) #ifdef CONFIG_IRQ_STACKS diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S index 2c543f130f93..f74f6b60e347 100644 --- a/arch/riscv/kernel/fpu.S +++ b/arch/riscv/kernel/fpu.S @@ -170,7 +170,7 @@ SYM_FUNC_END(__fstate_restore) __access_func(f31) -#ifdef CONFIG_RISCV_MISALIGNED +#ifdef CONFIG_RISCV_SCALAR_MISALIGNED /* * Disable compressed instructions set to keep a constant offset between FP @@ -211,7 +211,7 @@ SYM_FUNC_START(put_f64_reg) SYM_FUNC_END(put_f64_reg) /* - * put_f64_reg - Get a 64 bits FP register value and returned it or store it to + * get_f64_reg - Get a 64 bits FP register value and returned it or store it to * a pointer. * a0 = FP register index to be retrieved * a1 = If xlen == 32, pointer which should be loaded with the FP register value @@ -224,4 +224,4 @@ SYM_FUNC_START(get_f64_reg) fp_access_epilogue SYM_FUNC_END(get_f64_reg) -#endif /* CONFIG_RISCV_MISALIGNED */ +#endif /* CONFIG_RISCV_SCALAR_MISALIGNED */ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index f5aa24d9e1c1..3524db5e4fa0 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -8,8 +8,9 @@ #include <linux/ftrace.h> #include <linux/uaccess.h> #include <linux/memory.h> +#include <linux/stop_machine.h> #include <asm/cacheflush.h> -#include <asm/patch.h> +#include <asm/text-patching.h> #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) @@ -75,8 +76,7 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, make_call_t0(hook_pos, target, call); /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */ - if (patch_text_nosync - ((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE)) + if (patch_insn_write((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE)) return -EPERM; return 0; @@ -88,7 +88,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) make_call_t0(rec->ip, addr, call); - if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE)) + if (patch_insn_write((void *)rec->ip, call, MCOUNT_INSN_SIZE)) return -EPERM; return 0; @@ -99,7 +99,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, { unsigned int nops[2] = {NOP4, NOP4}; - if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) + if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) return -EPERM; return 0; @@ -127,16 +127,48 @@ int ftrace_update_ftrace_func(ftrace_func_t func) { int ret = __ftrace_modify_call((unsigned long)&ftrace_call, (unsigned long)func, true, true); - if (!ret) { - ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call, - (unsigned long)func, true, true); - } return ret; } + +struct ftrace_modify_param { + int command; + atomic_t cpu_count; +}; + +static int __ftrace_modify_code(void *data) +{ + struct ftrace_modify_param *param = data; + + if (atomic_inc_return(¶m->cpu_count) == num_online_cpus()) { + ftrace_modify_all_code(param->command); + /* + * Make sure the patching store is effective *before* we + * increment the counter which releases all waiting CPUs + * by using the release variant of atomic increment. The + * release pairs with the call to local_flush_icache_all() + * on the waiting CPU. + */ + atomic_inc_return_release(¶m->cpu_count); + } else { + while (atomic_read(¶m->cpu_count) <= num_online_cpus()) + cpu_relax(); + + local_flush_icache_all(); + } + + return 0; +} + +void arch_ftrace_update_code(int command) +{ + struct ftrace_modify_param param = { command, ATOMIC_INIT(0) }; + + stop_machine(__ftrace_modify_code, ¶m, cpu_online_mask); +} #endif -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -178,16 +210,28 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, } #ifdef CONFIG_DYNAMIC_FTRACE -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - struct pt_regs *regs = arch_ftrace_get_regs(fregs); - unsigned long *parent = (unsigned long *)®s->ra; + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long frame_pointer = arch_ftrace_regs(fregs)->s0; + unsigned long *parent = &arch_ftrace_regs(fregs)->ra; + unsigned long old; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * We don't suffer access faults, so no extra fault-recovery assembly + * is needed here. + */ + old = *parent; - prepare_ftrace_return(parent, ip, frame_pointer(regs)); + if (!function_graph_enter_regs(old, ip, frame_pointer, parent, fregs)) + *parent = return_hooker; } -#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ extern void ftrace_graph_call(void); int ftrace_enable_ftrace_graph_caller(void) { @@ -200,6 +244,6 @@ int ftrace_disable_ftrace_graph_caller(void) return __ftrace_modify_call((unsigned long)&ftrace_graph_call, (unsigned long)&prepare_ftrace_return, false, true); } -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 4236a69c35cb..356d5397b2a2 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -165,10 +165,21 @@ secondary_start_sbi: #endif call .Lsetup_trap_vector scs_load_current - tail smp_callin + call smp_callin #endif /* CONFIG_SMP */ .align 2 +.Lsecondary_park: + /* + * Park this hart if we: + * - have too many harts on CONFIG_RISCV_BOOT_SPINWAIT + * - receive an early trap, before setup_trap_vector finished + * - fail in smp_callin(), as a successful one wouldn't return + */ + wfi + j .Lsecondary_park + +.align 2 .Lsetup_trap_vector: /* Set trap vector to exception handler */ la a0, handle_exception @@ -181,12 +192,6 @@ secondary_start_sbi: csrw CSR_SCRATCH, zero ret -.align 2 -.Lsecondary_park: - /* We lack SMP support or have too many harts, so park this hart */ - wfi - j .Lsecondary_park - SYM_CODE_END(_start) SYM_CODE_START(_start_kernel) @@ -300,6 +305,9 @@ SYM_CODE_START(_start_kernel) #else mv a0, a1 #endif /* CONFIG_BUILTIN_DTB */ + /* Set trap vector to spin forever to help debug */ + la a3, .Lsecondary_park + csrw CSR_TVEC, a3 call setup_vm #ifdef CONFIG_MMU la a0, early_pg_dir diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c index e6694759dbd0..654ed159c830 100644 --- a/arch/riscv/kernel/jump_label.c +++ b/arch/riscv/kernel/jump_label.c @@ -9,13 +9,14 @@ #include <linux/memory.h> #include <linux/mutex.h> #include <asm/bug.h> -#include <asm/patch.h> +#include <asm/cacheflush.h> +#include <asm/text-patching.h> #define RISCV_INSN_NOP 0x00000013U #define RISCV_INSN_JAL 0x0000006fU -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)jump_entry_code(entry); u32 insn; @@ -24,7 +25,7 @@ void arch_jump_label_transform(struct jump_entry *entry, long offset = jump_entry_target(entry) - jump_entry_code(entry); if (WARN_ON(offset & 1 || offset < -524288 || offset >= 524288)) - return; + return true; insn = RISCV_INSN_JAL | (((u32)offset & GENMASK(19, 12)) << (12 - 12)) | @@ -35,7 +36,20 @@ void arch_jump_label_transform(struct jump_entry *entry, insn = RISCV_INSN_NOP; } - mutex_lock(&text_mutex); - patch_text_nosync(addr, &insn, sizeof(insn)); - mutex_unlock(&text_mutex); + if (early_boot_irqs_disabled) { + riscv_patch_in_stop_machine = 1; + patch_insn_write(addr, &insn, sizeof(insn)); + riscv_patch_in_stop_machine = 0; + } else { + mutex_lock(&text_mutex); + patch_insn_write(addr, &insn, sizeof(insn)); + mutex_unlock(&text_mutex); + } + + return true; +} + +void arch_jump_label_transform_apply(void) +{ + flush_icache_all(); } diff --git a/arch/riscv/kernel/kernel_mode_fpu.c b/arch/riscv/kernel/kernel_mode_fpu.c new file mode 100644 index 000000000000..0ac8348876c4 --- /dev/null +++ b/arch/riscv/kernel/kernel_mode_fpu.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 SiFive + */ + +#include <linux/export.h> +#include <linux/preempt.h> + +#include <asm/csr.h> +#include <asm/fpu.h> +#include <asm/processor.h> +#include <asm/switch_to.h> + +void kernel_fpu_begin(void) +{ + preempt_disable(); + fstate_save(current, task_pt_regs(current)); + csr_set(CSR_SSTATUS, SR_FS); +} +EXPORT_SYMBOL_GPL(kernel_fpu_begin); + +void kernel_fpu_end(void) +{ + csr_clear(CSR_SSTATUS, SR_FS); + fstate_restore(current, task_pt_regs(current)); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(kernel_fpu_end); diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c index 6afe80c7f03a..99972a48e86b 100644 --- a/arch/riscv/kernel/kernel_mode_vector.c +++ b/arch/riscv/kernel/kernel_mode_vector.c @@ -143,7 +143,7 @@ static int riscv_v_start_kernel_context(bool *is_nested) /* Transfer the ownership of V from user to kernel, then save */ riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY); - if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) { + if (__riscv_v_vstate_check(task_pt_regs(current)->status, DIRTY)) { uvstate = ¤t->thread.vstate; __riscv_v_vstate_save(uvstate, uvstate->datap); } @@ -160,7 +160,7 @@ asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs) return; depth = riscv_v_ctx_get_depth(); - if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY) + if (depth == 0 && __riscv_v_vstate_check(regs->status, DIRTY)) riscv_preempt_v_set_dirty(); riscv_v_ctx_depth_inc(); @@ -208,7 +208,7 @@ void kernel_vector_begin(void) { bool nested = false; - if (WARN_ON(!has_vector())) + if (WARN_ON(!(has_vector() || has_xtheadvector()))) return; BUG_ON(!may_use_simd()); @@ -236,7 +236,7 @@ EXPORT_SYMBOL_GPL(kernel_vector_begin); */ void kernel_vector_end(void) { - if (WARN_ON(!has_vector())) + if (WARN_ON(!(has_vector() || has_xtheadvector()))) return; riscv_v_disable(); diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ed9cad20c039..2306ce3e5f22 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -114,37 +114,6 @@ void machine_shutdown(void) #endif } -static void machine_kexec_mask_interrupts(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - int ret; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - /* - * First try to remove the active state. If this - * fails, try to EOI the interrupt. - */ - ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); - - if (ret && irqd_irq_inprogress(&desc->irq_data) && - chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - /* * machine_crash_shutdown - Prepare to kexec after a kernel crash * diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index b7561288e8da..745dd4c4a69c 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -56,138 +56,77 @@ addi sp, sp, ABI_SIZE_ON_STACK .endm -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS /** -* SAVE_ABI_REGS - save regs against the pt_regs struct -* -* @all: tell if saving all the regs -* -* If all is set, all the regs will be saved, otherwise only ABI -* related regs (a0-a7,epc,ra and optional s0) will be saved. +* SAVE_ABI_REGS - save regs against the ftrace_regs struct * * After the stack is established, * * 0(sp) stores the PC of the traced function which can be accessed -* by &(fregs)->regs->epc in tracing function. Note that the real +* by &(fregs)->epc in tracing function. Note that the real * function entry address should be computed with -FENTRY_RA_OFFSET. * * 8(sp) stores the function return address (i.e. parent IP) that -* can be accessed by &(fregs)->regs->ra in tracing function. +* can be accessed by &(fregs)->ra in tracing function. * * The other regs are saved at the respective localtion and accessed -* by the respective pt_regs member. +* by the respective ftrace_regs member. * * Here is the layout of stack for your reference. * * PT_SIZE_ON_STACK -> +++++++++ * + ..... + -* + t3-t6 + -* + s2-s11+ * + a0-a7 + --++++-> ftrace_caller saved -* + s1 + + -* + s0 + --+ -* + t0-t2 + + -* + tp + + -* + gp + + +* + t1 + --++++-> direct tramp address +* + s0 + --+ // frame pointer * + sp + + * + ra + --+ // parent IP * sp -> + epc + --+ // PC * +++++++++ **/ - .macro SAVE_ABI_REGS, all=0 - addi sp, sp, -PT_SIZE_ON_STACK - - REG_S t0, PT_EPC(sp) - REG_S x1, PT_RA(sp) - - // save the ABI regs - - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - - // save the leftover regs + .macro SAVE_ABI_REGS + mv t4, sp // Save original SP in T4 + addi sp, sp, -FREGS_SIZE_ON_STACK - .if \all == 1 - REG_S x2, PT_SP(sp) - REG_S x3, PT_GP(sp) - REG_S x4, PT_TP(sp) - REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x8, PT_S0(sp) - REG_S x9, PT_S1(sp) - REG_S x18, PT_S2(sp) - REG_S x19, PT_S3(sp) - REG_S x20, PT_S4(sp) - REG_S x21, PT_S5(sp) - REG_S x22, PT_S6(sp) - REG_S x23, PT_S7(sp) - REG_S x24, PT_S8(sp) - REG_S x25, PT_S9(sp) - REG_S x26, PT_S10(sp) - REG_S x27, PT_S11(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) - - // save s0 if FP_TEST defined - - .else + REG_S t0, FREGS_EPC(sp) + REG_S x1, FREGS_RA(sp) + REG_S t4, FREGS_SP(sp) // Put original SP on stack #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - REG_S x8, PT_S0(sp) + REG_S x8, FREGS_S0(sp) #endif - .endif + REG_S x6, FREGS_T1(sp) + + // save the arguments + REG_S x10, FREGS_A0(sp) + REG_S x11, FREGS_A1(sp) + REG_S x12, FREGS_A2(sp) + REG_S x13, FREGS_A3(sp) + REG_S x14, FREGS_A4(sp) + REG_S x15, FREGS_A5(sp) + REG_S x16, FREGS_A6(sp) + REG_S x17, FREGS_A7(sp) .endm .macro RESTORE_ABI_REGS, all=0 - REG_L t0, PT_EPC(sp) - REG_L x1, PT_RA(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - - .if \all == 1 - REG_L x2, PT_SP(sp) - REG_L x3, PT_GP(sp) - REG_L x4, PT_TP(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x8, PT_S0(sp) - REG_L x9, PT_S1(sp) - REG_L x18, PT_S2(sp) - REG_L x19, PT_S3(sp) - REG_L x20, PT_S4(sp) - REG_L x21, PT_S5(sp) - REG_L x22, PT_S6(sp) - REG_L x23, PT_S7(sp) - REG_L x24, PT_S8(sp) - REG_L x25, PT_S9(sp) - REG_L x26, PT_S10(sp) - REG_L x27, PT_S11(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) - - .else + REG_L t0, FREGS_EPC(sp) + REG_L x1, FREGS_RA(sp) #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - REG_L x8, PT_S0(sp) + REG_L x8, FREGS_S0(sp) #endif - .endif - addi sp, sp, PT_SIZE_ON_STACK + REG_L x6, FREGS_T1(sp) + + // restore the arguments + REG_L x10, FREGS_A0(sp) + REG_L x11, FREGS_A1(sp) + REG_L x12, FREGS_A2(sp) + REG_L x13, FREGS_A3(sp) + REG_L x14, FREGS_A4(sp) + REG_L x15, FREGS_A5(sp) + REG_L x16, FREGS_A6(sp) + REG_L x17, FREGS_A7(sp) + + addi sp, sp, FREGS_SIZE_ON_STACK .endm .macro PREPARE_ARGS @@ -198,9 +137,9 @@ mv a3, sp .endm -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ -#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS SYM_FUNC_START(ftrace_caller) SAVE_ABI @@ -227,33 +166,23 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) jr t0 SYM_FUNC_END(ftrace_caller) -#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ -SYM_FUNC_START(ftrace_regs_caller) +#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ +SYM_FUNC_START(ftrace_caller) mv t1, zero - SAVE_ABI_REGS 1 + SAVE_ABI_REGS PREPARE_ARGS -SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) +SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub - RESTORE_ABI_REGS 1 + RESTORE_ABI_REGS bnez t1, .Ldirect jr t0 .Ldirect: jr t1 -SYM_FUNC_END(ftrace_regs_caller) - -SYM_FUNC_START(ftrace_caller) - SAVE_ABI_REGS 0 - PREPARE_ARGS - -SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) - call ftrace_stub - - RESTORE_ABI_REGS 0 - jr t0 SYM_FUNC_END(ftrace_caller) -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS SYM_CODE_START(ftrace_stub_direct_tramp) diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index d7ec69ac6910..068168046e0e 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -12,6 +12,8 @@ #include <asm/asm-offsets.h> #include <asm/ftrace.h> +#define ABI_SIZE_ON_STACK 80 + .text .macro SAVE_ABI_STATE @@ -26,12 +28,12 @@ * register if a0 was not saved. */ .macro SAVE_RET_ABI_STATE - addi sp, sp, -4*SZREG - REG_S s0, 2*SZREG(sp) - REG_S ra, 3*SZREG(sp) - REG_S a0, 1*SZREG(sp) - REG_S a1, 0*SZREG(sp) - addi s0, sp, 4*SZREG + addi sp, sp, -ABI_SIZE_ON_STACK + REG_S ra, 1*SZREG(sp) + REG_S s0, 8*SZREG(sp) + REG_S a0, 10*SZREG(sp) + REG_S a1, 11*SZREG(sp) + addi s0, sp, ABI_SIZE_ON_STACK .endm .macro RESTORE_ABI_STATE @@ -41,17 +43,17 @@ .endm .macro RESTORE_RET_ABI_STATE - REG_L ra, 3*SZREG(sp) - REG_L s0, 2*SZREG(sp) - REG_L a0, 1*SZREG(sp) - REG_L a1, 0*SZREG(sp) - addi sp, sp, 4*SZREG + REG_L ra, 1*SZREG(sp) + REG_L s0, 8*SZREG(sp) + REG_L a0, 10*SZREG(sp) + REG_L a1, 11*SZREG(sp) + addi sp, sp, ABI_SIZE_ON_STACK .endm SYM_TYPED_FUNC_START(ftrace_stub) #ifdef CONFIG_DYNAMIC_FTRACE - .global MCOUNT_NAME - .set MCOUNT_NAME, ftrace_stub + .global _mcount + .set _mcount, ftrace_stub #endif ret SYM_FUNC_END(ftrace_stub) @@ -80,7 +82,7 @@ SYM_FUNC_END(return_to_handler) #endif #ifndef CONFIG_DYNAMIC_FTRACE -SYM_FUNC_START(MCOUNT_NAME) +SYM_FUNC_START(_mcount) la t4, ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER la t0, ftrace_graph_return @@ -126,6 +128,6 @@ SYM_FUNC_START(MCOUNT_NAME) jalr t5 RESTORE_ABI_STATE ret -SYM_FUNC_END(MCOUNT_NAME) +SYM_FUNC_END(_mcount) #endif -EXPORT_SYMBOL(MCOUNT_NAME) +EXPORT_SYMBOL(_mcount) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 5e5a82644451..47d0ebeec93c 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/log2.h> #include <linux/moduleloader.h> -#include <linux/vmalloc.h> #include <linux/sizes.h> #include <linux/pgtable.h> #include <asm/alternative.h> @@ -24,7 +23,7 @@ struct used_bucket { struct relocation_head { struct hlist_node node; - struct list_head *rel_entry; + struct list_head rel_entry; void *location; }; @@ -635,7 +634,7 @@ process_accumulated_relocations(struct module *me, location = rel_head_iter->location; list_for_each_entry_safe(rel_entry_iter, rel_entry_iter_tmp, - rel_head_iter->rel_entry, + &rel_head_iter->rel_entry, head) { curr_type = rel_entry_iter->type; reloc_handlers[curr_type].reloc_handler( @@ -705,16 +704,7 @@ static int add_relocation_to_accumulate(struct module *me, int type, return -ENOMEM; } - rel_head->rel_entry = - kmalloc(sizeof(struct list_head), GFP_KERNEL); - - if (!rel_head->rel_entry) { - kfree(entry); - kfree(rel_head); - return -ENOMEM; - } - - INIT_LIST_HEAD(rel_head->rel_entry); + INIT_LIST_HEAD(&rel_head->rel_entry); rel_head->location = location; INIT_HLIST_NODE(&rel_head->node); if (!current_head->first) { @@ -723,7 +713,6 @@ static int add_relocation_to_accumulate(struct module *me, int type, if (!bucket) { kfree(entry); - kfree(rel_head->rel_entry); kfree(rel_head); return -ENOMEM; } @@ -736,7 +725,7 @@ static int add_relocation_to_accumulate(struct module *me, int type, } /* Add relocation to head of discovered rel_head */ - list_add_tail(&entry->head, rel_head->rel_entry); + list_add_tail(&entry->head, &rel_head->rel_entry); return 0; } @@ -788,8 +777,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, int res; unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel); struct hlist_head *relocation_hashtable; - struct list_head used_buckets_list; unsigned int hashtable_bits; + LIST_HEAD(used_buckets_list); hashtable_bits = initialize_relocation_hashtable(num_relocations, &relocation_hashtable); @@ -797,8 +786,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, if (!relocation_hashtable) return -ENOMEM; - INIT_LIST_HEAD(&used_buckets_list); - pr_debug("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); @@ -905,17 +892,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, return 0; } -#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) -void *module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, MODULES_VADDR, - MODULES_END, GFP_KERNEL, - PAGE_KERNEL, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c index 0d6225fd3194..fa6b0339a65d 100644 --- a/arch/riscv/kernel/paravirt.c +++ b/arch/riscv/kernel/paravirt.c @@ -62,7 +62,7 @@ static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi, ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM, lo, hi, flags, 0, 0, 0); if (ret.error) { - if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE) + if (lo == SBI_SHMEM_DISABLE && hi == SBI_SHMEM_DISABLE) pr_warn("Failed to disable steal-time shmem"); else pr_warn("Failed to set steal-time shmem"); @@ -84,8 +84,8 @@ static int pv_time_cpu_online(unsigned int cpu) static int pv_time_cpu_down_prepare(unsigned int cpu) { - return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE, - SBI_STA_SHMEM_DISABLE, 0); + return sbi_sta_steal_time_set_shmem(SBI_SHMEM_DISABLE, + SBI_SHMEM_DISABLE, 0); } static u64 pv_time_steal_clock(int cpu) diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 37e87fdcf6a0..db13c9ddf9e3 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -13,13 +13,13 @@ #include <asm/cacheflush.h> #include <asm/fixmap.h> #include <asm/ftrace.h> -#include <asm/patch.h> +#include <asm/text-patching.h> #include <asm/sections.h> struct patch_insn { void *addr; u32 *insns; - int ninsns; + size_t len; atomic_t cpu_count; }; @@ -54,7 +54,7 @@ static __always_inline void *patch_map(void *addr, const unsigned int fixmap) BUG_ON(!page); return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + - (uintaddr & ~PAGE_MASK)); + offset_in_page(addr)); } static void patch_unmap(int fixmap) @@ -65,8 +65,8 @@ NOKPROBE_SYMBOL(patch_unmap); static int __patch_insn_set(void *addr, u8 c, size_t len) { + bool across_pages = (offset_in_page(addr) + len) > PAGE_SIZE; void *waddr = addr; - bool across_pages = (((uintptr_t)addr & ~PAGE_MASK) + len) > PAGE_SIZE; /* * Only two pages can be mapped at a time for writing. @@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) */ lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -87,19 +89,29 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) memset(waddr, c, len); + /* + * We could have just patched a function that is about to be + * called so make sure we don't execute partially patched + * instructions by flushing the icache as soon as possible. + */ + local_flush_icache_range((unsigned long)waddr, + (unsigned long)waddr + len); + patch_unmap(FIX_TEXT_POKE0); if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return 0; } NOKPROBE_SYMBOL(__patch_insn_set); static int __patch_insn_write(void *addr, const void *insn, size_t len) { + bool across_pages = (offset_in_page(addr) + len) > PAGE_SIZE; void *waddr = addr; - bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; int ret; /* @@ -122,6 +134,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) if (!riscv_patch_in_stop_machine) lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -129,11 +143,21 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) ret = copy_to_kernel_nofault(waddr, insn, len); + /* + * We could have just patched a function that is about to be + * called so make sure we don't execute partially patched + * instructions by flushing the icache as soon as possible. + */ + local_flush_icache_range((unsigned long)waddr, + (unsigned long)waddr + len); + patch_unmap(FIX_TEXT_POKE0); if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return ret; } NOKPROBE_SYMBOL(__patch_insn_write); @@ -155,69 +179,70 @@ NOKPROBE_SYMBOL(__patch_insn_write); static int patch_insn_set(void *addr, u8 c, size_t len) { - size_t patched = 0; size_t size; - int ret = 0; + int ret; /* * __patch_insn_set() can only work on 2 pages at a time so call it in a * loop with len <= 2 * PAGE_SIZE. */ - while (patched < len && !ret) { - size = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(addr + patched), len - patched); - ret = __patch_insn_set(addr + patched, c, size); - - patched += size; + while (len) { + size = min(len, PAGE_SIZE * 2 - offset_in_page(addr)); + ret = __patch_insn_set(addr, c, size); + if (ret) + return ret; + + addr += size; + len -= size; } - return ret; + return 0; } NOKPROBE_SYMBOL(patch_insn_set); int patch_text_set_nosync(void *addr, u8 c, size_t len) { - u32 *tp = addr; int ret; - ret = patch_insn_set(tp, c, len); - + ret = patch_insn_set(addr, c, len); if (!ret) - flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len); + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } NOKPROBE_SYMBOL(patch_text_set_nosync); -static int patch_insn_write(void *addr, const void *insn, size_t len) +int patch_insn_write(void *addr, const void *insn, size_t len) { - size_t patched = 0; size_t size; - int ret = 0; + int ret; /* * Copy the instructions to the destination address, two pages at a time * because __patch_insn_write() can only handle len <= 2 * PAGE_SIZE. */ - while (patched < len && !ret) { - size = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(addr + patched), len - patched); - ret = __patch_insn_write(addr + patched, insn + patched, size); - - patched += size; + while (len) { + size = min(len, PAGE_SIZE * 2 - offset_in_page(addr)); + ret = __patch_insn_write(addr, insn, size); + if (ret) + return ret; + + addr += size; + insn += size; + len -= size; } - return ret; + return 0; } NOKPROBE_SYMBOL(patch_insn_write); int patch_text_nosync(void *addr, const void *insns, size_t len) { - u32 *tp = addr; int ret; - ret = patch_insn_write(tp, insns, len); - + ret = patch_insn_write(addr, insns, len); if (!ret) - flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } @@ -226,33 +251,36 @@ NOKPROBE_SYMBOL(patch_text_nosync); static int patch_text_cb(void *data) { struct patch_insn *patch = data; - unsigned long len; - int i, ret = 0; + int ret = 0; if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) { - for (i = 0; ret == 0 && i < patch->ninsns; i++) { - len = GET_INSN_LENGTH(patch->insns[i]); - ret = patch_text_nosync(patch->addr + i * len, - &patch->insns[i], len); - } - atomic_inc(&patch->cpu_count); + ret = patch_insn_write(patch->addr, patch->insns, patch->len); + /* + * Make sure the patching store is effective *before* we + * increment the counter which releases all waiting CPUs + * by using the release variant of atomic increment. The + * release pairs with the call to local_flush_icache_all() + * on the waiting CPU. + */ + atomic_inc_return_release(&patch->cpu_count); } else { while (atomic_read(&patch->cpu_count) <= num_online_cpus()) cpu_relax(); - smp_mb(); + + local_flush_icache_all(); } return ret; } NOKPROBE_SYMBOL(patch_text_cb); -int patch_text(void *addr, u32 *insns, int ninsns) +int patch_text(void *addr, u32 *insns, size_t len) { int ret; struct patch_insn patch = { .addr = addr, .insns = insns, - .ninsns = ninsns, + .len = len, .cpu_count = ATOMIC_INIT(0), }; diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 3348a61de7d9..b465bc9eb870 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -6,37 +6,9 @@ #include <asm/stacktrace.h> -/* - * Get the return address for a single stackframe and return a pointer to the - * next frame tail. - */ -static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, - unsigned long fp, unsigned long reg_ra) +static bool fill_callchain(void *entry, unsigned long pc) { - struct stackframe buftail; - unsigned long ra = 0; - unsigned long __user *user_frame_tail = - (unsigned long __user *)(fp - sizeof(struct stackframe)); - - /* Check accessibility of one struct frame_tail beyond */ - if (!access_ok(user_frame_tail, sizeof(buftail))) - return 0; - if (__copy_from_user_inatomic(&buftail, user_frame_tail, - sizeof(buftail))) - return 0; - - if (reg_ra != 0) - ra = reg_ra; - else - ra = buftail.ra; - - fp = buftail.fp; - if (ra != 0) - perf_callchain_store(entry, ra); - else - return 0; - - return fp; + return perf_callchain_store(entry, pc) == 0; } /* @@ -56,23 +28,21 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - unsigned long fp = 0; + if (perf_guest_state()) { + /* TODO: We don't support guest os callchain now */ + return; + } - fp = regs->s0; - perf_callchain_store(entry, regs->epc); - - fp = user_backtrace(entry, fp, regs->ra); - while (fp && !(fp & 0x3) && entry->nr < entry->max_stack) - fp = user_backtrace(entry, fp, 0); -} - -static bool fill_callchain(void *entry, unsigned long pc) -{ - return perf_callchain_store(entry, pc) == 0; + arch_stack_walk_user(fill_callchain, entry, regs); } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + if (perf_guest_state()) { + /* TODO: We don't support guest os callchain now */ + return; + } + walk_stackframe(NULL, regs, fill_callchain, entry); } diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile index 07915dc9279e..81d69d45c06c 100644 --- a/arch/riscv/kernel/pi/Makefile +++ b/arch/riscv/kernel/pi/Makefile @@ -5,20 +5,23 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \ $(call cc-option,-mbranch-protection=none) \ -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \ + -include $(srctree)/include/linux/hidden.h \ -D__DISABLE_EXPORTS -ffreestanding \ -fno-asynchronous-unwind-tables -fno-unwind-tables \ $(call cc-option,-fno-addrsig) +# Disable LTO +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) + KBUILD_CFLAGS += -mcmodel=medany CFLAGS_cmdline_early.o += -D__NO_FORTIFY +CFLAGS_fdt_early.o += -D__NO_FORTIFY +# lib/string.c already defines __NO_FORTIFY +CFLAGS_ctype.o += -D__NO_FORTIFY +CFLAGS_lib-fdt.o += -D__NO_FORTIFY CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY - -GCOV_PROFILE := n -KASAN_SANITIZE := n -KCSAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n +CFLAGS_archrandom_early.o += -D__NO_FORTIFY $(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \ --remove-section=.note.gnu.property \ @@ -35,5 +38,5 @@ $(obj)/string.o: $(srctree)/lib/string.c FORCE $(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE $(call if_changed_rule,cc_o_c) -obj-y := cmdline_early.pi.o fdt_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o +obj-y := cmdline_early.pi.o fdt_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o archrandom_early.pi.o extra-y := $(patsubst %.pi.o,%.o,$(obj-y)) diff --git a/arch/riscv/kernel/pi/archrandom_early.c b/arch/riscv/kernel/pi/archrandom_early.c new file mode 100644 index 000000000000..3f05d3cf3b7b --- /dev/null +++ b/arch/riscv/kernel/pi/archrandom_early.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/csr.h> +#include <linux/processor.h> + +#include "pi.h" + +/* + * To avoid rewriting code include asm/archrandom.h and create macros + * for the functions that won't be included. + */ +#undef riscv_has_extension_unlikely +#define riscv_has_extension_likely(...) false +#undef pr_err_once +#define pr_err_once(...) + +#include <asm/archrandom.h> + +u64 get_kaslr_seed_zkr(const uintptr_t dtb_pa) +{ + unsigned long seed = 0; + + if (!fdt_early_match_extension_isa((const void *)dtb_pa, "zkr")) + return 0; + + if (!csr_seed_long(&seed)) + return 0; + + return seed; +} diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c index f6d4dedffb84..fbcdc9e4e143 100644 --- a/arch/riscv/kernel/pi/cmdline_early.c +++ b/arch/riscv/kernel/pi/cmdline_early.c @@ -6,15 +6,9 @@ #include <asm/pgtable.h> #include <asm/setup.h> -static char early_cmdline[COMMAND_LINE_SIZE]; +#include "pi.h" -/* - * Declare the functions that are exported (but prefixed) here so that LLVM - * does not complain it lacks the 'static' keyword (which, if added, makes - * LLVM complain because the function is actually unused in this file). - */ -u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa); -bool set_nokaslr_from_cmdline(uintptr_t dtb_pa); +static char early_cmdline[COMMAND_LINE_SIZE]; static char *get_early_cmdline(uintptr_t dtb_pa) { diff --git a/arch/riscv/kernel/pi/fdt_early.c b/arch/riscv/kernel/pi/fdt_early.c index 899610e042ab..9bdee2fafe47 100644 --- a/arch/riscv/kernel/pi/fdt_early.c +++ b/arch/riscv/kernel/pi/fdt_early.c @@ -2,13 +2,9 @@ #include <linux/types.h> #include <linux/init.h> #include <linux/libfdt.h> +#include <linux/ctype.h> -/* - * Declare the functions that are exported (but prefixed) here so that LLVM - * does not complain it lacks the 'static' keyword (which, if added, makes - * LLVM complain because the function is actually unused in this file). - */ -u64 get_kaslr_seed(uintptr_t dtb_pa); +#include "pi.h" u64 get_kaslr_seed(uintptr_t dtb_pa) { @@ -28,3 +24,162 @@ u64 get_kaslr_seed(uintptr_t dtb_pa) *prop = 0; return ret; } + +/** + * fdt_device_is_available - check if a device is available for use + * + * @fdt: pointer to the device tree blob + * @node: offset of the node whose property to find + * + * Returns true if the status property is absent or set to "okay" or "ok", + * false otherwise + */ +static bool fdt_device_is_available(const void *fdt, int node) +{ + const char *status; + int statlen; + + status = fdt_getprop(fdt, node, "status", &statlen); + if (!status) + return true; + + if (statlen > 0) { + if (!strcmp(status, "okay") || !strcmp(status, "ok")) + return true; + } + + return false; +} + +/* Copy of fdt_nodename_eq_ */ +static int fdt_node_name_eq(const void *fdt, int offset, + const char *s) +{ + int olen; + int len = strlen(s); + const char *p = fdt_get_name(fdt, offset, &olen); + + if (!p || olen < len) + /* short match */ + return 0; + + if (memcmp(p, s, len) != 0) + return 0; + + if (p[len] == '\0') + return 1; + else if (!memchr(s, '@', len) && (p[len] == '@')) + return 1; + else + return 0; +} + +/** + * isa_string_contains - check if isa string contains an extension + * + * @isa_str: isa string to search + * @ext_name: the extension to search for + * + * Returns true if the extension is in the given isa string, + * false otherwise + */ +static bool isa_string_contains(const char *isa_str, const char *ext_name) +{ + size_t i, single_end, len = strlen(ext_name); + char ext_end; + + /* Error must contain rv32/64 */ + if (strlen(isa_str) < 4) + return false; + + if (len == 1) { + single_end = strcspn(isa_str, "sSxXzZ"); + /* Search for single chars between rv32/64 and multi-letter extensions */ + for (i = 4; i < single_end; i++) { + if (tolower(isa_str[i]) == ext_name[0]) + return true; + } + return false; + } + + /* Skip to start of multi-letter extensions */ + isa_str = strpbrk(isa_str, "sSxXzZ"); + while (isa_str) { + if (strncasecmp(isa_str, ext_name, len) == 0) { + ext_end = isa_str[len]; + /* Check if matches the whole extension. */ + if (ext_end == '\0' || ext_end == '_') + return true; + } + /* Multi-letter extensions must be split from other multi-letter + * extensions with an "_", the end of a multi-letter extension will + * either be the null character or the "_" at the start of the next + * multi-letter extension. + */ + isa_str = strchr(isa_str, '_'); + if (isa_str) + isa_str++; + } + + return false; +} + +/** + * early_cpu_isa_ext_available - check if cpu node has an extension + * + * @fdt: pointer to the device tree blob + * @node: offset of the cpu node + * @ext_name: the extension to search for + * + * Returns true if the cpu node has the extension, + * false otherwise + */ +static bool early_cpu_isa_ext_available(const void *fdt, int node, const char *ext_name) +{ + const void *prop; + int len; + + prop = fdt_getprop(fdt, node, "riscv,isa-extensions", &len); + if (prop && fdt_stringlist_contains(prop, len, ext_name)) + return true; + + prop = fdt_getprop(fdt, node, "riscv,isa", &len); + if (prop && isa_string_contains(prop, ext_name)) + return true; + + return false; +} + +/** + * fdt_early_match_extension_isa - check if all cpu nodes have an extension + * + * @fdt: pointer to the device tree blob + * @ext_name: the extension to search for + * + * Returns true if the all available the cpu nodes have the extension, + * false otherwise + */ +bool fdt_early_match_extension_isa(const void *fdt, const char *ext_name) +{ + int node, parent; + bool ret = false; + + parent = fdt_path_offset(fdt, "/cpus"); + if (parent < 0) + return false; + + fdt_for_each_subnode(node, fdt, parent) { + if (!fdt_node_name_eq(fdt, node, "cpu")) + continue; + + if (!fdt_device_is_available(fdt, node)) + continue; + + if (!early_cpu_isa_ext_available(fdt, node, ext_name)) + return false; + + ret = true; + } + + return ret; +} diff --git a/arch/riscv/kernel/pi/pi.h b/arch/riscv/kernel/pi/pi.h new file mode 100644 index 000000000000..21141d84fea6 --- /dev/null +++ b/arch/riscv/kernel/pi/pi.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _RISCV_PI_H_ +#define _RISCV_PI_H_ + +#include <linux/types.h> + +/* + * The following functions are exported (but prefixed). Declare them here so + * that LLVM does not complain it lacks the 'static' keyword (which, if + * added, makes LLVM complain because the function is unused). + */ + +u64 get_kaslr_seed(uintptr_t dtb_pa); +u64 get_kaslr_seed_zkr(const uintptr_t dtb_pa); +bool set_nokaslr_from_cmdline(uintptr_t dtb_pa); +u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa); + +bool fdt_early_match_extension_isa(const void *fdt, const char *ext_name); + +#endif /* _RISCV_PI_H_ */ diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile index 8265ff497977..d2129f2c61b8 100644 --- a/arch/riscv/kernel/probes/Makefile +++ b/arch/riscv/kernel/probes/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o -obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c deleted file mode 100644 index 7142ec42e889..000000000000 --- a/arch/riscv/kernel/probes/ftrace.c +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <linux/kprobes.h> - -/* Ftrace callback handler for kprobes -- called under preepmt disabled */ -void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct ftrace_regs *fregs) -{ - struct kprobe *p; - struct pt_regs *regs; - struct kprobe_ctlblk *kcb; - int bit; - - bit = ftrace_test_recursion_trylock(ip, parent_ip); - if (bit < 0) - return; - - p = get_kprobe((kprobe_opcode_t *)ip); - if (unlikely(!p) || kprobe_disabled(p)) - goto out; - - regs = ftrace_get_regs(fregs); - kcb = get_kprobe_ctlblk(); - if (kprobe_running()) { - kprobes_inc_nmissed_count(p); - } else { - unsigned long orig_ip = instruction_pointer(regs); - - instruction_pointer_set(regs, ip); - - __this_cpu_write(current_kprobe, p); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) { - /* - * Emulate singlestep (and also recover regs->pc) - * as if there is a nop - */ - instruction_pointer_set(regs, - (unsigned long)p->addr + MCOUNT_INSN_SIZE); - if (unlikely(p->post_handler)) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - instruction_pointer_set(regs, orig_ip); - } - - /* - * If pre_handler returns !0, it changes regs->pc. We have to - * skip emulating post_handler. - */ - __this_cpu_write(current_kprobe, NULL); - } -out: - ftrace_test_recursion_unlock(bit); -} -NOKPROBE_SYMBOL(kprobe_ftrace_handler); - -int arch_prepare_kprobe_ftrace(struct kprobe *p) -{ - p->ainsn.api.insn = NULL; - return 0; -} diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 2f08c14a933d..c0738d6c6498 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -6,12 +6,13 @@ #include <linux/extable.h> #include <linux/slab.h> #include <linux/stop_machine.h> +#include <linux/vmalloc.h> #include <asm/ptrace.h> #include <linux/uaccess.h> #include <asm/sections.h> #include <asm/cacheflush.h> #include <asm/bug.h> -#include <asm/patch.h> +#include <asm/text-patching.h> #include "decode-insn.h" @@ -23,14 +24,13 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *); static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { + size_t len = GET_INSN_LENGTH(p->opcode); u32 insn = __BUG_INSN_32; - unsigned long offset = GET_INSN_LENGTH(p->opcode); - p->ainsn.api.restore = (unsigned long)p->addr + offset; + p->ainsn.api.restore = (unsigned long)p->addr + len; - patch_text(p->ainsn.api.insn, &p->opcode, 1); - patch_text((void *)((unsigned long)(p->ainsn.api.insn) + offset), - &insn, 1); + patch_text_nosync(p->ainsn.api.insn, &p->opcode, len); + patch_text_nosync((void *)p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn)); } static void __kprobes arch_prepare_simulate(struct kprobe *p) @@ -104,29 +104,21 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -#ifdef CONFIG_MMU -void *alloc_insn_page(void) -{ - return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_READ_EXEC, - VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - /* install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { - u32 insn = (p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32 ? - __BUG_INSN_32 : __BUG_INSN_16; + size_t len = GET_INSN_LENGTH(p->opcode); + u32 insn = len == 4 ? __BUG_INSN_32 : __BUG_INSN_16; - patch_text(p->addr, &insn, 1); + patch_text(p->addr, &insn, len); } /* remove breakpoint from text */ void __kprobes arch_disarm_kprobe(struct kprobe *p) { - patch_text(p->addr, &p->opcode, 1); + size_t len = GET_INSN_LENGTH(p->opcode); + + patch_text(p->addr, &p->opcode, len); } void __kprobes arch_remove_kprobe(struct kprobe *p) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 92922dbd5b5c..7c244de77180 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 SiFive */ +#include <linux/bitfield.h> #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -15,6 +16,7 @@ #include <linux/tick.h> #include <linux/ptrace.h> #include <linux/uaccess.h> +#include <linux/personality.h> #include <asm/unistd.h> #include <asm/processor.h> @@ -26,8 +28,7 @@ #include <asm/cpuidle.h> #include <asm/vector.h> #include <asm/cpufeature.h> - -register unsigned long gp_in_global __asm__("gp"); +#include <asm/exec.h> #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include <linux/stackprotector.h> @@ -37,7 +38,7 @@ EXPORT_SYMBOL(__stack_chk_guard); extern asmlinkage void ret_from_fork(void); -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { cpu_do_idle(); } @@ -101,6 +102,13 @@ void show_regs(struct pt_regs *regs) dump_backtrace(regs, NULL, KERN_DEFAULT); } +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_u32_below(PAGE_SIZE); + return sp & ~0xf; +} + #ifdef CONFIG_COMPAT static bool compat_mode_supported __read_mostly; @@ -173,12 +181,16 @@ void flush_thread(void) memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE); #endif +#ifdef CONFIG_RISCV_ISA_SUPM + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + envcfg_update_bits(current, ENVCFG_PMM, ENVCFG_PMM_PMLEN_0); +#endif } void arch_release_task_struct(struct task_struct *tsk) { /* Free the vector context of datap. */ - if (has_vector()) + if (has_vector() || has_xtheadvector()) riscv_v_thread_free(tsk); } @@ -201,13 +213,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); + /* Ensure all threads in this mm have the same pointer masking mode. */ + if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM) && p->mm && (clone_flags & CLONE_VM)) + set_bit(MM_CONTEXT_LOCK_PMLEN, &p->mm->context.flags); + memset(&p->thread.s, 0, sizeof(p->thread.s)); /* p->thread holds context to be restored by __switch_to() */ if (unlikely(args->fn)) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; @@ -225,7 +240,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.s[0] = 0; } p->thread.riscv_v_flags = 0; - if (has_vector()) + if (has_vector() || has_xtheadvector()) riscv_v_thread_alloc(p); p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ @@ -236,3 +251,148 @@ void __init arch_task_cache_init(void) { riscv_v_setup_ctx_cache(); } + +#ifdef CONFIG_RISCV_ISA_SUPM +enum { + PMLEN_0 = 0, + PMLEN_7 = 7, + PMLEN_16 = 16, +}; + +static bool have_user_pmlen_7; +static bool have_user_pmlen_16; + +/* + * Control the relaxed ABI allowing tagged user addresses into the kernel. + */ +static unsigned int tagged_addr_disabled; + +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) +{ + unsigned long valid_mask = PR_PMLEN_MASK | PR_TAGGED_ADDR_ENABLE; + struct thread_info *ti = task_thread_info(task); + struct mm_struct *mm = task->mm; + unsigned long pmm; + u8 pmlen; + + if (is_compat_thread(ti)) + return -EINVAL; + + if (arg & ~valid_mask) + return -EINVAL; + + /* + * Prefer the smallest PMLEN that satisfies the user's request, + * in case choosing a larger PMLEN has a performance impact. + */ + pmlen = FIELD_GET(PR_PMLEN_MASK, arg); + if (pmlen == PMLEN_0) { + pmm = ENVCFG_PMM_PMLEN_0; + } else if (pmlen <= PMLEN_7 && have_user_pmlen_7) { + pmlen = PMLEN_7; + pmm = ENVCFG_PMM_PMLEN_7; + } else if (pmlen <= PMLEN_16 && have_user_pmlen_16) { + pmlen = PMLEN_16; + pmm = ENVCFG_PMM_PMLEN_16; + } else { + return -EINVAL; + } + + /* + * Do not allow the enabling of the tagged address ABI if globally + * disabled via sysctl abi.tagged_addr_disabled, if pointer masking + * is disabled for userspace. + */ + if (arg & PR_TAGGED_ADDR_ENABLE && (tagged_addr_disabled || !pmlen)) + return -EINVAL; + + if (!(arg & PR_TAGGED_ADDR_ENABLE)) + pmlen = PMLEN_0; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + if (test_bit(MM_CONTEXT_LOCK_PMLEN, &mm->context.flags) && mm->context.pmlen != pmlen) { + mmap_write_unlock(mm); + return -EBUSY; + } + + envcfg_update_bits(task, ENVCFG_PMM, pmm); + mm->context.pmlen = pmlen; + + mmap_write_unlock(mm); + + return 0; +} + +long get_tagged_addr_ctrl(struct task_struct *task) +{ + struct thread_info *ti = task_thread_info(task); + long ret = 0; + + if (is_compat_thread(ti)) + return -EINVAL; + + /* + * The mm context's pmlen is set only when the tagged address ABI is + * enabled, so the effective PMLEN must be extracted from envcfg.PMM. + */ + switch (task->thread.envcfg & ENVCFG_PMM) { + case ENVCFG_PMM_PMLEN_7: + ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_7); + break; + case ENVCFG_PMM_PMLEN_16: + ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_16); + break; + } + + if (task->mm->context.pmlen) + ret |= PR_TAGGED_ADDR_ENABLE; + + return ret; +} + +static bool try_to_set_pmm(unsigned long value) +{ + csr_set(CSR_ENVCFG, value); + return (csr_read_clear(CSR_ENVCFG, ENVCFG_PMM) & ENVCFG_PMM) == value; +} + +/* + * Global sysctl to disable the tagged user addresses support. This control + * only prevents the tagged address ABI enabling via prctl() and does not + * disable it for tasks that already opted in to the relaxed ABI. + */ + +static const struct ctl_table tagged_addr_sysctl_table[] = { + { + .procname = "tagged_addr_disabled", + .mode = 0644, + .data = &tagged_addr_disabled, + .maxlen = sizeof(int), + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +}; + +static int __init tagged_addr_init(void) +{ + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return 0; + + /* + * envcfg.PMM is a WARL field. Detect which values are supported. + * Assume the supported PMLEN values are the same on all harts. + */ + csr_clear(CSR_ENVCFG, ENVCFG_PMM); + have_user_pmlen_7 = try_to_set_pmm(ENVCFG_PMM_PMLEN_7); + have_user_pmlen_16 = try_to_set_pmm(ENVCFG_PMM_PMLEN_16); + + if (!register_sysctl("abi", tagged_addr_sysctl_table)) + return -EINVAL; + + return 0; +} +core_initcall(tagged_addr_init); +#endif /* CONFIG_RISCV_ISA_SUPM */ diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index e8515aa9d80b..ea67e9fb7a58 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -28,6 +28,9 @@ enum riscv_regset { #ifdef CONFIG_RISCV_ISA_V REGSET_V, #endif +#ifdef CONFIG_RISCV_ISA_SUPM + REGSET_TAGGED_ADDR_CTRL, +#endif }; static int riscv_gpr_get(struct task_struct *target, @@ -152,6 +155,35 @@ static int riscv_vr_set(struct task_struct *target, } #endif +#ifdef CONFIG_RISCV_ISA_SUPM +static int tagged_addr_ctrl_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + long ctrl = get_tagged_addr_ctrl(target); + + if (IS_ERR_VALUE(ctrl)) + return ctrl; + + return membuf_write(&to, &ctrl, sizeof(ctrl)); +} + +static int tagged_addr_ctrl_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + long ctrl; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); + if (ret) + return ret; + + return set_tagged_addr_ctrl(target, ctrl); +} +#endif + static const struct user_regset riscv_user_regset[] = { [REGSET_X] = { .core_note_type = NT_PRSTATUS, @@ -182,6 +214,16 @@ static const struct user_regset riscv_user_regset[] = { .set = riscv_vr_set, }, #endif +#ifdef CONFIG_RISCV_ISA_SUPM + [REGSET_TAGGED_ADDR_CTRL] = { + .core_note_type = NT_RISCV_TAGGED_ADDR_CTRL, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .regset_get = tagged_addr_ctrl_get, + .set = tagged_addr_ctrl_set, + }, +#endif }; static const struct user_regset_view riscv_user_native_view = { @@ -377,14 +419,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, return ret; } +#else +static const struct user_regset_view compat_riscv_user_native_view = {}; #endif /* CONFIG_COMPAT */ const struct user_regset_view *task_user_regset_view(struct task_struct *task) { -#ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_32BIT)) + if (is_compat_thread(&task->thread_info)) return &compat_riscv_user_native_view; else -#endif return &riscv_user_native_view; } diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index a72879b4249a..5ab1c7e1a6ed 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -12,9 +12,6 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memmove); diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c index a4559695ce62..0cc5559c08d8 100644 --- a/arch/riscv/kernel/sbi-ipi.c +++ b/arch/riscv/kernel/sbi-ipi.c @@ -13,6 +13,9 @@ #include <linux/irqdomain.h> #include <asm/sbi.h> +DEFINE_STATIC_KEY_FALSE(riscv_sbi_for_rfence); +EXPORT_SYMBOL_GPL(riscv_sbi_for_rfence); + static int sbi_ipi_virq; static void sbi_ipi_handle(struct irq_desc *desc) @@ -68,10 +71,16 @@ void __init sbi_ipi_init(void) * the masking/unmasking of virtual IPIs is done * via generic IPI-Mux */ - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + cpuhp_setup_state(CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, "irqchip/sbi-ipi:starting", sbi_ipi_starting_cpu, NULL); - riscv_ipi_set_virq_range(virq, BITS_PER_BYTE, false); + riscv_ipi_set_virq_range(virq, BITS_PER_BYTE); pr_info("providing IPIs using SBI IPI extension\n"); + + /* + * Use the SBI remote fence extension to avoid + * the extra context switch needed to handle IPIs. + */ + static_branch_enable(&riscv_sbi_for_rfence); } diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index e66e0999a800..1989b8cade1b 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -24,51 +24,6 @@ static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) __ro_after_init; -struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, - unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5) -{ - struct sbiret ret; - - register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); - register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); - register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); - register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); - register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); - register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); - register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); - register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); - asm volatile ("ecall" - : "+r" (a0), "+r" (a1) - : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) - : "memory"); - ret.error = a0; - ret.value = a1; - - return ret; -} -EXPORT_SYMBOL(sbi_ecall); - -int sbi_err_map_linux_errno(int err) -{ - switch (err) { - case SBI_SUCCESS: - return 0; - case SBI_ERR_DENIED: - return -EPERM; - case SBI_ERR_INVALID_PARAM: - return -EINVAL; - case SBI_ERR_INVALID_ADDRESS: - return -EFAULT; - case SBI_ERR_NOT_SUPPORTED: - case SBI_ERR_FAILURE: - default: - return -ENOTSUPP; - }; -} -EXPORT_SYMBOL(sbi_err_map_linux_errno); - #ifdef CONFIG_RISCV_SBI_V01 static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask) { @@ -528,17 +483,6 @@ long sbi_probe_extension(int extid) } EXPORT_SYMBOL(sbi_probe_extension); -static long __sbi_base_ecall(int fid) -{ - struct sbiret ret; - - ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); - if (!ret.error) - return ret.value; - else - return sbi_err_map_linux_errno(ret.error); -} - static inline long sbi_get_spec_version(void) { return __sbi_base_ecall(SBI_EXT_BASE_GET_SPEC_VERSION); diff --git a/arch/riscv/kernel/sbi_ecall.c b/arch/riscv/kernel/sbi_ecall.c new file mode 100644 index 000000000000..24aabb4fbde3 --- /dev/null +++ b/arch/riscv/kernel/sbi_ecall.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Rivos Inc. */ + +#include <asm/sbi.h> +#define CREATE_TRACE_POINTS +#include <asm/trace.h> + +long __sbi_base_ecall(int fid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); + if (!ret.error) + return ret.value; + else + return sbi_err_map_linux_errno(ret.error); +} +EXPORT_SYMBOL(__sbi_base_ecall); + +struct sbiret __sbi_ecall(unsigned long arg0, unsigned long arg1, + unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5, + int fid, int ext) +{ + struct sbiret ret; + + trace_sbi_call(ext, fid); + + register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); + register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); + register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); + register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); + register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); + register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); + register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); + register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); + asm volatile ("ecall" + : "+r" (a0), "+r" (a1) + : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) + : "memory"); + ret.error = a0; + ret.value = a1; + + trace_sbi_return(ext, ret.error, ret.value); + + return ret; +} +EXPORT_SYMBOL(__sbi_ecall); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4f73c0ae44b2..4fe45daa6281 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -147,9 +147,7 @@ static void __init init_resources(void) res_idx = num_resources - 1; mem_res_sz = num_resources * sizeof(*mem_res); - mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); - if (!mem_res) - panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); + mem_res = memblock_alloc_or_panic(mem_res_sz, SMP_CACHE_BYTES); /* * Start by adding the reserved regions, if they overlap @@ -227,7 +225,7 @@ static void __init init_resources(void) static void __init parse_dtb(void) { /* Early scan of device tree from init memory */ - if (early_init_dt_scan(dtb_early_va)) { + if (early_init_dt_scan(dtb_early_va, dtb_early_pa)) { const char *name = of_flat_dt_get_machine_name(); if (name) { @@ -244,6 +242,42 @@ static void __init parse_dtb(void) #endif } +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS) +DEFINE_STATIC_KEY_TRUE(qspinlock_key); +EXPORT_SYMBOL(qspinlock_key); +#endif + +static void __init riscv_spinlock_init(void) +{ + char *using_ext = NULL; + + if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) { + pr_info("Ticket spinlock: enabled\n"); + return; + } + + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && + riscv_isa_extension_available(NULL, ZABHA) && + riscv_isa_extension_available(NULL, ZACAS)) { + using_ext = "using Zabha"; + } else if (riscv_isa_extension_available(NULL, ZICCRSE)) { + using_ext = "using Ziccrse"; + } +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS) + else { + static_branch_disable(&qspinlock_key); + pr_info("Ticket spinlock: enabled\n"); + return; + } +#endif + + if (!using_ext) + pr_err("Queued spinlock without Zabha or Ziccrse"); + else + pr_info("Queued spinlock %s: enabled\n", using_ext); +} + extern void __init init_rt_signal_env(void); void __init setup_arch(char **cmdline_p) @@ -281,13 +315,15 @@ void __init setup_arch(char **cmdline_p) setup_smp(); #endif - if (!acpi_disabled) + if (!acpi_disabled) { acpi_init_rintc_map(); + acpi_map_cpus_to_nodes(); + } riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); - init_rt_signal_env(); apply_boot_alternatives(); + init_rt_signal_env(); if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && riscv_isa_extension_available(NULL, ZICBOM)) @@ -295,6 +331,7 @@ void __init setup_arch(char **cmdline_p) riscv_set_dma_cache_alignment(); riscv_user_isa_enable(); + riscv_spinlock_init(); } bool arch_cpu_is_hotpluggable(int cpu) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 501e66debf69..08378fea3a11 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -84,7 +84,7 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec) datap = state + 1; /* datap is designed to be 16 byte aligned for better performance */ - WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16))); + WARN_ON(!IS_ALIGNED((unsigned long)datap, 16)); get_cpu_vector_context(); riscv_v_vstate_save(¤t->thread.vstate, regs); @@ -119,6 +119,13 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) struct __sc_riscv_v_state __user *state = sc_vec; void __user *datap; + /* + * Mark the vstate as clean prior performing the actual copy, + * to avoid getting the vstate incorrectly clobbered by the + * discarded vector state. + */ + riscv_v_vstate_set_restore(current, regs); + /* Copy everything of __sc_riscv_v_state except datap. */ err = __copy_from_user(¤t->thread.vstate, &state->v_state, offsetof(struct __riscv_v_ext_state, datap)); @@ -133,13 +140,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) * Copy the whole vector content from user space datap. Use * copy_from_user to prevent information leak. */ - err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); - if (unlikely(err)) - return err; - - riscv_v_vstate_set_restore(current, regs); - - return err; + return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); } #else #define save_v_state(task, regs) (0) @@ -188,7 +189,7 @@ static long restore_sigcontext(struct pt_regs *regs, return 0; case RISCV_V_MAGIC: - if (!has_vector() || !riscv_v_vstate_query(regs) || + if (!(has_vector() || has_xtheadvector()) || !riscv_v_vstate_query(regs) || size != riscv_v_sc_size) return -EINVAL; @@ -210,16 +211,10 @@ static size_t get_rt_frame_size(bool cal_all) frame_size = sizeof(*frame); - if (has_vector()) { + if (has_vector() || has_xtheadvector()) { if (cal_all || riscv_v_vstate_query(task_pt_regs(current))) total_context_size += riscv_v_sc_size; } - /* - * Preserved a __riscv_ctx_hdr for END signal context header if an - * extension uses __riscv_extra_ext_header - */ - if (total_context_size) - total_context_size += sizeof(struct __riscv_ctx_hdr); frame_size += total_context_size; @@ -283,7 +278,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, if (has_fpu()) err |= save_fp_state(regs, &sc->sc_fpregs); /* Save the vector state. */ - if (has_vector() && riscv_v_vstate_query(regs)) + if ((has_vector() || has_xtheadvector()) && riscv_v_vstate_query(regs)) err |= save_v_state(regs, (void __user **)&sc_ext_ptr); /* Write zero to fp-reserved space and check it on restore_sigcontext */ err |= __put_user(0, &sc->sc_extdesc.reserved); diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 45dd4035416e..d58b5e751286 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -13,6 +13,7 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/kexec.h> +#include <linux/kgdb.h> #include <linux/percpu.h> #include <linux/profile.h> #include <linux/smp.h> @@ -21,6 +22,7 @@ #include <linux/delay.h> #include <linux/irq.h> #include <linux/irq_work.h> +#include <linux/nmi.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> @@ -33,12 +35,15 @@ enum ipi_message_type { IPI_CPU_CRASH_STOP, IPI_IRQ_WORK, IPI_TIMER, + IPI_CPU_BACKTRACE, + IPI_KGDB_ROUNDUP, IPI_MAX }; unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = { [0 ... NR_CPUS-1] = INVALID_HARTID }; +EXPORT_SYMBOL_GPL(__cpuid_to_hartid_map); void __init smp_setup_processor_id(void) { @@ -113,6 +118,7 @@ void arch_irq_work_raise(void) static irqreturn_t handle_IPI(int irq, void *data) { + unsigned int cpu = smp_processor_id(); int ipi = irq - ipi_virq_base; switch (ipi) { @@ -126,7 +132,7 @@ static irqreturn_t handle_IPI(int irq, void *data) ipi_stop(); break; case IPI_CPU_CRASH_STOP: - ipi_cpu_crash_stop(smp_processor_id(), get_irq_regs()); + ipi_cpu_crash_stop(cpu, get_irq_regs()); break; case IPI_IRQ_WORK: irq_work_run(); @@ -136,8 +142,14 @@ static irqreturn_t handle_IPI(int irq, void *data) tick_receive_broadcast(); break; #endif + case IPI_CPU_BACKTRACE: + nmi_cpu_backtrace(get_irq_regs()); + break; + case IPI_KGDB_ROUNDUP: + kgdb_nmicallback(cpu, get_irq_regs()); + break; default: - pr_warn("CPU%d: unhandled IPI%d\n", smp_processor_id(), ipi); + pr_warn("CPU%d: unhandled IPI%d\n", cpu, ipi); break; } @@ -171,10 +183,7 @@ bool riscv_ipi_have_virq_range(void) return (ipi_virq_base) ? true : false; } -DEFINE_STATIC_KEY_FALSE(riscv_ipi_for_rfence); -EXPORT_SYMBOL_GPL(riscv_ipi_for_rfence); - -void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) +void riscv_ipi_set_virq_range(int virq, int nr) { int i, err; @@ -197,12 +206,6 @@ void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) /* Enabled IPIs for boot CPU immediately */ riscv_ipi_enable(); - - /* Update RFENCE static key */ - if (use_for_rfence) - static_branch_enable(&riscv_ipi_for_rfence); - else - static_branch_disable(&riscv_ipi_for_rfence); } static const char * const ipi_names[] = { @@ -212,6 +215,8 @@ static const char * const ipi_names[] = { [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", [IPI_TIMER] = "Timer broadcast interrupts", + [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", + [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", }; void show_ipi_stats(struct seq_file *p, int prec) @@ -332,3 +337,29 @@ void arch_smp_send_reschedule(int cpu) send_ipi_single(cpu, IPI_RESCHEDULE); } EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); + +static void riscv_backtrace_ipi(cpumask_t *mask) +{ + send_ipi_mask(mask, IPI_CPU_BACKTRACE); +} + +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) +{ + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, riscv_backtrace_ipi); +} + +#ifdef CONFIG_KGDB +void kgdb_roundup_cpus(void) +{ + int this_cpu = raw_smp_processor_id(); + int cpu; + + for_each_online_cpu(cpu) { + /* No need to roundup ourselves */ + if (cpu == this_cpu) + continue; + + send_ipi_single(cpu, IPI_KGDB_ROUNDUP); + } +} +#endif diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 519b6bd946e5..e36d20205bd7 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -26,9 +26,8 @@ #include <linux/sched/task_stack.h> #include <linux/sched/mm.h> -#include <asm/cpufeature.h> +#include <asm/cacheflush.h> #include <asm/cpu_ops.h> -#include <asm/cpufeature.h> #include <asm/irq.h> #include <asm/mmu_context.h> #include <asm/numa.h> @@ -42,10 +41,6 @@ static DECLARE_COMPLETION(cpu_running); -void __init smp_prepare_boot_cpu(void) -{ -} - void __init smp_prepare_cpus(unsigned int max_cpus) { int cpuid; @@ -101,7 +96,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un if (hart == cpuid_to_hartid_map(0)) { BUG_ON(found_boot_cpu); found_boot_cpu = true; - early_map_cpu_to_node(0, acpi_numa_get_nid(cpu_count)); return 0; } @@ -111,7 +105,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un } cpuid_to_hartid_map(cpu_count) = hart; - early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count)); cpu_count++; return 0; @@ -219,6 +212,15 @@ asmlinkage __visible void smp_callin(void) struct mm_struct *mm = &init_mm; unsigned int curr_cpuid = smp_processor_id(); + if (has_vector()) { + /* + * Return as early as possible so the hart with a mismatching + * vlen won't boot. + */ + if (riscv_v_setup_vsize()) + return; + } + /* All kernel threads share the same mm context. */ mmgrab(mm); current->active_mm = mm; @@ -229,19 +231,13 @@ asmlinkage __visible void smp_callin(void) riscv_ipi_enable(); numa_add_cpu(curr_cpuid); - set_cpu_online(curr_cpuid, 1); - - if (has_vector()) { - if (riscv_v_setup_vsize()) - elf_hwcap &= ~COMPAT_HWCAP_ISA_V; - } - - riscv_user_isa_enable(); + set_cpu_online(curr_cpuid, true); /* - * Remote TLB flushes are ignored while the CPU is offline, so emit - * a local TLB flush right now just in case. + * Remote cache and TLB flushes are ignored while the CPU is offline, + * so flush them both right now just in case. */ + local_flush_icache_all(); local_flush_tlb_all(); complete(&cpu_running); /* diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 64a9c093aef9..d4355c770c36 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -16,12 +16,24 @@ #ifdef CONFIG_FRAME_POINTER -extern asmlinkage void ret_from_exception(void); +extern asmlinkage void handle_exception(void); +extern unsigned long ret_from_exception_end; + +static inline int fp_is_valid(unsigned long fp, unsigned long sp) +{ + unsigned long low, high; + + low = sp + sizeof(struct stackframe); + high = ALIGN(sp, THREAD_SIZE); + + return !(fp < low || fp > high || fp & 0x07); +} void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; + int graph_idx = 0; int level = 0; if (regs) { @@ -41,28 +53,27 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, } for (;;) { - unsigned long low, high; struct stackframe *frame; if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc)))) break; - /* Validate frame pointer */ - low = sp + sizeof(struct stackframe); - high = ALIGN(sp, THREAD_SIZE); - if (unlikely(fp < low || fp > high || fp & 0x7)) + if (unlikely(!fp_is_valid(fp, sp))) break; + /* Unwind stack frame */ frame = (struct stackframe *)fp - 1; sp = fp; - if (regs && (regs->epc == pc) && (frame->fp & 0x7)) { + if (regs && (regs->epc == pc) && fp_is_valid(frame->ra, sp)) { + /* We hit function where ra is not saved on the stack */ fp = frame->ra; pc = regs->ra; } else { fp = frame->fp; - pc = ftrace_graph_ret_addr(current, NULL, frame->ra, + pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, &frame->ra); - if (pc == (unsigned long)ret_from_exception) { + if (pc >= (unsigned long)handle_exception && + pc < (unsigned long)&ret_from_exception_end) { if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) break; @@ -148,8 +159,51 @@ unsigned long __get_wchan(struct task_struct *task) return pc; } -noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, +noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { walk_stackframe(task, regs, consume_entry, cookie); } + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long unwind_user_frame(stack_trace_consume_fn consume_entry, + void *cookie, unsigned long fp, + unsigned long reg_ra) +{ + struct stackframe buftail; + unsigned long ra = 0; + unsigned long __user *user_frame_tail = + (unsigned long __user *)(fp - sizeof(struct stackframe)); + + /* Check accessibility of one struct frame_tail beyond */ + if (!access_ok(user_frame_tail, sizeof(buftail))) + return 0; + if (__copy_from_user_inatomic(&buftail, user_frame_tail, + sizeof(buftail))) + return 0; + + ra = reg_ra ? : buftail.ra; + + fp = buftail.fp; + if (!ra || !consume_entry(cookie, ra)) + return 0; + + return fp; +} + +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, + const struct pt_regs *regs) +{ + unsigned long fp = 0; + + fp = regs->s0; + if (!consume_entry(cookie, regs->epc)) + return; + + fp = unwind_user_frame(consume_entry, cookie, fp, regs->ra); + while (fp && !(fp & 0x7)) + fp = unwind_user_frame(consume_entry, cookie, fp, 0); +} diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 299795341e8a..9a8a0dc035b2 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -14,8 +14,7 @@ void suspend_save_csrs(struct suspend_context *context) { - context->scratch = csr_read(CSR_SCRATCH); - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG)) context->envcfg = csr_read(CSR_ENVCFG); context->tvec = csr_read(CSR_TVEC); context->ie = csr_read(CSR_IE); @@ -37,8 +36,8 @@ void suspend_save_csrs(struct suspend_context *context) void suspend_restore_csrs(struct suspend_context *context) { - csr_write(CSR_SCRATCH, context->scratch); - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + csr_write(CSR_SCRATCH, 0); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG)) csr_write(CSR_ENVCFG, context->envcfg); csr_write(CSR_TVEC, context->tvec); csr_write(CSR_IE, context->ie); @@ -132,4 +131,53 @@ static int __init sbi_system_suspend_init(void) } arch_initcall(sbi_system_suspend_init); + +static int sbi_suspend_finisher(unsigned long suspend_type, + unsigned long resume_addr, + unsigned long opaque) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_SUSPEND, + suspend_type, resume_addr, opaque, 0, 0, 0); + + return (ret.error) ? sbi_err_map_linux_errno(ret.error) : 0; +} + +int riscv_sbi_hart_suspend(u32 state) +{ + if (state & SBI_HSM_SUSP_NON_RET_BIT) + return cpu_suspend(state, sbi_suspend_finisher); + else + return sbi_suspend_finisher(state, 0, 0); +} + +bool riscv_sbi_suspend_state_is_valid(u32 state) +{ + if (state > SBI_HSM_SUSPEND_RET_DEFAULT && + state < SBI_HSM_SUSPEND_RET_PLATFORM) + return false; + + if (state > SBI_HSM_SUSPEND_NON_RET_DEFAULT && + state < SBI_HSM_SUSPEND_NON_RET_PLATFORM) + return false; + + return true; +} + +bool riscv_sbi_hsm_is_supported(void) +{ + /* + * The SBI HSM suspend function is only available when: + * 1) SBI version is 0.3 or higher + * 2) SBI HSM extension is available + */ + if (sbi_spec_version < sbi_mk_version(0, 3) || + !sbi_probe_extension(SBI_EXT_HSM)) { + pr_info("HSM suspend not available\n"); + return false; + } + + return true; +} #endif /* CONFIG_RISCV_SBI */ diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index a7c56b41efd2..bcd3b816306c 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -8,11 +8,14 @@ #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwprobe.h> +#include <asm/processor.h> +#include <asm/delay.h> #include <asm/sbi.h> #include <asm/switch_to.h> #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/vector.h> +#include <asm/vendor_extensions/thead_hwprobe.h> #include <vdso/vsyscall.h> @@ -69,7 +72,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, if (riscv_isa_extension_available(NULL, c)) pair->value |= RISCV_HWPROBE_IMA_C; - if (has_vector()) + if (has_vector() && riscv_isa_extension_available(NULL, v)) pair->value |= RISCV_HWPROBE_IMA_V; /* @@ -92,29 +95,45 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, * regardless of the kernel's configuration, as no other checks, besides * presence in the hart_isa bitmap, are made. */ + EXT_KEY(ZACAS); + EXT_KEY(ZAWRS); EXT_KEY(ZBA); EXT_KEY(ZBB); - EXT_KEY(ZBS); - EXT_KEY(ZICBOZ); EXT_KEY(ZBC); - EXT_KEY(ZBKB); EXT_KEY(ZBKC); EXT_KEY(ZBKX); + EXT_KEY(ZBS); + EXT_KEY(ZCA); + EXT_KEY(ZCB); + EXT_KEY(ZCMOP); + EXT_KEY(ZICBOZ); + EXT_KEY(ZICOND); + EXT_KEY(ZIHINTNTL); + EXT_KEY(ZIHINTPAUSE); + EXT_KEY(ZIMOP); EXT_KEY(ZKND); EXT_KEY(ZKNE); EXT_KEY(ZKNH); EXT_KEY(ZKSED); EXT_KEY(ZKSH); EXT_KEY(ZKT); - EXT_KEY(ZIHINTNTL); EXT_KEY(ZTSO); - EXT_KEY(ZACAS); - EXT_KEY(ZICOND); + /* + * All the following extensions must depend on the kernel + * support of V. + */ if (has_vector()) { EXT_KEY(ZVBB); EXT_KEY(ZVBC); + EXT_KEY(ZVE32F); + EXT_KEY(ZVE32X); + EXT_KEY(ZVE64D); + EXT_KEY(ZVE64F); + EXT_KEY(ZVE64X); + EXT_KEY(ZVFH); + EXT_KEY(ZVFHMIN); EXT_KEY(ZVKB); EXT_KEY(ZVKG); EXT_KEY(ZVKNED); @@ -123,15 +142,18 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZVKSED); EXT_KEY(ZVKSH); EXT_KEY(ZVKT); - EXT_KEY(ZVFH); - EXT_KEY(ZVFHMIN); } if (has_fpu()) { + EXT_KEY(ZCD); + EXT_KEY(ZCF); + EXT_KEY(ZFA); EXT_KEY(ZFH); EXT_KEY(ZFHMIN); - EXT_KEY(ZFA); } + + if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM)) + EXT_KEY(SUPM); #undef EXT_KEY } @@ -147,6 +169,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) return (pair.value & ext); } +#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) static u64 hwprobe_misaligned(const struct cpumask *cpus) { int cpu; @@ -159,16 +182,65 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) perf = this_perf; if (perf != this_perf) { - perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + perf = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; break; } } if (perf == -1ULL) - return RISCV_HWPROBE_MISALIGNED_UNKNOWN; + return RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; return perf; } +#else +static u64 hwprobe_misaligned(const struct cpumask *cpus) +{ + if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; + + if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available()) + return RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; + + return RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; +} +#endif + +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus) +{ + int cpu; + u64 perf = -1ULL; + + /* Return if supported or not even if speed wasn't probed */ + for_each_cpu(cpu, cpus) { + int this_perf = per_cpu(vector_misaligned_access, cpu); + + if (perf == -1ULL) + perf = this_perf; + + if (perf != this_perf) { + perf = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + break; + } + } + + if (perf == -1ULL) + return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + + return perf; +} +#else +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus) +{ + if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + + if (IS_ENABLED(CONFIG_RISCV_SLOW_VECTOR_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; + + return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; +} +#endif static void hwprobe_one_pair(struct riscv_hwprobe *pair, const struct cpumask *cpus) @@ -194,14 +266,30 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, break; case RISCV_HWPROBE_KEY_CPUPERF_0: + case RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: pair->value = hwprobe_misaligned(cpus); break; + case RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF: + pair->value = hwprobe_vec_misaligned(cpus); + break; + case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE: pair->value = 0; if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ)) pair->value = riscv_cboz_block_size; break; + case RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS: + pair->value = user_max_virt_addr(); + break; + + case RISCV_HWPROBE_KEY_TIME_CSR_FREQ: + pair->value = riscv_timebase; + break; + + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: + hwprobe_isa_vendor_ext_thead_0(pair, cpus); + break; /* * For forward compatibility, unknown keys don't fail the whole @@ -363,7 +451,7 @@ static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, static int __init init_hwprobe_vdso_data(void) { struct vdso_data *vd = __arch_get_k_vdso_data(); - struct arch_vdso_data *avd = &vd->arch_data; + struct arch_vdso_time_data *avd = &vd->arch_data; u64 id_bitsmash = 0; struct riscv_hwprobe pair; int key; diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index f1c1416a9f1e..d77afe05578f 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -7,7 +7,6 @@ #include <linux/syscalls.h> #include <asm/cacheflush.h> -#include <asm-generic/mman-common.h> static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -24,7 +23,7 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, #ifdef CONFIG_64BIT SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, offset) + unsigned long, fd, unsigned long, offset) { return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0); } @@ -33,7 +32,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, offset) + unsigned long, fd, unsigned long, offset) { /* * Note that the shift for mmap2 is constant (12), diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index dda913764903..6f1a36cb0f3f 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -9,14 +9,16 @@ #include <asm-generic/syscalls.h> #include <asm/syscall.h> +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) + #undef __SYSCALL #define __SYSCALL(nr, call) asmlinkage long __riscv_##call(const struct pt_regs *); -#include <asm/unistd.h> +#include <asm/syscall_table.h> #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = __riscv_##call, void * const sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = __riscv_sys_ni_syscall, -#include <asm/unistd.h> +#include <asm/syscall_table.h> }; diff --git a/arch/riscv/kernel/tests/Kconfig.debug b/arch/riscv/kernel/tests/Kconfig.debug index 5dba64e8e977..78cea5d2c270 100644 --- a/arch/riscv/kernel/tests/Kconfig.debug +++ b/arch/riscv/kernel/tests/Kconfig.debug @@ -6,7 +6,7 @@ config AS_HAS_ULEB128 menuconfig RUNTIME_KERNEL_TESTING_MENU bool "arch/riscv/kernel runtime Testing" - def_bool y + default y help Enable riscv kernel runtime testing. diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index a1b9be3c4332..8ff8e8b36524 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -6,6 +6,7 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/init.h> +#include <linux/randomize_kstack.h> #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/signal.h> @@ -34,7 +35,7 @@ int show_unhandled_signals = 1; -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns) { @@ -80,7 +81,7 @@ void die(struct pt_regs *regs, const char *str) oops_enter(); - spin_lock_irqsave(&die_lock, flags); + raw_spin_lock_irqsave(&die_lock, flags); console_verbose(); bust_spinlocks(1); @@ -99,7 +100,7 @@ void die(struct pt_regs *regs, const char *str) bust_spinlocks(0); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - spin_unlock_irqrestore(&die_lock, flags); + raw_spin_unlock_irqrestore(&die_lock, flags); oops_exit(); if (in_interrupt()) @@ -121,7 +122,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) print_vma_addr(KERN_CONT " in ", instruction_pointer(regs)); pr_cont("\n"); __show_regs(regs); - dump_instr(KERN_EMERG, regs); + dump_instr(KERN_INFO, regs); } force_sig_fault(signo, code, (void __user *)addr); @@ -310,22 +311,36 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) } } -asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) +asmlinkage __visible __trap_section __no_stack_protector +void do_trap_ecall_u(struct pt_regs *regs) { if (user_mode(regs)) { long syscall = regs->a7; regs->epc += 4; regs->orig_a0 = regs->a0; + regs->a0 = -ENOSYS; riscv_v_vstate_discard(regs); syscall = syscall_enter_from_user_mode(regs, syscall); + add_random_kstack_offset(); + if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); - else if (syscall != -1) - regs->a0 = -ENOSYS; + + /* + * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), + * so the maximum stack offset is 1k bytes (10 bits). + * + * The actual entropy will be further reduced by the compiler when + * applying stack alignment constraints: 16-byte (i.e. 4-bit) aligned + * for RV32I or RV64I. + * + * The resulting 6 bits of entropy is seen in SP[9:4]. + */ + choose_random_kstack_offset(get_random_u16()); syscall_exit_to_user_mode(regs); } else { diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 8ded225e8c5b..7cc108aed74e 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -16,6 +16,7 @@ #include <asm/entry-common.h> #include <asm/hwprobe.h> #include <asm/cpufeature.h> +#include <asm/vector.h> #define INSN_MATCH_LB 0x3 #define INSN_MASK_LB 0x707f @@ -136,8 +137,6 @@ #define REG_PTR(insn, pos, regs) \ (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)) -#define GET_RM(insn) (((insn) >> 12) & 7) - #define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) #define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) #define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) @@ -264,86 +263,14 @@ static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, #define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs)) #define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs)) -#ifdef CONFIG_RISCV_M_MODE -static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) -{ - u8 val; - - asm volatile("lbu %0, %1" : "=&r" (val) : "m" (*addr)); - *r_val = val; - - return 0; -} - -static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) -{ - asm volatile ("sb %0, %1\n" : : "r" (val), "m" (*addr)); - - return 0; -} - -static inline int get_insn(struct pt_regs *regs, ulong mepc, ulong *r_insn) -{ - register ulong __mepc asm ("a2") = mepc; - ulong val, rvc_mask = 3, tmp; - - asm ("and %[tmp], %[addr], 2\n" - "bnez %[tmp], 1f\n" -#if defined(CONFIG_64BIT) - __stringify(LWU) " %[insn], (%[addr])\n" -#else - __stringify(LW) " %[insn], (%[addr])\n" -#endif - "and %[tmp], %[insn], %[rvc_mask]\n" - "beq %[tmp], %[rvc_mask], 2f\n" - "sll %[insn], %[insn], %[xlen_minus_16]\n" - "srl %[insn], %[insn], %[xlen_minus_16]\n" - "j 2f\n" - "1:\n" - "lhu %[insn], (%[addr])\n" - "and %[tmp], %[insn], %[rvc_mask]\n" - "bne %[tmp], %[rvc_mask], 2f\n" - "lhu %[tmp], 2(%[addr])\n" - "sll %[tmp], %[tmp], 16\n" - "add %[insn], %[insn], %[tmp]\n" - "2:" - : [insn] "=&r" (val), [tmp] "=&r" (tmp) - : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask), - [xlen_minus_16] "i" (XLEN_MINUS_16)); - - *r_insn = val; - - return 0; -} -#else -static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) -{ - if (user_mode(regs)) { - return __get_user(*r_val, (u8 __user *)addr); - } else { - *r_val = *addr; - return 0; - } -} - -static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) -{ - if (user_mode(regs)) { - return __put_user(val, (u8 __user *)addr); - } else { - *addr = val; - return 0; - } -} - -#define __read_insn(regs, insn, insn_addr) \ +#define __read_insn(regs, insn, insn_addr, type) \ ({ \ int __ret; \ \ if (user_mode(regs)) { \ - __ret = __get_user(insn, insn_addr); \ + __ret = __get_user(insn, (type __user *) insn_addr); \ } else { \ - insn = *(__force u16 *)insn_addr; \ + insn = *(type *)insn_addr; \ __ret = 0; \ } \ \ @@ -356,9 +283,8 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) if (epc & 0x2) { ulong tmp = 0; - u16 __user *insn_addr = (u16 __user *)epc; - if (__read_insn(regs, insn, insn_addr)) + if (__read_insn(regs, insn, epc, u16)) return -EFAULT; /* __get_user() uses regular "lw" which sign extend the loaded * value make sure to clear higher order bits in case we "or" it @@ -369,16 +295,14 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) *r_insn = insn; return 0; } - insn_addr++; - if (__read_insn(regs, tmp, insn_addr)) + epc += sizeof(u16); + if (__read_insn(regs, tmp, epc, u16)) return -EFAULT; *r_insn = (tmp << 16) | insn; return 0; } else { - u32 __user *insn_addr = (u32 __user *)epc; - - if (__read_insn(regs, insn, insn_addr)) + if (__read_insn(regs, insn, epc, u32)) return -EFAULT; if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) { *r_insn = insn; @@ -390,7 +314,6 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) return 0; } } -#endif union reg_data { u8 data_bytes[8]; @@ -398,22 +321,49 @@ union reg_data { u64 data_u64; }; -static bool unaligned_ctl __read_mostly; - /* sysctl hooks */ int unaligned_enabled __read_mostly = 1; /* Enabled by default */ -int handle_misaligned_load(struct pt_regs *regs) +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +static int handle_vector_misaligned_load(struct pt_regs *regs) +{ + unsigned long epc = regs->epc; + unsigned long insn; + + if (get_insn(regs, epc, &insn)) + return -1; + + /* Only return 0 when in check_vector_unaligned_access_emulated */ + if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + *this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + regs->epc = epc + INSN_LEN(insn); + return 0; + } + + /* If vector instruction we don't emulate it yet */ + regs->epc = epc; + return -1; +} +#else +static int handle_vector_misaligned_load(struct pt_regs *regs) +{ + return -1; +} +#endif + +static int handle_scalar_misaligned_load(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; unsigned long insn; unsigned long addr = regs->badaddr; - int i, fp = 0, shift = 0, len = 0; + int fp = 0, shift = 0, len = 0; perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); - *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; +#endif if (!unaligned_enabled) return -1; @@ -490,9 +440,11 @@ int handle_misaligned_load(struct pt_regs *regs) return -EOPNOTSUPP; val.data_u64 = 0; - for (i = 0; i < len; i++) { - if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i])) + if (user_mode(regs)) { + if (copy_from_user(&val, (u8 __user *)addr, len)) return -1; + } else { + memcpy(&val, (u8 *)addr, len); } if (!fp) @@ -507,13 +459,13 @@ int handle_misaligned_load(struct pt_regs *regs) return 0; } -int handle_misaligned_store(struct pt_regs *regs) +static int handle_scalar_misaligned_store(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; unsigned long insn; unsigned long addr = regs->badaddr; - int i, len = 0, fp = 0; + int len = 0, fp = 0; perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); @@ -586,9 +538,11 @@ int handle_misaligned_store(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_FPU) && fp) return -EOPNOTSUPP; - for (i = 0; i < len; i++) { - if (store_u8(regs, (void *)(addr + i), val.data_bytes[i])) + if (user_mode(regs)) { + if (copy_to_user((u8 __user *)addr, &val, len)) return -1; + } else { + memcpy((u8 *)addr, &val, len); } regs->epc = epc + INSN_LEN(insn); @@ -596,34 +550,116 @@ int handle_misaligned_store(struct pt_regs *regs) return 0; } -bool check_unaligned_access_emulated(int cpu) +int handle_misaligned_load(struct pt_regs *regs) +{ + unsigned long epc = regs->epc; + unsigned long insn; + + if (IS_ENABLED(CONFIG_RISCV_VECTOR_MISALIGNED)) { + if (get_insn(regs, epc, &insn)) + return -1; + + if (insn_is_vector(insn)) + return handle_vector_misaligned_load(regs); + } + + if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED)) + return handle_scalar_misaligned_load(regs); + + return -1; +} + +int handle_misaligned_store(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED)) + return handle_scalar_misaligned_store(regs); + + return -1; +} + +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused) +{ + long *mas_ptr = this_cpu_ptr(&vector_misaligned_access); + unsigned long tmp_var; + + *mas_ptr = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + + kernel_vector_begin(); + /* + * In pre-13.0.0 versions of GCC, vector registers cannot appear in + * the clobber list. This inline asm clobbers v0, but since we do not + * currently build the kernel with V enabled, the v0 clobber arg is not + * needed (as the compiler will not emit vector code itself). If the kernel + * is changed to build with V enabled, the clobber arg will need to be + * added here. + */ + __asm__ __volatile__ ( + ".balign 4\n\t" + ".option push\n\t" + ".option arch, +zve32x\n\t" + " vsetivli zero, 1, e16, m1, ta, ma\n\t" // Vectors of 16b + " vle16.v v0, (%[ptr])\n\t" // Load bytes + ".option pop\n\t" + : : [ptr] "r" ((u8 *)&tmp_var + 1)); + kernel_vector_end(); +} + +bool check_vector_unaligned_access_emulated_all_cpus(void) +{ + int cpu; + + if (!has_vector()) { + for_each_online_cpu(cpu) + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + return false; + } + + schedule_on_each_cpu(check_vector_unaligned_access_emulated); + + for_each_online_cpu(cpu) + if (per_cpu(vector_misaligned_access, cpu) + == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return false; + + return true; +} +#else +bool check_vector_unaligned_access_emulated_all_cpus(void) +{ + return false; +} +#endif + +#ifdef CONFIG_RISCV_SCALAR_MISALIGNED + +static bool unaligned_ctl __read_mostly; + +void check_unaligned_access_emulated(struct work_struct *work __always_unused) { + int cpu = smp_processor_id(); long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); unsigned long tmp_var, tmp_val; - bool misaligned_emu_detected; - *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + *mas_ptr = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; __asm__ __volatile__ ( " "REG_L" %[tmp], 1(%[ptr])\n" : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); - misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED); /* * If unaligned_ctl is already set, this means that we detected that all * CPUS uses emulated misaligned access at boot time. If that changed * when hotplugging the new cpu, this is something we don't handle. */ - if (unlikely(unaligned_ctl && !misaligned_emu_detected)) { + if (unlikely(unaligned_ctl && (*mas_ptr != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED))) { pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n"); while (true) cpu_relax(); } - - return misaligned_emu_detected; } -void unaligned_emulation_finish(void) +bool check_unaligned_access_emulated_all_cpus(void) { int cpu; @@ -632,16 +668,24 @@ void unaligned_emulation_finish(void) * accesses emulated since tasks requesting such control can run on any * CPU. */ - for_each_present_cpu(cpu) { - if (per_cpu(misaligned_access_speed, cpu) != - RISCV_HWPROBE_MISALIGNED_EMULATED) { - return; - } - } + schedule_on_each_cpu(check_unaligned_access_emulated); + + for_each_online_cpu(cpu) + if (per_cpu(misaligned_access_speed, cpu) + != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED) + return false; + unaligned_ctl = true; + return true; } bool unaligned_ctl_available(void) { return unaligned_ctl; } +#else +bool check_unaligned_access_emulated_all_cpus(void) +{ + return false; +} +#endif diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c new file mode 100644 index 000000000000..91f189cf1611 --- /dev/null +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Rivos Inc. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/jump_label.h> +#include <linux/kthread.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/types.h> +#include <asm/cpufeature.h> +#include <asm/hwprobe.h> +#include <asm/vector.h> + +#include "copy-unaligned.h" + +#define MISALIGNED_ACCESS_JIFFIES_LG2 1 +#define MISALIGNED_BUFFER_SIZE 0x4000 +#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) +#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) + +DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; +DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS +static cpumask_t fast_misaligned_access; +static int check_unaligned_access(void *param) +{ + int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; + struct page *page = param; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; + + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) + return 0; + + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ + src = dst + (MISALIGNED_BUFFER_SIZE / 2); + src += 2; + word_cycles = -1ULL; + /* Do a warmup. */ + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + preempt_disable(); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + mb(); + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + preempt_enable(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", + cpu); + + return 0; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) ? "fast" : "slow"); + + per_cpu(misaligned_access_speed, cpu) = speed; + + /* + * Set the value of fast_misaligned_access of a CPU. These operations + * are atomic to avoid race conditions. + */ + if (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) + cpumask_set_cpu(cpu, &fast_misaligned_access); + else + cpumask_clear_cpu(cpu, &fast_misaligned_access); + + return 0; +} + +static void check_unaligned_access_nonboot_cpu(void *param) +{ + unsigned int cpu = smp_processor_id(); + struct page **pages = param; + + if (smp_processor_id() != 0) + check_unaligned_access(pages[cpu]); +} + +DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); + +static void modify_unaligned_access_branches(cpumask_t *mask, int weight) +{ + if (cpumask_weight(mask) == weight) + static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key); + else + static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key); +} + +static void set_unaligned_access_static_branches_except_cpu(int cpu) +{ + /* + * Same as set_unaligned_access_static_branches, except excludes the + * given CPU from the result. When a CPU is hotplugged into an offline + * state, this function is called before the CPU is set to offline in + * the cpumask, and thus the CPU needs to be explicitly excluded. + */ + + cpumask_t fast_except_me; + + cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask); + cpumask_clear_cpu(cpu, &fast_except_me); + + modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1); +} + +static void set_unaligned_access_static_branches(void) +{ + /* + * This will be called after check_unaligned_access_all_cpus so the + * result of unaligned access speed for all CPUs will be available. + * + * To avoid the number of online cpus changing between reading + * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be + * held before calling this function. + */ + + cpumask_t fast_and_online; + + cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask); + + modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); +} + +static int lock_and_set_unaligned_access_static_branch(void) +{ + cpus_read_lock(); + set_unaligned_access_static_branches(); + cpus_read_unlock(); + + return 0; +} + +arch_initcall_sync(lock_and_set_unaligned_access_static_branch); + +static int riscv_online_cpu(unsigned int cpu) +{ + static struct page *buf; + + /* We are already set since the last check */ + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) + goto exit; + + check_unaligned_access_emulated(NULL); + buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!buf) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return -ENOMEM; + } + + check_unaligned_access(buf); + __free_pages(buf, MISALIGNED_BUFFER_ORDER); + +exit: + set_unaligned_access_static_branches(); + + return 0; +} + +static int riscv_offline_cpu(unsigned int cpu) +{ + set_unaligned_access_static_branches_except_cpu(cpu); + + return 0; +} + +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static int check_unaligned_access_speed_all_cpus(void) +{ + unsigned int cpu; + unsigned int cpu_count = num_possible_cpus(); + struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL); + + if (!bufs) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return 0; + } + + /* + * Allocate separate buffers for each CPU so there's no fighting over + * cache lines. + */ + for_each_cpu(cpu, cpu_online_mask) { + bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!bufs[cpu]) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + goto out; + } + } + + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); + + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu, riscv_offline_cpu); + +out: + for_each_cpu(cpu, cpu_online_mask) { + if (bufs[cpu]) + __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); + } + + kfree(bufs); + return 0; +} +#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ +static int check_unaligned_access_speed_all_cpus(void) +{ + return 0; +} +#endif + +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS +static void check_vector_unaligned_access(struct work_struct *work __always_unused) +{ + int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; + struct page *page; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; + + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return; + + page = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!page) { + pr_warn("Allocation failure, not measuring vector misaligned performance\n"); + return; + } + + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ + src = dst + (MISALIGNED_BUFFER_SIZE / 2); + src += 2; + word_cycles = -1ULL; + + /* Do a warmup. */ + kernel_vector_begin(); + __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + kernel_vector_end(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n", + cpu); + + return; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of vector byte access time to vector unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow"); + + per_cpu(vector_misaligned_access, cpu) = speed; +} + +static int riscv_online_cpu_vec(unsigned int cpu) +{ + if (!has_vector()) + return 0; + + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) + return 0; + + check_vector_unaligned_access_emulated(NULL); + check_vector_unaligned_access(NULL); + return 0; +} + +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + schedule_on_each_cpu(check_vector_unaligned_access); + + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu_vec, NULL); + + return 0; +} +#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ +static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + return 0; +} +#endif + +static int check_unaligned_access_all_cpus(void) +{ + bool all_cpus_emulated, all_cpus_vec_unsupported; + + all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); + all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus(); + + if (!all_cpus_vec_unsupported && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { + kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"); + } + + if (!all_cpus_emulated) + return check_unaligned_access_speed_all_cpus(); + + return 0; +} + +arch_initcall(check_unaligned_access_all_cpus); diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 2cf76218a5bd..3ca3ae4277e1 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -23,29 +23,16 @@ enum vvar_pages { VVAR_NR_PAGES, }; -enum rv_vdso_map { - RV_VDSO_MAP_VVAR, - RV_VDSO_MAP_VDSO, -}; - #define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) -/* - * The vDSO data page. - */ -static union { - struct vdso_data data; - u8 page[PAGE_SIZE]; -} vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +static union vdso_data_store vdso_data_store __page_aligned_data; +struct vdso_data *vdso_data = vdso_data_store.data; struct __vdso_info { const char *name; const char *vdso_code_start; const char *vdso_code_end; unsigned long vdso_pages; - /* Data Mapping */ - struct vm_special_mapping *dm; /* Code Mapping */ struct vm_special_mapping *cm; }; @@ -98,6 +85,8 @@ struct vdso_data *arch_get_vdso_data(void *vvar_page) return (struct vdso_data *)(vvar_page); } +static const struct vm_special_mapping rv_vvar_map; + /* * The vvar mapping contains data for a specific time namespace, so when a task * changes namespace we must unmap its vvar data for the old namespace. @@ -114,12 +103,8 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) mmap_read_lock(mm); for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, vdso_info.dm)) + if (vma_is_special_mapping(vma, &rv_vvar_map)) zap_vma_pages(vma); -#ifdef CONFIG_COMPAT - if (vma_is_special_mapping(vma, compat_vdso_info.dm)) - zap_vma_pages(vma); -#endif } mmap_read_unlock(mm); @@ -161,43 +146,34 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, return vmf_insert_pfn(vma, vmf->address, pfn); } -static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = { - [RV_VDSO_MAP_VVAR] = { - .name = "[vvar]", - .fault = vvar_fault, - }, - [RV_VDSO_MAP_VDSO] = { - .name = "[vdso]", - .mremap = vdso_mremap, - }, +static const struct vm_special_mapping rv_vvar_map = { + .name = "[vvar]", + .fault = vvar_fault, +}; + +static struct vm_special_mapping rv_vdso_map __ro_after_init = { + .name = "[vdso]", + .mremap = vdso_mremap, }; static struct __vdso_info vdso_info __ro_after_init = { .name = "vdso", .vdso_code_start = vdso_start, .vdso_code_end = vdso_end, - .dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR], - .cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO], + .cm = &rv_vdso_map, }; #ifdef CONFIG_COMPAT -static struct vm_special_mapping rv_compat_vdso_maps[] __ro_after_init = { - [RV_VDSO_MAP_VVAR] = { - .name = "[vvar]", - .fault = vvar_fault, - }, - [RV_VDSO_MAP_VDSO] = { - .name = "[vdso]", - .mremap = vdso_mremap, - }, +static struct vm_special_mapping rv_compat_vdso_map __ro_after_init = { + .name = "[vdso]", + .mremap = vdso_mremap, }; static struct __vdso_info compat_vdso_info __ro_after_init = { .name = "compat_vdso", .vdso_code_start = compat_vdso_start, .vdso_code_end = compat_vdso_end, - .dm = &rv_compat_vdso_maps[RV_VDSO_MAP_VVAR], - .cm = &rv_compat_vdso_maps[RV_VDSO_MAP_VDSO], + .cm = &rv_compat_vdso_map, }; #endif @@ -233,7 +209,7 @@ static int __setup_additional_pages(struct mm_struct *mm, } ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE, - (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info->dm); + (VM_READ | VM_MAYREAD | VM_PFNMAP), &rv_vvar_map); if (IS_ERR(ret)) goto up_fail; diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 9b517fe1b8a8..9a1b555e8733 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -18,6 +18,7 @@ obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ccflags-y := -fno-stack-protector ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -fno-builtin ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) @@ -37,20 +38,15 @@ endif # Disable -pg to prevent insert call site CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) - -# Disable profiling and instrumentation for VDSO code -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n +CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) # Force dependency $(obj)/vdso.o: $(obj)/vdso.so # link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE - $(call if_changed,vdsold) -LDFLAGS_vdso.so.dbg = -shared -S -soname=linux-vdso.so.1 \ + $(call if_changed,vdsold_and_check) +LDFLAGS_vdso.so.dbg = -shared -soname=linux-vdso.so.1 \ --build-id=sha1 --hash-style=both --eh-frame-hdr # strip rule for the .so file @@ -59,7 +55,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +gen-vdsosym := $(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ @@ -69,7 +65,8 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE # actual build commands # The DSO images are built using a special linker script # Make sure only to export the intended __vdso_xxx symbol offsets. -quiet_cmd_vdsold = VDSOLD $@ - cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ +quiet_cmd_vdsold_and_check = VDSOLD $@ + cmd_vdsold_and_check = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ - rm $@.tmp + rm $@.tmp && \ + $(cmd_vdso_check) diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index 1e926e4b5881..a158c029344f 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -17,7 +17,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count, unsigned int flags) { const struct vdso_data *vd = __arch_get_vdso_data(); - const struct arch_vdso_data *avd = &vd->arch_data; + const struct arch_vdso_time_data *avd = &vd->arch_data; bool all_cpus = !cpusetsize && !cpus; struct riscv_hwprobe *p = pairs; struct riscv_hwprobe *end = pairs + pair_count; @@ -52,7 +52,7 @@ static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count, unsigned int flags) { const struct vdso_data *vd = __arch_get_vdso_data(); - const struct arch_vdso_data *avd = &vd->arch_data; + const struct arch_vdso_time_data *avd = &vd->arch_data; struct riscv_hwprobe *p = pairs; struct riscv_hwprobe *end = pairs + pair_count; unsigned char *c = (unsigned char *)cpus; diff --git a/arch/riscv/kernel/vec-copy-unaligned.S b/arch/riscv/kernel/vec-copy-unaligned.S new file mode 100644 index 000000000000..d16f19f1b3b6 --- /dev/null +++ b/arch/riscv/kernel/vec-copy-unaligned.S @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2024 Rivos Inc. */ + +#include <linux/args.h> +#include <linux/linkage.h> +#include <asm/asm.h> + + .text + +#define WORD_EEW 32 + +#define WORD_SEW CONCATENATE(e, WORD_EEW) +#define VEC_L CONCATENATE(vle, WORD_EEW).v +#define VEC_S CONCATENATE(vle, WORD_EEW).v + +/* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */ +/* Performs a memcpy without aligning buffers, using word loads and stores. */ +/* Note: The size is truncated to a multiple of WORD_EEW */ +SYM_FUNC_START(__riscv_copy_vec_words_unaligned) + andi a4, a2, ~(WORD_EEW-1) + beqz a4, 2f + add a3, a1, a4 + .option push + .option arch, +zve32x +1: + vsetivli t0, 8, WORD_SEW, m8, ta, ma + VEC_L v0, (a1) + VEC_S v0, (a0) + addi a0, a0, WORD_EEW + addi a1, a1, WORD_EEW + bltu a1, a3, 1b + +2: + .option pop + ret +SYM_FUNC_END(__riscv_copy_vec_words_unaligned) + +/* void __riscv_copy_vec_bytes_unaligned(void *, const void *, size_t) */ +/* Performs a memcpy without aligning buffers, using only byte accesses. */ +/* Note: The size is truncated to a multiple of 8 */ +SYM_FUNC_START(__riscv_copy_vec_bytes_unaligned) + andi a4, a2, ~(8-1) + beqz a4, 2f + add a3, a1, a4 + .option push + .option arch, +zve32x +1: + vsetivli t0, 8, e8, m8, ta, ma + vle8.v v0, (a1) + vse8.v v0, (a0) + addi a0, a0, 8 + addi a1, a1, 8 + bltu a1, a3, 1b + +2: + .option pop + ret +SYM_FUNC_END(__riscv_copy_vec_bytes_unaligned) diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 6727d1d3b8f2..184f780c932d 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -33,7 +33,17 @@ int riscv_v_setup_vsize(void) { unsigned long this_vsize; - /* There are 32 vector registers with vlenb length. */ + /* + * There are 32 vector registers with vlenb length. + * + * If the thead,vlenb property was provided by the firmware, use that + * instead of probing the CSRs. + */ + if (thead_vlenb_of) { + riscv_v_vsize = thead_vlenb_of * 32; + return 0; + } + riscv_v_enable(); this_vsize = csr_read(CSR_VLENB) * 32; riscv_v_disable(); @@ -53,7 +63,7 @@ int riscv_v_setup_vsize(void) void __init riscv_v_setup_ctx_cache(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx", @@ -66,7 +76,7 @@ void __init riscv_v_setup_ctx_cache(void) #endif } -static bool insn_is_vector(u32 insn_buf) +bool insn_is_vector(u32 insn_buf) { u32 opcode = insn_buf & __INSN_OPCODE_MASK; u32 width, csr; @@ -173,8 +183,11 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) u32 __user *epc = (u32 __user *)regs->epc; u32 insn = (u32)regs->badaddr; + if (!(has_vector() || has_xtheadvector())) + return false; + /* Do not handle if V is not supported, or disabled */ - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V)) + if (!riscv_v_vstate_ctrl_user_allowed()) return false; /* If V has been enabled then it is not the first-use trap */ @@ -213,7 +226,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; next = riscv_v_ctrl_get_next(tsk); @@ -235,7 +248,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) long riscv_v_vstate_ctrl_get_current(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; return current->thread.vstate_ctrl & PR_RISCV_V_VSTATE_CTRL_MASK; @@ -246,7 +259,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; if (arg & ~PR_RISCV_V_VSTATE_CTRL_MASK) @@ -284,7 +297,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) #ifdef CONFIG_SYSCTL -static struct ctl_table riscv_v_default_vstate_table[] = { +static const struct ctl_table riscv_v_default_vstate_table[] = { { .procname = "riscv_v_default_allow", .data = &riscv_v_implicit_uacc, @@ -296,7 +309,7 @@ static struct ctl_table riscv_v_default_vstate_table[] = { static int __init riscv_v_sysctl_init(void) { - if (has_vector()) + if (has_vector() || has_xtheadvector()) if (!register_sysctl("abi", riscv_v_default_vstate_table)) return -EINVAL; return 0; @@ -306,7 +319,7 @@ static int __init riscv_v_sysctl_init(void) static int __init riscv_v_sysctl_init(void) { return 0; } #endif /* ! CONFIG_SYSCTL */ -static int riscv_v_init(void) +static int __init riscv_v_init(void) { return riscv_v_sysctl_init(); } diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c new file mode 100644 index 000000000000..a31ff84740eb --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Rivos, Inc + */ + +#include <asm/vendorid_list.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/andes.h> +#include <asm/vendor_extensions/thead.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = { +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES + &riscv_isa_vendor_ext_list_andes, +#endif +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + &riscv_isa_vendor_ext_list_thead, +#endif +}; + +const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list); + +/** + * __riscv_isa_vendor_extension_available() - Check whether given vendor + * extension is available or not. + * + * @cpu: check if extension is available on this cpu + * @vendor: vendor that the extension is a member of + * @bit: bit position of the desired extension + * Return: true or false + * + * NOTE: When cpu is -1, will check if extension is available on all cpus + */ +bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsigned int bit) +{ + struct riscv_isavendorinfo *bmap; + struct riscv_isavendorinfo *cpu_bmap; + + switch (vendor) { + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES + case ANDES_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_andes.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; + break; + #endif + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + case THEAD_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap; + break; + #endif + default: + return false; + } + + if (cpu != -1) + bmap = &cpu_bmap[cpu]; + + if (bit >= RISCV_ISA_VENDOR_EXT_MAX) + return false; + + return test_bit(bit, bmap->isa) ? true : false; +} +EXPORT_SYMBOL_GPL(__riscv_isa_vendor_extension_available); diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile new file mode 100644 index 000000000000..866414c81a9f --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o diff --git a/arch/riscv/kernel/vendor_extensions/andes.c b/arch/riscv/kernel/vendor_extensions/andes.c new file mode 100644 index 000000000000..51f302b6d503 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/andes.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/andes.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +/* All Andes vendor extensions supported in Linux */ +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_andes[] = { + __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_VENDOR_EXT_XANDESPMU), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_andes = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_andes), + .ext_data = riscv_isa_vendor_ext_andes, +}; diff --git a/arch/riscv/kernel/vendor_extensions/thead.c b/arch/riscv/kernel/vendor_extensions/thead.c new file mode 100644 index 000000000000..519dbf70710a --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> + +#include <linux/array_size.h> +#include <linux/cpumask.h> +#include <linux/types.h> + +/* All T-Head vendor extensions supported in Linux */ +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_thead[] = { + __RISCV_ISA_EXT_DATA(xtheadvector, RISCV_ISA_VENDOR_EXT_XTHEADVECTOR), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_thead), + .ext_data = riscv_isa_vendor_ext_thead, +}; + +void disable_xtheadvector(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap[cpu].isa); + + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap.isa); +} diff --git a/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c new file mode 100644 index 000000000000..2eba34011786 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/vendor_extensions/thead.h> +#include <asm/vendor_extensions/thead_hwprobe.h> +#include <asm/vendor_extensions/vendor_hwprobe.h> + +#include <linux/cpumask.h> +#include <linux/types.h> + +#include <uapi/asm/hwprobe.h> +#include <uapi/asm/vendor/thead.h> + +void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus) +{ + VENDOR_EXTENSION_SUPPORTED(pair, cpus, + riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap, { + VENDOR_EXT_KEY(XTHEADVECTOR); + }); +} diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/vmcore_info.c index 8706736fd4e2..d5e448aa90e7 100644 --- a/arch/riscv/kernel/crash_core.c +++ b/arch/riscv/kernel/vmcore_info.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <linux/crash_core.h> +#include <linux/vmcore_info.h> #include <linux/pagemap.h> void arch_crash_save_vmcoreinfo(void) @@ -8,7 +8,6 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_NUMBER(phys_ram_base); vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); - vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); #ifdef CONFIG_MMU VMCOREINFO_NUMBER(VA_BITS); @@ -20,6 +19,13 @@ void arch_crash_save_vmcoreinfo(void) #endif #endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); +#ifdef CONFIG_XIP_KERNEL + /* TODO: Communicate with crash-utility developers on the information to + * export. The XIP case is more complicated, because the virtual-physical + * address offset depends on whether the address is in ROM or in RAM. + */ +#else vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", kernel_map.va_kernel_pa_offset); +#endif } diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index 8c3daa1b0531..a7611789bad5 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -14,6 +14,7 @@ #include <asm/page.h> #include <asm/cache.h> #include <asm/thread_info.h> +#include <asm/set_memory.h> OUTPUT_ARCH(riscv) ENTRY(_start) @@ -65,10 +66,10 @@ SECTIONS * From this point, stuff is considered writable and will be copied to RAM */ __data_loc = ALIGN(PAGE_SIZE); /* location in file */ - . = KERNEL_LINK_ADDR + XIP_OFFSET; /* location in memory */ + . = ALIGN(SECTION_ALIGN); /* location in memory */ #undef LOAD_OFFSET -#define LOAD_OFFSET (KERNEL_LINK_ADDR + XIP_OFFSET - (__data_loc & XIP_OFFSET_MASK)) +#define LOAD_OFFSET (KERNEL_LINK_ADDR + _sdata - __data_loc) _sdata = .; /* Start of data section */ _data = .; |