diff options
Diffstat (limited to 'arch/riscv/kernel')
78 files changed, 4331 insertions, 1480 deletions
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index fee22a3d1b53..8d186bfced45 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -7,11 +7,12 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) -ifdef CONFIG_KEXEC +ifdef CONFIG_KEXEC_CORE AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) endif @@ -19,17 +20,26 @@ endif ifdef CONFIG_RISCV_ALTERNATIVE_EARLY CFLAGS_alternative.o := -mcmodel=medany CFLAGS_cpufeature.o := -mcmodel=medany +CFLAGS_sbi_ecall.o := -mcmodel=medany ifdef CONFIG_FTRACE CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_sbi_ecall.o = $(CC_FLAGS_FTRACE) endif ifdef CONFIG_RELOCATABLE CFLAGS_alternative.o += -fno-pie CFLAGS_cpufeature.o += -fno-pie +CFLAGS_sbi_ecall.o += -fno-pie endif ifdef CONFIG_KASAN KASAN_SANITIZE_alternative.o := n KASAN_SANITIZE_cpufeature.o := n +KASAN_SANITIZE_sbi_ecall.o := n +endif +ifdef CONFIG_FORTIFY_SOURCE +CFLAGS_alternative.o += -D__NO_FORTIFY +CFLAGS_cpufeature.o += -D__NO_FORTIFY +CFLAGS_sbi_ecall.o += -D__NO_FORTIFY endif endif @@ -38,7 +48,6 @@ extra-y += vmlinux.lds obj-y += head.o obj-y += soc.o obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o -obj-y += copy-unaligned.o obj-y += cpu.o obj-y += cpufeature.o obj-y += entry.o @@ -46,23 +55,33 @@ obj-y += irq.o obj-y += process.o obj-y += ptrace.o obj-y += reset.o +obj-y += return_address.o obj-y += setup.o obj-y += signal.o obj-y += syscall_table.o obj-y += sys_riscv.o +obj-y += sys_hwprobe.o obj-y += time.o obj-y += traps.o obj-y += riscv_ksyms.o obj-y += stacktrace.o obj-y += cacheinfo.o obj-y += patch.o +obj-y += vendor_extensions.o +obj-y += vendor_extensions/ obj-y += probes/ obj-y += tests/ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o +obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o +obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o +obj-$(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS) += vec-copy-unaligned.o + obj-$(CONFIG_FPU) += fpu.o +obj-$(CONFIG_FPU) += kernel_mode_fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o +obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += cpu_ops.o @@ -79,17 +98,18 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o -obj-$(CONFIG_RISCV_SBI) += sbi.o +obj-$(CONFIG_RISCV_SBI) += sbi.o sbi_ecall.o ifeq ($(CONFIG_RISCV_SBI), y) obj-$(CONFIG_SMP) += sbi-ipi.o obj-$(CONFIG_SMP) += cpu_ops_sbi.o endif obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o +obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o -obj-$(CONFIG_CRASH_CORE) += crash_core.o +obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o @@ -102,3 +122,6 @@ obj-$(CONFIG_COMPAT) += compat_vdso/ obj-$(CONFIG_64BIT) += pi/ obj-$(CONFIG_ACPI) += acpi.o +obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o + +obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += bugs.o diff --git a/arch/riscv/kernel/Makefile.syscalls b/arch/riscv/kernel/Makefile.syscalls new file mode 100644 index 000000000000..9668fd1faf60 --- /dev/null +++ b/arch/riscv/kernel/Makefile.syscalls @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 + +syscall_abis_32 += riscv memfd_secret +syscall_abis_64 += riscv rlimit memfd_secret diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index e619edc8b0cc..2fd29695a788 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -17,7 +17,9 @@ #include <linux/efi.h> #include <linux/io.h> #include <linux/memblock.h> +#include <linux/of_fdt.h> #include <linux/pci.h> +#include <linux/serial_core.h> int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; @@ -131,7 +133,7 @@ void __init acpi_boot_table_init(void) if (param_acpi_off || (!param_acpi_on && !param_acpi_force && efi.acpi20 == EFI_INVALID_TABLE_ADDR)) - return; + goto done; /* * ACPI is disabled at this point. Enable it in order to parse @@ -151,6 +153,14 @@ void __init acpi_boot_table_init(void) if (!param_acpi_force) disable_acpi(); } + +done: + if (acpi_disabled) { + if (earlycon_acpi_spcr_enable) + early_init_dt_scan_chosen_stdout(); + } else { + acpi_parse_spcr(earlycon_acpi_spcr_enable, true); + } } static int acpi_parse_madt_rintc(union acpi_subtable_headers *header, const unsigned long end) @@ -191,11 +201,6 @@ struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) return &cpu_madt_rintc[cpu]; } -u32 get_acpi_id_for_cpu(int cpu) -{ - return acpi_cpu_get_madt_rintc(cpu)->uid; -} - /* * __acpi_map_table() will be called before paging_init(), so early_ioremap() * or early_memremap() should be called here to for ACPI table mapping. @@ -205,7 +210,7 @@ void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size) if (!size) return NULL; - return early_ioremap(phys, size); + return early_memremap(phys, size); } void __init __acpi_unmap_table(void __iomem *map, unsigned long size) @@ -213,7 +218,7 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) if (!map || !size) return; - early_iounmap(map, size); + early_memunmap(map, size); } void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) @@ -306,29 +311,26 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) #ifdef CONFIG_PCI /* - * These interfaces are defined just to enable building ACPI core. - * TODO: Update it with actual implementation when external interrupt - * controller support is added in RISC-V ACPI. + * raw_pci_read/write - Platform-specific PCI config space access. */ -int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, - int reg, int len, u32 *val) +int raw_pci_read(unsigned int domain, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *val) { - return PCIBIOS_DEVICE_NOT_FOUND; -} + struct pci_bus *b = pci_find_bus(domain, bus); -int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, - int reg, int len, u32 val) -{ - return PCIBIOS_DEVICE_NOT_FOUND; + if (!b) + return PCIBIOS_DEVICE_NOT_FOUND; + return b->ops->read(b, devfn, reg, len, val); } -int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) +int raw_pci_write(unsigned int domain, unsigned int bus, + unsigned int devfn, int reg, int len, u32 val) { - return -1; -} + struct pci_bus *b = pci_find_bus(domain, bus); -struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) -{ - return NULL; + if (!b) + return PCIBIOS_DEVICE_NOT_FOUND; + return b->ops->write(b, devfn, reg, len, val); } + #endif /* CONFIG_PCI */ diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c new file mode 100644 index 000000000000..130769e3a99c --- /dev/null +++ b/arch/riscv/kernel/acpi_numa.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ACPI 6.6 based NUMA setup for RISCV + * Lots of code was borrowed from arch/arm64/kernel/acpi_numa.c + * + * Copyright 2004 Andi Kleen, SuSE Labs. + * Copyright (C) 2013-2016, Linaro Ltd. + * Author: Hanjun Guo <hanjun.guo@linaro.org> + * Copyright (C) 2024 Intel Corporation. + * + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. + * + * Called from acpi_numa_init while reading the SRAT and SLIT tables. + * Assumes all memory regions belonging to a single proximity domain + * are in one chunk. Holes between them will be included in the node. + */ + +#define pr_fmt(fmt) "ACPI: NUMA: " fmt + +#include <linux/acpi.h> +#include <linux/bitmap.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/memblock.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/topology.h> + +#include <asm/numa.h> + +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; + +static int __init acpi_numa_get_nid(unsigned int cpu) +{ + return acpi_early_node_map[cpu]; +} + +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + +static int __init acpi_parse_rintc_pxm(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_srat_rintc_affinity *pa; + int cpu, pxm, node; + + if (srat_disabled()) + return -EINVAL; + + pa = (struct acpi_srat_rintc_affinity *)header; + if (!pa) + return -EINVAL; + + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) + return 0; + + pxm = pa->proximity_domain; + node = pxm_to_node(pxm); + + /* + * If we can't map the UID to a logical cpu this + * means that the UID is not part of possible cpus + * so we do not need a NUMA mapping for it, skip + * the SRAT entry and keep parsing. + */ + cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid); + if (cpu < 0) + return 0; + + acpi_early_node_map[cpu] = node; + pr_info("SRAT: PXM %d -> HARTID 0x%lx -> Node %d\n", pxm, + cpuid_to_hartid_map(cpu), node); + + return 0; +} + +void __init acpi_map_cpus_to_nodes(void) +{ + int i; + + /* + * In ACPI, SMP and CPU NUMA information is provided in separate + * static tables, namely the MADT and the SRAT. + * + * Thus, it is simpler to first create the cpu logical map through + * an MADT walk and then map the logical cpus to their node ids + * as separate steps. + */ + acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_RINTC_AFFINITY, acpi_parse_rintc_pxm, 0); + + for (i = 0; i < nr_cpu_ids; i++) + early_map_cpu_to_node(i, acpi_numa_get_nid(i)); +} + +/* Callback for Proximity Domain -> logical node ID mapping */ +void __init acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) +{ + int pxm, node; + + if (srat_disabled()) + return; + + if (pa->header.length < sizeof(struct acpi_srat_rintc_affinity)) { + pr_err("SRAT: Invalid SRAT header length: %d\n", pa->header.length); + bad_srat(); + return; + } + + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) + return; + + pxm = pa->proximity_domain; + node = acpi_map_pxm_to_node(pxm); + + if (node == NUMA_NO_NODE) { + pr_err("SRAT: Too many proximity domains %d\n", pxm); + bad_srat(); + return; + } + + node_set(node, numa_nodes_parsed); +} diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c index 319a1da0358b..7eb3cb1215c6 100644 --- a/arch/riscv/kernel/alternative.c +++ b/arch/riscv/kernel/alternative.c @@ -18,7 +18,7 @@ #include <asm/sbi.h> #include <asm/csr.h> #include <asm/insn.h> -#include <asm/patch.h> +#include <asm/text-patching.h> struct cpu_manufacturer_info_t { unsigned long vendor_id; @@ -43,7 +43,7 @@ static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info switch (cpu_mfr_info->vendor_id) { #ifdef CONFIG_ERRATA_ANDES - case ANDESTECH_VENDOR_ID: + case ANDES_VENDOR_ID: cpu_mfr_info->patch_func = andes_errata_patch_func; break; #endif diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index a03129f40c46..e89455a6a0e5 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -4,11 +4,10 @@ * Copyright (C) 2017 SiFive */ -#define GENERATING_ASM_OFFSETS - #include <linux/kbuild.h> #include <linux/mm.h> #include <linux/sched.h> +#include <linux/ftrace.h> #include <linux/suspend.h> #include <asm/kvm_host.h> #include <asm/thread_info.h> @@ -35,6 +34,8 @@ void asm_offsets(void) OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]); OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]); OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]); + + OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags); OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); @@ -42,6 +43,11 @@ void asm_offsets(void) #ifdef CONFIG_SHADOW_CALL_STACK OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp); #endif +#ifdef CONFIG_64BIT + OFFSET(TASK_TI_A0, task_struct, thread_info.a0); + OFFSET(TASK_TI_A1, task_struct, thread_info.a1); + OFFSET(TASK_TI_A2, task_struct, thread_info.a2); +#endif OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu); OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]); @@ -488,4 +494,21 @@ void asm_offsets(void) DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN)); OFFSET(STACKFRAME_FP, stackframe, fp); OFFSET(STACKFRAME_RA, stackframe, ra); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS + DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct __arch_ftrace_regs), STACK_ALIGN)); + DEFINE(FREGS_EPC, offsetof(struct __arch_ftrace_regs, epc)); + DEFINE(FREGS_RA, offsetof(struct __arch_ftrace_regs, ra)); + DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp)); + DEFINE(FREGS_S0, offsetof(struct __arch_ftrace_regs, s0)); + DEFINE(FREGS_T1, offsetof(struct __arch_ftrace_regs, t1)); + DEFINE(FREGS_A0, offsetof(struct __arch_ftrace_regs, a0)); + DEFINE(FREGS_A1, offsetof(struct __arch_ftrace_regs, a1)); + DEFINE(FREGS_A2, offsetof(struct __arch_ftrace_regs, a2)); + DEFINE(FREGS_A3, offsetof(struct __arch_ftrace_regs, a3)); + DEFINE(FREGS_A4, offsetof(struct __arch_ftrace_regs, a4)); + DEFINE(FREGS_A5, offsetof(struct __arch_ftrace_regs, a5)); + DEFINE(FREGS_A6, offsetof(struct __arch_ftrace_regs, a6)); + DEFINE(FREGS_A7, offsetof(struct __arch_ftrace_regs, a7)); +#endif } diff --git a/arch/riscv/kernel/bugs.c b/arch/riscv/kernel/bugs.c new file mode 100644 index 000000000000..3655fe7d678c --- /dev/null +++ b/arch/riscv/kernel/bugs.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Rivos Inc. + */ + +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/sprintf.h> + +#include <asm/bugs.h> +#include <asm/vendor_extensions/thead.h> + +static enum mitigation_state ghostwrite_state; + +void ghostwrite_set_vulnerable(void) +{ + ghostwrite_state = VULNERABLE; +} + +/* + * Vendor extension alternatives will use the value set at the time of boot + * alternative patching, thus this must be called before boot alternatives are + * patched (and after extension probing) to be effective. + * + * Returns true if mitgated, false otherwise. + */ +bool ghostwrite_enable_mitigation(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR) && + ghostwrite_state == VULNERABLE && !cpu_mitigations_off()) { + disable_xtheadvector(); + ghostwrite_state = MITIGATED; + return true; + } + + return false; +} + +enum mitigation_state ghostwrite_get_state(void) +{ + return ghostwrite_state; +} + +ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) { + switch (ghostwrite_state) { + case UNAFFECTED: + return sprintf(buf, "Not affected\n"); + case MITIGATED: + return sprintf(buf, "Mitigation: xtheadvector disabled\n"); + case VULNERABLE: + fallthrough; + default: + return sprintf(buf, "Vulnerable\n"); + } + } else { + return sprintf(buf, "Not affected\n"); + } +} diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 09e9b88110d1..26b085dbdd07 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -3,6 +3,7 @@ * Copyright (C) 2017 SiFive */ +#include <linux/acpi.h> #include <linux/cpu.h> #include <linux/of.h> #include <asm/cacheinfo.h> @@ -64,27 +65,55 @@ uintptr_t get_cache_geometry(u32 level, enum cache_type type) } static void ci_leaf_init(struct cacheinfo *this_leaf, - struct device_node *node, enum cache_type type, unsigned int level) { this_leaf->level = level; this_leaf->type = type; } +int init_cache_level(unsigned int cpu) +{ + return init_of_cache_level(cpu); +} + int populate_cache_leaves(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; - struct device_node *np = of_cpu_device_node_get(cpu); - struct device_node *prev = NULL; + struct device_node *np, *prev; int levels = 1, level = 1; - if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + if (!acpi_disabled) { + int ret, fw_levels, split_levels; + + ret = acpi_get_cache_info(cpu, &fw_levels, &split_levels); + if (ret) + return ret; + + BUG_ON((split_levels > fw_levels) || + (split_levels + fw_levels > this_cpu_ci->num_leaves)); + + for (; level <= this_cpu_ci->num_levels; level++) { + if (level <= split_levels) { + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + } else { + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + } + } + return 0; + } + + np = of_cpu_device_node_get(cpu); + if (!np) + return -ENOENT; + + if (of_property_present(np, "cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + if (of_property_present(np, "i-cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (of_property_present(np, "d-cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); prev = np; while ((np = of_find_next_cache_node(np))) { @@ -96,12 +125,12 @@ int populate_cache_leaves(unsigned int cpu) break; if (level <= levels) break; - if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + if (of_property_present(np, "cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + if (of_property_present(np, "i-cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (of_property_present(np, "d-cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); levels = level; } of_node_put(np); diff --git a/arch/riscv/kernel/cfi.c b/arch/riscv/kernel/cfi.c index 820158d7a291..64bdd3e1ab8c 100644 --- a/arch/riscv/kernel/cfi.c +++ b/arch/riscv/kernel/cfi.c @@ -4,7 +4,7 @@ * * Copyright (C) 2023 Google LLC */ -#include <asm/cfi.h> +#include <linux/cfi.h> #include <asm/insn.h> /* @@ -75,3 +75,56 @@ enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) return report_cfi_failure(regs, regs->epc, &target, type); } + +#ifdef CONFIG_CFI_CLANG +struct bpf_insn; + +/* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */ +extern unsigned int __bpf_prog_runX(const void *ctx, + const struct bpf_insn *insn); + +/* + * Force a reference to the external symbol so the compiler generates + * __kcfi_typid. + */ +__ADDRESSABLE(__bpf_prog_runX); + +/* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */ +asm ( +" .pushsection .data..ro_after_init,\"aw\",@progbits \n" +" .type cfi_bpf_hash,@object \n" +" .globl cfi_bpf_hash \n" +" .p2align 2, 0x0 \n" +"cfi_bpf_hash: \n" +" .word __kcfi_typeid___bpf_prog_runX \n" +" .size cfi_bpf_hash, 4 \n" +" .popsection \n" +); + +/* Must match bpf_callback_t */ +extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64); + +__ADDRESSABLE(__bpf_callback_fn); + +/* u32 __ro_after_init cfi_bpf_subprog_hash = __kcfi_typeid___bpf_callback_fn; */ +asm ( +" .pushsection .data..ro_after_init,\"aw\",@progbits \n" +" .type cfi_bpf_subprog_hash,@object \n" +" .globl cfi_bpf_subprog_hash \n" +" .p2align 2, 0x0 \n" +"cfi_bpf_subprog_hash: \n" +" .word __kcfi_typeid___bpf_callback_fn \n" +" .size cfi_bpf_subprog_hash, 4 \n" +" .popsection \n" +); + +u32 cfi_get_func_hash(void *func) +{ + u32 hash; + + if (get_kernel_nofault(hash, func - cfi_get_offset())) + return 0; + + return hash; +} +#endif diff --git a/arch/riscv/kernel/compat_syscall_table.c b/arch/riscv/kernel/compat_syscall_table.c index ad7f2d712f5f..e884c069e88f 100644 --- a/arch/riscv/kernel/compat_syscall_table.c +++ b/arch/riscv/kernel/compat_syscall_table.c @@ -8,9 +8,11 @@ #include <asm-generic/syscalls.h> #include <asm/syscall.h> +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat) + #undef __SYSCALL #define __SYSCALL(nr, call) asmlinkage long __riscv_##call(const struct pt_regs *); -#include <asm/unistd.h> +#include <asm/syscall_table_32.h> #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = __riscv_##call, @@ -19,5 +21,5 @@ asmlinkage long compat_sys_rt_sigreturn(void); void * const compat_sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = __riscv_sys_ni_syscall, -#include <asm/unistd.h> +#include <asm/syscall_table_32.h> }; diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 62fa393b2eb2..24e37d1ef7ec 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -34,12 +34,6 @@ obj-compat_vdso := $(addprefix $(obj)/, $(obj-compat_vdso)) obj-y += compat_vdso.o CPPFLAGS_compat_vdso.lds += -P -C -DCOMPAT_VDSO -U$(ARCH) -# Disable profiling and instrumentation for VDSO code -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n - # Force dependency $(obj)/compat_vdso.o: $(obj)/compat_vdso.so @@ -58,7 +52,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # Generate VDSO offsets using helper script -gen-compat_vdsosym := $(srctree)/$(src)/gen_compat_vdso_offsets.sh +gen-compat_vdsosym := $(src)/gen_compat_vdso_offsets.sh quiet_cmd_compat_vdsosym = VDSOSYM $@ cmd_compat_vdsosym = $(NM) $< | $(gen-compat_vdsosym) | LC_ALL=C sort > $@ @@ -74,5 +68,5 @@ quiet_cmd_compat_vdsold = VDSOLD $@ rm $@.tmp # actual build commands -quiet_cmd_compat_vdsoas = VDSOAS $@ +quiet_cmd_compat_vdsoas = VDSOAS $@ cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $< diff --git a/arch/riscv/kernel/copy-unaligned.h b/arch/riscv/kernel/copy-unaligned.h index e3d70d35b708..85d4d11450cb 100644 --- a/arch/riscv/kernel/copy-unaligned.h +++ b/arch/riscv/kernel/copy-unaligned.h @@ -10,4 +10,9 @@ void __riscv_copy_words_unaligned(void *dst, const void *src, size_t size); void __riscv_copy_bytes_unaligned(void *dst, const void *src, size_t size); +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS +void __riscv_copy_vec_words_unaligned(void *dst, const void *src, size_t size); +void __riscv_copy_vec_bytes_unaligned(void *dst, const void *src, size_t size); +#endif + #endif /* __RISCV_KERNEL_COPY_UNALIGNED_H */ diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index 457a18efcb11..a1e38ecfc8be 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -18,7 +18,7 @@ bool cpu_has_hotplug(unsigned int cpu) { - if (cpu_ops[cpu]->cpu_stop) + if (cpu_ops->cpu_stop) return true; return false; @@ -29,25 +29,18 @@ bool cpu_has_hotplug(unsigned int cpu) */ int __cpu_disable(void) { - int ret = 0; unsigned int cpu = smp_processor_id(); - if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_stop) + if (!cpu_ops->cpu_stop) return -EOPNOTSUPP; - if (cpu_ops[cpu]->cpu_disable) - ret = cpu_ops[cpu]->cpu_disable(cpu); - - if (ret) - return ret; - remove_cpu_topology(cpu); numa_remove_cpu(cpu); set_cpu_online(cpu, false); riscv_ipi_disable(); irq_migrate_all_off_this_cpu(); - return ret; + return 0; } #ifdef CONFIG_HOTPLUG_CPU @@ -62,10 +55,10 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) pr_notice("CPU%u: off\n", cpu); /* Verify from the firmware if the cpu is really stopped*/ - if (cpu_ops[cpu]->cpu_is_stopped) - ret = cpu_ops[cpu]->cpu_is_stopped(cpu); + if (cpu_ops->cpu_is_stopped) + ret = cpu_ops->cpu_is_stopped(cpu); if (ret) - pr_warn("CPU%d may not have stopped: %d\n", cpu, ret); + pr_warn("CPU%u may not have stopped: %d\n", cpu, ret); } /* @@ -77,7 +70,7 @@ void __noreturn arch_cpu_idle_dead(void) cpuhp_ap_report_dead(); - cpu_ops[smp_processor_id()]->cpu_stop(); + cpu_ops->cpu_stop(); /* It should never reach here */ BUG(); } diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index d11d6320fb0d..f6b13e9f5e6c 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -16,6 +16,7 @@ #include <asm/sbi.h> #include <asm/smp.h> #include <asm/pgtable.h> +#include <asm/vendor_extensions.h> bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { @@ -139,6 +140,34 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) return -1; } +unsigned long __init riscv_get_marchid(void) +{ + struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); + +#if IS_ENABLED(CONFIG_RISCV_SBI) + ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); +#elif IS_ENABLED(CONFIG_RISCV_M_MODE) + ci->marchid = csr_read(CSR_MARCHID); +#else + ci->marchid = 0; +#endif + return ci->marchid; +} + +unsigned long __init riscv_get_mvendorid(void) +{ + struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); + +#if IS_ENABLED(CONFIG_RISCV_SBI) + ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); +#elif IS_ENABLED(CONFIG_RISCV_M_MODE) + ci->mvendorid = csr_read(CSR_MVENDORID); +#else + ci->mvendorid = 0; +#endif + return ci->mvendorid; +} + DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); unsigned long riscv_cached_mvendorid(unsigned int cpu_id) @@ -170,12 +199,16 @@ static int riscv_cpuinfo_starting(unsigned int cpu) struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); #if IS_ENABLED(CONFIG_RISCV_SBI) - ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); - ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); + if (!ci->mvendorid) + ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); + if (!ci->marchid) + ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); ci->mimpid = sbi_spec_is_0_1() ? 0 : sbi_get_mimpid(); #elif IS_ENABLED(CONFIG_RISCV_M_MODE) - ci->mvendorid = csr_read(CSR_MVENDORID); - ci->marchid = csr_read(CSR_MARCHID); + if (!ci->mvendorid) + ci->mvendorid = csr_read(CSR_MVENDORID); + if (!ci->marchid) + ci->marchid = csr_read(CSR_MARCHID); ci->mimpid = csr_read(CSR_MIMPID); #else ci->mvendorid = 0; @@ -203,7 +236,33 @@ arch_initcall(riscv_cpuinfo_init); #ifdef CONFIG_PROC_FS -static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) +#define ALL_CPUS -1 + +static void print_vendor_isa(struct seq_file *f, int cpu) +{ + struct riscv_isavendorinfo *vendor_bitmap; + struct riscv_isa_vendor_ext_data_list *ext_list; + const struct riscv_isa_ext_data *ext_data; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + ext_list = riscv_isa_vendor_ext_list[i]; + ext_data = riscv_isa_vendor_ext_list[i]->ext_data; + + if (cpu == ALL_CPUS) + vendor_bitmap = &ext_list->all_harts_isa_bitmap; + else + vendor_bitmap = &ext_list->per_hart_isa_bitmap[cpu]; + + for (int j = 0; j < ext_list->ext_data_count; j++) { + if (!__riscv_isa_extension_available(vendor_bitmap->isa, ext_data[j].id)) + continue; + + seq_printf(f, "_%s", ext_data[j].name); + } + } +} + +static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap, int cpu) { if (IS_ENABLED(CONFIG_32BIT)) @@ -222,6 +281,8 @@ static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) seq_printf(f, "%s", riscv_isa_ext[i].name); } + print_vendor_isa(f, cpu); + seq_puts(f, "\n"); } @@ -284,7 +345,7 @@ static int c_show(struct seq_file *m, void *v) * line. */ seq_puts(m, "isa\t\t: "); - print_isa(m, NULL); + print_isa(m, NULL, ALL_CPUS); print_mmu(m); if (acpi_disabled) { @@ -306,7 +367,7 @@ static int c_show(struct seq_file *m, void *v) * additional extensions not present across all harts. */ seq_puts(m, "hart isa\t: "); - print_isa(m, hart_isa[cpu_id].isa); + print_isa(m, hart_isa[cpu_id].isa, cpu_id); seq_puts(m, "\n"); return 0; diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c index eb479a88a954..6a8bd8f4db07 100644 --- a/arch/riscv/kernel/cpu_ops.c +++ b/arch/riscv/kernel/cpu_ops.c @@ -13,25 +13,21 @@ #include <asm/sbi.h> #include <asm/smp.h> -const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; +const struct cpu_operations *cpu_ops __ro_after_init = &cpu_ops_spinwait; extern const struct cpu_operations cpu_ops_sbi; #ifndef CONFIG_RISCV_BOOT_SPINWAIT const struct cpu_operations cpu_ops_spinwait = { - .name = "", - .cpu_prepare = NULL, .cpu_start = NULL, }; #endif -void __init cpu_set_ops(int cpuid) +void __init cpu_set_ops(void) { #if IS_ENABLED(CONFIG_RISCV_SBI) if (sbi_probe_extension(SBI_EXT_HSM)) { - if (!cpuid) - pr_info("SBI HSM extension detected\n"); - cpu_ops[cpuid] = &cpu_ops_sbi; - } else + pr_info("SBI HSM extension detected\n"); + cpu_ops = &cpu_ops_sbi; + } #endif - cpu_ops[cpuid] = &cpu_ops_spinwait; } diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index efa0f0816634..e6fbaaf54956 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -72,30 +72,14 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) /* Make sure tidle is updated */ smp_mb(); bdata->task_ptr = tidle; - bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE; + bdata->stack_ptr = task_pt_regs(tidle); /* Make sure boot data is updated */ smp_mb(); hsm_data = __pa(bdata); return sbi_hsm_hart_start(hartid, boot_addr, hsm_data); } -static int sbi_cpu_prepare(unsigned int cpuid) -{ - if (!cpu_ops_sbi.cpu_start) { - pr_err("cpu start method not defined for CPU [%d]\n", cpuid); - return -ENODEV; - } - return 0; -} - #ifdef CONFIG_HOTPLUG_CPU -static int sbi_cpu_disable(unsigned int cpuid) -{ - if (!cpu_ops_sbi.cpu_stop) - return -EOPNOTSUPP; - return 0; -} - static void sbi_cpu_stop(void) { int ret; @@ -118,11 +102,8 @@ static int sbi_cpu_is_stopped(unsigned int cpuid) #endif const struct cpu_operations cpu_ops_sbi = { - .name = "sbi", - .cpu_prepare = sbi_cpu_prepare, .cpu_start = sbi_cpu_start, #ifdef CONFIG_HOTPLUG_CPU - .cpu_disable = sbi_cpu_disable, .cpu_stop = sbi_cpu_stop, .cpu_is_stopped = sbi_cpu_is_stopped, #endif diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c index d98d19226b5f..24869eb88908 100644 --- a/arch/riscv/kernel/cpu_ops_spinwait.c +++ b/arch/riscv/kernel/cpu_ops_spinwait.c @@ -34,20 +34,10 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid, /* Make sure tidle is updated */ smp_mb(); - WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], - task_stack_page(tidle) + THREAD_SIZE); + WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], task_pt_regs(tidle)); WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle); } -static int spinwait_cpu_prepare(unsigned int cpuid) -{ - if (!cpu_ops_spinwait.cpu_start) { - pr_err("cpu start method not defined for CPU [%d]\n", cpuid); - return -ENODEV; - } - return 0; -} - static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle) { /* @@ -64,7 +54,5 @@ static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle) } const struct cpu_operations cpu_ops_spinwait = { - .name = "spinwait", - .cpu_prepare = spinwait_cpu_prepare, .cpu_start = spinwait_cpu_start, }; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index b3785ffc1570..40ac72e407b6 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -8,6 +8,7 @@ #include <linux/acpi.h> #include <linux/bitmap.h> +#include <linux/cpu.h> #include <linux/cpuhotplug.h> #include <linux/ctype.h> #include <linux/log2.h> @@ -16,22 +17,21 @@ #include <linux/of.h> #include <asm/acpi.h> #include <asm/alternative.h> +#include <asm/bugs.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwcap.h> +#include <asm/text-patching.h> #include <asm/hwprobe.h> -#include <asm/patch.h> #include <asm/processor.h> +#include <asm/sbi.h> #include <asm/vector.h> - -#include "copy-unaligned.h" +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> #define NUM_ALPHA_EXTS ('z' - 'a' + 1) -#define MISALIGNED_ACCESS_JIFFIES_LG2 1 -#define MISALIGNED_BUFFER_SIZE 0x4000 -#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) -#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) +static bool any_cpu_has_zicboz; unsigned long elf_hwcap __read_mostly; @@ -41,8 +41,7 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; /* Per-cpu ISA extensions. */ struct riscv_isainfo hart_isa[NR_CPUS]; -/* Performance information */ -DEFINE_PER_CPU(long, misaligned_access_speed); +u32 thead_vlenb_of; /** * riscv_isa_extension_base() - Get base extension word @@ -70,7 +69,7 @@ EXPORT_SYMBOL_GPL(riscv_isa_extension_base); * * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used. */ -bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit) +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit) { const unsigned long *bmap = (isa_bitmap) ? isa_bitmap : riscv_isa; @@ -81,38 +80,205 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit) } EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); -static bool riscv_isa_extension_check(int id) +static int riscv_ext_zicbom_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) { - switch (id) { - case RISCV_ISA_EXT_ZICBOM: - if (!riscv_cbom_block_size) { - pr_err("Zicbom detected in ISA string, disabling as no cbom-block-size found\n"); - return false; - } else if (!is_power_of_2(riscv_cbom_block_size)) { - pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n"); - return false; - } - return true; - case RISCV_ISA_EXT_ZICBOZ: - if (!riscv_cboz_block_size) { - pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n"); - return false; - } else if (!is_power_of_2(riscv_cboz_block_size)) { - pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n"); - return false; - } - return true; + if (!riscv_cbom_block_size) { + pr_err("Zicbom detected in ISA string, disabling as no cbom-block-size found\n"); + return -EINVAL; } + if (!is_power_of_2(riscv_cbom_block_size)) { + pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n"); + return -EINVAL; + } + return 0; +} + +static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!riscv_cboz_block_size) { + pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n"); + return -EINVAL; + } + if (!is_power_of_2(riscv_cboz_block_size)) { + pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n"); + return -EINVAL; + } + any_cpu_has_zicboz = true; + return 0; +} + +static int riscv_ext_zca_depends(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA)) + return 0; - return true; + return -EPROBE_DEFER; } +static int riscv_ext_zcd_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA) && + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d)) + return 0; -#define __RISCV_ISA_EXT_DATA(_name, _id) { \ - .name = #_name, \ - .property = #_name, \ - .id = _id, \ + return -EPROBE_DEFER; } +static int riscv_ext_zcf_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (IS_ENABLED(CONFIG_64BIT)) + return -EINVAL; + + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA) && + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_f)) + return 0; + + return -EPROBE_DEFER; +} + +static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + /* SVADE has already been detected, use SVADE only */ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_SVADE)) + return -EOPNOTSUPP; + + return 0; +} + +static const unsigned int riscv_zk_bundled_exts[] = { + RISCV_ISA_EXT_ZBKB, + RISCV_ISA_EXT_ZBKC, + RISCV_ISA_EXT_ZBKX, + RISCV_ISA_EXT_ZKND, + RISCV_ISA_EXT_ZKNE, + RISCV_ISA_EXT_ZKR, + RISCV_ISA_EXT_ZKT, +}; + +static const unsigned int riscv_zkn_bundled_exts[] = { + RISCV_ISA_EXT_ZBKB, + RISCV_ISA_EXT_ZBKC, + RISCV_ISA_EXT_ZBKX, + RISCV_ISA_EXT_ZKND, + RISCV_ISA_EXT_ZKNE, + RISCV_ISA_EXT_ZKNH, +}; + +static const unsigned int riscv_zks_bundled_exts[] = { + RISCV_ISA_EXT_ZBKB, + RISCV_ISA_EXT_ZBKC, + RISCV_ISA_EXT_ZKSED, + RISCV_ISA_EXT_ZKSH +}; + +#define RISCV_ISA_EXT_ZVKN \ + RISCV_ISA_EXT_ZVKNED, \ + RISCV_ISA_EXT_ZVKNHB, \ + RISCV_ISA_EXT_ZVKB, \ + RISCV_ISA_EXT_ZVKT + +static const unsigned int riscv_zvkn_bundled_exts[] = { + RISCV_ISA_EXT_ZVKN +}; + +static const unsigned int riscv_zvknc_bundled_exts[] = { + RISCV_ISA_EXT_ZVKN, + RISCV_ISA_EXT_ZVBC +}; + +static const unsigned int riscv_zvkng_bundled_exts[] = { + RISCV_ISA_EXT_ZVKN, + RISCV_ISA_EXT_ZVKG +}; + +#define RISCV_ISA_EXT_ZVKS \ + RISCV_ISA_EXT_ZVKSED, \ + RISCV_ISA_EXT_ZVKSH, \ + RISCV_ISA_EXT_ZVKB, \ + RISCV_ISA_EXT_ZVKT + +static const unsigned int riscv_zvks_bundled_exts[] = { + RISCV_ISA_EXT_ZVKS +}; + +static const unsigned int riscv_zvksc_bundled_exts[] = { + RISCV_ISA_EXT_ZVKS, + RISCV_ISA_EXT_ZVBC +}; + +static const unsigned int riscv_zvksg_bundled_exts[] = { + RISCV_ISA_EXT_ZVKS, + RISCV_ISA_EXT_ZVKG +}; + +static const unsigned int riscv_zvbb_exts[] = { + RISCV_ISA_EXT_ZVKB +}; + +#define RISCV_ISA_EXT_ZVE64F_IMPLY_LIST \ + RISCV_ISA_EXT_ZVE64X, \ + RISCV_ISA_EXT_ZVE32F, \ + RISCV_ISA_EXT_ZVE32X + +#define RISCV_ISA_EXT_ZVE64D_IMPLY_LIST \ + RISCV_ISA_EXT_ZVE64F, \ + RISCV_ISA_EXT_ZVE64F_IMPLY_LIST + +#define RISCV_ISA_EXT_V_IMPLY_LIST \ + RISCV_ISA_EXT_ZVE64D, \ + RISCV_ISA_EXT_ZVE64D_IMPLY_LIST + +static const unsigned int riscv_zve32f_exts[] = { + RISCV_ISA_EXT_ZVE32X +}; + +static const unsigned int riscv_zve64f_exts[] = { + RISCV_ISA_EXT_ZVE64F_IMPLY_LIST +}; + +static const unsigned int riscv_zve64d_exts[] = { + RISCV_ISA_EXT_ZVE64D_IMPLY_LIST +}; + +static const unsigned int riscv_v_exts[] = { + RISCV_ISA_EXT_V_IMPLY_LIST +}; + +static const unsigned int riscv_zve64x_exts[] = { + RISCV_ISA_EXT_ZVE32X, + RISCV_ISA_EXT_ZVE64X +}; + +/* + * While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V + * privileged ISA, the existence of the CSRs is implied by any extension which + * specifies [ms]envcfg bit(s). Hence, we define a custom ISA extension for the + * existence of the CSR, and treat it as a subset of those other extensions. + */ +static const unsigned int riscv_xlinuxenvcfg_exts[] = { + RISCV_ISA_EXT_XLINUXENVCFG +}; + +/* + * Zc* spec states that: + * - C always implies Zca + * - C+F implies Zcf (RV32 only) + * - C+D implies Zcd + * + * These extensions will be enabled and then validated depending on the + * availability of F/D RV32. + */ +static const unsigned int riscv_c_exts[] = { + RISCV_ISA_EXT_ZCA, + RISCV_ISA_EXT_ZCF, + RISCV_ISA_EXT_ZCD, +}; + /* * The canonical order of ISA extension names in the ISA string is defined in * chapter 27 of the unprivileged specification. @@ -159,38 +325,177 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(f, RISCV_ISA_EXT_f), __RISCV_ISA_EXT_DATA(d, RISCV_ISA_EXT_d), __RISCV_ISA_EXT_DATA(q, RISCV_ISA_EXT_q), - __RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c), - __RISCV_ISA_EXT_DATA(b, RISCV_ISA_EXT_b), - __RISCV_ISA_EXT_DATA(k, RISCV_ISA_EXT_k), - __RISCV_ISA_EXT_DATA(j, RISCV_ISA_EXT_j), - __RISCV_ISA_EXT_DATA(p, RISCV_ISA_EXT_p), - __RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v), + __RISCV_ISA_EXT_SUPERSET(c, RISCV_ISA_EXT_c, riscv_c_exts), + __RISCV_ISA_EXT_SUPERSET(v, RISCV_ISA_EXT_v, riscv_v_exts), __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), - __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), - __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, + riscv_ext_zicbom_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, + riscv_ext_zicboz_validate), + __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND), __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI), + __RISCV_ISA_EXT_DATA(zihintntl, RISCV_ISA_EXT_ZIHINTNTL), __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM), + __RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP), + __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA), + __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS), + __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS), + __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA), + __RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH), + __RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN), + __RISCV_ISA_EXT_DATA(zca, RISCV_ISA_EXT_ZCA), + __RISCV_ISA_EXT_DATA_VALIDATE(zcb, RISCV_ISA_EXT_ZCB, riscv_ext_zca_depends), + __RISCV_ISA_EXT_DATA_VALIDATE(zcd, RISCV_ISA_EXT_ZCD, riscv_ext_zcd_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zcf, RISCV_ISA_EXT_ZCF, riscv_ext_zcf_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zcmop, RISCV_ISA_EXT_ZCMOP, riscv_ext_zca_depends), __RISCV_ISA_EXT_DATA(zba, RISCV_ISA_EXT_ZBA), __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB), + __RISCV_ISA_EXT_DATA(zbc, RISCV_ISA_EXT_ZBC), + __RISCV_ISA_EXT_DATA(zbkb, RISCV_ISA_EXT_ZBKB), + __RISCV_ISA_EXT_DATA(zbkc, RISCV_ISA_EXT_ZBKC), + __RISCV_ISA_EXT_DATA(zbkx, RISCV_ISA_EXT_ZBKX), __RISCV_ISA_EXT_DATA(zbs, RISCV_ISA_EXT_ZBS), + __RISCV_ISA_EXT_BUNDLE(zk, riscv_zk_bundled_exts), + __RISCV_ISA_EXT_BUNDLE(zkn, riscv_zkn_bundled_exts), + __RISCV_ISA_EXT_DATA(zknd, RISCV_ISA_EXT_ZKND), + __RISCV_ISA_EXT_DATA(zkne, RISCV_ISA_EXT_ZKNE), + __RISCV_ISA_EXT_DATA(zknh, RISCV_ISA_EXT_ZKNH), + __RISCV_ISA_EXT_DATA(zkr, RISCV_ISA_EXT_ZKR), + __RISCV_ISA_EXT_BUNDLE(zks, riscv_zks_bundled_exts), + __RISCV_ISA_EXT_DATA(zkt, RISCV_ISA_EXT_ZKT), + __RISCV_ISA_EXT_DATA(zksed, RISCV_ISA_EXT_ZKSED), + __RISCV_ISA_EXT_DATA(zksh, RISCV_ISA_EXT_ZKSH), + __RISCV_ISA_EXT_DATA(ztso, RISCV_ISA_EXT_ZTSO), + __RISCV_ISA_EXT_SUPERSET(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts), + __RISCV_ISA_EXT_DATA(zvbc, RISCV_ISA_EXT_ZVBC), + __RISCV_ISA_EXT_SUPERSET(zve32f, RISCV_ISA_EXT_ZVE32F, riscv_zve32f_exts), + __RISCV_ISA_EXT_DATA(zve32x, RISCV_ISA_EXT_ZVE32X), + __RISCV_ISA_EXT_SUPERSET(zve64d, RISCV_ISA_EXT_ZVE64D, riscv_zve64d_exts), + __RISCV_ISA_EXT_SUPERSET(zve64f, RISCV_ISA_EXT_ZVE64F, riscv_zve64f_exts), + __RISCV_ISA_EXT_SUPERSET(zve64x, RISCV_ISA_EXT_ZVE64X, riscv_zve64x_exts), + __RISCV_ISA_EXT_DATA(zvfh, RISCV_ISA_EXT_ZVFH), + __RISCV_ISA_EXT_DATA(zvfhmin, RISCV_ISA_EXT_ZVFHMIN), + __RISCV_ISA_EXT_DATA(zvkb, RISCV_ISA_EXT_ZVKB), + __RISCV_ISA_EXT_DATA(zvkg, RISCV_ISA_EXT_ZVKG), + __RISCV_ISA_EXT_BUNDLE(zvkn, riscv_zvkn_bundled_exts), + __RISCV_ISA_EXT_BUNDLE(zvknc, riscv_zvknc_bundled_exts), + __RISCV_ISA_EXT_DATA(zvkned, RISCV_ISA_EXT_ZVKNED), + __RISCV_ISA_EXT_BUNDLE(zvkng, riscv_zvkng_bundled_exts), + __RISCV_ISA_EXT_DATA(zvknha, RISCV_ISA_EXT_ZVKNHA), + __RISCV_ISA_EXT_DATA(zvknhb, RISCV_ISA_EXT_ZVKNHB), + __RISCV_ISA_EXT_BUNDLE(zvks, riscv_zvks_bundled_exts), + __RISCV_ISA_EXT_BUNDLE(zvksc, riscv_zvksc_bundled_exts), + __RISCV_ISA_EXT_DATA(zvksed, RISCV_ISA_EXT_ZVKSED), + __RISCV_ISA_EXT_DATA(zvksh, RISCV_ISA_EXT_ZVKSH), + __RISCV_ISA_EXT_BUNDLE(zvksg, riscv_zvksg_bundled_exts), + __RISCV_ISA_EXT_DATA(zvkt, RISCV_ISA_EXT_ZVKT), __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA), + __RISCV_ISA_EXT_DATA(smmpm, RISCV_ISA_EXT_SMMPM), + __RISCV_ISA_EXT_SUPERSET(smnpm, RISCV_ISA_EXT_SMNPM, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(smstateen, RISCV_ISA_EXT_SMSTATEEN), __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA), __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), + __RISCV_ISA_EXT_SUPERSET(ssnpm, RISCV_ISA_EXT_SSNPM, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), + __RISCV_ISA_EXT_DATA(svade, RISCV_ISA_EXT_SVADE), + __RISCV_ISA_EXT_DATA_VALIDATE(svadu, RISCV_ISA_EXT_SVADU, riscv_ext_svadu_validate), __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), + __RISCV_ISA_EXT_DATA(svvptc, RISCV_ISA_EXT_SVVPTC), }; const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext); -static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct riscv_isainfo *isainfo, - unsigned long *isa2hwcap, const char *isa) +static void riscv_isa_set_ext(const struct riscv_isa_ext_data *ext, unsigned long *bitmap) +{ + if (ext->id != RISCV_ISA_EXT_INVALID) + set_bit(ext->id, bitmap); + + for (int i = 0; i < ext->subset_ext_size; i++) { + if (ext->subset_ext_ids[i] != RISCV_ISA_EXT_INVALID) + set_bit(ext->subset_ext_ids[i], bitmap); + } +} + +static const struct riscv_isa_ext_data *riscv_get_isa_ext_data(unsigned int ext_id) +{ + for (int i = 0; i < riscv_isa_ext_count; i++) { + if (riscv_isa_ext[i].id == ext_id) + return &riscv_isa_ext[i]; + } + + return NULL; +} + +/* + * "Resolve" a source ISA bitmap into one that matches kernel configuration as + * well as correct extension dependencies. Some extensions depends on specific + * kernel configuration to be usable (V needs CONFIG_RISCV_ISA_V for instance) + * and this function will actually validate all the extensions provided in + * source_isa into the resolved_isa based on extensions validate() callbacks. + */ +static void __init riscv_resolve_isa(unsigned long *source_isa, + unsigned long *resolved_isa, unsigned long *this_hwcap, + unsigned long *isa2hwcap) +{ + bool loop; + const struct riscv_isa_ext_data *ext; + DECLARE_BITMAP(prev_resolved_isa, RISCV_ISA_EXT_MAX); + int max_loop_count = riscv_isa_ext_count, ret; + unsigned int bit; + + do { + loop = false; + if (max_loop_count-- < 0) { + pr_err("Failed to reach a stable ISA state\n"); + return; + } + bitmap_copy(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX); + for_each_set_bit(bit, source_isa, RISCV_ISA_EXT_MAX) { + ext = riscv_get_isa_ext_data(bit); + + if (ext && ext->validate) { + ret = ext->validate(ext, resolved_isa); + if (ret == -EPROBE_DEFER) { + loop = true; + continue; + } else if (ret) { + /* Disable the extension entirely */ + clear_bit(bit, source_isa); + continue; + } + } + + set_bit(bit, resolved_isa); + /* No need to keep it in source isa now that it is enabled */ + clear_bit(bit, source_isa); + + /* Single letter extensions get set in hwcap */ + if (bit < RISCV_ISA_EXT_BASE) + *this_hwcap |= isa2hwcap[bit]; + } + } while (loop && !bitmap_equal(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX)); +} + +static void __init match_isa_ext(const char *name, const char *name_end, unsigned long *bitmap) +{ + for (int i = 0; i < riscv_isa_ext_count; i++) { + const struct riscv_isa_ext_data *ext = &riscv_isa_ext[i]; + + if ((name_end - name == strlen(ext->name)) && + !strncasecmp(name, ext->name, name_end - name)) { + riscv_isa_set_ext(ext, bitmap); + break; + } + } +} + +static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap) { /* * For all possible cpus, we have already validated in @@ -203,9 +508,24 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc while (*isa) { const char *ext = isa++; const char *ext_end = isa; - bool ext_long = false, ext_err = false; + bool ext_err = false; switch (*ext) { + case 'x': + case 'X': + if (acpi_disabled) + pr_warn_once("Vendor extensions are ignored in riscv,isa. Use riscv,isa-extensions instead."); + /* + * To skip an extension, we find its end. + * As multi-letter extensions must be split from other multi-letter + * extensions with an "_", the end of a multi-letter extension will + * either be the null character or the "_" at the start of the next + * multi-letter extension. + */ + for (; *isa && *isa != '_'; ++isa) + ; + ext_err = true; + break; case 's': /* * Workaround for invalid single-letter 's' & 'u' (QEMU). @@ -221,8 +541,6 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc } fallthrough; case 'S': - case 'x': - case 'X': case 'z': case 'Z': /* @@ -243,7 +561,6 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc * character itself while eliminating the extensions version number. * A simple re-increment solves this problem. */ - ext_long = true; for (; *isa && *isa != '_'; ++isa) if (unlikely(!isalnum(*isa))) ext_err = true; @@ -321,29 +638,10 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc if (*isa == '_') ++isa; -#define SET_ISA_EXT_MAP(name, bit) \ - do { \ - if ((ext_end - ext == strlen(name)) && \ - !strncasecmp(ext, name, strlen(name)) && \ - riscv_isa_extension_check(bit)) \ - set_bit(bit, isainfo->isa); \ - } while (false) \ - if (unlikely(ext_err)) continue; - if (!ext_long) { - int nr = tolower(*ext) - 'a'; - if (riscv_isa_extension_check(nr)) { - *this_hwcap |= isa2hwcap[nr]; - set_bit(nr, isainfo->isa); - } - } else { - for (int i = 0; i < riscv_isa_ext_count; i++) - SET_ISA_EXT_MAP(riscv_isa_ext[i].name, - riscv_isa_ext[i].id); - } -#undef SET_ISA_EXT_MAP + match_isa_ext(ext, ext_end, bitmap); } } @@ -355,6 +653,8 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) struct acpi_table_header *rhct; acpi_status status; unsigned int cpu; + u64 boot_vendorid; + u64 boot_archid; if (!acpi_disabled) { status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct); @@ -362,9 +662,13 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) return; } + boot_vendorid = riscv_get_mvendorid(); + boot_archid = riscv_get_marchid(); + for_each_possible_cpu(cpu) { struct riscv_isainfo *isainfo = &hart_isa[cpu]; unsigned long this_hwcap = 0; + DECLARE_BITMAP(source_isa, RISCV_ISA_EXT_MAX) = { 0 }; if (acpi_disabled) { node = of_cpu_device_node_get(cpu); @@ -387,7 +691,7 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) } } - riscv_parse_isa_string(&this_hwcap, isainfo, isa2hwcap, isa); + riscv_parse_isa_string(isa, source_isa); /* * These ones were as they were part of the base ISA when the @@ -395,12 +699,27 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) * unconditionally where `i` is in riscv,isa on DT systems. */ if (acpi_disabled) { - set_bit(RISCV_ISA_EXT_ZICSR, isainfo->isa); - set_bit(RISCV_ISA_EXT_ZIFENCEI, isainfo->isa); - set_bit(RISCV_ISA_EXT_ZICNTR, isainfo->isa); - set_bit(RISCV_ISA_EXT_ZIHPM, isainfo->isa); + set_bit(RISCV_ISA_EXT_ZICSR, source_isa); + set_bit(RISCV_ISA_EXT_ZIFENCEI, source_isa); + set_bit(RISCV_ISA_EXT_ZICNTR, source_isa); + set_bit(RISCV_ISA_EXT_ZIHPM, source_isa); + } + + /* + * "V" in ISA strings is ambiguous in practice: it should mean + * just the standard V-1.0 but vendors aren't well behaved. + * Many vendors with T-Head CPU cores which implement the 0.7.1 + * version of the vector specification put "v" into their DTs. + * CPU cores with the ratified spec will contain non-zero + * marchid. + */ + if (acpi_disabled && boot_vendorid == THEAD_VENDOR_ID && boot_archid == 0x0) { + this_hwcap &= ~isa2hwcap[RISCV_ISA_EXT_v]; + clear_bit(RISCV_ISA_EXT_v, source_isa); } + riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap); + /* * All "okay" hart should have same isa. Set HWCAP based on * common capabilities of every "okay" hart, in case they don't @@ -421,14 +740,111 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) acpi_put_table((struct acpi_table_header *)rhct); } +static void __init riscv_fill_cpu_vendor_ext(struct device_node *cpu_node, int cpu) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i]; + + for (int j = 0; j < ext_list->ext_data_count; j++) { + const struct riscv_isa_ext_data ext = ext_list->ext_data[j]; + struct riscv_isavendorinfo *isavendorinfo = &ext_list->per_hart_isa_bitmap[cpu]; + + if (of_property_match_string(cpu_node, "riscv,isa-extensions", + ext.property) < 0) + continue; + + /* + * Assume that subset extensions are all members of the + * same vendor. + */ + if (ext.subset_ext_size) + for (int k = 0; k < ext.subset_ext_size; k++) + set_bit(ext.subset_ext_ids[k], isavendorinfo->isa); + + set_bit(ext.id, isavendorinfo->isa); + } + } +} + +/* + * Populate all_harts_isa_bitmap for each vendor with all of the extensions that + * are shared across CPUs for that vendor. + */ +static void __init riscv_fill_vendor_ext_list(int cpu) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i]; + + if (!ext_list->is_initialized) { + bitmap_copy(ext_list->all_harts_isa_bitmap.isa, + ext_list->per_hart_isa_bitmap[cpu].isa, + RISCV_ISA_VENDOR_EXT_MAX); + ext_list->is_initialized = true; + } else { + bitmap_and(ext_list->all_harts_isa_bitmap.isa, + ext_list->all_harts_isa_bitmap.isa, + ext_list->per_hart_isa_bitmap[cpu].isa, + RISCV_ISA_VENDOR_EXT_MAX); + } + } +} + +static int has_thead_homogeneous_vlenb(void) +{ + int cpu; + u32 prev_vlenb = 0; + u32 vlenb; + + /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */ + if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return 0; + + for_each_possible_cpu(cpu) { + struct device_node *cpu_node; + + cpu_node = of_cpu_device_node_get(cpu); + if (!cpu_node) { + pr_warn("Unable to find cpu node\n"); + return -ENOENT; + } + + if (of_property_read_u32(cpu_node, "thead,vlenb", &vlenb)) { + of_node_put(cpu_node); + + if (prev_vlenb) + return -ENOENT; + continue; + } + + if (prev_vlenb && vlenb != prev_vlenb) { + of_node_put(cpu_node); + return -ENOENT; + } + + prev_vlenb = vlenb; + of_node_put(cpu_node); + } + + thead_vlenb_of = vlenb; + return 0; +} + static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) { unsigned int cpu; + bool mitigated; for_each_possible_cpu(cpu) { unsigned long this_hwcap = 0; struct device_node *cpu_node; struct riscv_isainfo *isainfo = &hart_isa[cpu]; + DECLARE_BITMAP(source_isa, RISCV_ISA_EXT_MAX) = { 0 }; cpu_node = of_cpu_device_node_get(cpu); if (!cpu_node) { @@ -442,20 +858,18 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) } for (int i = 0; i < riscv_isa_ext_count; i++) { - if (of_property_match_string(cpu_node, "riscv,isa-extensions", - riscv_isa_ext[i].property) < 0) - continue; + const struct riscv_isa_ext_data *ext = &riscv_isa_ext[i]; - if (!riscv_isa_extension_check(riscv_isa_ext[i].id)) + if (of_property_match_string(cpu_node, "riscv,isa-extensions", + ext->property) < 0) continue; - /* Only single letter extensions get set in hwcap */ - if (strnlen(riscv_isa_ext[i].name, 2) == 1) - this_hwcap |= isa2hwcap[riscv_isa_ext[i].id]; - - set_bit(riscv_isa_ext[i].id, isainfo->isa); + riscv_isa_set_ext(ext, source_isa); } + riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap); + riscv_fill_cpu_vendor_ext(cpu_node, cpu); + of_node_put(cpu_node); /* @@ -471,6 +885,19 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) bitmap_copy(riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); else bitmap_and(riscv_isa, riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); + + riscv_fill_vendor_ext_list(cpu); + } + + /* + * Execute ghostwrite mitigation immediately after detecting extensions + * to disable xtheadvector if necessary. + */ + mitigated = ghostwrite_enable_mitigation(); + + if (!mitigated && has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) { + pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n"); + disable_xtheadvector(); } if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) @@ -525,8 +952,15 @@ void __init riscv_fill_hwcap(void) elf_hwcap &= ~COMPAT_HWCAP_ISA_F; } - if (elf_hwcap & COMPAT_HWCAP_ISA_V) { + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X) || + has_xtheadvector_no_alternatives()) { + /* + * This cannot fail when called on the boot hart + */ riscv_v_setup_vsize(); + } + + if (elf_hwcap & COMPAT_HWCAP_ISA_V) { /* * ISA string in device tree might have 'v' flag, but * CONFIG_RISCV_ISA_V is disabled in kernel. @@ -561,171 +995,12 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } -static int check_unaligned_access(void *param) +void __init riscv_user_isa_enable(void) { - int cpu = smp_processor_id(); - u64 start_cycles, end_cycles; - u64 word_cycles; - u64 byte_cycles; - int ratio; - unsigned long start_jiffies, now; - struct page *page = param; - void *dst; - void *src; - long speed = RISCV_HWPROBE_MISALIGNED_SLOW; - - if (check_unaligned_access_emulated(cpu)) - return 0; - - /* Make an unaligned destination buffer. */ - dst = (void *)((unsigned long)page_address(page) | 0x1); - /* Unalign src as well, but differently (off by 1 + 2 = 3). */ - src = dst + (MISALIGNED_BUFFER_SIZE / 2); - src += 2; - word_cycles = -1ULL; - /* Do a warmup. */ - __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); - preempt_disable(); - start_jiffies = jiffies; - while ((now = jiffies) == start_jiffies) - cpu_relax(); - - /* - * For a fixed amount of time, repeatedly try the function, and take - * the best time in cycles as the measurement. - */ - while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { - start_cycles = get_cycles64(); - /* Ensure the CSR read can't reorder WRT to the copy. */ - mb(); - __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); - /* Ensure the copy ends before the end time is snapped. */ - mb(); - end_cycles = get_cycles64(); - if ((end_cycles - start_cycles) < word_cycles) - word_cycles = end_cycles - start_cycles; - } - - byte_cycles = -1ULL; - __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); - start_jiffies = jiffies; - while ((now = jiffies) == start_jiffies) - cpu_relax(); - - while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { - start_cycles = get_cycles64(); - mb(); - __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); - mb(); - end_cycles = get_cycles64(); - if ((end_cycles - start_cycles) < byte_cycles) - byte_cycles = end_cycles - start_cycles; - } - - preempt_enable(); - - /* Don't divide by zero. */ - if (!word_cycles || !byte_cycles) { - pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", - cpu); - - return 0; - } - - if (word_cycles < byte_cycles) - speed = RISCV_HWPROBE_MISALIGNED_FAST; - - ratio = div_u64((byte_cycles * 100), word_cycles); - pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", - cpu, - ratio / 100, - ratio % 100, - (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); - - per_cpu(misaligned_access_speed, cpu) = speed; - return 0; -} - -static void check_unaligned_access_nonboot_cpu(void *param) -{ - unsigned int cpu = smp_processor_id(); - struct page **pages = param; - - if (smp_processor_id() != 0) - check_unaligned_access(pages[cpu]); -} - -static int riscv_online_cpu(unsigned int cpu) -{ - static struct page *buf; - - /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) - return 0; - - buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!buf) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return -ENOMEM; - } - - check_unaligned_access(buf); - __free_pages(buf, MISALIGNED_BUFFER_ORDER); - return 0; -} - -/* Measure unaligned access on all CPUs present at boot in parallel. */ -static int check_unaligned_access_all_cpus(void) -{ - unsigned int cpu; - unsigned int cpu_count = num_possible_cpus(); - struct page **bufs = kzalloc(cpu_count * sizeof(struct page *), - GFP_KERNEL); - - if (!bufs) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return 0; - } - - /* - * Allocate separate buffers for each CPU so there's no fighting over - * cache lines. - */ - for_each_cpu(cpu, cpu_online_mask) { - bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!bufs[cpu]) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - goto out; - } - } - - /* Check everybody except 0, who stays behind to tend jiffies. */ - on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); - - /* Check core 0. */ - smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); - - /* Setup hotplug callback for any new CPUs that come online. */ - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu, NULL); - -out: - unaligned_emulation_finish(); - for_each_cpu(cpu, cpu_online_mask) { - if (bufs[cpu]) - __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); - } - - kfree(bufs); - return 0; -} - -arch_initcall(check_unaligned_access_all_cpus); - -void riscv_user_isa_enable(void) -{ - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) - csr_set(CSR_SENVCFG, ENVCFG_CBZE); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOZ)) + current->thread.envcfg |= ENVCFG_CBZE; + else if (any_cpu_has_zicboz) + pr_warn("Zicboz disabled as it is unavailable on some harts\n"); } #ifdef CONFIG_RISCV_ALTERNATIVE @@ -768,28 +1043,45 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, { struct alt_entry *alt; void *oldptr, *altptr; - u16 id, value; + u16 id, value, vendor; if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return; for (alt = begin; alt < end; alt++) { - if (alt->vendor_id != 0) - continue; - id = PATCH_ID_CPUFEATURE_ID(alt->patch_id); + vendor = PATCH_ID_CPUFEATURE_ID(alt->vendor_id); - if (id >= RISCV_ISA_EXT_MAX) { - WARN(1, "This extension id:%d is not in ISA extension list", id); - continue; - } + /* + * Any alternative with a patch_id that is less than + * RISCV_ISA_EXT_MAX is interpreted as a standard extension. + * + * Any alternative with patch_id that is greater than or equal + * to RISCV_VENDOR_EXT_ALTERNATIVES_BASE is interpreted as a + * vendor extension. + */ + if (id < RISCV_ISA_EXT_MAX) { + /* + * This patch should be treated as errata so skip + * processing here. + */ + if (alt->vendor_id != 0) + continue; - if (!__riscv_isa_extension_available(NULL, id)) - continue; + if (!__riscv_isa_extension_available(NULL, id)) + continue; - value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); - if (!riscv_cpufeature_patch_check(id, value)) + value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); + if (!riscv_cpufeature_patch_check(id, value)) + continue; + } else if (id >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE) { + if (!__riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, + id - RISCV_VENDOR_EXT_ALTERNATIVES_BASE)) + continue; + } else { + WARN(1, "This extension id:%d is not in ISA extension list", id); continue; + } oldptr = ALT_OLD_PTR(alt); altptr = ALT_ALT_PTR(alt); diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S index 515b2dfbca75..c5f17c2710b5 100644 --- a/arch/riscv/kernel/efi-header.S +++ b/arch/riscv/kernel/efi-header.S @@ -64,7 +64,7 @@ extra_header_fields: .long efi_header_end - _start // SizeOfHeaders .long 0 // CheckSum .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem - .short 0 // DllCharacteristics + .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics .quad 0 // SizeOfStackReserve .quad 0 // SizeOfStackCommit .quad 0 // SizeOfHeapReserve diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c index aa6209a74c83..b64bf1624a05 100644 --- a/arch/riscv/kernel/efi.c +++ b/arch/riscv/kernel/efi.c @@ -60,7 +60,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data) { efi_memory_desc_t *md = data; - pte_t pte = READ_ONCE(*ptep); + pte_t pte = ptep_get(ptep); unsigned long val; if (md->attribute & EFI_MEMORY_RO) { diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index e60fbd8660c4..3c37661801f9 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -19,6 +19,7 @@ #include <linux/libfdt.h> #include <linux/types.h> #include <linux/memblock.h> +#include <linux/vmalloc.h> #include <asm/setup.h> int arch_kimage_file_post_load_cleanup(struct kimage *image) @@ -117,6 +118,7 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, return ret; } +#ifdef CONFIG_CRASH_DUMP static int get_nr_ram_ranges_callback(struct resource *res, void *arg) { unsigned int *nr_ranges = arg; @@ -189,6 +191,7 @@ static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; return cmdline_ptr; } +#endif static void *elf_kexec_load(struct kimage *image, char *kernel_buf, unsigned long kernel_len, char *initrd, @@ -196,12 +199,11 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, unsigned long cmdline_len) { int ret; + void *fdt; unsigned long old_kernel_pbase = ULONG_MAX; unsigned long new_kernel_pbase = 0UL; unsigned long initrd_pbase = 0UL; - unsigned long headers_sz; unsigned long kernel_start; - void *fdt, *headers; struct elfhdr ehdr; struct kexec_buf kbuf; struct kexec_elf_info elf_info; @@ -216,7 +218,6 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, if (ret) goto out; kernel_start = image->start; - pr_notice("The entry point of kernel at 0x%lx\n", image->start); /* Add the kernel binary to the image */ ret = riscv_kexec_elf_load(image, &ehdr, &elf_info, @@ -228,8 +229,11 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, kbuf.buf_min = new_kernel_pbase + kernel_len; kbuf.buf_max = ULONG_MAX; +#ifdef CONFIG_CRASH_DUMP /* Add elfcorehdr */ if (image->type == KEXEC_TYPE_CRASH) { + void *headers; + unsigned long headers_sz; ret = prepare_elf_headers(&headers, &headers_sz); if (ret) { pr_err("Preparing elf core header failed\n"); @@ -252,8 +256,8 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, image->elf_load_addr = kbuf.mem; image->elf_headers_sz = headers_sz; - pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - image->elf_load_addr, kbuf.bufsz, kbuf.memsz); + kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->elf_load_addr, kbuf.bufsz, kbuf.memsz); /* Setup cmdline for kdump kernel case */ modified_cmdline = setup_kdump_cmdline(image, cmdline, @@ -265,6 +269,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, } cmdline = modified_cmdline; } +#endif #ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY /* Add purgatory to the image */ @@ -275,6 +280,8 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, pr_err("Error loading purgatory ret=%d\n", ret); goto out; } + kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem); + ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry", &kernel_start, sizeof(kernel_start), 0); @@ -293,7 +300,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, if (ret) goto out; initrd_pbase = kbuf.mem; - pr_notice("Loaded initrd at 0x%lx\n", initrd_pbase); + kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase); } /* Add the DTB to the image */ @@ -318,7 +325,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, } /* Cache the fdt buffer address for memory cleanup */ image->arch.fdt = fdt; - pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem); + kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem); goto out; out_free_fdt: @@ -444,6 +451,12 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | ENCODE_CJTYPE_IMM(val - addr); break; + case R_RISCV_ADD16: + *(u16 *)loc += val; + break; + case R_RISCV_SUB16: + *(u16 *)loc -= val; + break; case R_RISCV_ADD32: *(u32 *)loc += val; break; diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 54ca4564a926..33a5a9f2a0d4 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -19,6 +19,79 @@ .section .irqentry.text, "ax" +.macro new_vmalloc_check + REG_S a0, TASK_TI_A0(tp) + csrr a0, CSR_CAUSE + /* Exclude IRQs */ + blt a0, zero, .Lnew_vmalloc_restore_context_a0 + + REG_S a1, TASK_TI_A1(tp) + /* Only check new_vmalloc if we are in page/protection fault */ + li a1, EXC_LOAD_PAGE_FAULT + beq a0, a1, .Lnew_vmalloc_kernel_address + li a1, EXC_STORE_PAGE_FAULT + beq a0, a1, .Lnew_vmalloc_kernel_address + li a1, EXC_INST_PAGE_FAULT + bne a0, a1, .Lnew_vmalloc_restore_context_a1 + +.Lnew_vmalloc_kernel_address: + /* Is it a kernel address? */ + csrr a0, CSR_TVAL + bge a0, zero, .Lnew_vmalloc_restore_context_a1 + + /* Check if a new vmalloc mapping appeared that could explain the trap */ + REG_S a2, TASK_TI_A2(tp) + /* + * Computes: + * a0 = &new_vmalloc[BIT_WORD(cpu)] + * a1 = BIT_MASK(cpu) + */ + REG_L a2, TASK_TI_CPU(tp) + /* + * Compute the new_vmalloc element position: + * (cpu / 64) * 8 = (cpu >> 6) << 3 + */ + srli a1, a2, 6 + slli a1, a1, 3 + la a0, new_vmalloc + add a0, a0, a1 + /* + * Compute the bit position in the new_vmalloc element: + * bit_pos = cpu % 64 = cpu - (cpu / 64) * 64 = cpu - (cpu >> 6) << 6 + * = cpu - ((cpu >> 6) << 3) << 3 + */ + slli a1, a1, 3 + sub a1, a2, a1 + /* Compute the "get mask": 1 << bit_pos */ + li a2, 1 + sll a1, a2, a1 + + /* Check the value of new_vmalloc for this cpu */ + REG_L a2, 0(a0) + and a2, a2, a1 + beq a2, zero, .Lnew_vmalloc_restore_context + + /* Atomically reset the current cpu bit in new_vmalloc */ + amoxor.d a0, a1, (a0) + + /* Only emit a sfence.vma if the uarch caches invalid entries */ + ALTERNATIVE("sfence.vma", "nop", 0, RISCV_ISA_EXT_SVVPTC, 1) + + REG_L a0, TASK_TI_A0(tp) + REG_L a1, TASK_TI_A1(tp) + REG_L a2, TASK_TI_A2(tp) + csrw CSR_SCRATCH, x0 + sret + +.Lnew_vmalloc_restore_context: + REG_L a2, TASK_TI_A2(tp) +.Lnew_vmalloc_restore_context_a1: + REG_L a1, TASK_TI_A1(tp) +.Lnew_vmalloc_restore_context_a0: + REG_L a0, TASK_TI_A0(tp) +.endm + + SYM_CODE_START(handle_exception) /* * If coming from userspace, preserve the user thread pointer and load @@ -30,6 +103,20 @@ SYM_CODE_START(handle_exception) .Lrestore_kernel_tpsp: csrr tp, CSR_SCRATCH + +#ifdef CONFIG_64BIT + /* + * The RISC-V kernel does not eagerly emit a sfence.vma after each + * new vmalloc mapping, which may result in exceptions: + * - if the uarch caches invalid entries, the new mapping would not be + * observed by the page table walker and an invalidation is needed. + * - if the uarch does not cache invalid entries, a reordered access + * could "miss" the new mapping and traps: in that case, we only need + * to retry the access, no sfence.vma is required. + */ + new_vmalloc_check +#endif + REG_S sp, TASK_TI_KERNEL_SP(tp) #ifdef CONFIG_VMAP_STACK @@ -83,8 +170,11 @@ SYM_CODE_START(handle_exception) /* Load the kernel shadow call stack pointer if coming from userspace */ scs_load_current_if_task_changed s5 +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + move a0, sp + call riscv_v_context_nesting_start +#endif move a0, sp /* pt_regs */ - la ra, ret_from_exception /* * MSB of cause differentiates between @@ -93,7 +183,8 @@ SYM_CODE_START(handle_exception) bge s4, zero, 1f /* Handle interrupts */ - tail do_irq + call do_irq + j ret_from_exception 1: /* Handle other exceptions */ slli t0, s4, RISCV_LGPTR @@ -101,12 +192,16 @@ SYM_CODE_START(handle_exception) la t2, excp_vect_table_end add t0, t1, t0 /* Check if exception code lies within bounds */ - bgeu t0, t2, 1f - REG_L t0, 0(t0) - jr t0 -1: - tail do_trap_unknown + bgeu t0, t2, 3f + REG_L t1, 0(t0) +2: jalr t1 + j ret_from_exception +3: + + la t1, do_trap_unknown + j 2b SYM_CODE_END(handle_exception) +ASM_NOKPROBE(handle_exception) /* * The ret_from_exception must be called with interrupt disabled. Here is the @@ -125,6 +220,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #endif bnez s0, 1f +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + call stackleak_erase_on_task_stack +#endif + /* Save unwound kernel stack pointer in thread_info */ addi s0, sp, PT_SIZE_ON_STACK REG_S s0, TASK_TI_KERNEL_SP(tp) @@ -138,6 +237,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception) */ csrw CSR_SCRATCH, tp 1: +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + move a0, sp + call riscv_v_context_nesting_end +#endif REG_L a0, PT_STATUS(sp) /* * The current load reservation is effectively part of the processor's @@ -175,7 +278,9 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #else sret #endif +SYM_INNER_LABEL(ret_from_exception_end, SYM_L_GLOBAL) SYM_CODE_END(ret_from_exception) +ASM_NOKPROBE(ret_from_exception) #ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) @@ -211,6 +316,7 @@ SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) move a0, sp tail handle_bad_stack SYM_CODE_END(handle_kernel_stack_overflow) +ASM_NOKPROBE(handle_kernel_stack_overflow) #endif SYM_CODE_START(ret_from_fork) @@ -221,8 +327,8 @@ SYM_CODE_START(ret_from_fork) jalr s0 1: move a0, sp /* pt_regs */ - la ra, ret_from_exception - tail syscall_exit_to_user_mode + call syscall_exit_to_user_mode + j ret_from_exception SYM_CODE_END(ret_from_fork) #ifdef CONFIG_IRQ_STACKS diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S index 2c543f130f93..f74f6b60e347 100644 --- a/arch/riscv/kernel/fpu.S +++ b/arch/riscv/kernel/fpu.S @@ -170,7 +170,7 @@ SYM_FUNC_END(__fstate_restore) __access_func(f31) -#ifdef CONFIG_RISCV_MISALIGNED +#ifdef CONFIG_RISCV_SCALAR_MISALIGNED /* * Disable compressed instructions set to keep a constant offset between FP @@ -211,7 +211,7 @@ SYM_FUNC_START(put_f64_reg) SYM_FUNC_END(put_f64_reg) /* - * put_f64_reg - Get a 64 bits FP register value and returned it or store it to + * get_f64_reg - Get a 64 bits FP register value and returned it or store it to * a pointer. * a0 = FP register index to be retrieved * a1 = If xlen == 32, pointer which should be loaded with the FP register value @@ -224,4 +224,4 @@ SYM_FUNC_START(get_f64_reg) fp_access_epilogue SYM_FUNC_END(get_f64_reg) -#endif /* CONFIG_RISCV_MISALIGNED */ +#endif /* CONFIG_RISCV_SCALAR_MISALIGNED */ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 03a6434a8cdd..3524db5e4fa0 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -8,8 +8,9 @@ #include <linux/ftrace.h> #include <linux/uaccess.h> #include <linux/memory.h> +#include <linux/stop_machine.h> #include <asm/cacheflush.h> -#include <asm/patch.h> +#include <asm/text-patching.h> #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) @@ -75,8 +76,7 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, make_call_t0(hook_pos, target, call); /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */ - if (patch_text_nosync - ((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE)) + if (patch_insn_write((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE)) return -EPERM; return 0; @@ -88,7 +88,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) make_call_t0(rec->ip, addr, call); - if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE)) + if (patch_insn_write((void *)rec->ip, call, MCOUNT_INSN_SIZE)) return -EPERM; return 0; @@ -99,7 +99,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, { unsigned int nops[2] = {NOP4, NOP4}; - if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) + if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) return -EPERM; return 0; @@ -127,16 +127,48 @@ int ftrace_update_ftrace_func(ftrace_func_t func) { int ret = __ftrace_modify_call((unsigned long)&ftrace_call, (unsigned long)func, true, true); - if (!ret) { - ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call, - (unsigned long)func, true, true); - } return ret; } + +struct ftrace_modify_param { + int command; + atomic_t cpu_count; +}; + +static int __ftrace_modify_code(void *data) +{ + struct ftrace_modify_param *param = data; + + if (atomic_inc_return(¶m->cpu_count) == num_online_cpus()) { + ftrace_modify_all_code(param->command); + /* + * Make sure the patching store is effective *before* we + * increment the counter which releases all waiting CPUs + * by using the release variant of atomic increment. The + * release pairs with the call to local_flush_icache_all() + * on the waiting CPU. + */ + atomic_inc_return_release(¶m->cpu_count); + } else { + while (atomic_read(¶m->cpu_count) <= num_online_cpus()) + cpu_relax(); + + local_flush_icache_all(); + } + + return 0; +} + +void arch_ftrace_update_code(int command) +{ + struct ftrace_modify_param param = { command, ATOMIC_INIT(0) }; + + stop_machine(__ftrace_modify_code, ¶m, cpu_online_mask); +} #endif -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -178,32 +210,40 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, } #ifdef CONFIG_DYNAMIC_FTRACE -extern void ftrace_graph_call(void); -extern void ftrace_graph_regs_call(void); -int ftrace_enable_ftrace_graph_caller(void) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { - int ret; + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long frame_pointer = arch_ftrace_regs(fregs)->s0; + unsigned long *parent = &arch_ftrace_regs(fregs)->ra; + unsigned long old; - ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, - (unsigned long)&prepare_ftrace_return, true, true); - if (ret) - return ret; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * We don't suffer access faults, so no extra fault-recovery assembly + * is needed here. + */ + old = *parent; - return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, + if (!function_graph_enter_regs(old, ip, frame_pointer, parent, fregs)) + *parent = return_hooker; +} +#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ +extern void ftrace_graph_call(void); +int ftrace_enable_ftrace_graph_caller(void) +{ + return __ftrace_modify_call((unsigned long)&ftrace_graph_call, (unsigned long)&prepare_ftrace_return, true, true); } int ftrace_disable_ftrace_graph_caller(void) { - int ret; - - ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, - (unsigned long)&prepare_ftrace_return, false, true); - if (ret) - return ret; - - return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, + return __ftrace_modify_call((unsigned long)&ftrace_graph_call, (unsigned long)&prepare_ftrace_return, false, true); } +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 76ace1e0b46f..356d5397b2a2 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -11,7 +11,6 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/csr.h> -#include <asm/cpu_ops_sbi.h> #include <asm/hwcap.h> #include <asm/image.h> #include <asm/scs.h> @@ -89,6 +88,7 @@ relocate_enable_mmu: /* Compute satp for kernel page tables, but don't load it yet */ srl a2, a0, PAGE_SHIFT la a1, satp_mode + XIP_FIXUP_OFFSET a1 REG_L a1, 0(a1) or a2, a2, a1 @@ -165,10 +165,21 @@ secondary_start_sbi: #endif call .Lsetup_trap_vector scs_load_current - tail smp_callin + call smp_callin #endif /* CONFIG_SMP */ .align 2 +.Lsecondary_park: + /* + * Park this hart if we: + * - have too many harts on CONFIG_RISCV_BOOT_SPINWAIT + * - receive an early trap, before setup_trap_vector finished + * - fail in smp_callin(), as a successful one wouldn't return + */ + wfi + j .Lsecondary_park + +.align 2 .Lsetup_trap_vector: /* Set trap vector to exception handler */ la a0, handle_exception @@ -181,12 +192,6 @@ secondary_start_sbi: csrw CSR_SCRATCH, zero ret -.align 2 -.Lsecondary_park: - /* We lack SMP support or have too many harts, so park this hart */ - wfi - j .Lsecondary_park - SYM_CODE_END(_start) SYM_CODE_START(_start_kernel) @@ -265,10 +270,12 @@ SYM_CODE_START(_start_kernel) la sp, _end + THREAD_SIZE XIP_FIXUP_OFFSET sp mv s0, a0 + mv s1, a1 call __copy_data - /* Restore a0 copy */ + /* Restore a0 & a1 copy */ mv a0, s0 + mv a1, s1 #endif #ifndef CONFIG_XIP_KERNEL @@ -298,6 +305,9 @@ SYM_CODE_START(_start_kernel) #else mv a0, a1 #endif /* CONFIG_BUILTIN_DTB */ + /* Set trap vector to spin forever to help debug */ + la a3, .Lsecondary_park + csrw CSR_TVEC, a3 call setup_vm #ifdef CONFIG_MMU la a0, early_pg_dir diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c index e6694759dbd0..654ed159c830 100644 --- a/arch/riscv/kernel/jump_label.c +++ b/arch/riscv/kernel/jump_label.c @@ -9,13 +9,14 @@ #include <linux/memory.h> #include <linux/mutex.h> #include <asm/bug.h> -#include <asm/patch.h> +#include <asm/cacheflush.h> +#include <asm/text-patching.h> #define RISCV_INSN_NOP 0x00000013U #define RISCV_INSN_JAL 0x0000006fU -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)jump_entry_code(entry); u32 insn; @@ -24,7 +25,7 @@ void arch_jump_label_transform(struct jump_entry *entry, long offset = jump_entry_target(entry) - jump_entry_code(entry); if (WARN_ON(offset & 1 || offset < -524288 || offset >= 524288)) - return; + return true; insn = RISCV_INSN_JAL | (((u32)offset & GENMASK(19, 12)) << (12 - 12)) | @@ -35,7 +36,20 @@ void arch_jump_label_transform(struct jump_entry *entry, insn = RISCV_INSN_NOP; } - mutex_lock(&text_mutex); - patch_text_nosync(addr, &insn, sizeof(insn)); - mutex_unlock(&text_mutex); + if (early_boot_irqs_disabled) { + riscv_patch_in_stop_machine = 1; + patch_insn_write(addr, &insn, sizeof(insn)); + riscv_patch_in_stop_machine = 0; + } else { + mutex_lock(&text_mutex); + patch_insn_write(addr, &insn, sizeof(insn)); + mutex_unlock(&text_mutex); + } + + return true; +} + +void arch_jump_label_transform_apply(void) +{ + flush_icache_all(); } diff --git a/arch/riscv/kernel/kernel_mode_fpu.c b/arch/riscv/kernel/kernel_mode_fpu.c new file mode 100644 index 000000000000..0ac8348876c4 --- /dev/null +++ b/arch/riscv/kernel/kernel_mode_fpu.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 SiFive + */ + +#include <linux/export.h> +#include <linux/preempt.h> + +#include <asm/csr.h> +#include <asm/fpu.h> +#include <asm/processor.h> +#include <asm/switch_to.h> + +void kernel_fpu_begin(void) +{ + preempt_disable(); + fstate_save(current, task_pt_regs(current)); + csr_set(CSR_SSTATUS, SR_FS); +} +EXPORT_SYMBOL_GPL(kernel_fpu_begin); + +void kernel_fpu_end(void) +{ + csr_clear(CSR_SSTATUS, SR_FS); + fstate_restore(current, task_pt_regs(current)); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(kernel_fpu_end); diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c new file mode 100644 index 000000000000..99972a48e86b --- /dev/null +++ b/arch/riscv/kernel/kernel_mode_vector.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2021 SiFive + */ +#include <linux/compiler.h> +#include <linux/irqflags.h> +#include <linux/percpu.h> +#include <linux/preempt.h> +#include <linux/types.h> + +#include <asm/vector.h> +#include <asm/switch_to.h> +#include <asm/simd.h> +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +#include <asm/asm-prototypes.h> +#endif + +static inline void riscv_v_flags_set(u32 flags) +{ + WRITE_ONCE(current->thread.riscv_v_flags, flags); +} + +static inline void riscv_v_start(u32 flags) +{ + int orig; + + orig = riscv_v_flags(); + BUG_ON((orig & flags) != 0); + riscv_v_flags_set(orig | flags); + barrier(); +} + +static inline void riscv_v_stop(u32 flags) +{ + int orig; + + barrier(); + orig = riscv_v_flags(); + BUG_ON((orig & flags) == 0); + riscv_v_flags_set(orig & ~flags); +} + +/* + * Claim ownership of the CPU vector context for use by the calling context. + * + * The caller may freely manipulate the vector context metadata until + * put_cpu_vector_context() is called. + */ +void get_cpu_vector_context(void) +{ + /* + * disable softirqs so it is impossible for softirqs to nest + * get_cpu_vector_context() when kernel is actively using Vector. + */ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_disable(); + else + preempt_disable(); + + riscv_v_start(RISCV_KERNEL_MODE_V); +} + +/* + * Release the CPU vector context. + * + * Must be called from a context in which get_cpu_vector_context() was + * previously called, with no call to put_cpu_vector_context() in the + * meantime. + */ +void put_cpu_vector_context(void) +{ + riscv_v_stop(RISCV_KERNEL_MODE_V); + + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_enable(); + else + preempt_enable(); +} + +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +static __always_inline u32 *riscv_v_flags_ptr(void) +{ + return ¤t->thread.riscv_v_flags; +} + +static inline void riscv_preempt_v_set_dirty(void) +{ + *riscv_v_flags_ptr() |= RISCV_PREEMPT_V_DIRTY; +} + +static inline void riscv_preempt_v_reset_flags(void) +{ + *riscv_v_flags_ptr() &= ~(RISCV_PREEMPT_V_DIRTY | RISCV_PREEMPT_V_NEED_RESTORE); +} + +static inline void riscv_v_ctx_depth_inc(void) +{ + *riscv_v_flags_ptr() += RISCV_V_CTX_UNIT_DEPTH; +} + +static inline void riscv_v_ctx_depth_dec(void) +{ + *riscv_v_flags_ptr() -= RISCV_V_CTX_UNIT_DEPTH; +} + +static inline u32 riscv_v_ctx_get_depth(void) +{ + return *riscv_v_flags_ptr() & RISCV_V_CTX_DEPTH_MASK; +} + +static int riscv_v_stop_kernel_context(void) +{ + if (riscv_v_ctx_get_depth() != 0 || !riscv_preempt_v_started(current)) + return 1; + + riscv_preempt_v_clear_dirty(current); + riscv_v_stop(RISCV_PREEMPT_V); + return 0; +} + +static int riscv_v_start_kernel_context(bool *is_nested) +{ + struct __riscv_v_ext_state *kvstate, *uvstate; + + kvstate = ¤t->thread.kernel_vstate; + if (!kvstate->datap) + return -ENOENT; + + if (riscv_preempt_v_started(current)) { + WARN_ON(riscv_v_ctx_get_depth() == 0); + *is_nested = true; + get_cpu_vector_context(); + if (riscv_preempt_v_dirty(current)) { + __riscv_v_vstate_save(kvstate, kvstate->datap); + riscv_preempt_v_clear_dirty(current); + } + riscv_preempt_v_set_restore(current); + return 0; + } + + /* Transfer the ownership of V from user to kernel, then save */ + riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY); + if (__riscv_v_vstate_check(task_pt_regs(current)->status, DIRTY)) { + uvstate = ¤t->thread.vstate; + __riscv_v_vstate_save(uvstate, uvstate->datap); + } + riscv_preempt_v_clear_dirty(current); + return 0; +} + +/* low-level V context handling code, called with irq disabled */ +asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs) +{ + int depth; + + if (!riscv_preempt_v_started(current)) + return; + + depth = riscv_v_ctx_get_depth(); + if (depth == 0 && __riscv_v_vstate_check(regs->status, DIRTY)) + riscv_preempt_v_set_dirty(); + + riscv_v_ctx_depth_inc(); +} + +asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs) +{ + struct __riscv_v_ext_state *vstate = ¤t->thread.kernel_vstate; + u32 depth; + + WARN_ON(!irqs_disabled()); + + if (!riscv_preempt_v_started(current)) + return; + + riscv_v_ctx_depth_dec(); + depth = riscv_v_ctx_get_depth(); + if (depth == 0) { + if (riscv_preempt_v_restore(current)) { + __riscv_v_vstate_restore(vstate, vstate->datap); + __riscv_v_vstate_clean(regs); + riscv_preempt_v_reset_flags(); + } + } +} +#else +#define riscv_v_start_kernel_context(nested) (-ENOENT) +#define riscv_v_stop_kernel_context() (-ENOENT) +#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */ + +/* + * kernel_vector_begin(): obtain the CPU vector registers for use by the calling + * context + * + * Must not be called unless may_use_simd() returns true. + * Task context in the vector registers is saved back to memory as necessary. + * + * A matching call to kernel_vector_end() must be made before returning from the + * calling context. + * + * The caller may freely use the vector registers until kernel_vector_end() is + * called. + */ +void kernel_vector_begin(void) +{ + bool nested = false; + + if (WARN_ON(!(has_vector() || has_xtheadvector()))) + return; + + BUG_ON(!may_use_simd()); + + if (riscv_v_start_kernel_context(&nested)) { + get_cpu_vector_context(); + riscv_v_vstate_save(¤t->thread.vstate, task_pt_regs(current)); + } + + if (!nested) + riscv_v_vstate_set_restore(current, task_pt_regs(current)); + + riscv_v_enable(); +} +EXPORT_SYMBOL_GPL(kernel_vector_begin); + +/* + * kernel_vector_end(): give the CPU vector registers back to the current task + * + * Must be called from a context in which kernel_vector_begin() was previously + * called, with no call to kernel_vector_end() in the meantime. + * + * The caller must not use the vector registers after this function is called, + * unless kernel_vector_begin() is called again in the meantime. + */ +void kernel_vector_end(void) +{ + if (WARN_ON(!(has_vector() || has_xtheadvector()))) + return; + + riscv_v_disable(); + + if (riscv_v_stop_kernel_context()) + put_cpu_vector_context(); +} +EXPORT_SYMBOL_GPL(kernel_vector_end); diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index 2d139b724bc8..2306ce3e5f22 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -19,30 +19,6 @@ #include <linux/irq.h> /* - * kexec_image_info - Print received image details - */ -static void -kexec_image_info(const struct kimage *image) -{ - unsigned long i; - - pr_debug("Kexec image info:\n"); - pr_debug("\ttype: %d\n", image->type); - pr_debug("\tstart: %lx\n", image->start); - pr_debug("\thead: %lx\n", image->head); - pr_debug("\tnr_segments: %lu\n", image->nr_segments); - - for (i = 0; i < image->nr_segments; i++) { - pr_debug("\t segment[%lu]: %016lx - %016lx", i, - image->segment[i].mem, - image->segment[i].mem + image->segment[i].memsz); - pr_debug("\t\t0x%lx bytes, %lu pages\n", - (unsigned long) image->segment[i].memsz, - (unsigned long) image->segment[i].memsz / PAGE_SIZE); - } -} - -/* * machine_kexec_prepare - Initialize kexec * * This function is called from do_kexec_load, when the user has @@ -60,8 +36,6 @@ machine_kexec_prepare(struct kimage *image) unsigned int control_code_buffer_sz = 0; int i = 0; - kexec_image_info(image); - /* Find the Flattened Device Tree and save its physical address */ for (i = 0; i < image->nr_segments; i++) { if (image->segment[i].memsz <= sizeof(fdt)) @@ -140,37 +114,6 @@ void machine_shutdown(void) #endif } -static void machine_kexec_mask_interrupts(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - int ret; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - /* - * First try to remove the active state. If this - * fails, try to EOI the interrupt. - */ - ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); - - if (ret && irqd_irq_inprogress(&desc->irq_data) && - chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - /* * machine_crash_shutdown - Prepare to kexec after a kernel crash * diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 58dd96a2a153..745dd4c4a69c 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -3,12 +3,12 @@ #include <linux/init.h> #include <linux/linkage.h> +#include <linux/export.h> #include <asm/asm.h> #include <asm/csr.h> #include <asm/unistd.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> -#include <asm-generic/export.h> #include <asm/ftrace.h> .text @@ -56,32 +56,90 @@ addi sp, sp, ABI_SIZE_ON_STACK .endm -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - .macro SAVE_ALL - addi sp, sp, -PT_SIZE_ON_STACK - - REG_S t0, PT_EPC(sp) - REG_S x1, PT_RA(sp) - REG_S x2, PT_SP(sp) - REG_S x3, PT_GP(sp) - REG_S x4, PT_TP(sp) - REG_S x5, PT_T0(sp) - save_from_x6_to_x31 +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS + +/** +* SAVE_ABI_REGS - save regs against the ftrace_regs struct +* +* After the stack is established, +* +* 0(sp) stores the PC of the traced function which can be accessed +* by &(fregs)->epc in tracing function. Note that the real +* function entry address should be computed with -FENTRY_RA_OFFSET. +* +* 8(sp) stores the function return address (i.e. parent IP) that +* can be accessed by &(fregs)->ra in tracing function. +* +* The other regs are saved at the respective localtion and accessed +* by the respective ftrace_regs member. +* +* Here is the layout of stack for your reference. +* +* PT_SIZE_ON_STACK -> +++++++++ +* + ..... + +* + a0-a7 + --++++-> ftrace_caller saved +* + t1 + --++++-> direct tramp address +* + s0 + --+ // frame pointer +* + sp + + +* + ra + --+ // parent IP +* sp -> + epc + --+ // PC +* +++++++++ +**/ + .macro SAVE_ABI_REGS + mv t4, sp // Save original SP in T4 + addi sp, sp, -FREGS_SIZE_ON_STACK + + REG_S t0, FREGS_EPC(sp) + REG_S x1, FREGS_RA(sp) + REG_S t4, FREGS_SP(sp) // Put original SP on stack +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + REG_S x8, FREGS_S0(sp) +#endif + REG_S x6, FREGS_T1(sp) + + // save the arguments + REG_S x10, FREGS_A0(sp) + REG_S x11, FREGS_A1(sp) + REG_S x12, FREGS_A2(sp) + REG_S x13, FREGS_A3(sp) + REG_S x14, FREGS_A4(sp) + REG_S x15, FREGS_A5(sp) + REG_S x16, FREGS_A6(sp) + REG_S x17, FREGS_A7(sp) .endm - .macro RESTORE_ALL - REG_L x1, PT_RA(sp) - REG_L x2, PT_SP(sp) - REG_L x3, PT_GP(sp) - REG_L x4, PT_TP(sp) - /* Restore t0 with PT_EPC */ - REG_L x5, PT_EPC(sp) - restore_from_x6_to_x31 + .macro RESTORE_ABI_REGS, all=0 + REG_L t0, FREGS_EPC(sp) + REG_L x1, FREGS_RA(sp) +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + REG_L x8, FREGS_S0(sp) +#endif + REG_L x6, FREGS_T1(sp) + + // restore the arguments + REG_L x10, FREGS_A0(sp) + REG_L x11, FREGS_A1(sp) + REG_L x12, FREGS_A2(sp) + REG_L x13, FREGS_A3(sp) + REG_L x14, FREGS_A4(sp) + REG_L x15, FREGS_A5(sp) + REG_L x16, FREGS_A6(sp) + REG_L x17, FREGS_A7(sp) + + addi sp, sp, FREGS_SIZE_ON_STACK + .endm - addi sp, sp, PT_SIZE_ON_STACK + .macro PREPARE_ARGS + addi a0, t0, -FENTRY_RA_OFFSET + la a1, function_trace_op + REG_L a2, 0(a1) + mv a1, ra + mv a3, sp .endm -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ + +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS SYM_FUNC_START(ftrace_caller) SAVE_ABI @@ -105,34 +163,29 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) call ftrace_stub #endif RESTORE_ABI - jr t0 + jr t0 SYM_FUNC_END(ftrace_caller) -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -SYM_FUNC_START(ftrace_regs_caller) - SAVE_ALL - - addi a0, t0, -FENTRY_RA_OFFSET - la a1, function_trace_op - REG_L a2, 0(a1) - mv a1, ra - mv a3, sp +#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ +SYM_FUNC_START(ftrace_caller) + mv t1, zero + SAVE_ABI_REGS + PREPARE_ARGS -SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) +SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - addi a0, sp, PT_RA - REG_L a1, PT_EPC(sp) - addi a1, a1, -FENTRY_RA_OFFSET -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST - mv a2, s0 -#endif -SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL) - call ftrace_stub -#endif + RESTORE_ABI_REGS + bnez t1, .Ldirect + jr t0 +.Ldirect: + jr t1 +SYM_FUNC_END(ftrace_caller) + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ - RESTORE_ALL - jr t0 -SYM_FUNC_END(ftrace_regs_caller) -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +SYM_CODE_START(ftrace_stub_direct_tramp) + jr t0 +SYM_CODE_END(ftrace_stub_direct_tramp) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index b4dd9ed6849e..068168046e0e 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -4,14 +4,16 @@ #include <linux/init.h> #include <linux/linkage.h> #include <linux/cfi_types.h> +#include <linux/export.h> #include <asm/asm.h> #include <asm/csr.h> #include <asm/unistd.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> -#include <asm-generic/export.h> #include <asm/ftrace.h> +#define ABI_SIZE_ON_STACK 80 + .text .macro SAVE_ABI_STATE @@ -26,12 +28,12 @@ * register if a0 was not saved. */ .macro SAVE_RET_ABI_STATE - addi sp, sp, -4*SZREG - REG_S s0, 2*SZREG(sp) - REG_S ra, 3*SZREG(sp) - REG_S a0, 1*SZREG(sp) - REG_S a1, 0*SZREG(sp) - addi s0, sp, 4*SZREG + addi sp, sp, -ABI_SIZE_ON_STACK + REG_S ra, 1*SZREG(sp) + REG_S s0, 8*SZREG(sp) + REG_S a0, 10*SZREG(sp) + REG_S a1, 11*SZREG(sp) + addi s0, sp, ABI_SIZE_ON_STACK .endm .macro RESTORE_ABI_STATE @@ -41,17 +43,17 @@ .endm .macro RESTORE_RET_ABI_STATE - REG_L ra, 3*SZREG(sp) - REG_L s0, 2*SZREG(sp) - REG_L a0, 1*SZREG(sp) - REG_L a1, 0*SZREG(sp) - addi sp, sp, 4*SZREG + REG_L ra, 1*SZREG(sp) + REG_L s0, 8*SZREG(sp) + REG_L a0, 10*SZREG(sp) + REG_L a1, 11*SZREG(sp) + addi sp, sp, ABI_SIZE_ON_STACK .endm SYM_TYPED_FUNC_START(ftrace_stub) #ifdef CONFIG_DYNAMIC_FTRACE - .global MCOUNT_NAME - .set MCOUNT_NAME, ftrace_stub + .global _mcount + .set _mcount, ftrace_stub #endif ret SYM_FUNC_END(ftrace_stub) @@ -80,7 +82,7 @@ SYM_FUNC_END(return_to_handler) #endif #ifndef CONFIG_DYNAMIC_FTRACE -SYM_FUNC_START(MCOUNT_NAME) +SYM_FUNC_START(_mcount) la t4, ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER la t0, ftrace_graph_return @@ -126,6 +128,6 @@ SYM_FUNC_START(MCOUNT_NAME) jalr t5 RESTORE_ABI_STATE ret -SYM_FUNC_END(MCOUNT_NAME) +SYM_FUNC_END(_mcount) #endif -EXPORT_SYMBOL(MCOUNT_NAME) +EXPORT_SYMBOL(_mcount) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index aac019ed63b1..47d0ebeec93c 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/log2.h> #include <linux/moduleloader.h> -#include <linux/vmalloc.h> #include <linux/sizes.h> #include <linux/pgtable.h> #include <asm/alternative.h> @@ -24,7 +23,7 @@ struct used_bucket { struct relocation_head { struct hlist_node node; - struct list_head *rel_entry; + struct list_head rel_entry; void *location; }; @@ -635,7 +634,7 @@ process_accumulated_relocations(struct module *me, location = rel_head_iter->location; list_for_each_entry_safe(rel_entry_iter, rel_entry_iter_tmp, - rel_head_iter->rel_entry, + &rel_head_iter->rel_entry, head) { curr_type = rel_entry_iter->type; reloc_handlers[curr_type].reloc_handler( @@ -705,16 +704,7 @@ static int add_relocation_to_accumulate(struct module *me, int type, return -ENOMEM; } - rel_head->rel_entry = - kmalloc(sizeof(struct list_head), GFP_KERNEL); - - if (!rel_head->rel_entry) { - kfree(entry); - kfree(rel_head); - return -ENOMEM; - } - - INIT_LIST_HEAD(rel_head->rel_entry); + INIT_LIST_HEAD(&rel_head->rel_entry); rel_head->location = location; INIT_HLIST_NODE(&rel_head->node); if (!current_head->first) { @@ -724,7 +714,6 @@ static int add_relocation_to_accumulate(struct module *me, int type, if (!bucket) { kfree(entry); kfree(rel_head); - kfree(rel_head->rel_entry); return -ENOMEM; } @@ -736,7 +725,7 @@ static int add_relocation_to_accumulate(struct module *me, int type, } /* Add relocation to head of discovered rel_head */ - list_add_tail(&entry->head, rel_head->rel_entry); + list_add_tail(&entry->head, &rel_head->rel_entry); return 0; } @@ -747,6 +736,10 @@ initialize_relocation_hashtable(unsigned int num_relocations, { /* Can safely assume that bits is not greater than sizeof(long) */ unsigned long hashtable_size = roundup_pow_of_two(num_relocations); + /* + * When hashtable_size == 1, hashtable_bits == 0. + * This is valid because the hashing algorithm returns 0 in this case. + */ unsigned int hashtable_bits = ilog2(hashtable_size); /* @@ -760,10 +753,10 @@ initialize_relocation_hashtable(unsigned int num_relocations, hashtable_size <<= should_double_size; *relocation_hashtable = kmalloc_array(hashtable_size, - sizeof(*relocation_hashtable), + sizeof(**relocation_hashtable), GFP_KERNEL); if (!*relocation_hashtable) - return -ENOMEM; + return 0; __hash_init(*relocation_hashtable, hashtable_size); @@ -779,20 +772,19 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, Elf_Sym *sym; void *location; unsigned int i, type; + unsigned int j_idx = 0; Elf_Addr v; int res; unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel); struct hlist_head *relocation_hashtable; - struct list_head used_buckets_list; unsigned int hashtable_bits; + LIST_HEAD(used_buckets_list); hashtable_bits = initialize_relocation_hashtable(num_relocations, &relocation_hashtable); - if (hashtable_bits < 0) - return hashtable_bits; - - INIT_LIST_HEAD(&used_buckets_list); + if (!relocation_hashtable) + return -ENOMEM; pr_debug("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); @@ -829,9 +821,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, v = sym->st_value + rel[i].r_addend; if (type == R_RISCV_PCREL_LO12_I || type == R_RISCV_PCREL_LO12_S) { - unsigned int j; + unsigned int j = j_idx; + bool found = false; - for (j = 0; j < sechdrs[relsec].sh_size / sizeof(*rel); j++) { + do { unsigned long hi20_loc = sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[j].r_offset; @@ -860,16 +853,26 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, hi20 = (offset + 0x800) & 0xfffff000; lo12 = offset - hi20; v = lo12; + found = true; break; } - } - if (j == sechdrs[relsec].sh_size / sizeof(*rel)) { + + j++; + if (j > sechdrs[relsec].sh_size / sizeof(*rel)) + j = 0; + + } while (j_idx != j); + + if (!found) { pr_err( "%s: Can not find HI20 relocation information\n", me->name); return -EINVAL; } + + /* Record the previous j-loop end index */ + j_idx = j; } if (reloc_handlers[type].accumulate_handler) @@ -889,16 +892,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, return 0; } -#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) -void *module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, MODULES_VADDR, - MODULES_END, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c new file mode 100644 index 000000000000..fa6b0339a65d --- /dev/null +++ b/arch/riscv/kernel/paravirt.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 Ventana Micro Systems Inc. + */ + +#define pr_fmt(fmt) "riscv-pv: " fmt + +#include <linux/cpuhotplug.h> +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/jump_label.h> +#include <linux/kconfig.h> +#include <linux/kernel.h> +#include <linux/percpu-defs.h> +#include <linux/printk.h> +#include <linux/static_call.h> +#include <linux/types.h> + +#include <asm/barrier.h> +#include <asm/page.h> +#include <asm/paravirt.h> +#include <asm/sbi.h> + +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +static u64 native_steal_clock(int cpu) +{ + return 0; +} + +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); + +static bool steal_acc = true; +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} + +early_param("no-steal-acc", parse_no_stealacc); + +static DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64); + +static bool __init has_pv_steal_clock(void) +{ + if (sbi_spec_version >= sbi_mk_version(2, 0) && + sbi_probe_extension(SBI_EXT_STA) > 0) { + pr_info("SBI STA extension detected\n"); + return true; + } + + return false; +} + +static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi, + unsigned long flags) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM, + lo, hi, flags, 0, 0, 0); + if (ret.error) { + if (lo == SBI_SHMEM_DISABLE && hi == SBI_SHMEM_DISABLE) + pr_warn("Failed to disable steal-time shmem"); + else + pr_warn("Failed to set steal-time shmem"); + return sbi_err_map_linux_errno(ret.error); + } + + return 0; +} + +static int pv_time_cpu_online(unsigned int cpu) +{ + struct sbi_sta_struct *st = this_cpu_ptr(&steal_time); + phys_addr_t pa = __pa(st); + unsigned long lo = (unsigned long)pa; + unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0; + + return sbi_sta_steal_time_set_shmem(lo, hi, 0); +} + +static int pv_time_cpu_down_prepare(unsigned int cpu) +{ + return sbi_sta_steal_time_set_shmem(SBI_SHMEM_DISABLE, + SBI_SHMEM_DISABLE, 0); +} + +static u64 pv_time_steal_clock(int cpu) +{ + struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu); + __le32 sequence; + __le64 steal; + + /* + * Check the sequence field before and after reading the steal + * field. Repeat the read if it is different or odd. + */ + do { + sequence = READ_ONCE(st->sequence); + virt_rmb(); + steal = READ_ONCE(st->steal); + virt_rmb(); + } while ((le32_to_cpu(sequence) & 1) || + sequence != READ_ONCE(st->sequence)); + + return le64_to_cpu(steal); +} + +int __init pv_time_init(void) +{ + int ret; + + if (!has_pv_steal_clock()) + return 0; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "riscv/pv_time:online", + pv_time_cpu_online, + pv_time_cpu_down_prepare); + if (ret < 0) + return ret; + + static_call_update(pv_steal_clock, pv_time_steal_clock); + + static_key_slow_inc(¶virt_steal_enabled); + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); + + pr_info("Computing paravirt steal-time\n"); + + return 0; +} diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 13ee7bf589a1..db13c9ddf9e3 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -13,18 +13,27 @@ #include <asm/cacheflush.h> #include <asm/fixmap.h> #include <asm/ftrace.h> -#include <asm/patch.h> +#include <asm/text-patching.h> +#include <asm/sections.h> struct patch_insn { void *addr; u32 *insns; - int ninsns; + size_t len; atomic_t cpu_count; }; int riscv_patch_in_stop_machine = false; #ifdef CONFIG_MMU + +static inline bool is_kernel_exittext(uintptr_t addr) +{ + return system_state < SYSTEM_RUNNING && + addr >= (uintptr_t)__exittext_begin && + addr < (uintptr_t)__exittext_end; +} + /* * The fix_to_virt(, idx) needs a const value (not a dynamic variable of * reg-a0) or BUILD_BUG_ON failed with "idx >= __end_of_fixed_addresses". @@ -35,7 +44,7 @@ static __always_inline void *patch_map(void *addr, const unsigned int fixmap) uintptr_t uintaddr = (uintptr_t) addr; struct page *page; - if (core_kernel_text(uintaddr)) + if (core_kernel_text(uintaddr) || is_kernel_exittext(uintaddr)) page = phys_to_page(__pa_symbol(addr)); else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) page = vmalloc_to_page(addr); @@ -45,7 +54,7 @@ static __always_inline void *patch_map(void *addr, const unsigned int fixmap) BUG_ON(!page); return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + - (uintaddr & ~PAGE_MASK)); + offset_in_page(addr)); } static void patch_unmap(int fixmap) @@ -56,8 +65,8 @@ NOKPROBE_SYMBOL(patch_unmap); static int __patch_insn_set(void *addr, u8 c, size_t len) { + bool across_pages = (offset_in_page(addr) + len) > PAGE_SIZE; void *waddr = addr; - bool across_pages = (((uintptr_t)addr & ~PAGE_MASK) + len) > PAGE_SIZE; /* * Only two pages can be mapped at a time for writing. @@ -71,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) */ lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -78,19 +89,29 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) memset(waddr, c, len); + /* + * We could have just patched a function that is about to be + * called so make sure we don't execute partially patched + * instructions by flushing the icache as soon as possible. + */ + local_flush_icache_range((unsigned long)waddr, + (unsigned long)waddr + len); + patch_unmap(FIX_TEXT_POKE0); if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return 0; } NOKPROBE_SYMBOL(__patch_insn_set); static int __patch_insn_write(void *addr, const void *insn, size_t len) { + bool across_pages = (offset_in_page(addr) + len) > PAGE_SIZE; void *waddr = addr; - bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; int ret; /* @@ -113,6 +134,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) if (!riscv_patch_in_stop_machine) lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -120,11 +143,21 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) ret = copy_to_kernel_nofault(waddr, insn, len); + /* + * We could have just patched a function that is about to be + * called so make sure we don't execute partially patched + * instructions by flushing the icache as soon as possible. + */ + local_flush_icache_range((unsigned long)waddr, + (unsigned long)waddr + len); + patch_unmap(FIX_TEXT_POKE0); if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return ret; } NOKPROBE_SYMBOL(__patch_insn_write); @@ -146,69 +179,70 @@ NOKPROBE_SYMBOL(__patch_insn_write); static int patch_insn_set(void *addr, u8 c, size_t len) { - size_t patched = 0; size_t size; - int ret = 0; + int ret; /* * __patch_insn_set() can only work on 2 pages at a time so call it in a * loop with len <= 2 * PAGE_SIZE. */ - while (patched < len && !ret) { - size = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(addr + patched), len - patched); - ret = __patch_insn_set(addr + patched, c, size); - - patched += size; + while (len) { + size = min(len, PAGE_SIZE * 2 - offset_in_page(addr)); + ret = __patch_insn_set(addr, c, size); + if (ret) + return ret; + + addr += size; + len -= size; } - return ret; + return 0; } NOKPROBE_SYMBOL(patch_insn_set); int patch_text_set_nosync(void *addr, u8 c, size_t len) { - u32 *tp = addr; int ret; - ret = patch_insn_set(tp, c, len); - + ret = patch_insn_set(addr, c, len); if (!ret) - flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len); + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } NOKPROBE_SYMBOL(patch_text_set_nosync); -static int patch_insn_write(void *addr, const void *insn, size_t len) +int patch_insn_write(void *addr, const void *insn, size_t len) { - size_t patched = 0; size_t size; - int ret = 0; + int ret; /* * Copy the instructions to the destination address, two pages at a time * because __patch_insn_write() can only handle len <= 2 * PAGE_SIZE. */ - while (patched < len && !ret) { - size = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(addr + patched), len - patched); - ret = __patch_insn_write(addr + patched, insn + patched, size); - - patched += size; + while (len) { + size = min(len, PAGE_SIZE * 2 - offset_in_page(addr)); + ret = __patch_insn_write(addr, insn, size); + if (ret) + return ret; + + addr += size; + insn += size; + len -= size; } - return ret; + return 0; } NOKPROBE_SYMBOL(patch_insn_write); int patch_text_nosync(void *addr, const void *insns, size_t len) { - u32 *tp = addr; int ret; - ret = patch_insn_write(tp, insns, len); - + ret = patch_insn_write(addr, insns, len); if (!ret) - flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } @@ -217,33 +251,36 @@ NOKPROBE_SYMBOL(patch_text_nosync); static int patch_text_cb(void *data) { struct patch_insn *patch = data; - unsigned long len; - int i, ret = 0; + int ret = 0; if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) { - for (i = 0; ret == 0 && i < patch->ninsns; i++) { - len = GET_INSN_LENGTH(patch->insns[i]); - ret = patch_text_nosync(patch->addr + i * len, - &patch->insns[i], len); - } - atomic_inc(&patch->cpu_count); + ret = patch_insn_write(patch->addr, patch->insns, patch->len); + /* + * Make sure the patching store is effective *before* we + * increment the counter which releases all waiting CPUs + * by using the release variant of atomic increment. The + * release pairs with the call to local_flush_icache_all() + * on the waiting CPU. + */ + atomic_inc_return_release(&patch->cpu_count); } else { while (atomic_read(&patch->cpu_count) <= num_online_cpus()) cpu_relax(); - smp_mb(); + + local_flush_icache_all(); } return ret; } NOKPROBE_SYMBOL(patch_text_cb); -int patch_text(void *addr, u32 *insns, int ninsns) +int patch_text(void *addr, u32 *insns, size_t len) { int ret; struct patch_insn patch = { .addr = addr, .insns = insns, - .ninsns = ninsns, + .len = len, .cpu_count = ATOMIC_INIT(0), }; diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 3348a61de7d9..b465bc9eb870 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -6,37 +6,9 @@ #include <asm/stacktrace.h> -/* - * Get the return address for a single stackframe and return a pointer to the - * next frame tail. - */ -static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, - unsigned long fp, unsigned long reg_ra) +static bool fill_callchain(void *entry, unsigned long pc) { - struct stackframe buftail; - unsigned long ra = 0; - unsigned long __user *user_frame_tail = - (unsigned long __user *)(fp - sizeof(struct stackframe)); - - /* Check accessibility of one struct frame_tail beyond */ - if (!access_ok(user_frame_tail, sizeof(buftail))) - return 0; - if (__copy_from_user_inatomic(&buftail, user_frame_tail, - sizeof(buftail))) - return 0; - - if (reg_ra != 0) - ra = reg_ra; - else - ra = buftail.ra; - - fp = buftail.fp; - if (ra != 0) - perf_callchain_store(entry, ra); - else - return 0; - - return fp; + return perf_callchain_store(entry, pc) == 0; } /* @@ -56,23 +28,21 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - unsigned long fp = 0; + if (perf_guest_state()) { + /* TODO: We don't support guest os callchain now */ + return; + } - fp = regs->s0; - perf_callchain_store(entry, regs->epc); - - fp = user_backtrace(entry, fp, regs->ra); - while (fp && !(fp & 0x3) && entry->nr < entry->max_stack) - fp = user_backtrace(entry, fp, 0); -} - -static bool fill_callchain(void *entry, unsigned long pc) -{ - return perf_callchain_store(entry, pc) == 0; + arch_stack_walk_user(fill_callchain, entry, regs); } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + if (perf_guest_state()) { + /* TODO: We don't support guest os callchain now */ + return; + } + walk_stackframe(NULL, regs, fill_callchain, entry); } diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile index 07915dc9279e..81d69d45c06c 100644 --- a/arch/riscv/kernel/pi/Makefile +++ b/arch/riscv/kernel/pi/Makefile @@ -5,20 +5,23 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \ $(call cc-option,-mbranch-protection=none) \ -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \ + -include $(srctree)/include/linux/hidden.h \ -D__DISABLE_EXPORTS -ffreestanding \ -fno-asynchronous-unwind-tables -fno-unwind-tables \ $(call cc-option,-fno-addrsig) +# Disable LTO +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) + KBUILD_CFLAGS += -mcmodel=medany CFLAGS_cmdline_early.o += -D__NO_FORTIFY +CFLAGS_fdt_early.o += -D__NO_FORTIFY +# lib/string.c already defines __NO_FORTIFY +CFLAGS_ctype.o += -D__NO_FORTIFY +CFLAGS_lib-fdt.o += -D__NO_FORTIFY CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY - -GCOV_PROFILE := n -KASAN_SANITIZE := n -KCSAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n +CFLAGS_archrandom_early.o += -D__NO_FORTIFY $(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \ --remove-section=.note.gnu.property \ @@ -35,5 +38,5 @@ $(obj)/string.o: $(srctree)/lib/string.c FORCE $(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE $(call if_changed_rule,cc_o_c) -obj-y := cmdline_early.pi.o fdt_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o +obj-y := cmdline_early.pi.o fdt_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o archrandom_early.pi.o extra-y := $(patsubst %.pi.o,%.o,$(obj-y)) diff --git a/arch/riscv/kernel/pi/archrandom_early.c b/arch/riscv/kernel/pi/archrandom_early.c new file mode 100644 index 000000000000..3f05d3cf3b7b --- /dev/null +++ b/arch/riscv/kernel/pi/archrandom_early.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/csr.h> +#include <linux/processor.h> + +#include "pi.h" + +/* + * To avoid rewriting code include asm/archrandom.h and create macros + * for the functions that won't be included. + */ +#undef riscv_has_extension_unlikely +#define riscv_has_extension_likely(...) false +#undef pr_err_once +#define pr_err_once(...) + +#include <asm/archrandom.h> + +u64 get_kaslr_seed_zkr(const uintptr_t dtb_pa) +{ + unsigned long seed = 0; + + if (!fdt_early_match_extension_isa((const void *)dtb_pa, "zkr")) + return 0; + + if (!csr_seed_long(&seed)) + return 0; + + return seed; +} diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c index 68e786c84c94..fbcdc9e4e143 100644 --- a/arch/riscv/kernel/pi/cmdline_early.c +++ b/arch/riscv/kernel/pi/cmdline_early.c @@ -6,15 +6,9 @@ #include <asm/pgtable.h> #include <asm/setup.h> -static char early_cmdline[COMMAND_LINE_SIZE]; +#include "pi.h" -/* - * Declare the functions that are exported (but prefixed) here so that LLVM - * does not complain it lacks the 'static' keyword (which, if added, makes - * LLVM complain because the function is actually unused in this file). - */ -u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa); -bool set_nokaslr_from_cmdline(uintptr_t dtb_pa); +static char early_cmdline[COMMAND_LINE_SIZE]; static char *get_early_cmdline(uintptr_t dtb_pa) { @@ -38,8 +32,7 @@ static char *get_early_cmdline(uintptr_t dtb_pa) if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || IS_ENABLED(CONFIG_CMDLINE_FORCE) || fdt_cmdline_size == 0 /* CONFIG_CMDLINE_FALLBACK */) { - strncat(early_cmdline, CONFIG_CMDLINE, - COMMAND_LINE_SIZE - fdt_cmdline_size); + strlcat(early_cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE); } return early_cmdline; diff --git a/arch/riscv/kernel/pi/fdt_early.c b/arch/riscv/kernel/pi/fdt_early.c index 899610e042ab..9bdee2fafe47 100644 --- a/arch/riscv/kernel/pi/fdt_early.c +++ b/arch/riscv/kernel/pi/fdt_early.c @@ -2,13 +2,9 @@ #include <linux/types.h> #include <linux/init.h> #include <linux/libfdt.h> +#include <linux/ctype.h> -/* - * Declare the functions that are exported (but prefixed) here so that LLVM - * does not complain it lacks the 'static' keyword (which, if added, makes - * LLVM complain because the function is actually unused in this file). - */ -u64 get_kaslr_seed(uintptr_t dtb_pa); +#include "pi.h" u64 get_kaslr_seed(uintptr_t dtb_pa) { @@ -28,3 +24,162 @@ u64 get_kaslr_seed(uintptr_t dtb_pa) *prop = 0; return ret; } + +/** + * fdt_device_is_available - check if a device is available for use + * + * @fdt: pointer to the device tree blob + * @node: offset of the node whose property to find + * + * Returns true if the status property is absent or set to "okay" or "ok", + * false otherwise + */ +static bool fdt_device_is_available(const void *fdt, int node) +{ + const char *status; + int statlen; + + status = fdt_getprop(fdt, node, "status", &statlen); + if (!status) + return true; + + if (statlen > 0) { + if (!strcmp(status, "okay") || !strcmp(status, "ok")) + return true; + } + + return false; +} + +/* Copy of fdt_nodename_eq_ */ +static int fdt_node_name_eq(const void *fdt, int offset, + const char *s) +{ + int olen; + int len = strlen(s); + const char *p = fdt_get_name(fdt, offset, &olen); + + if (!p || olen < len) + /* short match */ + return 0; + + if (memcmp(p, s, len) != 0) + return 0; + + if (p[len] == '\0') + return 1; + else if (!memchr(s, '@', len) && (p[len] == '@')) + return 1; + else + return 0; +} + +/** + * isa_string_contains - check if isa string contains an extension + * + * @isa_str: isa string to search + * @ext_name: the extension to search for + * + * Returns true if the extension is in the given isa string, + * false otherwise + */ +static bool isa_string_contains(const char *isa_str, const char *ext_name) +{ + size_t i, single_end, len = strlen(ext_name); + char ext_end; + + /* Error must contain rv32/64 */ + if (strlen(isa_str) < 4) + return false; + + if (len == 1) { + single_end = strcspn(isa_str, "sSxXzZ"); + /* Search for single chars between rv32/64 and multi-letter extensions */ + for (i = 4; i < single_end; i++) { + if (tolower(isa_str[i]) == ext_name[0]) + return true; + } + return false; + } + + /* Skip to start of multi-letter extensions */ + isa_str = strpbrk(isa_str, "sSxXzZ"); + while (isa_str) { + if (strncasecmp(isa_str, ext_name, len) == 0) { + ext_end = isa_str[len]; + /* Check if matches the whole extension. */ + if (ext_end == '\0' || ext_end == '_') + return true; + } + /* Multi-letter extensions must be split from other multi-letter + * extensions with an "_", the end of a multi-letter extension will + * either be the null character or the "_" at the start of the next + * multi-letter extension. + */ + isa_str = strchr(isa_str, '_'); + if (isa_str) + isa_str++; + } + + return false; +} + +/** + * early_cpu_isa_ext_available - check if cpu node has an extension + * + * @fdt: pointer to the device tree blob + * @node: offset of the cpu node + * @ext_name: the extension to search for + * + * Returns true if the cpu node has the extension, + * false otherwise + */ +static bool early_cpu_isa_ext_available(const void *fdt, int node, const char *ext_name) +{ + const void *prop; + int len; + + prop = fdt_getprop(fdt, node, "riscv,isa-extensions", &len); + if (prop && fdt_stringlist_contains(prop, len, ext_name)) + return true; + + prop = fdt_getprop(fdt, node, "riscv,isa", &len); + if (prop && isa_string_contains(prop, ext_name)) + return true; + + return false; +} + +/** + * fdt_early_match_extension_isa - check if all cpu nodes have an extension + * + * @fdt: pointer to the device tree blob + * @ext_name: the extension to search for + * + * Returns true if the all available the cpu nodes have the extension, + * false otherwise + */ +bool fdt_early_match_extension_isa(const void *fdt, const char *ext_name) +{ + int node, parent; + bool ret = false; + + parent = fdt_path_offset(fdt, "/cpus"); + if (parent < 0) + return false; + + fdt_for_each_subnode(node, fdt, parent) { + if (!fdt_node_name_eq(fdt, node, "cpu")) + continue; + + if (!fdt_device_is_available(fdt, node)) + continue; + + if (!early_cpu_isa_ext_available(fdt, node, ext_name)) + return false; + + ret = true; + } + + return ret; +} diff --git a/arch/riscv/kernel/pi/pi.h b/arch/riscv/kernel/pi/pi.h new file mode 100644 index 000000000000..21141d84fea6 --- /dev/null +++ b/arch/riscv/kernel/pi/pi.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _RISCV_PI_H_ +#define _RISCV_PI_H_ + +#include <linux/types.h> + +/* + * The following functions are exported (but prefixed). Declare them here so + * that LLVM does not complain it lacks the 'static' keyword (which, if + * added, makes LLVM complain because the function is unused). + */ + +u64 get_kaslr_seed(uintptr_t dtb_pa); +u64 get_kaslr_seed_zkr(const uintptr_t dtb_pa); +bool set_nokaslr_from_cmdline(uintptr_t dtb_pa); +u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa); + +bool fdt_early_match_extension_isa(const void *fdt, const char *ext_name); + +#endif /* _RISCV_PI_H_ */ diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile index 8265ff497977..d2129f2c61b8 100644 --- a/arch/riscv/kernel/probes/Makefile +++ b/arch/riscv/kernel/probes/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o -obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c deleted file mode 100644 index 7142ec42e889..000000000000 --- a/arch/riscv/kernel/probes/ftrace.c +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <linux/kprobes.h> - -/* Ftrace callback handler for kprobes -- called under preepmt disabled */ -void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct ftrace_regs *fregs) -{ - struct kprobe *p; - struct pt_regs *regs; - struct kprobe_ctlblk *kcb; - int bit; - - bit = ftrace_test_recursion_trylock(ip, parent_ip); - if (bit < 0) - return; - - p = get_kprobe((kprobe_opcode_t *)ip); - if (unlikely(!p) || kprobe_disabled(p)) - goto out; - - regs = ftrace_get_regs(fregs); - kcb = get_kprobe_ctlblk(); - if (kprobe_running()) { - kprobes_inc_nmissed_count(p); - } else { - unsigned long orig_ip = instruction_pointer(regs); - - instruction_pointer_set(regs, ip); - - __this_cpu_write(current_kprobe, p); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) { - /* - * Emulate singlestep (and also recover regs->pc) - * as if there is a nop - */ - instruction_pointer_set(regs, - (unsigned long)p->addr + MCOUNT_INSN_SIZE); - if (unlikely(p->post_handler)) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - instruction_pointer_set(regs, orig_ip); - } - - /* - * If pre_handler returns !0, it changes regs->pc. We have to - * skip emulating post_handler. - */ - __this_cpu_write(current_kprobe, NULL); - } -out: - ftrace_test_recursion_unlock(bit); -} -NOKPROBE_SYMBOL(kprobe_ftrace_handler); - -int arch_prepare_kprobe_ftrace(struct kprobe *p) -{ - p->ainsn.api.insn = NULL; - return 0; -} diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 2f08c14a933d..c0738d6c6498 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -6,12 +6,13 @@ #include <linux/extable.h> #include <linux/slab.h> #include <linux/stop_machine.h> +#include <linux/vmalloc.h> #include <asm/ptrace.h> #include <linux/uaccess.h> #include <asm/sections.h> #include <asm/cacheflush.h> #include <asm/bug.h> -#include <asm/patch.h> +#include <asm/text-patching.h> #include "decode-insn.h" @@ -23,14 +24,13 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *); static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { + size_t len = GET_INSN_LENGTH(p->opcode); u32 insn = __BUG_INSN_32; - unsigned long offset = GET_INSN_LENGTH(p->opcode); - p->ainsn.api.restore = (unsigned long)p->addr + offset; + p->ainsn.api.restore = (unsigned long)p->addr + len; - patch_text(p->ainsn.api.insn, &p->opcode, 1); - patch_text((void *)((unsigned long)(p->ainsn.api.insn) + offset), - &insn, 1); + patch_text_nosync(p->ainsn.api.insn, &p->opcode, len); + patch_text_nosync((void *)p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn)); } static void __kprobes arch_prepare_simulate(struct kprobe *p) @@ -104,29 +104,21 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -#ifdef CONFIG_MMU -void *alloc_insn_page(void) -{ - return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_READ_EXEC, - VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - /* install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { - u32 insn = (p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32 ? - __BUG_INSN_32 : __BUG_INSN_16; + size_t len = GET_INSN_LENGTH(p->opcode); + u32 insn = len == 4 ? __BUG_INSN_32 : __BUG_INSN_16; - patch_text(p->addr, &insn, 1); + patch_text(p->addr, &insn, len); } /* remove breakpoint from text */ void __kprobes arch_disarm_kprobe(struct kprobe *p) { - patch_text(p->addr, &p->opcode, 1); + size_t len = GET_INSN_LENGTH(p->opcode); + + patch_text(p->addr, &p->opcode, len); } void __kprobes arch_remove_kprobe(struct kprobe *p) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 4f21d970a129..7c244de77180 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 SiFive */ +#include <linux/bitfield.h> #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -15,6 +16,7 @@ #include <linux/tick.h> #include <linux/ptrace.h> #include <linux/uaccess.h> +#include <linux/personality.h> #include <asm/unistd.h> #include <asm/processor.h> @@ -26,8 +28,7 @@ #include <asm/cpuidle.h> #include <asm/vector.h> #include <asm/cpufeature.h> - -register unsigned long gp_in_global __asm__("gp"); +#include <asm/exec.h> #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include <linux/stackprotector.h> @@ -37,7 +38,7 @@ EXPORT_SYMBOL(__stack_chk_guard); extern asmlinkage void ret_from_fork(void); -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { cpu_do_idle(); } @@ -101,6 +102,13 @@ void show_regs(struct pt_regs *regs) dump_backtrace(regs, NULL, KERN_DEFAULT); } +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_u32_below(PAGE_SIZE); + return sp & ~0xf; +} + #ifdef CONFIG_COMPAT static bool compat_mode_supported __read_mostly; @@ -171,14 +179,19 @@ void flush_thread(void) riscv_v_vstate_off(task_pt_regs(current)); kfree(current->thread.vstate.datap); memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); + clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE); +#endif +#ifdef CONFIG_RISCV_ISA_SUPM + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + envcfg_update_bits(current, ENVCFG_PMM, ENVCFG_PMM_PMLEN_0); #endif } void arch_release_task_struct(struct task_struct *tsk) { /* Free the vector context of datap. */ - if (has_vector()) - kfree(tsk->thread.vstate.datap); + if (has_vector() || has_xtheadvector()) + riscv_v_thread_free(tsk); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) @@ -187,6 +200,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) *dst = *src; /* clear entire V context, including datap for a new task */ memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); + memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state)); + clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE); return 0; } @@ -198,13 +213,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); + /* Ensure all threads in this mm have the same pointer masking mode. */ + if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM) && p->mm && (clone_flags & CLONE_VM)) + set_bit(MM_CONTEXT_LOCK_PMLEN, &p->mm->context.flags); + memset(&p->thread.s, 0, sizeof(p->thread.s)); /* p->thread holds context to be restored by __switch_to() */ if (unlikely(args->fn)) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; @@ -221,7 +239,160 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs->a0 = 0; /* Return value of fork() */ p->thread.s[0] = 0; } + p->thread.riscv_v_flags = 0; + if (has_vector() || has_xtheadvector()) + riscv_v_thread_alloc(p); p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ return 0; } + +void __init arch_task_cache_init(void) +{ + riscv_v_setup_ctx_cache(); +} + +#ifdef CONFIG_RISCV_ISA_SUPM +enum { + PMLEN_0 = 0, + PMLEN_7 = 7, + PMLEN_16 = 16, +}; + +static bool have_user_pmlen_7; +static bool have_user_pmlen_16; + +/* + * Control the relaxed ABI allowing tagged user addresses into the kernel. + */ +static unsigned int tagged_addr_disabled; + +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) +{ + unsigned long valid_mask = PR_PMLEN_MASK | PR_TAGGED_ADDR_ENABLE; + struct thread_info *ti = task_thread_info(task); + struct mm_struct *mm = task->mm; + unsigned long pmm; + u8 pmlen; + + if (is_compat_thread(ti)) + return -EINVAL; + + if (arg & ~valid_mask) + return -EINVAL; + + /* + * Prefer the smallest PMLEN that satisfies the user's request, + * in case choosing a larger PMLEN has a performance impact. + */ + pmlen = FIELD_GET(PR_PMLEN_MASK, arg); + if (pmlen == PMLEN_0) { + pmm = ENVCFG_PMM_PMLEN_0; + } else if (pmlen <= PMLEN_7 && have_user_pmlen_7) { + pmlen = PMLEN_7; + pmm = ENVCFG_PMM_PMLEN_7; + } else if (pmlen <= PMLEN_16 && have_user_pmlen_16) { + pmlen = PMLEN_16; + pmm = ENVCFG_PMM_PMLEN_16; + } else { + return -EINVAL; + } + + /* + * Do not allow the enabling of the tagged address ABI if globally + * disabled via sysctl abi.tagged_addr_disabled, if pointer masking + * is disabled for userspace. + */ + if (arg & PR_TAGGED_ADDR_ENABLE && (tagged_addr_disabled || !pmlen)) + return -EINVAL; + + if (!(arg & PR_TAGGED_ADDR_ENABLE)) + pmlen = PMLEN_0; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + if (test_bit(MM_CONTEXT_LOCK_PMLEN, &mm->context.flags) && mm->context.pmlen != pmlen) { + mmap_write_unlock(mm); + return -EBUSY; + } + + envcfg_update_bits(task, ENVCFG_PMM, pmm); + mm->context.pmlen = pmlen; + + mmap_write_unlock(mm); + + return 0; +} + +long get_tagged_addr_ctrl(struct task_struct *task) +{ + struct thread_info *ti = task_thread_info(task); + long ret = 0; + + if (is_compat_thread(ti)) + return -EINVAL; + + /* + * The mm context's pmlen is set only when the tagged address ABI is + * enabled, so the effective PMLEN must be extracted from envcfg.PMM. + */ + switch (task->thread.envcfg & ENVCFG_PMM) { + case ENVCFG_PMM_PMLEN_7: + ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_7); + break; + case ENVCFG_PMM_PMLEN_16: + ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_16); + break; + } + + if (task->mm->context.pmlen) + ret |= PR_TAGGED_ADDR_ENABLE; + + return ret; +} + +static bool try_to_set_pmm(unsigned long value) +{ + csr_set(CSR_ENVCFG, value); + return (csr_read_clear(CSR_ENVCFG, ENVCFG_PMM) & ENVCFG_PMM) == value; +} + +/* + * Global sysctl to disable the tagged user addresses support. This control + * only prevents the tagged address ABI enabling via prctl() and does not + * disable it for tasks that already opted in to the relaxed ABI. + */ + +static const struct ctl_table tagged_addr_sysctl_table[] = { + { + .procname = "tagged_addr_disabled", + .mode = 0644, + .data = &tagged_addr_disabled, + .maxlen = sizeof(int), + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +}; + +static int __init tagged_addr_init(void) +{ + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return 0; + + /* + * envcfg.PMM is a WARL field. Detect which values are supported. + * Assume the supported PMLEN values are the same on all harts. + */ + csr_clear(CSR_ENVCFG, ENVCFG_PMM); + have_user_pmlen_7 = try_to_set_pmm(ENVCFG_PMM_PMLEN_7); + have_user_pmlen_16 = try_to_set_pmm(ENVCFG_PMM_PMLEN_16); + + if (!register_sysctl("abi", tagged_addr_sysctl_table)) + return -EINVAL; + + return 0; +} +core_initcall(tagged_addr_init); +#endif /* CONFIG_RISCV_ISA_SUPM */ diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 2afe460de16a..ea67e9fb7a58 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -28,6 +28,9 @@ enum riscv_regset { #ifdef CONFIG_RISCV_ISA_V REGSET_V, #endif +#ifdef CONFIG_RISCV_ISA_SUPM + REGSET_TAGGED_ADDR_CTRL, +#endif }; static int riscv_gpr_get(struct task_struct *target, @@ -99,8 +102,11 @@ static int riscv_vr_get(struct task_struct *target, * Ensure the vector registers have been saved to the memory before * copying them to membuf. */ - if (target == current) - riscv_v_vstate_save(current, task_pt_regs(current)); + if (target == current) { + get_cpu_vector_context(); + riscv_v_vstate_save(¤t->thread.vstate, task_pt_regs(current)); + put_cpu_vector_context(); + } ptrace_vstate.vstart = vstate->vstart; ptrace_vstate.vl = vstate->vl; @@ -149,6 +155,35 @@ static int riscv_vr_set(struct task_struct *target, } #endif +#ifdef CONFIG_RISCV_ISA_SUPM +static int tagged_addr_ctrl_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + long ctrl = get_tagged_addr_ctrl(target); + + if (IS_ERR_VALUE(ctrl)) + return ctrl; + + return membuf_write(&to, &ctrl, sizeof(ctrl)); +} + +static int tagged_addr_ctrl_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + long ctrl; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); + if (ret) + return ret; + + return set_tagged_addr_ctrl(target, ctrl); +} +#endif + static const struct user_regset riscv_user_regset[] = { [REGSET_X] = { .core_note_type = NT_PRSTATUS, @@ -179,6 +214,16 @@ static const struct user_regset riscv_user_regset[] = { .set = riscv_vr_set, }, #endif +#ifdef CONFIG_RISCV_ISA_SUPM + [REGSET_TAGGED_ADDR_CTRL] = { + .core_note_type = NT_RISCV_TAGGED_ADDR_CTRL, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .regset_get = tagged_addr_ctrl_get, + .set = tagged_addr_ctrl_set, + }, +#endif }; static const struct user_regset_view riscv_user_native_view = { @@ -374,14 +419,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, return ret; } +#else +static const struct user_regset_view compat_riscv_user_native_view = {}; #endif /* CONFIG_COMPAT */ const struct user_regset_view *task_user_regset_view(struct task_struct *task) { -#ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_32BIT)) + if (is_compat_thread(&task->thread_info)) return &compat_riscv_user_native_view; else -#endif return &riscv_user_native_view; } diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c new file mode 100644 index 000000000000..c8115ec8fb30 --- /dev/null +++ b/arch/riscv/kernel/return_address.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This code come from arch/arm64/kernel/return_address.c + * + * Copyright (C) 2023 SiFive. + */ + +#include <linux/export.h> +#include <linux/kprobes.h> +#include <linux/stacktrace.h> + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static bool save_return_addr(void *d, unsigned long pc) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)pc; + return false; + } + + --data->level; + + return true; +} +NOKPROBE_SYMBOL(save_return_addr); + +noinline void *return_address(unsigned int level) +{ + struct return_address_data data; + + data.level = level + 3; + data.addr = NULL; + + arch_stack_walk(save_return_addr, &data, current, NULL); + + if (!data.level) + return data.addr; + else + return NULL; + +} +EXPORT_SYMBOL_GPL(return_address); +NOKPROBE_SYMBOL(return_address); diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index a72879b4249a..5ab1c7e1a6ed 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -12,9 +12,6 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memmove); diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c index a4559695ce62..0cc5559c08d8 100644 --- a/arch/riscv/kernel/sbi-ipi.c +++ b/arch/riscv/kernel/sbi-ipi.c @@ -13,6 +13,9 @@ #include <linux/irqdomain.h> #include <asm/sbi.h> +DEFINE_STATIC_KEY_FALSE(riscv_sbi_for_rfence); +EXPORT_SYMBOL_GPL(riscv_sbi_for_rfence); + static int sbi_ipi_virq; static void sbi_ipi_handle(struct irq_desc *desc) @@ -68,10 +71,16 @@ void __init sbi_ipi_init(void) * the masking/unmasking of virtual IPIs is done * via generic IPI-Mux */ - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + cpuhp_setup_state(CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, "irqchip/sbi-ipi:starting", sbi_ipi_starting_cpu, NULL); - riscv_ipi_set_virq_range(virq, BITS_PER_BYTE, false); + riscv_ipi_set_virq_range(virq, BITS_PER_BYTE); pr_info("providing IPIs using SBI IPI extension\n"); + + /* + * Use the SBI remote fence extension to avoid + * the extra context switch needed to handle IPIs. + */ + static_branch_enable(&riscv_sbi_for_rfence); } diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 5a62ed1da453..1989b8cade1b 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -7,6 +7,7 @@ #include <linux/bits.h> #include <linux/init.h> +#include <linux/mm.h> #include <linux/pm.h> #include <linux/reboot.h> #include <asm/sbi.h> @@ -23,51 +24,6 @@ static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) __ro_after_init; -struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, - unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5) -{ - struct sbiret ret; - - register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); - register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); - register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); - register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); - register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); - register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); - register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); - register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); - asm volatile ("ecall" - : "+r" (a0), "+r" (a1) - : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) - : "memory"); - ret.error = a0; - ret.value = a1; - - return ret; -} -EXPORT_SYMBOL(sbi_ecall); - -int sbi_err_map_linux_errno(int err) -{ - switch (err) { - case SBI_SUCCESS: - return 0; - case SBI_ERR_DENIED: - return -EPERM; - case SBI_ERR_INVALID_PARAM: - return -EINVAL; - case SBI_ERR_INVALID_ADDRESS: - return -EFAULT; - case SBI_ERR_NOT_SUPPORTED: - case SBI_ERR_FAILURE: - default: - return -ENOTSUPP; - }; -} -EXPORT_SYMBOL(sbi_err_map_linux_errno); - #ifdef CONFIG_RISCV_SBI_V01 static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask) { @@ -527,17 +483,6 @@ long sbi_probe_extension(int extid) } EXPORT_SYMBOL(sbi_probe_extension); -static long __sbi_base_ecall(int fid) -{ - struct sbiret ret; - - ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); - if (!ret.error) - return ret.value; - else - return sbi_err_map_linux_errno(ret.error); -} - static inline long sbi_get_spec_version(void) { return __sbi_base_ecall(SBI_EXT_BASE_GET_SPEC_VERSION); @@ -571,6 +516,66 @@ long sbi_get_mimpid(void) } EXPORT_SYMBOL_GPL(sbi_get_mimpid); +bool sbi_debug_console_available; + +int sbi_debug_console_write(const char *bytes, unsigned int num_bytes) +{ + phys_addr_t base_addr; + struct sbiret ret; + + if (!sbi_debug_console_available) + return -EOPNOTSUPP; + + if (is_vmalloc_addr(bytes)) + base_addr = page_to_phys(vmalloc_to_page(bytes)) + + offset_in_page(bytes); + else + base_addr = __pa(bytes); + if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes)) + num_bytes = PAGE_SIZE - offset_in_page(bytes); + + if (IS_ENABLED(CONFIG_32BIT)) + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE, + num_bytes, lower_32_bits(base_addr), + upper_32_bits(base_addr), 0, 0, 0); + else + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE, + num_bytes, base_addr, 0, 0, 0, 0); + + if (ret.error == SBI_ERR_FAILURE) + return -EIO; + return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value; +} + +int sbi_debug_console_read(char *bytes, unsigned int num_bytes) +{ + phys_addr_t base_addr; + struct sbiret ret; + + if (!sbi_debug_console_available) + return -EOPNOTSUPP; + + if (is_vmalloc_addr(bytes)) + base_addr = page_to_phys(vmalloc_to_page(bytes)) + + offset_in_page(bytes); + else + base_addr = __pa(bytes); + if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes)) + num_bytes = PAGE_SIZE - offset_in_page(bytes); + + if (IS_ENABLED(CONFIG_32BIT)) + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ, + num_bytes, lower_32_bits(base_addr), + upper_32_bits(base_addr), 0, 0, 0); + else + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ, + num_bytes, base_addr, 0, 0, 0, 0); + + if (ret.error == SBI_ERR_FAILURE) + return -EIO; + return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value; +} + void __init sbi_init(void) { int ret; @@ -612,6 +617,11 @@ void __init sbi_init(void) sbi_srst_reboot_nb.priority = 192; register_restart_handler(&sbi_srst_reboot_nb); } + if ((sbi_spec_version >= sbi_mk_version(2, 0)) && + (sbi_probe_extension(SBI_EXT_DBCN) > 0)) { + pr_info("SBI DBCN extension detected\n"); + sbi_debug_console_available = true; + } } else { __sbi_set_timer = __sbi_set_timer_v01; __sbi_send_ipi = __sbi_send_ipi_v01; diff --git a/arch/riscv/kernel/sbi_ecall.c b/arch/riscv/kernel/sbi_ecall.c new file mode 100644 index 000000000000..24aabb4fbde3 --- /dev/null +++ b/arch/riscv/kernel/sbi_ecall.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Rivos Inc. */ + +#include <asm/sbi.h> +#define CREATE_TRACE_POINTS +#include <asm/trace.h> + +long __sbi_base_ecall(int fid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); + if (!ret.error) + return ret.value; + else + return sbi_err_map_linux_errno(ret.error); +} +EXPORT_SYMBOL(__sbi_base_ecall); + +struct sbiret __sbi_ecall(unsigned long arg0, unsigned long arg1, + unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5, + int fid, int ext) +{ + struct sbiret ret; + + trace_sbi_call(ext, fid); + + register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); + register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); + register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); + register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); + register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); + register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); + register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); + register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); + asm volatile ("ecall" + : "+r" (a0), "+r" (a1) + : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) + : "memory"); + ret.error = a0; + ret.value = a1; + + trace_sbi_return(ext, ret.error, ret.value); + + return ret; +} +EXPORT_SYMBOL(__sbi_ecall); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 535a837de55d..4fe45daa6281 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -26,7 +26,6 @@ #include <asm/alternative.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> -#include <asm/cpu_ops.h> #include <asm/early_ioremap.h> #include <asm/pgtable.h> #include <asm/setup.h> @@ -51,7 +50,6 @@ atomic_t hart_lottery __section(".sdata") #endif ; unsigned long boot_cpu_hartid; -static DEFINE_PER_CPU(struct cpu, cpu_devices); /* * Place kernel memory regions on the resource tree so that @@ -149,9 +147,7 @@ static void __init init_resources(void) res_idx = num_resources - 1; mem_res_sz = num_resources * sizeof(*mem_res); - mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); - if (!mem_res) - panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); + mem_res = memblock_alloc_or_panic(mem_res_sz, SMP_CACHE_BYTES); /* * Start by adding the reserved regions, if they overlap @@ -229,7 +225,7 @@ static void __init init_resources(void) static void __init parse_dtb(void) { /* Early scan of device tree from init memory */ - if (early_init_dt_scan(dtb_early_va)) { + if (early_init_dt_scan(dtb_early_va, dtb_early_pa)) { const char *name = of_flat_dt_get_machine_name(); if (name) { @@ -246,6 +242,42 @@ static void __init parse_dtb(void) #endif } +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS) +DEFINE_STATIC_KEY_TRUE(qspinlock_key); +EXPORT_SYMBOL(qspinlock_key); +#endif + +static void __init riscv_spinlock_init(void) +{ + char *using_ext = NULL; + + if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) { + pr_info("Ticket spinlock: enabled\n"); + return; + } + + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && + riscv_isa_extension_available(NULL, ZABHA) && + riscv_isa_extension_available(NULL, ZACAS)) { + using_ext = "using Zabha"; + } else if (riscv_isa_extension_available(NULL, ZICCRSE)) { + using_ext = "using Ziccrse"; + } +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS) + else { + static_branch_disable(&qspinlock_key); + pr_info("Ticket spinlock: enabled\n"); + return; + } +#endif + + if (!using_ext) + pr_err("Queued spinlock without Zabha or Ziccrse"); + else + pr_info("Queued spinlock %s: enabled\n", using_ext); +} + extern void __init init_rt_signal_env(void); void __init setup_arch(char **cmdline_p) @@ -283,13 +315,15 @@ void __init setup_arch(char **cmdline_p) setup_smp(); #endif - if (!acpi_disabled) + if (!acpi_disabled) { acpi_init_rintc_map(); + acpi_map_cpus_to_nodes(); + } riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); - init_rt_signal_env(); apply_boot_alternatives(); + init_rt_signal_env(); if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && riscv_isa_extension_available(NULL, ZICBOM)) @@ -297,25 +331,13 @@ void __init setup_arch(char **cmdline_p) riscv_set_dma_cache_alignment(); riscv_user_isa_enable(); + riscv_spinlock_init(); } -static int __init topology_init(void) +bool arch_cpu_is_hotpluggable(int cpu) { - int i, ret; - - for_each_possible_cpu(i) { - struct cpu *cpu = &per_cpu(cpu_devices, i); - - cpu->hotpluggable = cpu_has_hotplug(i); - ret = register_cpu(cpu, i); - if (unlikely(ret)) - pr_warn("Warning: %s: register_cpu %d failed (%d)\n", - __func__, i, ret); - } - - return 0; + return cpu_has_hotplug(cpu); } -subsys_initcall(topology_init); void free_initmem(void) { diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 88b6220b2608..08378fea3a11 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -84,14 +84,17 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec) datap = state + 1; /* datap is designed to be 16 byte aligned for better performance */ - WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16))); + WARN_ON(!IS_ALIGNED((unsigned long)datap, 16)); + + get_cpu_vector_context(); + riscv_v_vstate_save(¤t->thread.vstate, regs); + put_cpu_vector_context(); - riscv_v_vstate_save(current, regs); /* Copy everything of vstate but datap. */ err = __copy_to_user(&state->v_state, ¤t->thread.vstate, offsetof(struct __riscv_v_ext_state, datap)); /* Copy the pointer datap itself. */ - err |= __put_user(datap, &state->v_state.datap); + err |= __put_user((__force void *)datap, &state->v_state.datap); /* Copy the whole vector content to user space datap. */ err |= __copy_to_user(datap, current->thread.vstate.datap, riscv_v_vsize); /* Copy magic to the user space after saving all vector conetext */ @@ -116,6 +119,13 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) struct __sc_riscv_v_state __user *state = sc_vec; void __user *datap; + /* + * Mark the vstate as clean prior performing the actual copy, + * to avoid getting the vstate incorrectly clobbered by the + * discarded vector state. + */ + riscv_v_vstate_set_restore(current, regs); + /* Copy everything of __sc_riscv_v_state except datap. */ err = __copy_from_user(¤t->thread.vstate, &state->v_state, offsetof(struct __riscv_v_ext_state, datap)); @@ -130,13 +140,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) * Copy the whole vector content from user space datap. Use * copy_from_user to prevent information leak. */ - err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); - if (unlikely(err)) - return err; - - riscv_v_vstate_restore(current, regs); - - return err; + return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); } #else #define save_v_state(task, regs) (0) @@ -185,7 +189,7 @@ static long restore_sigcontext(struct pt_regs *regs, return 0; case RISCV_V_MAGIC: - if (!has_vector() || !riscv_v_vstate_query(regs) || + if (!(has_vector() || has_xtheadvector()) || !riscv_v_vstate_query(regs) || size != riscv_v_sc_size) return -EINVAL; @@ -207,16 +211,10 @@ static size_t get_rt_frame_size(bool cal_all) frame_size = sizeof(*frame); - if (has_vector()) { + if (has_vector() || has_xtheadvector()) { if (cal_all || riscv_v_vstate_query(task_pt_regs(current))) total_context_size += riscv_v_sc_size; } - /* - * Preserved a __riscv_ctx_hdr for END signal context header if an - * extension uses __riscv_extra_ext_header - */ - if (total_context_size) - total_context_size += sizeof(struct __riscv_ctx_hdr); frame_size += total_context_size; @@ -280,7 +278,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, if (has_fpu()) err |= save_fp_state(regs, &sc->sc_fpregs); /* Save the vector state. */ - if (has_vector() && riscv_v_vstate_query(regs)) + if ((has_vector() || has_xtheadvector()) && riscv_v_vstate_query(regs)) err |= save_v_state(regs, (void __user **)&sc_ext_ptr); /* Write zero to fp-reserved space and check it on restore_sigcontext */ err |= __put_user(0, &sc->sc_extdesc.reserved); diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 40420afbb1a0..d58b5e751286 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -13,6 +13,7 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/kexec.h> +#include <linux/kgdb.h> #include <linux/percpu.h> #include <linux/profile.h> #include <linux/smp.h> @@ -21,6 +22,7 @@ #include <linux/delay.h> #include <linux/irq.h> #include <linux/irq_work.h> +#include <linux/nmi.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> @@ -33,12 +35,15 @@ enum ipi_message_type { IPI_CPU_CRASH_STOP, IPI_IRQ_WORK, IPI_TIMER, + IPI_CPU_BACKTRACE, + IPI_KGDB_ROUNDUP, IPI_MAX }; unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = { [0 ... NR_CPUS-1] = INVALID_HARTID }; +EXPORT_SYMBOL_GPL(__cpuid_to_hartid_map); void __init smp_setup_processor_id(void) { @@ -81,7 +86,7 @@ static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) #ifdef CONFIG_HOTPLUG_CPU if (cpu_has_hotplug(cpu)) - cpu_ops[cpu]->cpu_stop(); + cpu_ops->cpu_stop(); #endif for(;;) @@ -113,6 +118,7 @@ void arch_irq_work_raise(void) static irqreturn_t handle_IPI(int irq, void *data) { + unsigned int cpu = smp_processor_id(); int ipi = irq - ipi_virq_base; switch (ipi) { @@ -126,7 +132,7 @@ static irqreturn_t handle_IPI(int irq, void *data) ipi_stop(); break; case IPI_CPU_CRASH_STOP: - ipi_cpu_crash_stop(smp_processor_id(), get_irq_regs()); + ipi_cpu_crash_stop(cpu, get_irq_regs()); break; case IPI_IRQ_WORK: irq_work_run(); @@ -136,8 +142,14 @@ static irqreturn_t handle_IPI(int irq, void *data) tick_receive_broadcast(); break; #endif + case IPI_CPU_BACKTRACE: + nmi_cpu_backtrace(get_irq_regs()); + break; + case IPI_KGDB_ROUNDUP: + kgdb_nmicallback(cpu, get_irq_regs()); + break; default: - pr_warn("CPU%d: unhandled IPI%d\n", smp_processor_id(), ipi); + pr_warn("CPU%d: unhandled IPI%d\n", cpu, ipi); break; } @@ -171,10 +183,7 @@ bool riscv_ipi_have_virq_range(void) return (ipi_virq_base) ? true : false; } -DEFINE_STATIC_KEY_FALSE(riscv_ipi_for_rfence); -EXPORT_SYMBOL_GPL(riscv_ipi_for_rfence); - -void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) +void riscv_ipi_set_virq_range(int virq, int nr) { int i, err; @@ -197,12 +206,6 @@ void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) /* Enabled IPIs for boot CPU immediately */ riscv_ipi_enable(); - - /* Update RFENCE static key */ - if (use_for_rfence) - static_branch_enable(&riscv_ipi_for_rfence); - else - static_branch_disable(&riscv_ipi_for_rfence); } static const char * const ipi_names[] = { @@ -212,6 +215,8 @@ static const char * const ipi_names[] = { [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", [IPI_TIMER] = "Timer broadcast interrupts", + [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", + [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", }; void show_ipi_stats(struct seq_file *p, int prec) @@ -332,3 +337,29 @@ void arch_smp_send_reschedule(int cpu) send_ipi_single(cpu, IPI_RESCHEDULE); } EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); + +static void riscv_backtrace_ipi(cpumask_t *mask) +{ + send_ipi_mask(mask, IPI_CPU_BACKTRACE); +} + +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) +{ + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, riscv_backtrace_ipi); +} + +#ifdef CONFIG_KGDB +void kgdb_roundup_cpus(void) +{ + int this_cpu = raw_smp_processor_id(); + int cpu; + + for_each_online_cpu(cpu) { + /* No need to roundup ourselves */ + if (cpu == this_cpu) + continue; + + send_ipi_single(cpu, IPI_KGDB_ROUNDUP); + } +} +#endif diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index d162bf339beb..e36d20205bd7 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -26,9 +26,8 @@ #include <linux/sched/task_stack.h> #include <linux/sched/mm.h> -#include <asm/cpufeature.h> +#include <asm/cacheflush.h> #include <asm/cpu_ops.h> -#include <asm/cpufeature.h> #include <asm/irq.h> #include <asm/mmu_context.h> #include <asm/numa.h> @@ -42,14 +41,9 @@ static DECLARE_COMPLETION(cpu_running); -void __init smp_prepare_boot_cpu(void) -{ -} - void __init smp_prepare_cpus(unsigned int max_cpus) { int cpuid; - int ret; unsigned int curr_cpuid; init_cpu_topology(); @@ -66,11 +60,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(cpuid) { if (cpuid == curr_cpuid) continue; - if (cpu_ops[cpuid]->cpu_prepare) { - ret = cpu_ops[cpuid]->cpu_prepare(cpuid); - if (ret) - continue; - } set_cpu_present(cpuid, true); numa_store_cpu_info(cpuid); } @@ -107,7 +96,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un if (hart == cpuid_to_hartid_map(0)) { BUG_ON(found_boot_cpu); found_boot_cpu = true; - early_map_cpu_to_node(0, acpi_numa_get_nid(cpu_count)); return 0; } @@ -117,7 +105,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un } cpuid_to_hartid_map(cpu_count) = hart; - early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count)); cpu_count++; return 0; @@ -125,18 +112,7 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un static void __init acpi_parse_and_init_cpus(void) { - int cpuid; - - cpu_set_ops(0); - acpi_table_parse_madt(ACPI_MADT_TYPE_RINTC, acpi_parse_rintc, 0); - - for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++) { - if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) { - cpu_set_ops(cpuid); - set_cpu_possible(cpuid, true); - } - } } #else #define acpi_parse_and_init_cpus(...) do { } while (0) @@ -150,8 +126,6 @@ static void __init of_parse_and_init_cpus(void) int cpuid = 1; int rc; - cpu_set_ops(0); - for_each_of_cpu_node(dn) { rc = riscv_early_of_processor_hartid(dn, &hart); if (rc < 0) @@ -179,27 +153,28 @@ static void __init of_parse_and_init_cpus(void) if (cpuid > nr_cpu_ids) pr_warn("Total number of cpus [%d] is greater than nr_cpus option value [%d]\n", cpuid, nr_cpu_ids); - - for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++) { - if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) { - cpu_set_ops(cpuid); - set_cpu_possible(cpuid, true); - } - } } void __init setup_smp(void) { + int cpuid; + + cpu_set_ops(); + if (acpi_disabled) of_parse_and_init_cpus(); else acpi_parse_and_init_cpus(); + + for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++) + if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) + set_cpu_possible(cpuid, true); } static int start_secondary_cpu(int cpu, struct task_struct *tidle) { - if (cpu_ops[cpu]->cpu_start) - return cpu_ops[cpu]->cpu_start(cpu, tidle); + if (cpu_ops->cpu_start) + return cpu_ops->cpu_start(cpu, tidle); return -EOPNOTSUPP; } @@ -237,6 +212,15 @@ asmlinkage __visible void smp_callin(void) struct mm_struct *mm = &init_mm; unsigned int curr_cpuid = smp_processor_id(); + if (has_vector()) { + /* + * Return as early as possible so the hart with a mismatching + * vlen won't boot. + */ + if (riscv_v_setup_vsize()) + return; + } + /* All kernel threads share the same mm context. */ mmgrab(mm); current->active_mm = mm; @@ -247,19 +231,13 @@ asmlinkage __visible void smp_callin(void) riscv_ipi_enable(); numa_add_cpu(curr_cpuid); - set_cpu_online(curr_cpuid, 1); - - if (has_vector()) { - if (riscv_v_setup_vsize()) - elf_hwcap &= ~COMPAT_HWCAP_ISA_V; - } - - riscv_user_isa_enable(); + set_cpu_online(curr_cpuid, true); /* - * Remote TLB flushes are ignored while the CPU is offline, so emit - * a local TLB flush right now just in case. + * Remote cache and TLB flushes are ignored while the CPU is offline, + * so flush them both right now just in case. */ + local_flush_icache_all(); local_flush_tlb_all(); complete(&cpu_running); /* diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 64a9c093aef9..d4355c770c36 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -16,12 +16,24 @@ #ifdef CONFIG_FRAME_POINTER -extern asmlinkage void ret_from_exception(void); +extern asmlinkage void handle_exception(void); +extern unsigned long ret_from_exception_end; + +static inline int fp_is_valid(unsigned long fp, unsigned long sp) +{ + unsigned long low, high; + + low = sp + sizeof(struct stackframe); + high = ALIGN(sp, THREAD_SIZE); + + return !(fp < low || fp > high || fp & 0x07); +} void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; + int graph_idx = 0; int level = 0; if (regs) { @@ -41,28 +53,27 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, } for (;;) { - unsigned long low, high; struct stackframe *frame; if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc)))) break; - /* Validate frame pointer */ - low = sp + sizeof(struct stackframe); - high = ALIGN(sp, THREAD_SIZE); - if (unlikely(fp < low || fp > high || fp & 0x7)) + if (unlikely(!fp_is_valid(fp, sp))) break; + /* Unwind stack frame */ frame = (struct stackframe *)fp - 1; sp = fp; - if (regs && (regs->epc == pc) && (frame->fp & 0x7)) { + if (regs && (regs->epc == pc) && fp_is_valid(frame->ra, sp)) { + /* We hit function where ra is not saved on the stack */ fp = frame->ra; pc = regs->ra; } else { fp = frame->fp; - pc = ftrace_graph_ret_addr(current, NULL, frame->ra, + pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, &frame->ra); - if (pc == (unsigned long)ret_from_exception) { + if (pc >= (unsigned long)handle_exception && + pc < (unsigned long)&ret_from_exception_end) { if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) break; @@ -148,8 +159,51 @@ unsigned long __get_wchan(struct task_struct *task) return pc; } -noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, +noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { walk_stackframe(task, regs, consume_entry, cookie); } + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long unwind_user_frame(stack_trace_consume_fn consume_entry, + void *cookie, unsigned long fp, + unsigned long reg_ra) +{ + struct stackframe buftail; + unsigned long ra = 0; + unsigned long __user *user_frame_tail = + (unsigned long __user *)(fp - sizeof(struct stackframe)); + + /* Check accessibility of one struct frame_tail beyond */ + if (!access_ok(user_frame_tail, sizeof(buftail))) + return 0; + if (__copy_from_user_inatomic(&buftail, user_frame_tail, + sizeof(buftail))) + return 0; + + ra = reg_ra ? : buftail.ra; + + fp = buftail.fp; + if (!ra || !consume_entry(cookie, ra)) + return 0; + + return fp; +} + +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, + const struct pt_regs *regs) +{ + unsigned long fp = 0; + + fp = regs->s0; + if (!consume_entry(cookie, regs->epc)) + return; + + fp = unwind_user_frame(consume_entry, cookie, fp, regs->ra); + while (fp && !(fp & 0x7)) + fp = unwind_user_frame(consume_entry, cookie, fp, 0); +} diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 3c89b8ec69c4..9a8a0dc035b2 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -4,13 +4,18 @@ * Copyright (c) 2022 Ventana Micro Systems Inc. */ +#define pr_fmt(fmt) "suspend: " fmt + #include <linux/ftrace.h> +#include <linux/suspend.h> #include <asm/csr.h> +#include <asm/sbi.h> #include <asm/suspend.h> void suspend_save_csrs(struct suspend_context *context) { - context->scratch = csr_read(CSR_SCRATCH); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG)) + context->envcfg = csr_read(CSR_ENVCFG); context->tvec = csr_read(CSR_TVEC); context->ie = csr_read(CSR_IE); @@ -31,7 +36,9 @@ void suspend_save_csrs(struct suspend_context *context) void suspend_restore_csrs(struct suspend_context *context) { - csr_write(CSR_SCRATCH, context->scratch); + csr_write(CSR_SCRATCH, 0); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG)) + csr_write(CSR_ENVCFG, context->envcfg); csr_write(CSR_TVEC, context->tvec); csr_write(CSR_IE, context->ie); @@ -85,3 +92,92 @@ int cpu_suspend(unsigned long arg, return rc; } + +#ifdef CONFIG_RISCV_SBI +static int sbi_system_suspend(unsigned long sleep_type, + unsigned long resume_addr, + unsigned long opaque) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_SUSP, SBI_EXT_SUSP_SYSTEM_SUSPEND, + sleep_type, resume_addr, opaque, 0, 0, 0); + if (ret.error) + return sbi_err_map_linux_errno(ret.error); + + return ret.value; +} + +static int sbi_system_suspend_enter(suspend_state_t state) +{ + return cpu_suspend(SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM, sbi_system_suspend); +} + +static const struct platform_suspend_ops sbi_system_suspend_ops = { + .valid = suspend_valid_only_mem, + .enter = sbi_system_suspend_enter, +}; + +static int __init sbi_system_suspend_init(void) +{ + if (sbi_spec_version >= sbi_mk_version(2, 0) && + sbi_probe_extension(SBI_EXT_SUSP) > 0) { + pr_info("SBI SUSP extension detected\n"); + if (IS_ENABLED(CONFIG_SUSPEND)) + suspend_set_ops(&sbi_system_suspend_ops); + } + + return 0; +} + +arch_initcall(sbi_system_suspend_init); + +static int sbi_suspend_finisher(unsigned long suspend_type, + unsigned long resume_addr, + unsigned long opaque) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_SUSPEND, + suspend_type, resume_addr, opaque, 0, 0, 0); + + return (ret.error) ? sbi_err_map_linux_errno(ret.error) : 0; +} + +int riscv_sbi_hart_suspend(u32 state) +{ + if (state & SBI_HSM_SUSP_NON_RET_BIT) + return cpu_suspend(state, sbi_suspend_finisher); + else + return sbi_suspend_finisher(state, 0, 0); +} + +bool riscv_sbi_suspend_state_is_valid(u32 state) +{ + if (state > SBI_HSM_SUSPEND_RET_DEFAULT && + state < SBI_HSM_SUSPEND_RET_PLATFORM) + return false; + + if (state > SBI_HSM_SUSPEND_NON_RET_DEFAULT && + state < SBI_HSM_SUSPEND_NON_RET_PLATFORM) + return false; + + return true; +} + +bool riscv_sbi_hsm_is_supported(void) +{ + /* + * The SBI HSM suspend function is only available when: + * 1) SBI version is 0.3 or higher + * 2) SBI HSM extension is available + */ + if (sbi_spec_version < sbi_mk_version(0, 3) || + !sbi_probe_extension(SBI_EXT_HSM)) { + pr_info("HSM suspend not available\n"); + return false; + } + + return true; +} +#endif /* CONFIG_RISCV_SBI */ diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c new file mode 100644 index 000000000000..bcd3b816306c --- /dev/null +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -0,0 +1,499 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * The hwprobe interface, for allowing userspace to probe to see which features + * are supported by the hardware. See Documentation/arch/riscv/hwprobe.rst for + * more details. + */ +#include <linux/syscalls.h> +#include <asm/cacheflush.h> +#include <asm/cpufeature.h> +#include <asm/hwprobe.h> +#include <asm/processor.h> +#include <asm/delay.h> +#include <asm/sbi.h> +#include <asm/switch_to.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> +#include <asm/vector.h> +#include <asm/vendor_extensions/thead_hwprobe.h> +#include <vdso/vsyscall.h> + + +static void hwprobe_arch_id(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + u64 id = -1ULL; + bool first = true; + int cpu; + + for_each_cpu(cpu, cpus) { + u64 cpu_id; + + switch (pair->key) { + case RISCV_HWPROBE_KEY_MVENDORID: + cpu_id = riscv_cached_mvendorid(cpu); + break; + case RISCV_HWPROBE_KEY_MIMPID: + cpu_id = riscv_cached_mimpid(cpu); + break; + case RISCV_HWPROBE_KEY_MARCHID: + cpu_id = riscv_cached_marchid(cpu); + break; + } + + if (first) { + id = cpu_id; + first = false; + } + + /* + * If there's a mismatch for the given set, return -1 in the + * value. + */ + if (id != cpu_id) { + id = -1ULL; + break; + } + } + + pair->value = id; +} + +static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + int cpu; + u64 missing = 0; + + pair->value = 0; + if (has_fpu()) + pair->value |= RISCV_HWPROBE_IMA_FD; + + if (riscv_isa_extension_available(NULL, c)) + pair->value |= RISCV_HWPROBE_IMA_C; + + if (has_vector() && riscv_isa_extension_available(NULL, v)) + pair->value |= RISCV_HWPROBE_IMA_V; + + /* + * Loop through and record extensions that 1) anyone has, and 2) anyone + * doesn't have. + */ + for_each_cpu(cpu, cpus) { + struct riscv_isainfo *isainfo = &hart_isa[cpu]; + +#define EXT_KEY(ext) \ + do { \ + if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_EXT_##ext)) \ + pair->value |= RISCV_HWPROBE_EXT_##ext; \ + else \ + missing |= RISCV_HWPROBE_EXT_##ext; \ + } while (false) + + /* + * Only use EXT_KEY() for extensions which can be exposed to userspace, + * regardless of the kernel's configuration, as no other checks, besides + * presence in the hart_isa bitmap, are made. + */ + EXT_KEY(ZACAS); + EXT_KEY(ZAWRS); + EXT_KEY(ZBA); + EXT_KEY(ZBB); + EXT_KEY(ZBC); + EXT_KEY(ZBKB); + EXT_KEY(ZBKC); + EXT_KEY(ZBKX); + EXT_KEY(ZBS); + EXT_KEY(ZCA); + EXT_KEY(ZCB); + EXT_KEY(ZCMOP); + EXT_KEY(ZICBOZ); + EXT_KEY(ZICOND); + EXT_KEY(ZIHINTNTL); + EXT_KEY(ZIHINTPAUSE); + EXT_KEY(ZIMOP); + EXT_KEY(ZKND); + EXT_KEY(ZKNE); + EXT_KEY(ZKNH); + EXT_KEY(ZKSED); + EXT_KEY(ZKSH); + EXT_KEY(ZKT); + EXT_KEY(ZTSO); + + /* + * All the following extensions must depend on the kernel + * support of V. + */ + if (has_vector()) { + EXT_KEY(ZVBB); + EXT_KEY(ZVBC); + EXT_KEY(ZVE32F); + EXT_KEY(ZVE32X); + EXT_KEY(ZVE64D); + EXT_KEY(ZVE64F); + EXT_KEY(ZVE64X); + EXT_KEY(ZVFH); + EXT_KEY(ZVFHMIN); + EXT_KEY(ZVKB); + EXT_KEY(ZVKG); + EXT_KEY(ZVKNED); + EXT_KEY(ZVKNHA); + EXT_KEY(ZVKNHB); + EXT_KEY(ZVKSED); + EXT_KEY(ZVKSH); + EXT_KEY(ZVKT); + } + + if (has_fpu()) { + EXT_KEY(ZCD); + EXT_KEY(ZCF); + EXT_KEY(ZFA); + EXT_KEY(ZFH); + EXT_KEY(ZFHMIN); + } + + if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM)) + EXT_KEY(SUPM); +#undef EXT_KEY + } + + /* Now turn off reporting features if any CPU is missing it. */ + pair->value &= ~missing; +} + +static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) +{ + struct riscv_hwprobe pair; + + hwprobe_isa_ext0(&pair, cpus); + return (pair.value & ext); +} + +#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) +static u64 hwprobe_misaligned(const struct cpumask *cpus) +{ + int cpu; + u64 perf = -1ULL; + + for_each_cpu(cpu, cpus) { + int this_perf = per_cpu(misaligned_access_speed, cpu); + + if (perf == -1ULL) + perf = this_perf; + + if (perf != this_perf) { + perf = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; + break; + } + } + + if (perf == -1ULL) + return RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; + + return perf; +} +#else +static u64 hwprobe_misaligned(const struct cpumask *cpus) +{ + if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; + + if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available()) + return RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; + + return RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; +} +#endif + +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus) +{ + int cpu; + u64 perf = -1ULL; + + /* Return if supported or not even if speed wasn't probed */ + for_each_cpu(cpu, cpus) { + int this_perf = per_cpu(vector_misaligned_access, cpu); + + if (perf == -1ULL) + perf = this_perf; + + if (perf != this_perf) { + perf = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + break; + } + } + + if (perf == -1ULL) + return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + + return perf; +} +#else +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus) +{ + if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + + if (IS_ENABLED(CONFIG_RISCV_SLOW_VECTOR_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; + + return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; +} +#endif + +static void hwprobe_one_pair(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + switch (pair->key) { + case RISCV_HWPROBE_KEY_MVENDORID: + case RISCV_HWPROBE_KEY_MARCHID: + case RISCV_HWPROBE_KEY_MIMPID: + hwprobe_arch_id(pair, cpus); + break; + /* + * The kernel already assumes that the base single-letter ISA + * extensions are supported on all harts, and only supports the + * IMA base, so just cheat a bit here and tell that to + * userspace. + */ + case RISCV_HWPROBE_KEY_BASE_BEHAVIOR: + pair->value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA; + break; + + case RISCV_HWPROBE_KEY_IMA_EXT_0: + hwprobe_isa_ext0(pair, cpus); + break; + + case RISCV_HWPROBE_KEY_CPUPERF_0: + case RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: + pair->value = hwprobe_misaligned(cpus); + break; + + case RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF: + pair->value = hwprobe_vec_misaligned(cpus); + break; + + case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE: + pair->value = 0; + if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ)) + pair->value = riscv_cboz_block_size; + break; + case RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS: + pair->value = user_max_virt_addr(); + break; + + case RISCV_HWPROBE_KEY_TIME_CSR_FREQ: + pair->value = riscv_timebase; + break; + + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: + hwprobe_isa_vendor_ext_thead_0(pair, cpus); + break; + + /* + * For forward compatibility, unknown keys don't fail the whole + * call, but get their element key set to -1 and value set to 0 + * indicating they're unrecognized. + */ + default: + pair->key = -1; + pair->value = 0; + break; + } +} + +static int hwprobe_get_values(struct riscv_hwprobe __user *pairs, + size_t pair_count, size_t cpusetsize, + unsigned long __user *cpus_user, + unsigned int flags) +{ + size_t out; + int ret; + cpumask_t cpus; + + /* Check the reserved flags. */ + if (flags != 0) + return -EINVAL; + + /* + * The interface supports taking in a CPU mask, and returns values that + * are consistent across that mask. Allow userspace to specify NULL and + * 0 as a shortcut to all online CPUs. + */ + cpumask_clear(&cpus); + if (!cpusetsize && !cpus_user) { + cpumask_copy(&cpus, cpu_online_mask); + } else { + if (cpusetsize > cpumask_size()) + cpusetsize = cpumask_size(); + + ret = copy_from_user(&cpus, cpus_user, cpusetsize); + if (ret) + return -EFAULT; + + /* + * Userspace must provide at least one online CPU, without that + * there's no way to define what is supported. + */ + cpumask_and(&cpus, &cpus, cpu_online_mask); + if (cpumask_empty(&cpus)) + return -EINVAL; + } + + for (out = 0; out < pair_count; out++, pairs++) { + struct riscv_hwprobe pair; + + if (get_user(pair.key, &pairs->key)) + return -EFAULT; + + pair.value = 0; + hwprobe_one_pair(&pair, &cpus); + ret = put_user(pair.key, &pairs->key); + if (ret == 0) + ret = put_user(pair.value, &pairs->value); + + if (ret) + return -EFAULT; + } + + return 0; +} + +static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs, + size_t pair_count, size_t cpusetsize, + unsigned long __user *cpus_user, + unsigned int flags) +{ + cpumask_t cpus, one_cpu; + bool clear_all = false; + size_t i; + int ret; + + if (flags != RISCV_HWPROBE_WHICH_CPUS) + return -EINVAL; + + if (!cpusetsize || !cpus_user) + return -EINVAL; + + if (cpusetsize > cpumask_size()) + cpusetsize = cpumask_size(); + + ret = copy_from_user(&cpus, cpus_user, cpusetsize); + if (ret) + return -EFAULT; + + if (cpumask_empty(&cpus)) + cpumask_copy(&cpus, cpu_online_mask); + + cpumask_and(&cpus, &cpus, cpu_online_mask); + + cpumask_clear(&one_cpu); + + for (i = 0; i < pair_count; i++) { + struct riscv_hwprobe pair, tmp; + int cpu; + + ret = copy_from_user(&pair, &pairs[i], sizeof(pair)); + if (ret) + return -EFAULT; + + if (!riscv_hwprobe_key_is_valid(pair.key)) { + clear_all = true; + pair = (struct riscv_hwprobe){ .key = -1, }; + ret = copy_to_user(&pairs[i], &pair, sizeof(pair)); + if (ret) + return -EFAULT; + } + + if (clear_all) + continue; + + tmp = (struct riscv_hwprobe){ .key = pair.key, }; + + for_each_cpu(cpu, &cpus) { + cpumask_set_cpu(cpu, &one_cpu); + + hwprobe_one_pair(&tmp, &one_cpu); + + if (!riscv_hwprobe_pair_cmp(&tmp, &pair)) + cpumask_clear_cpu(cpu, &cpus); + + cpumask_clear_cpu(cpu, &one_cpu); + } + } + + if (clear_all) + cpumask_clear(&cpus); + + ret = copy_to_user(cpus_user, &cpus, cpusetsize); + if (ret) + return -EFAULT; + + return 0; +} + +static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, + size_t pair_count, size_t cpusetsize, + unsigned long __user *cpus_user, + unsigned int flags) +{ + if (flags & RISCV_HWPROBE_WHICH_CPUS) + return hwprobe_get_cpus(pairs, pair_count, cpusetsize, + cpus_user, flags); + + return hwprobe_get_values(pairs, pair_count, cpusetsize, + cpus_user, flags); +} + +#ifdef CONFIG_MMU + +static int __init init_hwprobe_vdso_data(void) +{ + struct vdso_data *vd = __arch_get_k_vdso_data(); + struct arch_vdso_time_data *avd = &vd->arch_data; + u64 id_bitsmash = 0; + struct riscv_hwprobe pair; + int key; + + /* + * Initialize vDSO data with the answers for the "all CPUs" case, to + * save a syscall in the common case. + */ + for (key = 0; key <= RISCV_HWPROBE_MAX_KEY; key++) { + pair.key = key; + hwprobe_one_pair(&pair, cpu_online_mask); + + WARN_ON_ONCE(pair.key < 0); + + avd->all_cpu_hwprobe_values[key] = pair.value; + /* + * Smash together the vendor, arch, and impl IDs to see if + * they're all 0 or any negative. + */ + if (key <= RISCV_HWPROBE_KEY_MIMPID) + id_bitsmash |= pair.value; + } + + /* + * If the arch, vendor, and implementation ID are all the same across + * all harts, then assume all CPUs are the same, and allow the vDSO to + * answer queries for arbitrary masks. However if all values are 0 (not + * populated) or any value returns -1 (varies across CPUs), then the + * vDSO should defer to the kernel for exotic cpu masks. + */ + avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1; + return 0; +} + +arch_initcall_sync(init_hwprobe_vdso_data); + +#endif /* CONFIG_MMU */ + +SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs, + size_t, pair_count, size_t, cpusetsize, unsigned long __user *, + cpus, unsigned int, flags) +{ + return do_riscv_hwprobe(pairs, pair_count, cpusetsize, + cpus, flags); +} diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index a2ca5b7756a5..d77afe05578f 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -7,15 +7,6 @@ #include <linux/syscalls.h> #include <asm/cacheflush.h> -#include <asm/cpufeature.h> -#include <asm/hwprobe.h> -#include <asm/sbi.h> -#include <asm/vector.h> -#include <asm/switch_to.h> -#include <asm/uaccess.h> -#include <asm/unistd.h> -#include <asm-generic/mman-common.h> -#include <vdso/vsyscall.h> static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -32,7 +23,7 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, #ifdef CONFIG_64BIT SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, offset) + unsigned long, fd, unsigned long, offset) { return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0); } @@ -41,7 +32,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, offset) + unsigned long, fd, unsigned long, offset) { /* * Note that the shift for mmap2 is constant (12), @@ -77,283 +68,6 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, return 0; } -/* - * The hwprobe interface, for allowing userspace to probe to see which features - * are supported by the hardware. See Documentation/arch/riscv/hwprobe.rst for more - * details. - */ -static void hwprobe_arch_id(struct riscv_hwprobe *pair, - const struct cpumask *cpus) -{ - u64 id = -1ULL; - bool first = true; - int cpu; - - for_each_cpu(cpu, cpus) { - u64 cpu_id; - - switch (pair->key) { - case RISCV_HWPROBE_KEY_MVENDORID: - cpu_id = riscv_cached_mvendorid(cpu); - break; - case RISCV_HWPROBE_KEY_MIMPID: - cpu_id = riscv_cached_mimpid(cpu); - break; - case RISCV_HWPROBE_KEY_MARCHID: - cpu_id = riscv_cached_marchid(cpu); - break; - } - - if (first) { - id = cpu_id; - first = false; - } - - /* - * If there's a mismatch for the given set, return -1 in the - * value. - */ - if (id != cpu_id) { - id = -1ULL; - break; - } - } - - pair->value = id; -} - -static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, - const struct cpumask *cpus) -{ - int cpu; - u64 missing = 0; - - pair->value = 0; - if (has_fpu()) - pair->value |= RISCV_HWPROBE_IMA_FD; - - if (riscv_isa_extension_available(NULL, c)) - pair->value |= RISCV_HWPROBE_IMA_C; - - if (has_vector()) - pair->value |= RISCV_HWPROBE_IMA_V; - - /* - * Loop through and record extensions that 1) anyone has, and 2) anyone - * doesn't have. - */ - for_each_cpu(cpu, cpus) { - struct riscv_isainfo *isainfo = &hart_isa[cpu]; - -#define EXT_KEY(ext) \ - do { \ - if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_EXT_##ext)) \ - pair->value |= RISCV_HWPROBE_EXT_##ext; \ - else \ - missing |= RISCV_HWPROBE_EXT_##ext; \ - } while (false) - - /* - * Only use EXT_KEY() for extensions which can be exposed to userspace, - * regardless of the kernel's configuration, as no other checks, besides - * presence in the hart_isa bitmap, are made. - */ - EXT_KEY(ZBA); - EXT_KEY(ZBB); - EXT_KEY(ZBS); - EXT_KEY(ZICBOZ); -#undef EXT_KEY - } - - /* Now turn off reporting features if any CPU is missing it. */ - pair->value &= ~missing; -} - -static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext) -{ - struct riscv_hwprobe pair; - - hwprobe_isa_ext0(&pair, cpus); - return (pair.value & ext); -} - -static u64 hwprobe_misaligned(const struct cpumask *cpus) -{ - int cpu; - u64 perf = -1ULL; - - for_each_cpu(cpu, cpus) { - int this_perf = per_cpu(misaligned_access_speed, cpu); - - if (perf == -1ULL) - perf = this_perf; - - if (perf != this_perf) { - perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; - break; - } - } - - if (perf == -1ULL) - return RISCV_HWPROBE_MISALIGNED_UNKNOWN; - - return perf; -} - -static void hwprobe_one_pair(struct riscv_hwprobe *pair, - const struct cpumask *cpus) -{ - switch (pair->key) { - case RISCV_HWPROBE_KEY_MVENDORID: - case RISCV_HWPROBE_KEY_MARCHID: - case RISCV_HWPROBE_KEY_MIMPID: - hwprobe_arch_id(pair, cpus); - break; - /* - * The kernel already assumes that the base single-letter ISA - * extensions are supported on all harts, and only supports the - * IMA base, so just cheat a bit here and tell that to - * userspace. - */ - case RISCV_HWPROBE_KEY_BASE_BEHAVIOR: - pair->value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA; - break; - - case RISCV_HWPROBE_KEY_IMA_EXT_0: - hwprobe_isa_ext0(pair, cpus); - break; - - case RISCV_HWPROBE_KEY_CPUPERF_0: - pair->value = hwprobe_misaligned(cpus); - break; - - case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE: - pair->value = 0; - if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ)) - pair->value = riscv_cboz_block_size; - break; - - /* - * For forward compatibility, unknown keys don't fail the whole - * call, but get their element key set to -1 and value set to 0 - * indicating they're unrecognized. - */ - default: - pair->key = -1; - pair->value = 0; - break; - } -} - -static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, - size_t pair_count, size_t cpu_count, - unsigned long __user *cpus_user, - unsigned int flags) -{ - size_t out; - int ret; - cpumask_t cpus; - - /* Check the reserved flags. */ - if (flags != 0) - return -EINVAL; - - /* - * The interface supports taking in a CPU mask, and returns values that - * are consistent across that mask. Allow userspace to specify NULL and - * 0 as a shortcut to all online CPUs. - */ - cpumask_clear(&cpus); - if (!cpu_count && !cpus_user) { - cpumask_copy(&cpus, cpu_online_mask); - } else { - if (cpu_count > cpumask_size()) - cpu_count = cpumask_size(); - - ret = copy_from_user(&cpus, cpus_user, cpu_count); - if (ret) - return -EFAULT; - - /* - * Userspace must provide at least one online CPU, without that - * there's no way to define what is supported. - */ - cpumask_and(&cpus, &cpus, cpu_online_mask); - if (cpumask_empty(&cpus)) - return -EINVAL; - } - - for (out = 0; out < pair_count; out++, pairs++) { - struct riscv_hwprobe pair; - - if (get_user(pair.key, &pairs->key)) - return -EFAULT; - - pair.value = 0; - hwprobe_one_pair(&pair, &cpus); - ret = put_user(pair.key, &pairs->key); - if (ret == 0) - ret = put_user(pair.value, &pairs->value); - - if (ret) - return -EFAULT; - } - - return 0; -} - -#ifdef CONFIG_MMU - -static int __init init_hwprobe_vdso_data(void) -{ - struct vdso_data *vd = __arch_get_k_vdso_data(); - struct arch_vdso_data *avd = &vd->arch_data; - u64 id_bitsmash = 0; - struct riscv_hwprobe pair; - int key; - - /* - * Initialize vDSO data with the answers for the "all CPUs" case, to - * save a syscall in the common case. - */ - for (key = 0; key <= RISCV_HWPROBE_MAX_KEY; key++) { - pair.key = key; - hwprobe_one_pair(&pair, cpu_online_mask); - - WARN_ON_ONCE(pair.key < 0); - - avd->all_cpu_hwprobe_values[key] = pair.value; - /* - * Smash together the vendor, arch, and impl IDs to see if - * they're all 0 or any negative. - */ - if (key <= RISCV_HWPROBE_KEY_MIMPID) - id_bitsmash |= pair.value; - } - - /* - * If the arch, vendor, and implementation ID are all the same across - * all harts, then assume all CPUs are the same, and allow the vDSO to - * answer queries for arbitrary masks. However if all values are 0 (not - * populated) or any value returns -1 (varies across CPUs), then the - * vDSO should defer to the kernel for exotic cpu masks. - */ - avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1; - return 0; -} - -arch_initcall_sync(init_hwprobe_vdso_data); - -#endif /* CONFIG_MMU */ - -SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs, - size_t, pair_count, size_t, cpu_count, unsigned long __user *, - cpus, unsigned int, flags) -{ - return do_riscv_hwprobe(pairs, pair_count, cpu_count, - cpus, flags); -} - /* Not defined using SYSCALL_DEFINE0 to avoid error injection */ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *__unused) { diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index dda913764903..6f1a36cb0f3f 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -9,14 +9,16 @@ #include <asm-generic/syscalls.h> #include <asm/syscall.h> +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) + #undef __SYSCALL #define __SYSCALL(nr, call) asmlinkage long __riscv_##call(const struct pt_regs *); -#include <asm/unistd.h> +#include <asm/syscall_table.h> #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = __riscv_##call, void * const sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = __riscv_sys_ni_syscall, -#include <asm/unistd.h> +#include <asm/syscall_table.h> }; diff --git a/arch/riscv/kernel/tests/Kconfig.debug b/arch/riscv/kernel/tests/Kconfig.debug index 5dba64e8e977..78cea5d2c270 100644 --- a/arch/riscv/kernel/tests/Kconfig.debug +++ b/arch/riscv/kernel/tests/Kconfig.debug @@ -6,7 +6,7 @@ config AS_HAS_ULEB128 menuconfig RUNTIME_KERNEL_TESTING_MENU bool "arch/riscv/kernel runtime Testing" - def_bool y + default y help Enable riscv kernel runtime testing. diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 23641e82a9df..ba3477197789 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -12,6 +12,7 @@ #include <asm/sbi.h> #include <asm/processor.h> #include <asm/timex.h> +#include <asm/paravirt.h> unsigned long riscv_timebase __ro_after_init; EXPORT_SYMBOL_GPL(riscv_timebase); @@ -45,4 +46,6 @@ void __init time_init(void) timer_probe(); tick_setup_hrtimer_broadcast(); + + pv_time_init(); } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index a1b9be3c4332..8ff8e8b36524 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -6,6 +6,7 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/init.h> +#include <linux/randomize_kstack.h> #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/signal.h> @@ -34,7 +35,7 @@ int show_unhandled_signals = 1; -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns) { @@ -80,7 +81,7 @@ void die(struct pt_regs *regs, const char *str) oops_enter(); - spin_lock_irqsave(&die_lock, flags); + raw_spin_lock_irqsave(&die_lock, flags); console_verbose(); bust_spinlocks(1); @@ -99,7 +100,7 @@ void die(struct pt_regs *regs, const char *str) bust_spinlocks(0); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - spin_unlock_irqrestore(&die_lock, flags); + raw_spin_unlock_irqrestore(&die_lock, flags); oops_exit(); if (in_interrupt()) @@ -121,7 +122,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) print_vma_addr(KERN_CONT " in ", instruction_pointer(regs)); pr_cont("\n"); __show_regs(regs); - dump_instr(KERN_EMERG, regs); + dump_instr(KERN_INFO, regs); } force_sig_fault(signo, code, (void __user *)addr); @@ -310,22 +311,36 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) } } -asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) +asmlinkage __visible __trap_section __no_stack_protector +void do_trap_ecall_u(struct pt_regs *regs) { if (user_mode(regs)) { long syscall = regs->a7; regs->epc += 4; regs->orig_a0 = regs->a0; + regs->a0 = -ENOSYS; riscv_v_vstate_discard(regs); syscall = syscall_enter_from_user_mode(regs, syscall); + add_random_kstack_offset(); + if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); - else if (syscall != -1) - regs->a0 = -ENOSYS; + + /* + * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), + * so the maximum stack offset is 1k bytes (10 bits). + * + * The actual entropy will be further reduced by the compiler when + * applying stack alignment constraints: 16-byte (i.e. 4-bit) aligned + * for RV32I or RV64I. + * + * The resulting 6 bits of entropy is seen in SP[9:4]. + */ + choose_random_kstack_offset(get_random_u16()); syscall_exit_to_user_mode(regs); } else { diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 5255f8134aef..7cc108aed74e 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -16,6 +16,7 @@ #include <asm/entry-common.h> #include <asm/hwprobe.h> #include <asm/cpufeature.h> +#include <asm/vector.h> #define INSN_MATCH_LB 0x3 #define INSN_MASK_LB 0x707f @@ -136,8 +137,6 @@ #define REG_PTR(insn, pos, regs) \ (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)) -#define GET_RM(insn) (((insn) >> 12) & 7) - #define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) #define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) #define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) @@ -264,86 +263,14 @@ static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, #define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs)) #define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs)) -#ifdef CONFIG_RISCV_M_MODE -static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) -{ - u8 val; - - asm volatile("lbu %0, %1" : "=&r" (val) : "m" (*addr)); - *r_val = val; - - return 0; -} - -static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) -{ - asm volatile ("sb %0, %1\n" : : "r" (val), "m" (*addr)); - - return 0; -} - -static inline int get_insn(struct pt_regs *regs, ulong mepc, ulong *r_insn) -{ - register ulong __mepc asm ("a2") = mepc; - ulong val, rvc_mask = 3, tmp; - - asm ("and %[tmp], %[addr], 2\n" - "bnez %[tmp], 1f\n" -#if defined(CONFIG_64BIT) - __stringify(LWU) " %[insn], (%[addr])\n" -#else - __stringify(LW) " %[insn], (%[addr])\n" -#endif - "and %[tmp], %[insn], %[rvc_mask]\n" - "beq %[tmp], %[rvc_mask], 2f\n" - "sll %[insn], %[insn], %[xlen_minus_16]\n" - "srl %[insn], %[insn], %[xlen_minus_16]\n" - "j 2f\n" - "1:\n" - "lhu %[insn], (%[addr])\n" - "and %[tmp], %[insn], %[rvc_mask]\n" - "bne %[tmp], %[rvc_mask], 2f\n" - "lhu %[tmp], 2(%[addr])\n" - "sll %[tmp], %[tmp], 16\n" - "add %[insn], %[insn], %[tmp]\n" - "2:" - : [insn] "=&r" (val), [tmp] "=&r" (tmp) - : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask), - [xlen_minus_16] "i" (XLEN_MINUS_16)); - - *r_insn = val; - - return 0; -} -#else -static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) -{ - if (user_mode(regs)) { - return __get_user(*r_val, addr); - } else { - *r_val = *addr; - return 0; - } -} - -static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) -{ - if (user_mode(regs)) { - return __put_user(val, addr); - } else { - *addr = val; - return 0; - } -} - -#define __read_insn(regs, insn, insn_addr) \ +#define __read_insn(regs, insn, insn_addr, type) \ ({ \ int __ret; \ \ if (user_mode(regs)) { \ - __ret = __get_user(insn, insn_addr); \ + __ret = __get_user(insn, (type __user *) insn_addr); \ } else { \ - insn = *insn_addr; \ + insn = *(type *)insn_addr; \ __ret = 0; \ } \ \ @@ -356,9 +283,8 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) if (epc & 0x2) { ulong tmp = 0; - u16 __user *insn_addr = (u16 __user *)epc; - if (__read_insn(regs, insn, insn_addr)) + if (__read_insn(regs, insn, epc, u16)) return -EFAULT; /* __get_user() uses regular "lw" which sign extend the loaded * value make sure to clear higher order bits in case we "or" it @@ -369,16 +295,14 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) *r_insn = insn; return 0; } - insn_addr++; - if (__read_insn(regs, tmp, insn_addr)) + epc += sizeof(u16); + if (__read_insn(regs, tmp, epc, u16)) return -EFAULT; *r_insn = (tmp << 16) | insn; return 0; } else { - u32 __user *insn_addr = (u32 __user *)epc; - - if (__read_insn(regs, insn, insn_addr)) + if (__read_insn(regs, insn, epc, u32)) return -EFAULT; if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) { *r_insn = insn; @@ -390,7 +314,6 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) return 0; } } -#endif union reg_data { u8 data_bytes[8]; @@ -398,22 +321,49 @@ union reg_data { u64 data_u64; }; -static bool unaligned_ctl __read_mostly; - /* sysctl hooks */ int unaligned_enabled __read_mostly = 1; /* Enabled by default */ -int handle_misaligned_load(struct pt_regs *regs) +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +static int handle_vector_misaligned_load(struct pt_regs *regs) +{ + unsigned long epc = regs->epc; + unsigned long insn; + + if (get_insn(regs, epc, &insn)) + return -1; + + /* Only return 0 when in check_vector_unaligned_access_emulated */ + if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + *this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + regs->epc = epc + INSN_LEN(insn); + return 0; + } + + /* If vector instruction we don't emulate it yet */ + regs->epc = epc; + return -1; +} +#else +static int handle_vector_misaligned_load(struct pt_regs *regs) +{ + return -1; +} +#endif + +static int handle_scalar_misaligned_load(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; unsigned long insn; unsigned long addr = regs->badaddr; - int i, fp = 0, shift = 0, len = 0; + int fp = 0, shift = 0, len = 0; perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); - *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; +#endif if (!unaligned_enabled) return -1; @@ -490,9 +440,11 @@ int handle_misaligned_load(struct pt_regs *regs) return -EOPNOTSUPP; val.data_u64 = 0; - for (i = 0; i < len; i++) { - if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i])) + if (user_mode(regs)) { + if (copy_from_user(&val, (u8 __user *)addr, len)) return -1; + } else { + memcpy(&val, (u8 *)addr, len); } if (!fp) @@ -507,13 +459,13 @@ int handle_misaligned_load(struct pt_regs *regs) return 0; } -int handle_misaligned_store(struct pt_regs *regs) +static int handle_scalar_misaligned_store(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; unsigned long insn; unsigned long addr = regs->badaddr; - int i, len = 0, fp = 0; + int len = 0, fp = 0; perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); @@ -586,9 +538,11 @@ int handle_misaligned_store(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_FPU) && fp) return -EOPNOTSUPP; - for (i = 0; i < len; i++) { - if (store_u8(regs, (void *)(addr + i), val.data_bytes[i])) + if (user_mode(regs)) { + if (copy_to_user((u8 __user *)addr, &val, len)) return -1; + } else { + memcpy((u8 *)addr, &val, len); } regs->epc = epc + INSN_LEN(insn); @@ -596,34 +550,116 @@ int handle_misaligned_store(struct pt_regs *regs) return 0; } -bool check_unaligned_access_emulated(int cpu) +int handle_misaligned_load(struct pt_regs *regs) +{ + unsigned long epc = regs->epc; + unsigned long insn; + + if (IS_ENABLED(CONFIG_RISCV_VECTOR_MISALIGNED)) { + if (get_insn(regs, epc, &insn)) + return -1; + + if (insn_is_vector(insn)) + return handle_vector_misaligned_load(regs); + } + + if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED)) + return handle_scalar_misaligned_load(regs); + + return -1; +} + +int handle_misaligned_store(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED)) + return handle_scalar_misaligned_store(regs); + + return -1; +} + +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused) +{ + long *mas_ptr = this_cpu_ptr(&vector_misaligned_access); + unsigned long tmp_var; + + *mas_ptr = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + + kernel_vector_begin(); + /* + * In pre-13.0.0 versions of GCC, vector registers cannot appear in + * the clobber list. This inline asm clobbers v0, but since we do not + * currently build the kernel with V enabled, the v0 clobber arg is not + * needed (as the compiler will not emit vector code itself). If the kernel + * is changed to build with V enabled, the clobber arg will need to be + * added here. + */ + __asm__ __volatile__ ( + ".balign 4\n\t" + ".option push\n\t" + ".option arch, +zve32x\n\t" + " vsetivli zero, 1, e16, m1, ta, ma\n\t" // Vectors of 16b + " vle16.v v0, (%[ptr])\n\t" // Load bytes + ".option pop\n\t" + : : [ptr] "r" ((u8 *)&tmp_var + 1)); + kernel_vector_end(); +} + +bool check_vector_unaligned_access_emulated_all_cpus(void) +{ + int cpu; + + if (!has_vector()) { + for_each_online_cpu(cpu) + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + return false; + } + + schedule_on_each_cpu(check_vector_unaligned_access_emulated); + + for_each_online_cpu(cpu) + if (per_cpu(vector_misaligned_access, cpu) + == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return false; + + return true; +} +#else +bool check_vector_unaligned_access_emulated_all_cpus(void) +{ + return false; +} +#endif + +#ifdef CONFIG_RISCV_SCALAR_MISALIGNED + +static bool unaligned_ctl __read_mostly; + +void check_unaligned_access_emulated(struct work_struct *work __always_unused) { + int cpu = smp_processor_id(); long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); unsigned long tmp_var, tmp_val; - bool misaligned_emu_detected; - *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + *mas_ptr = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; __asm__ __volatile__ ( " "REG_L" %[tmp], 1(%[ptr])\n" : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); - misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED); /* * If unaligned_ctl is already set, this means that we detected that all * CPUS uses emulated misaligned access at boot time. If that changed * when hotplugging the new cpu, this is something we don't handle. */ - if (unlikely(unaligned_ctl && !misaligned_emu_detected)) { + if (unlikely(unaligned_ctl && (*mas_ptr != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED))) { pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n"); while (true) cpu_relax(); } - - return misaligned_emu_detected; } -void unaligned_emulation_finish(void) +bool check_unaligned_access_emulated_all_cpus(void) { int cpu; @@ -632,16 +668,24 @@ void unaligned_emulation_finish(void) * accesses emulated since tasks requesting such control can run on any * CPU. */ - for_each_present_cpu(cpu) { - if (per_cpu(misaligned_access_speed, cpu) != - RISCV_HWPROBE_MISALIGNED_EMULATED) { - return; - } - } + schedule_on_each_cpu(check_unaligned_access_emulated); + + for_each_online_cpu(cpu) + if (per_cpu(misaligned_access_speed, cpu) + != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED) + return false; + unaligned_ctl = true; + return true; } bool unaligned_ctl_available(void) { return unaligned_ctl; } +#else +bool check_unaligned_access_emulated_all_cpus(void) +{ + return false; +} +#endif diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c new file mode 100644 index 000000000000..91f189cf1611 --- /dev/null +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Rivos Inc. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/jump_label.h> +#include <linux/kthread.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/types.h> +#include <asm/cpufeature.h> +#include <asm/hwprobe.h> +#include <asm/vector.h> + +#include "copy-unaligned.h" + +#define MISALIGNED_ACCESS_JIFFIES_LG2 1 +#define MISALIGNED_BUFFER_SIZE 0x4000 +#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) +#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) + +DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; +DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS +static cpumask_t fast_misaligned_access; +static int check_unaligned_access(void *param) +{ + int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; + struct page *page = param; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; + + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) + return 0; + + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ + src = dst + (MISALIGNED_BUFFER_SIZE / 2); + src += 2; + word_cycles = -1ULL; + /* Do a warmup. */ + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + preempt_disable(); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + mb(); + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + preempt_enable(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", + cpu); + + return 0; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) ? "fast" : "slow"); + + per_cpu(misaligned_access_speed, cpu) = speed; + + /* + * Set the value of fast_misaligned_access of a CPU. These operations + * are atomic to avoid race conditions. + */ + if (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) + cpumask_set_cpu(cpu, &fast_misaligned_access); + else + cpumask_clear_cpu(cpu, &fast_misaligned_access); + + return 0; +} + +static void check_unaligned_access_nonboot_cpu(void *param) +{ + unsigned int cpu = smp_processor_id(); + struct page **pages = param; + + if (smp_processor_id() != 0) + check_unaligned_access(pages[cpu]); +} + +DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); + +static void modify_unaligned_access_branches(cpumask_t *mask, int weight) +{ + if (cpumask_weight(mask) == weight) + static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key); + else + static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key); +} + +static void set_unaligned_access_static_branches_except_cpu(int cpu) +{ + /* + * Same as set_unaligned_access_static_branches, except excludes the + * given CPU from the result. When a CPU is hotplugged into an offline + * state, this function is called before the CPU is set to offline in + * the cpumask, and thus the CPU needs to be explicitly excluded. + */ + + cpumask_t fast_except_me; + + cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask); + cpumask_clear_cpu(cpu, &fast_except_me); + + modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1); +} + +static void set_unaligned_access_static_branches(void) +{ + /* + * This will be called after check_unaligned_access_all_cpus so the + * result of unaligned access speed for all CPUs will be available. + * + * To avoid the number of online cpus changing between reading + * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be + * held before calling this function. + */ + + cpumask_t fast_and_online; + + cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask); + + modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); +} + +static int lock_and_set_unaligned_access_static_branch(void) +{ + cpus_read_lock(); + set_unaligned_access_static_branches(); + cpus_read_unlock(); + + return 0; +} + +arch_initcall_sync(lock_and_set_unaligned_access_static_branch); + +static int riscv_online_cpu(unsigned int cpu) +{ + static struct page *buf; + + /* We are already set since the last check */ + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) + goto exit; + + check_unaligned_access_emulated(NULL); + buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!buf) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return -ENOMEM; + } + + check_unaligned_access(buf); + __free_pages(buf, MISALIGNED_BUFFER_ORDER); + +exit: + set_unaligned_access_static_branches(); + + return 0; +} + +static int riscv_offline_cpu(unsigned int cpu) +{ + set_unaligned_access_static_branches_except_cpu(cpu); + + return 0; +} + +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static int check_unaligned_access_speed_all_cpus(void) +{ + unsigned int cpu; + unsigned int cpu_count = num_possible_cpus(); + struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL); + + if (!bufs) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return 0; + } + + /* + * Allocate separate buffers for each CPU so there's no fighting over + * cache lines. + */ + for_each_cpu(cpu, cpu_online_mask) { + bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!bufs[cpu]) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + goto out; + } + } + + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); + + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu, riscv_offline_cpu); + +out: + for_each_cpu(cpu, cpu_online_mask) { + if (bufs[cpu]) + __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); + } + + kfree(bufs); + return 0; +} +#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ +static int check_unaligned_access_speed_all_cpus(void) +{ + return 0; +} +#endif + +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS +static void check_vector_unaligned_access(struct work_struct *work __always_unused) +{ + int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; + struct page *page; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; + + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return; + + page = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!page) { + pr_warn("Allocation failure, not measuring vector misaligned performance\n"); + return; + } + + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ + src = dst + (MISALIGNED_BUFFER_SIZE / 2); + src += 2; + word_cycles = -1ULL; + + /* Do a warmup. */ + kernel_vector_begin(); + __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + kernel_vector_end(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n", + cpu); + + return; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of vector byte access time to vector unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow"); + + per_cpu(vector_misaligned_access, cpu) = speed; +} + +static int riscv_online_cpu_vec(unsigned int cpu) +{ + if (!has_vector()) + return 0; + + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) + return 0; + + check_vector_unaligned_access_emulated(NULL); + check_vector_unaligned_access(NULL); + return 0; +} + +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + schedule_on_each_cpu(check_vector_unaligned_access); + + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu_vec, NULL); + + return 0; +} +#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ +static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + return 0; +} +#endif + +static int check_unaligned_access_all_cpus(void) +{ + bool all_cpus_emulated, all_cpus_vec_unsupported; + + all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); + all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus(); + + if (!all_cpus_vec_unsupported && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { + kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"); + } + + if (!all_cpus_emulated) + return check_unaligned_access_speed_all_cpus(); + + return 0; +} + +arch_initcall(check_unaligned_access_all_cpus); diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 2cf76218a5bd..3ca3ae4277e1 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -23,29 +23,16 @@ enum vvar_pages { VVAR_NR_PAGES, }; -enum rv_vdso_map { - RV_VDSO_MAP_VVAR, - RV_VDSO_MAP_VDSO, -}; - #define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) -/* - * The vDSO data page. - */ -static union { - struct vdso_data data; - u8 page[PAGE_SIZE]; -} vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +static union vdso_data_store vdso_data_store __page_aligned_data; +struct vdso_data *vdso_data = vdso_data_store.data; struct __vdso_info { const char *name; const char *vdso_code_start; const char *vdso_code_end; unsigned long vdso_pages; - /* Data Mapping */ - struct vm_special_mapping *dm; /* Code Mapping */ struct vm_special_mapping *cm; }; @@ -98,6 +85,8 @@ struct vdso_data *arch_get_vdso_data(void *vvar_page) return (struct vdso_data *)(vvar_page); } +static const struct vm_special_mapping rv_vvar_map; + /* * The vvar mapping contains data for a specific time namespace, so when a task * changes namespace we must unmap its vvar data for the old namespace. @@ -114,12 +103,8 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) mmap_read_lock(mm); for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, vdso_info.dm)) + if (vma_is_special_mapping(vma, &rv_vvar_map)) zap_vma_pages(vma); -#ifdef CONFIG_COMPAT - if (vma_is_special_mapping(vma, compat_vdso_info.dm)) - zap_vma_pages(vma); -#endif } mmap_read_unlock(mm); @@ -161,43 +146,34 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, return vmf_insert_pfn(vma, vmf->address, pfn); } -static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = { - [RV_VDSO_MAP_VVAR] = { - .name = "[vvar]", - .fault = vvar_fault, - }, - [RV_VDSO_MAP_VDSO] = { - .name = "[vdso]", - .mremap = vdso_mremap, - }, +static const struct vm_special_mapping rv_vvar_map = { + .name = "[vvar]", + .fault = vvar_fault, +}; + +static struct vm_special_mapping rv_vdso_map __ro_after_init = { + .name = "[vdso]", + .mremap = vdso_mremap, }; static struct __vdso_info vdso_info __ro_after_init = { .name = "vdso", .vdso_code_start = vdso_start, .vdso_code_end = vdso_end, - .dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR], - .cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO], + .cm = &rv_vdso_map, }; #ifdef CONFIG_COMPAT -static struct vm_special_mapping rv_compat_vdso_maps[] __ro_after_init = { - [RV_VDSO_MAP_VVAR] = { - .name = "[vvar]", - .fault = vvar_fault, - }, - [RV_VDSO_MAP_VDSO] = { - .name = "[vdso]", - .mremap = vdso_mremap, - }, +static struct vm_special_mapping rv_compat_vdso_map __ro_after_init = { + .name = "[vdso]", + .mremap = vdso_mremap, }; static struct __vdso_info compat_vdso_info __ro_after_init = { .name = "compat_vdso", .vdso_code_start = compat_vdso_start, .vdso_code_end = compat_vdso_end, - .dm = &rv_compat_vdso_maps[RV_VDSO_MAP_VVAR], - .cm = &rv_compat_vdso_maps[RV_VDSO_MAP_VDSO], + .cm = &rv_compat_vdso_map, }; #endif @@ -233,7 +209,7 @@ static int __setup_additional_pages(struct mm_struct *mm, } ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE, - (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info->dm); + (VM_READ | VM_MAYREAD | VM_PFNMAP), &rv_vvar_map); if (IS_ERR(ret)) goto up_fail; diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 9b517fe1b8a8..9a1b555e8733 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -18,6 +18,7 @@ obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ccflags-y := -fno-stack-protector ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -fno-builtin ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) @@ -37,20 +38,15 @@ endif # Disable -pg to prevent insert call site CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) - -# Disable profiling and instrumentation for VDSO code -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n +CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) # Force dependency $(obj)/vdso.o: $(obj)/vdso.so # link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE - $(call if_changed,vdsold) -LDFLAGS_vdso.so.dbg = -shared -S -soname=linux-vdso.so.1 \ + $(call if_changed,vdsold_and_check) +LDFLAGS_vdso.so.dbg = -shared -soname=linux-vdso.so.1 \ --build-id=sha1 --hash-style=both --eh-frame-hdr # strip rule for the .so file @@ -59,7 +55,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +gen-vdsosym := $(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ @@ -69,7 +65,8 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE # actual build commands # The DSO images are built using a special linker script # Make sure only to export the intended __vdso_xxx symbol offsets. -quiet_cmd_vdsold = VDSOLD $@ - cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ +quiet_cmd_vdsold_and_check = VDSOLD $@ + cmd_vdsold_and_check = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ - rm $@.tmp + rm $@.tmp && \ + $(cmd_vdso_check) diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index cadf725ef798..a158c029344f 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -3,26 +3,22 @@ * Copyright 2023 Rivos, Inc */ +#include <linux/string.h> #include <linux/types.h> #include <vdso/datapage.h> #include <vdso/helpers.h> extern int riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, - size_t cpu_count, unsigned long *cpus, + size_t cpusetsize, unsigned long *cpus, unsigned int flags); -/* Add a prototype to avoid -Wmissing-prototypes warning. */ -int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, - size_t cpu_count, unsigned long *cpus, - unsigned int flags); - -int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, - size_t cpu_count, unsigned long *cpus, - unsigned int flags) +static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpusetsize, unsigned long *cpus, + unsigned int flags) { const struct vdso_data *vd = __arch_get_vdso_data(); - const struct arch_vdso_data *avd = &vd->arch_data; - bool all_cpus = !cpu_count && !cpus; + const struct arch_vdso_time_data *avd = &vd->arch_data; + bool all_cpus = !cpusetsize && !cpus; struct riscv_hwprobe *p = pairs; struct riscv_hwprobe *end = pairs + pair_count; @@ -33,7 +29,7 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, * masks. */ if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus)) - return riscv_hwprobe(pairs, pair_count, cpu_count, cpus, flags); + return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags); /* This is something we can handle, fill out the pairs. */ while (p < end) { @@ -50,3 +46,71 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, return 0; } + +static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpusetsize, unsigned long *cpus, + unsigned int flags) +{ + const struct vdso_data *vd = __arch_get_vdso_data(); + const struct arch_vdso_time_data *avd = &vd->arch_data; + struct riscv_hwprobe *p = pairs; + struct riscv_hwprobe *end = pairs + pair_count; + unsigned char *c = (unsigned char *)cpus; + bool empty_cpus = true; + bool clear_all = false; + int i; + + if (!cpusetsize || !cpus) + return -EINVAL; + + for (i = 0; i < cpusetsize; i++) { + if (c[i]) { + empty_cpus = false; + break; + } + } + + if (empty_cpus || flags != RISCV_HWPROBE_WHICH_CPUS || !avd->homogeneous_cpus) + return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags); + + while (p < end) { + if (riscv_hwprobe_key_is_valid(p->key)) { + struct riscv_hwprobe t = { + .key = p->key, + .value = avd->all_cpu_hwprobe_values[p->key], + }; + + if (!riscv_hwprobe_pair_cmp(&t, p)) + clear_all = true; + } else { + clear_all = true; + p->key = -1; + p->value = 0; + } + p++; + } + + if (clear_all) { + for (i = 0; i < cpusetsize; i++) + c[i] = 0; + } + + return 0; +} + +/* Add a prototype to avoid -Wmissing-prototypes warning. */ +int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpusetsize, unsigned long *cpus, + unsigned int flags); + +int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpusetsize, unsigned long *cpus, + unsigned int flags) +{ + if (flags & RISCV_HWPROBE_WHICH_CPUS) + return riscv_vdso_get_cpus(pairs, pair_count, cpusetsize, + cpus, flags); + + return riscv_vdso_get_values(pairs, pair_count, cpusetsize, + cpus, flags); +} diff --git a/arch/riscv/kernel/vdso/vgettimeofday.c b/arch/riscv/kernel/vdso/vgettimeofday.c index cc0d80699c31..b35057802584 100644 --- a/arch/riscv/kernel/vdso/vgettimeofday.c +++ b/arch/riscv/kernel/vdso/vgettimeofday.c @@ -8,23 +8,18 @@ #include <linux/time.h> #include <linux/types.h> +#include <vdso/gettime.h> -extern -int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { return __cvdso_clock_gettime(clock, ts); } -extern -int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } -extern -int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res); int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res) { return __cvdso_clock_getres(clock_id, res); diff --git a/arch/riscv/kernel/vec-copy-unaligned.S b/arch/riscv/kernel/vec-copy-unaligned.S new file mode 100644 index 000000000000..d16f19f1b3b6 --- /dev/null +++ b/arch/riscv/kernel/vec-copy-unaligned.S @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2024 Rivos Inc. */ + +#include <linux/args.h> +#include <linux/linkage.h> +#include <asm/asm.h> + + .text + +#define WORD_EEW 32 + +#define WORD_SEW CONCATENATE(e, WORD_EEW) +#define VEC_L CONCATENATE(vle, WORD_EEW).v +#define VEC_S CONCATENATE(vle, WORD_EEW).v + +/* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */ +/* Performs a memcpy without aligning buffers, using word loads and stores. */ +/* Note: The size is truncated to a multiple of WORD_EEW */ +SYM_FUNC_START(__riscv_copy_vec_words_unaligned) + andi a4, a2, ~(WORD_EEW-1) + beqz a4, 2f + add a3, a1, a4 + .option push + .option arch, +zve32x +1: + vsetivli t0, 8, WORD_SEW, m8, ta, ma + VEC_L v0, (a1) + VEC_S v0, (a0) + addi a0, a0, WORD_EEW + addi a1, a1, WORD_EEW + bltu a1, a3, 1b + +2: + .option pop + ret +SYM_FUNC_END(__riscv_copy_vec_words_unaligned) + +/* void __riscv_copy_vec_bytes_unaligned(void *, const void *, size_t) */ +/* Performs a memcpy without aligning buffers, using only byte accesses. */ +/* Note: The size is truncated to a multiple of 8 */ +SYM_FUNC_START(__riscv_copy_vec_bytes_unaligned) + andi a4, a2, ~(8-1) + beqz a4, 2f + add a3, a1, a4 + .option push + .option arch, +zve32x +1: + vsetivli t0, 8, e8, m8, ta, ma + vle8.v v0, (a1) + vse8.v v0, (a0) + addi a0, a0, 8 + addi a1, a1, 8 + bltu a1, a3, 1b + +2: + .option pop + ret +SYM_FUNC_END(__riscv_copy_vec_bytes_unaligned) diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 578b6292487e..184f780c932d 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -21,6 +21,10 @@ #include <asm/bug.h> static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE); +static struct kmem_cache *riscv_v_user_cachep; +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE +static struct kmem_cache *riscv_v_kernel_cachep; +#endif unsigned long riscv_v_vsize __read_mostly; EXPORT_SYMBOL_GPL(riscv_v_vsize); @@ -29,7 +33,17 @@ int riscv_v_setup_vsize(void) { unsigned long this_vsize; - /* There are 32 vector registers with vlenb length. */ + /* + * There are 32 vector registers with vlenb length. + * + * If the thead,vlenb property was provided by the firmware, use that + * instead of probing the CSRs. + */ + if (thead_vlenb_of) { + riscv_v_vsize = thead_vlenb_of * 32; + return 0; + } + riscv_v_enable(); this_vsize = csr_read(CSR_VLENB) * 32; riscv_v_disable(); @@ -47,7 +61,22 @@ int riscv_v_setup_vsize(void) return 0; } -static bool insn_is_vector(u32 insn_buf) +void __init riscv_v_setup_ctx_cache(void) +{ + if (!(has_vector() || has_xtheadvector())) + return; + + riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx", + riscv_v_vsize, 16, SLAB_PANIC, + 0, riscv_v_vsize, NULL); +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + riscv_v_kernel_cachep = kmem_cache_create("riscv_vector_kctx", + riscv_v_vsize, 16, + SLAB_PANIC, NULL); +#endif +} + +bool insn_is_vector(u32 insn_buf) { u32 opcode = insn_buf & __INSN_OPCODE_MASK; u32 width, csr; @@ -80,20 +109,37 @@ static bool insn_is_vector(u32 insn_buf) return false; } -static int riscv_v_thread_zalloc(void) +static int riscv_v_thread_zalloc(struct kmem_cache *cache, + struct __riscv_v_ext_state *ctx) { void *datap; - datap = kzalloc(riscv_v_vsize, GFP_KERNEL); + datap = kmem_cache_zalloc(cache, GFP_KERNEL); if (!datap) return -ENOMEM; - current->thread.vstate.datap = datap; - memset(¤t->thread.vstate, 0, offsetof(struct __riscv_v_ext_state, - datap)); + ctx->datap = datap; + memset(ctx, 0, offsetof(struct __riscv_v_ext_state, datap)); return 0; } +void riscv_v_thread_alloc(struct task_struct *tsk) +{ +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + riscv_v_thread_zalloc(riscv_v_kernel_cachep, &tsk->thread.kernel_vstate); +#endif +} + +void riscv_v_thread_free(struct task_struct *tsk) +{ + if (tsk->thread.vstate.datap) + kmem_cache_free(riscv_v_user_cachep, tsk->thread.vstate.datap); +#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE + if (tsk->thread.kernel_vstate.datap) + kmem_cache_free(riscv_v_kernel_cachep, tsk->thread.kernel_vstate.datap); +#endif +} + #define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) #define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2) #define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) @@ -122,7 +168,8 @@ static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt, ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt); if (inherit) ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT; - tsk->thread.vstate_ctrl = ctrl; + tsk->thread.vstate_ctrl &= ~PR_RISCV_V_VSTATE_CTRL_MASK; + tsk->thread.vstate_ctrl |= ctrl; } bool riscv_v_vstate_ctrl_user_allowed(void) @@ -136,8 +183,11 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) u32 __user *epc = (u32 __user *)regs->epc; u32 insn = (u32)regs->badaddr; + if (!(has_vector() || has_xtheadvector())) + return false; + /* Do not handle if V is not supported, or disabled */ - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V)) + if (!riscv_v_vstate_ctrl_user_allowed()) return false; /* If V has been enabled then it is not the first-use trap */ @@ -162,12 +212,12 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) * context where VS has been off. So, try to allocate the user's V * context and resume execution. */ - if (riscv_v_thread_zalloc()) { + if (riscv_v_thread_zalloc(riscv_v_user_cachep, ¤t->thread.vstate)) { force_sig(SIGBUS); return true; } riscv_v_vstate_on(regs); - riscv_v_vstate_restore(current, regs); + riscv_v_vstate_set_restore(current, regs); return true; } @@ -176,7 +226,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; next = riscv_v_ctrl_get_next(tsk); @@ -198,7 +248,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) long riscv_v_vstate_ctrl_get_current(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; return current->thread.vstate_ctrl & PR_RISCV_V_VSTATE_CTRL_MASK; @@ -209,7 +259,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; if (arg & ~PR_RISCV_V_VSTATE_CTRL_MASK) @@ -247,7 +297,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) #ifdef CONFIG_SYSCTL -static struct ctl_table riscv_v_default_vstate_table[] = { +static const struct ctl_table riscv_v_default_vstate_table[] = { { .procname = "riscv_v_default_allow", .data = &riscv_v_implicit_uacc, @@ -259,7 +309,7 @@ static struct ctl_table riscv_v_default_vstate_table[] = { static int __init riscv_v_sysctl_init(void) { - if (has_vector()) + if (has_vector() || has_xtheadvector()) if (!register_sysctl("abi", riscv_v_default_vstate_table)) return -EINVAL; return 0; @@ -269,7 +319,7 @@ static int __init riscv_v_sysctl_init(void) static int __init riscv_v_sysctl_init(void) { return 0; } #endif /* ! CONFIG_SYSCTL */ -static int riscv_v_init(void) +static int __init riscv_v_init(void) { return riscv_v_sysctl_init(); } diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c new file mode 100644 index 000000000000..a31ff84740eb --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Rivos, Inc + */ + +#include <asm/vendorid_list.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/andes.h> +#include <asm/vendor_extensions/thead.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = { +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES + &riscv_isa_vendor_ext_list_andes, +#endif +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + &riscv_isa_vendor_ext_list_thead, +#endif +}; + +const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list); + +/** + * __riscv_isa_vendor_extension_available() - Check whether given vendor + * extension is available or not. + * + * @cpu: check if extension is available on this cpu + * @vendor: vendor that the extension is a member of + * @bit: bit position of the desired extension + * Return: true or false + * + * NOTE: When cpu is -1, will check if extension is available on all cpus + */ +bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsigned int bit) +{ + struct riscv_isavendorinfo *bmap; + struct riscv_isavendorinfo *cpu_bmap; + + switch (vendor) { + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES + case ANDES_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_andes.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; + break; + #endif + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + case THEAD_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap; + break; + #endif + default: + return false; + } + + if (cpu != -1) + bmap = &cpu_bmap[cpu]; + + if (bit >= RISCV_ISA_VENDOR_EXT_MAX) + return false; + + return test_bit(bit, bmap->isa) ? true : false; +} +EXPORT_SYMBOL_GPL(__riscv_isa_vendor_extension_available); diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile new file mode 100644 index 000000000000..866414c81a9f --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o diff --git a/arch/riscv/kernel/vendor_extensions/andes.c b/arch/riscv/kernel/vendor_extensions/andes.c new file mode 100644 index 000000000000..51f302b6d503 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/andes.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/andes.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +/* All Andes vendor extensions supported in Linux */ +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_andes[] = { + __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_VENDOR_EXT_XANDESPMU), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_andes = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_andes), + .ext_data = riscv_isa_vendor_ext_andes, +}; diff --git a/arch/riscv/kernel/vendor_extensions/thead.c b/arch/riscv/kernel/vendor_extensions/thead.c new file mode 100644 index 000000000000..519dbf70710a --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> + +#include <linux/array_size.h> +#include <linux/cpumask.h> +#include <linux/types.h> + +/* All T-Head vendor extensions supported in Linux */ +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_thead[] = { + __RISCV_ISA_EXT_DATA(xtheadvector, RISCV_ISA_VENDOR_EXT_XTHEADVECTOR), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_thead), + .ext_data = riscv_isa_vendor_ext_thead, +}; + +void disable_xtheadvector(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap[cpu].isa); + + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap.isa); +} diff --git a/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c new file mode 100644 index 000000000000..2eba34011786 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/vendor_extensions/thead.h> +#include <asm/vendor_extensions/thead_hwprobe.h> +#include <asm/vendor_extensions/vendor_hwprobe.h> + +#include <linux/cpumask.h> +#include <linux/types.h> + +#include <uapi/asm/hwprobe.h> +#include <uapi/asm/vendor/thead.h> + +void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus) +{ + VENDOR_EXTENSION_SUPPORTED(pair, cpus, + riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap, { + VENDOR_EXT_KEY(XTHEADVECTOR); + }); +} diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/vmcore_info.c index 8706736fd4e2..d5e448aa90e7 100644 --- a/arch/riscv/kernel/crash_core.c +++ b/arch/riscv/kernel/vmcore_info.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <linux/crash_core.h> +#include <linux/vmcore_info.h> #include <linux/pagemap.h> void arch_crash_save_vmcoreinfo(void) @@ -8,7 +8,6 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_NUMBER(phys_ram_base); vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); - vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); #ifdef CONFIG_MMU VMCOREINFO_NUMBER(VA_BITS); @@ -20,6 +19,13 @@ void arch_crash_save_vmcoreinfo(void) #endif #endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); +#ifdef CONFIG_XIP_KERNEL + /* TODO: Communicate with crash-utility developers on the information to + * export. The XIP case is more complicated, because the virtual-physical + * address offset depends on whether the address is in ROM or in RAM. + */ +#else vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", kernel_map.va_kernel_pa_offset); +#endif } diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index 50767647fbc6..a7611789bad5 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -14,6 +14,7 @@ #include <asm/page.h> #include <asm/cache.h> #include <asm/thread_info.h> +#include <asm/set_memory.h> OUTPUT_ARCH(riscv) ENTRY(_start) @@ -29,10 +30,12 @@ SECTIONS HEAD_TEXT_SECTION INIT_TEXT_SECTION(PAGE_SIZE) /* we have to discard exit text and such at runtime, not link time */ + __exittext_begin = .; .exit.text : { EXIT_TEXT } + __exittext_end = .; .text : { _text = .; @@ -63,10 +66,10 @@ SECTIONS * From this point, stuff is considered writable and will be copied to RAM */ __data_loc = ALIGN(PAGE_SIZE); /* location in file */ - . = KERNEL_LINK_ADDR + XIP_OFFSET; /* location in memory */ + . = ALIGN(SECTION_ALIGN); /* location in memory */ #undef LOAD_OFFSET -#define LOAD_OFFSET (KERNEL_LINK_ADDR + XIP_OFFSET - (__data_loc & XIP_OFFSET_MASK)) +#define LOAD_OFFSET (KERNEL_LINK_ADDR + _sdata - __data_loc) _sdata = .; /* Start of data section */ _data = .; diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 492dd4b8f3d6..002ca58dd998 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -69,10 +69,12 @@ SECTIONS __soc_builtin_dtb_table_end = .; } /* we have to discard exit text and such at runtime, not link time */ + __exittext_begin = .; .exit.text : { EXIT_TEXT } + __exittext_end = .; __init_text_end = .; . = ALIGN(SECTION_ALIGN); |