summaryrefslogtreecommitdiff
path: root/arch/riscv/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv/kernel')
-rw-r--r--arch/riscv/kernel/Makefile4
-rw-r--r--arch/riscv/kernel/alternative.c2
-rw-r--r--arch/riscv/kernel/cpufeature.c256
-rw-r--r--arch/riscv/kernel/entry.S3
-rw-r--r--arch/riscv/kernel/pi/Makefile3
-rw-r--r--arch/riscv/kernel/ptrace.c6
-rw-r--r--arch/riscv/kernel/smpboot.c1
-rw-r--r--arch/riscv/kernel/suspend.c49
-rw-r--r--arch/riscv/kernel/sys_hwprobe.c13
-rw-r--r--arch/riscv/kernel/traps.c17
-rw-r--r--arch/riscv/kernel/traps_misaligned.c17
-rw-r--r--arch/riscv/kernel/unaligned_access_speed.c281
12 files changed, 382 insertions, 270 deletions
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 5e591f831638..81d94a8ee10f 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -39,7 +39,6 @@ extra-y += vmlinux.lds
obj-y += head.o
obj-y += soc.o
obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o
-obj-y += copy-unaligned.o
obj-y += cpu.o
obj-y += cpufeature.o
obj-y += entry.o
@@ -64,6 +63,9 @@ obj-y += tests/
obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
+obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o
+obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o
+
obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_RISCV_ISA_V) += vector.o
obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o
diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
index 319a1da0358b..0128b161bfda 100644
--- a/arch/riscv/kernel/alternative.c
+++ b/arch/riscv/kernel/alternative.c
@@ -43,7 +43,7 @@ static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info
switch (cpu_mfr_info->vendor_id) {
#ifdef CONFIG_ERRATA_ANDES
- case ANDESTECH_VENDOR_ID:
+ case ANDES_VENDOR_ID:
cpu_mfr_info->patch_func = andes_errata_patch_func;
break;
#endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 79a5a35fab96..3ed2359eae35 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -11,7 +11,6 @@
#include <linux/cpu.h>
#include <linux/cpuhotplug.h>
#include <linux/ctype.h>
-#include <linux/jump_label.h>
#include <linux/log2.h>
#include <linux/memory.h>
#include <linux/module.h>
@@ -21,21 +20,13 @@
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/hwcap.h>
-#include <asm/hwprobe.h>
#include <asm/patch.h>
#include <asm/processor.h>
#include <asm/sbi.h>
#include <asm/vector.h>
-#include "copy-unaligned.h"
-
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
-#define MISALIGNED_ACCESS_JIFFIES_LG2 1
-#define MISALIGNED_BUFFER_SIZE 0x4000
-#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
-#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
-
unsigned long elf_hwcap __read_mostly;
/* Host ISA bitmap */
@@ -44,11 +35,6 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
/* Per-cpu ISA extensions. */
struct riscv_isainfo hart_isa[NR_CPUS];
-/* Performance information */
-DEFINE_PER_CPU(long, misaligned_access_speed);
-
-static cpumask_t fast_misaligned_access;
-
/**
* riscv_isa_extension_base() - Get base extension word
*
@@ -318,6 +304,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
+ __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_EXT_XANDESPMU),
};
const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
@@ -731,247 +718,6 @@ unsigned long riscv_get_elf_hwcap(void)
return hwcap;
}
-static int check_unaligned_access(void *param)
-{
- int cpu = smp_processor_id();
- u64 start_cycles, end_cycles;
- u64 word_cycles;
- u64 byte_cycles;
- int ratio;
- unsigned long start_jiffies, now;
- struct page *page = param;
- void *dst;
- void *src;
- long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
-
- if (check_unaligned_access_emulated(cpu))
- return 0;
-
- /* Make an unaligned destination buffer. */
- dst = (void *)((unsigned long)page_address(page) | 0x1);
- /* Unalign src as well, but differently (off by 1 + 2 = 3). */
- src = dst + (MISALIGNED_BUFFER_SIZE / 2);
- src += 2;
- word_cycles = -1ULL;
- /* Do a warmup. */
- __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
- preempt_disable();
- start_jiffies = jiffies;
- while ((now = jiffies) == start_jiffies)
- cpu_relax();
-
- /*
- * For a fixed amount of time, repeatedly try the function, and take
- * the best time in cycles as the measurement.
- */
- while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
- start_cycles = get_cycles64();
- /* Ensure the CSR read can't reorder WRT to the copy. */
- mb();
- __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
- /* Ensure the copy ends before the end time is snapped. */
- mb();
- end_cycles = get_cycles64();
- if ((end_cycles - start_cycles) < word_cycles)
- word_cycles = end_cycles - start_cycles;
- }
-
- byte_cycles = -1ULL;
- __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
- start_jiffies = jiffies;
- while ((now = jiffies) == start_jiffies)
- cpu_relax();
-
- while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
- start_cycles = get_cycles64();
- mb();
- __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
- mb();
- end_cycles = get_cycles64();
- if ((end_cycles - start_cycles) < byte_cycles)
- byte_cycles = end_cycles - start_cycles;
- }
-
- preempt_enable();
-
- /* Don't divide by zero. */
- if (!word_cycles || !byte_cycles) {
- pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
- cpu);
-
- return 0;
- }
-
- if (word_cycles < byte_cycles)
- speed = RISCV_HWPROBE_MISALIGNED_FAST;
-
- ratio = div_u64((byte_cycles * 100), word_cycles);
- pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
- cpu,
- ratio / 100,
- ratio % 100,
- (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
-
- per_cpu(misaligned_access_speed, cpu) = speed;
-
- /*
- * Set the value of fast_misaligned_access of a CPU. These operations
- * are atomic to avoid race conditions.
- */
- if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
- cpumask_set_cpu(cpu, &fast_misaligned_access);
- else
- cpumask_clear_cpu(cpu, &fast_misaligned_access);
-
- return 0;
-}
-
-static void check_unaligned_access_nonboot_cpu(void *param)
-{
- unsigned int cpu = smp_processor_id();
- struct page **pages = param;
-
- if (smp_processor_id() != 0)
- check_unaligned_access(pages[cpu]);
-}
-
-DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
-
-static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
-{
- if (cpumask_weight(mask) == weight)
- static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
- else
- static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
-}
-
-static void set_unaligned_access_static_branches_except_cpu(int cpu)
-{
- /*
- * Same as set_unaligned_access_static_branches, except excludes the
- * given CPU from the result. When a CPU is hotplugged into an offline
- * state, this function is called before the CPU is set to offline in
- * the cpumask, and thus the CPU needs to be explicitly excluded.
- */
-
- cpumask_t fast_except_me;
-
- cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
- cpumask_clear_cpu(cpu, &fast_except_me);
-
- modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
-}
-
-static void set_unaligned_access_static_branches(void)
-{
- /*
- * This will be called after check_unaligned_access_all_cpus so the
- * result of unaligned access speed for all CPUs will be available.
- *
- * To avoid the number of online cpus changing between reading
- * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
- * held before calling this function.
- */
-
- cpumask_t fast_and_online;
-
- cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
-
- modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
-}
-
-static int lock_and_set_unaligned_access_static_branch(void)
-{
- cpus_read_lock();
- set_unaligned_access_static_branches();
- cpus_read_unlock();
-
- return 0;
-}
-
-arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
-
-static int riscv_online_cpu(unsigned int cpu)
-{
- static struct page *buf;
-
- /* We are already set since the last check */
- if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
- goto exit;
-
- buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
- if (!buf) {
- pr_warn("Allocation failure, not measuring misaligned performance\n");
- return -ENOMEM;
- }
-
- check_unaligned_access(buf);
- __free_pages(buf, MISALIGNED_BUFFER_ORDER);
-
-exit:
- set_unaligned_access_static_branches();
-
- return 0;
-}
-
-static int riscv_offline_cpu(unsigned int cpu)
-{
- set_unaligned_access_static_branches_except_cpu(cpu);
-
- return 0;
-}
-
-/* Measure unaligned access on all CPUs present at boot in parallel. */
-static int check_unaligned_access_all_cpus(void)
-{
- unsigned int cpu;
- unsigned int cpu_count = num_possible_cpus();
- struct page **bufs = kzalloc(cpu_count * sizeof(struct page *),
- GFP_KERNEL);
-
- if (!bufs) {
- pr_warn("Allocation failure, not measuring misaligned performance\n");
- return 0;
- }
-
- /*
- * Allocate separate buffers for each CPU so there's no fighting over
- * cache lines.
- */
- for_each_cpu(cpu, cpu_online_mask) {
- bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
- if (!bufs[cpu]) {
- pr_warn("Allocation failure, not measuring misaligned performance\n");
- goto out;
- }
- }
-
- /* Check everybody except 0, who stays behind to tend jiffies. */
- on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
-
- /* Check core 0. */
- smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
-
- /*
- * Setup hotplug callbacks for any new CPUs that come online or go
- * offline.
- */
- cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
- riscv_online_cpu, riscv_offline_cpu);
-
-out:
- unaligned_emulation_finish();
- for_each_cpu(cpu, cpu_online_mask) {
- if (bufs[cpu])
- __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
- }
-
- kfree(bufs);
- return 0;
-}
-
-arch_initcall(check_unaligned_access_all_cpus);
-
void riscv_user_isa_enable(void)
{
if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 9d1a305d5508..68a24cf9481a 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -111,6 +111,7 @@ SYM_CODE_START(handle_exception)
1:
tail do_trap_unknown
SYM_CODE_END(handle_exception)
+ASM_NOKPROBE(handle_exception)
/*
* The ret_from_exception must be called with interrupt disabled. Here is the
@@ -184,6 +185,7 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
sret
#endif
SYM_CODE_END(ret_from_exception)
+ASM_NOKPROBE(ret_from_exception)
#ifdef CONFIG_VMAP_STACK
SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
@@ -219,6 +221,7 @@ SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
move a0, sp
tail handle_bad_stack
SYM_CODE_END(handle_kernel_stack_overflow)
+ASM_NOKPROBE(handle_kernel_stack_overflow)
#endif
SYM_CODE_START(ret_from_fork)
diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile
index 07915dc9279e..b75f150b923d 100644
--- a/arch/riscv/kernel/pi/Makefile
+++ b/arch/riscv/kernel/pi/Makefile
@@ -9,6 +9,9 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
-fno-asynchronous-unwind-tables -fno-unwind-tables \
$(call cc-option,-fno-addrsig)
+# Disable LTO
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
+
KBUILD_CFLAGS += -mcmodel=medany
CFLAGS_cmdline_early.o += -D__NO_FORTIFY
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index e8515aa9d80b..92731ff8c79a 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -377,14 +377,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
return ret;
}
+#else
+static const struct user_regset_view compat_riscv_user_native_view = {};
#endif /* CONFIG_COMPAT */
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
-#ifdef CONFIG_COMPAT
- if (test_tsk_thread_flag(task, TIF_32BIT))
+ if (is_compat_thread(&task->thread_info))
return &compat_riscv_user_native_view;
else
-#endif
return &riscv_user_native_view;
}
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index c4ed7d977f57..d41090fc3203 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -28,7 +28,6 @@
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
-#include <asm/cpufeature.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/numa.h>
diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
index 299795341e8a..8a327b485b90 100644
--- a/arch/riscv/kernel/suspend.c
+++ b/arch/riscv/kernel/suspend.c
@@ -132,4 +132,53 @@ static int __init sbi_system_suspend_init(void)
}
arch_initcall(sbi_system_suspend_init);
+
+static int sbi_suspend_finisher(unsigned long suspend_type,
+ unsigned long resume_addr,
+ unsigned long opaque)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_SUSPEND,
+ suspend_type, resume_addr, opaque, 0, 0, 0);
+
+ return (ret.error) ? sbi_err_map_linux_errno(ret.error) : 0;
+}
+
+int riscv_sbi_hart_suspend(u32 state)
+{
+ if (state & SBI_HSM_SUSP_NON_RET_BIT)
+ return cpu_suspend(state, sbi_suspend_finisher);
+ else
+ return sbi_suspend_finisher(state, 0, 0);
+}
+
+bool riscv_sbi_suspend_state_is_valid(u32 state)
+{
+ if (state > SBI_HSM_SUSPEND_RET_DEFAULT &&
+ state < SBI_HSM_SUSPEND_RET_PLATFORM)
+ return false;
+
+ if (state > SBI_HSM_SUSPEND_NON_RET_DEFAULT &&
+ state < SBI_HSM_SUSPEND_NON_RET_PLATFORM)
+ return false;
+
+ return true;
+}
+
+bool riscv_sbi_hsm_is_supported(void)
+{
+ /*
+ * The SBI HSM suspend function is only available when:
+ * 1) SBI version is 0.3 or higher
+ * 2) SBI HSM extension is available
+ */
+ if (sbi_spec_version < sbi_mk_version(0, 3) ||
+ !sbi_probe_extension(SBI_EXT_HSM)) {
+ pr_info("HSM suspend not available\n");
+ return false;
+ }
+
+ return true;
+}
#endif /* CONFIG_RISCV_SBI */
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index a7c56b41efd2..8cae41a502dd 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -147,6 +147,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
return (pair.value & ext);
}
+#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
static u64 hwprobe_misaligned(const struct cpumask *cpus)
{
int cpu;
@@ -169,6 +170,18 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
return perf;
}
+#else
+static u64 hwprobe_misaligned(const struct cpumask *cpus)
+{
+ if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS))
+ return RISCV_HWPROBE_MISALIGNED_FAST;
+
+ if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available())
+ return RISCV_HWPROBE_MISALIGNED_EMULATED;
+
+ return RISCV_HWPROBE_MISALIGNED_SLOW;
+}
+#endif
static void hwprobe_one_pair(struct riscv_hwprobe *pair,
const struct cpumask *cpus)
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index a1b9be3c4332..868d6280cf66 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -6,6 +6,7 @@
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/randomize_kstack.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/signal.h>
@@ -310,7 +311,8 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
}
}
-asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
+asmlinkage __visible __trap_section __no_stack_protector
+void do_trap_ecall_u(struct pt_regs *regs)
{
if (user_mode(regs)) {
long syscall = regs->a7;
@@ -322,10 +324,23 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
syscall = syscall_enter_from_user_mode(regs, syscall);
+ add_random_kstack_offset();
+
if (syscall >= 0 && syscall < NR_syscalls)
syscall_handler(regs, syscall);
else if (syscall != -1)
regs->a0 = -ENOSYS;
+ /*
+ * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+ * so the maximum stack offset is 1k bytes (10 bits).
+ *
+ * The actual entropy will be further reduced by the compiler when
+ * applying stack alignment constraints: 16-byte (i.e. 4-bit) aligned
+ * for RV32I or RV64I.
+ *
+ * The resulting 6 bits of entropy is seen in SP[9:4].
+ */
+ choose_random_kstack_offset(get_random_u16());
syscall_exit_to_user_mode(regs);
} else {
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 8ded225e8c5b..2adb7c3e4dd5 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs)
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
*this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
+#endif
if (!unaligned_enabled)
return -1;
@@ -596,7 +598,7 @@ int handle_misaligned_store(struct pt_regs *regs)
return 0;
}
-bool check_unaligned_access_emulated(int cpu)
+static bool check_unaligned_access_emulated(int cpu)
{
long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
unsigned long tmp_var, tmp_val;
@@ -623,7 +625,7 @@ bool check_unaligned_access_emulated(int cpu)
return misaligned_emu_detected;
}
-void unaligned_emulation_finish(void)
+bool check_unaligned_access_emulated_all_cpus(void)
{
int cpu;
@@ -632,13 +634,12 @@ void unaligned_emulation_finish(void)
* accesses emulated since tasks requesting such control can run on any
* CPU.
*/
- for_each_present_cpu(cpu) {
- if (per_cpu(misaligned_access_speed, cpu) !=
- RISCV_HWPROBE_MISALIGNED_EMULATED) {
- return;
- }
- }
+ for_each_online_cpu(cpu)
+ if (!check_unaligned_access_emulated(cpu))
+ return false;
+
unaligned_ctl = true;
+ return true;
}
bool unaligned_ctl_available(void)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
new file mode 100644
index 000000000000..a9a6bcb02acf
--- /dev/null
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Rivos Inc.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/jump_label.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <asm/cpufeature.h>
+#include <asm/hwprobe.h>
+
+#include "copy-unaligned.h"
+
+#define MISALIGNED_ACCESS_JIFFIES_LG2 1
+#define MISALIGNED_BUFFER_SIZE 0x4000
+#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
+#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
+
+DEFINE_PER_CPU(long, misaligned_access_speed);
+
+#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
+static cpumask_t fast_misaligned_access;
+static int check_unaligned_access(void *param)
+{
+ int cpu = smp_processor_id();
+ u64 start_cycles, end_cycles;
+ u64 word_cycles;
+ u64 byte_cycles;
+ int ratio;
+ unsigned long start_jiffies, now;
+ struct page *page = param;
+ void *dst;
+ void *src;
+ long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
+
+ if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
+ return 0;
+
+ /* Make an unaligned destination buffer. */
+ dst = (void *)((unsigned long)page_address(page) | 0x1);
+ /* Unalign src as well, but differently (off by 1 + 2 = 3). */
+ src = dst + (MISALIGNED_BUFFER_SIZE / 2);
+ src += 2;
+ word_cycles = -1ULL;
+ /* Do a warmup. */
+ __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ preempt_disable();
+ start_jiffies = jiffies;
+ while ((now = jiffies) == start_jiffies)
+ cpu_relax();
+
+ /*
+ * For a fixed amount of time, repeatedly try the function, and take
+ * the best time in cycles as the measurement.
+ */
+ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+ start_cycles = get_cycles64();
+ /* Ensure the CSR read can't reorder WRT to the copy. */
+ mb();
+ __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ /* Ensure the copy ends before the end time is snapped. */
+ mb();
+ end_cycles = get_cycles64();
+ if ((end_cycles - start_cycles) < word_cycles)
+ word_cycles = end_cycles - start_cycles;
+ }
+
+ byte_cycles = -1ULL;
+ __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ start_jiffies = jiffies;
+ while ((now = jiffies) == start_jiffies)
+ cpu_relax();
+
+ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+ start_cycles = get_cycles64();
+ mb();
+ __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ mb();
+ end_cycles = get_cycles64();
+ if ((end_cycles - start_cycles) < byte_cycles)
+ byte_cycles = end_cycles - start_cycles;
+ }
+
+ preempt_enable();
+
+ /* Don't divide by zero. */
+ if (!word_cycles || !byte_cycles) {
+ pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
+ cpu);
+
+ return 0;
+ }
+
+ if (word_cycles < byte_cycles)
+ speed = RISCV_HWPROBE_MISALIGNED_FAST;
+
+ ratio = div_u64((byte_cycles * 100), word_cycles);
+ pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
+ cpu,
+ ratio / 100,
+ ratio % 100,
+ (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
+
+ per_cpu(misaligned_access_speed, cpu) = speed;
+
+ /*
+ * Set the value of fast_misaligned_access of a CPU. These operations
+ * are atomic to avoid race conditions.
+ */
+ if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
+ cpumask_set_cpu(cpu, &fast_misaligned_access);
+ else
+ cpumask_clear_cpu(cpu, &fast_misaligned_access);
+
+ return 0;
+}
+
+static void check_unaligned_access_nonboot_cpu(void *param)
+{
+ unsigned int cpu = smp_processor_id();
+ struct page **pages = param;
+
+ if (smp_processor_id() != 0)
+ check_unaligned_access(pages[cpu]);
+}
+
+DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
+
+static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
+{
+ if (cpumask_weight(mask) == weight)
+ static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key);
+ else
+ static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key);
+}
+
+static void set_unaligned_access_static_branches_except_cpu(int cpu)
+{
+ /*
+ * Same as set_unaligned_access_static_branches, except excludes the
+ * given CPU from the result. When a CPU is hotplugged into an offline
+ * state, this function is called before the CPU is set to offline in
+ * the cpumask, and thus the CPU needs to be explicitly excluded.
+ */
+
+ cpumask_t fast_except_me;
+
+ cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
+ cpumask_clear_cpu(cpu, &fast_except_me);
+
+ modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
+}
+
+static void set_unaligned_access_static_branches(void)
+{
+ /*
+ * This will be called after check_unaligned_access_all_cpus so the
+ * result of unaligned access speed for all CPUs will be available.
+ *
+ * To avoid the number of online cpus changing between reading
+ * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
+ * held before calling this function.
+ */
+
+ cpumask_t fast_and_online;
+
+ cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
+
+ modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
+}
+
+static int lock_and_set_unaligned_access_static_branch(void)
+{
+ cpus_read_lock();
+ set_unaligned_access_static_branches();
+ cpus_read_unlock();
+
+ return 0;
+}
+
+arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
+
+static int riscv_online_cpu(unsigned int cpu)
+{
+ static struct page *buf;
+
+ /* We are already set since the last check */
+ if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
+ goto exit;
+
+ buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+ if (!buf) {
+ pr_warn("Allocation failure, not measuring misaligned performance\n");
+ return -ENOMEM;
+ }
+
+ check_unaligned_access(buf);
+ __free_pages(buf, MISALIGNED_BUFFER_ORDER);
+
+exit:
+ set_unaligned_access_static_branches();
+
+ return 0;
+}
+
+static int riscv_offline_cpu(unsigned int cpu)
+{
+ set_unaligned_access_static_branches_except_cpu(cpu);
+
+ return 0;
+}
+
+/* Measure unaligned access speed on all CPUs present at boot in parallel. */
+static int check_unaligned_access_speed_all_cpus(void)
+{
+ unsigned int cpu;
+ unsigned int cpu_count = num_possible_cpus();
+ struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL);
+
+ if (!bufs) {
+ pr_warn("Allocation failure, not measuring misaligned performance\n");
+ return 0;
+ }
+
+ /*
+ * Allocate separate buffers for each CPU so there's no fighting over
+ * cache lines.
+ */
+ for_each_cpu(cpu, cpu_online_mask) {
+ bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+ if (!bufs[cpu]) {
+ pr_warn("Allocation failure, not measuring misaligned performance\n");
+ goto out;
+ }
+ }
+
+ /* Check everybody except 0, who stays behind to tend jiffies. */
+ on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
+
+ /* Check core 0. */
+ smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
+
+ /*
+ * Setup hotplug callbacks for any new CPUs that come online or go
+ * offline.
+ */
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
+ riscv_online_cpu, riscv_offline_cpu);
+
+out:
+ for_each_cpu(cpu, cpu_online_mask) {
+ if (bufs[cpu])
+ __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
+ }
+
+ kfree(bufs);
+ return 0;
+}
+
+static int check_unaligned_access_all_cpus(void)
+{
+ bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
+
+ if (!all_cpus_emulated)
+ return check_unaligned_access_speed_all_cpus();
+
+ return 0;
+}
+#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
+static int check_unaligned_access_all_cpus(void)
+{
+ check_unaligned_access_emulated_all_cpus();
+
+ return 0;
+}
+#endif
+
+arch_initcall(check_unaligned_access_all_cpus);