diff options
Diffstat (limited to 'arch/riscv')
55 files changed, 2270 insertions, 393 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index ff1e353b0d6f..cc63aef41e94 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -83,6 +83,7 @@ config RISCV select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select ARCH_WANTS_NO_INSTR select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE + select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU select BUILDTIME_TABLE_SORT if MMU select CLINT_TIMER if RISCV_M_MODE @@ -116,6 +117,7 @@ config RISCV select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND select HAS_IOPORT if MMU + select HAVE_ALIGNED_STRUCT_PAGE select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT @@ -507,6 +509,39 @@ config NODES_SHIFT Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accommodate various tables. +choice + prompt "RISC-V spinlock type" + default RISCV_COMBO_SPINLOCKS + +config RISCV_TICKET_SPINLOCKS + bool "Using ticket spinlock" + +config RISCV_QUEUED_SPINLOCKS + bool "Using queued spinlock" + depends on SMP && MMU && NONPORTABLE + select ARCH_USE_QUEUED_SPINLOCKS + help + The queued spinlock implementation requires the forward progress + guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or + LR/SC with Ziccrse provide such guarantee. + + Select this if and only if Zabha or Ziccrse is available on your + platform, RISCV_QUEUED_SPINLOCKS must not be selected for platforms + without one of those extensions. + + If unsure, select RISCV_COMBO_SPINLOCKS, which will use qspinlocks + when supported and otherwise ticket spinlocks. + +config RISCV_COMBO_SPINLOCKS + bool "Using combo spinlock" + depends on SMP && MMU + select ARCH_USE_QUEUED_SPINLOCKS + help + Embed both queued spinlock and ticket lock so that the spinlock + implementation can be chosen at runtime. + +endchoice + config RISCV_ALTERNATIVE bool depends on !XIP_KERNEL @@ -532,6 +567,17 @@ config RISCV_ISA_C If you don't know what to do here, say Y. +config RISCV_ISA_SUPM + bool "Supm extension for userspace pointer masking" + depends on 64BIT + default y + help + Add support for pointer masking in userspace (Supm) when the + underlying hardware extension (Smnpm or Ssnpm) is detected at boot. + + If this option is disabled, userspace will be unable to use + the prctl(PR_{SET,GET}_TAGGED_ADDR_CTRL) API. + config RISCV_ISA_SVNAPOT bool "Svnapot extension support for supervisor mode NAPOT pages" depends on 64BIT && MMU @@ -633,6 +679,40 @@ config RISCV_ISA_ZAWRS use of these instructions in the kernel when the Zawrs extension is detected at boot. +config TOOLCHAIN_HAS_ZABHA + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zabha) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zabha) + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_ZABHA + bool "Zabha extension support for atomic byte/halfword operations" + depends on TOOLCHAIN_HAS_ZABHA + depends on RISCV_ALTERNATIVE + default y + help + Enable the use of the Zabha ISA-extension to implement kernel + byte/halfword atomic memory operations when it is detected at boot. + + If you don't know what to do here, say Y. + +config TOOLCHAIN_HAS_ZACAS + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas) + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_ZACAS + bool "Zacas extension support for atomic CAS" + depends on TOOLCHAIN_HAS_ZACAS + depends on RISCV_ALTERNATIVE + default y + help + Enable the use of the Zacas ISA-extension to implement kernel atomic + cmpxchg operations when it is detected at boot. + If you don't know what to do here, say Y. config TOOLCHAIN_HAS_ZBB @@ -786,10 +866,24 @@ config THREAD_SIZE_ORDER config RISCV_MISALIGNED bool + help + Embed support for detecting and emulating misaligned + scalar or vector loads and stores. + +config RISCV_SCALAR_MISALIGNED + bool + select RISCV_MISALIGNED select SYSCTL_ARCH_UNALIGN_ALLOW help Embed support for emulating misaligned loads and stores. +config RISCV_VECTOR_MISALIGNED + bool + select RISCV_MISALIGNED + depends on RISCV_ISA_V + help + Enable detecting support for vector misaligned loads and stores. + choice prompt "Unaligned Accesses Support" default RISCV_PROBE_UNALIGNED_ACCESS @@ -801,7 +895,7 @@ choice config RISCV_PROBE_UNALIGNED_ACCESS bool "Probe for hardware unaligned access support" - select RISCV_MISALIGNED + select RISCV_SCALAR_MISALIGNED help During boot, the kernel will run a series of tests to determine the speed of unaligned accesses. This probing will dynamically determine @@ -812,7 +906,7 @@ config RISCV_PROBE_UNALIGNED_ACCESS config RISCV_EMULATED_UNALIGNED_ACCESS bool "Emulate unaligned access where system support is missing" - select RISCV_MISALIGNED + select RISCV_SCALAR_MISALIGNED help If unaligned memory accesses trap into the kernel as they are not supported by the system, the kernel will emulate the unaligned @@ -841,6 +935,46 @@ config RISCV_EFFICIENT_UNALIGNED_ACCESS endchoice +choice + prompt "Vector unaligned Accesses Support" + depends on RISCV_ISA_V + default RISCV_PROBE_VECTOR_UNALIGNED_ACCESS + help + This determines the level of support for vector unaligned accesses. This + information is used by the kernel to perform optimizations. It is also + exposed to user space via the hwprobe syscall. The hardware will be + probed at boot by default. + +config RISCV_PROBE_VECTOR_UNALIGNED_ACCESS + bool "Probe speed of vector unaligned accesses" + select RISCV_VECTOR_MISALIGNED + depends on RISCV_ISA_V + help + During boot, the kernel will run a series of tests to determine the + speed of vector unaligned accesses if they are supported. This probing + will dynamically determine the speed of vector unaligned accesses on + the underlying system if they are supported. + +config RISCV_SLOW_VECTOR_UNALIGNED_ACCESS + bool "Assume the system supports slow vector unaligned memory accesses" + depends on NONPORTABLE + help + Assume that the system supports slow vector unaligned memory accesses. The + kernel and userspace programs may not be able to run at all on systems + that do not support unaligned memory accesses. + +config RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS + bool "Assume the system supports fast vector unaligned memory accesses" + depends on NONPORTABLE + help + Assume that the system supports fast vector unaligned memory accesses. When + enabled, this option improves the performance of the kernel on such + systems. However, the kernel and userspace programs will run much more + slowly, or will not be able to run at all, on systems that do not + support efficient unaligned memory accesses. + +endchoice + source "arch/riscv/Kconfig.vendor" endmenu # "Platform type" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index d469db9f46f4..9fe1ee740dda 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -82,6 +82,12 @@ else riscv-march-$(CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI) := $(riscv-march-y)_zicsr_zifencei endif +# Check if the toolchain supports Zacas +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas + +# Check if the toolchain supports Zabha +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha + # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 1d5e13b148f2..b4a37345703e 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -167,6 +167,7 @@ CONFIG_PINCTRL_SOPHGO_CV1800B=y CONFIG_PINCTRL_SOPHGO_CV1812H=y CONFIG_PINCTRL_SOPHGO_SG2000=y CONFIG_PINCTRL_SOPHGO_SG2002=y +CONFIG_GPIO_DWAPB=y CONFIG_GPIO_SIFIVE=y CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_SENSORS_SFCTEMP=m diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 1461af12da6e..de13d5a234f8 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -6,10 +6,12 @@ generic-y += early_ioremap.h generic-y += flat.h generic-y += kvm_para.h generic-y += mmzone.h +generic-y += mcs_spinlock.h generic-y += parport.h -generic-y += spinlock.h generic-y += spinlock_types.h +generic-y += ticket_spinlock.h generic-y += qrwlock.h generic-y += qrwlock_types.h +generic-y += qspinlock.h generic-y += user.h generic-y += vmlinux.lds.h diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index ebbce134917c..4cadc56220fe 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -12,30 +12,43 @@ #include <asm/fence.h> #include <asm/hwcap.h> #include <asm/insn-def.h> - -#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ -({ \ - u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ - ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ - ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ - << __s; \ - ulong __newx = (ulong)(n) << __s; \ - ulong __retx; \ - ulong __rc; \ - \ - __asm__ __volatile__ ( \ - prepend \ - "0: lr.w %0, %2\n" \ - " and %1, %0, %z4\n" \ - " or %1, %1, %z3\n" \ - " sc.w" sc_sfx " %1, %1, %2\n" \ - " bnez %1, 0b\n" \ - append \ - : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ - : "rJ" (__newx), "rJ" (~__mask) \ - : "memory"); \ - \ - r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ +#include <asm/cpufeature-macros.h> + +#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ + swap_append, r, p, n) \ +({ \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \ + __asm__ __volatile__ ( \ + prepend \ + " amoswap" swap_sfx " %0, %z2, %1\n" \ + swap_append \ + : "=&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ + } else { \ + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ + << __s; \ + ulong __newx = (ulong)(n) << __s; \ + ulong __retx; \ + ulong __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr.w %0, %2\n" \ + " and %1, %0, %z4\n" \ + " or %1, %1, %z3\n" \ + " sc.w" sc_sfx " %1, %1, %2\n" \ + " bnez %1, 0b\n" \ + sc_append \ + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ + : "rJ" (__newx), "rJ" (~__mask) \ + : "memory"); \ + \ + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ + } \ }) #define __arch_xchg(sfx, prepend, append, r, p, n) \ @@ -58,8 +71,13 @@ \ switch (sizeof(*__ptr)) { \ case 1: \ + __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \ + prepend, sc_append, swap_append, \ + __ret, __ptr, __new); \ + break; \ case 2: \ - __arch_xchg_masked(sc_sfx, prepend, sc_append, \ + __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \ + prepend, sc_append, swap_append, \ __ret, __ptr, __new); \ break; \ case 4: \ @@ -106,55 +124,90 @@ * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ - -#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \ -({ \ - u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ - ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ - ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ - << __s; \ - ulong __newx = (ulong)(n) << __s; \ - ulong __oldx = (ulong)(o) << __s; \ - ulong __retx; \ - ulong __rc; \ - \ - __asm__ __volatile__ ( \ - prepend \ - "0: lr.w %0, %2\n" \ - " and %1, %0, %z5\n" \ - " bne %1, %z3, 1f\n" \ - " and %1, %0, %z6\n" \ - " or %1, %1, %z4\n" \ - " sc.w" sc_sfx " %1, %1, %2\n" \ - " bnez %1, 0b\n" \ - append \ - "1:\n" \ - : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ - : "rJ" ((long)__oldx), "rJ" (__newx), \ - "rJ" (__mask), "rJ" (~__mask) \ - : "memory"); \ - \ - r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + r, p, o, n) \ +({ \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ + r = o; \ + \ + __asm__ __volatile__ ( \ + cas_prepend \ + " amocas" cas_sfx " %0, %z2, %1\n" \ + cas_append \ + : "+&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ + } else { \ + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ + << __s; \ + ulong __newx = (ulong)(n) << __s; \ + ulong __oldx = (ulong)(o) << __s; \ + ulong __retx; \ + ulong __rc; \ + \ + __asm__ __volatile__ ( \ + sc_prepend \ + "0: lr.w %0, %2\n" \ + " and %1, %0, %z5\n" \ + " bne %1, %z3, 1f\n" \ + " and %1, %0, %z6\n" \ + " or %1, %1, %z4\n" \ + " sc.w" sc_sfx " %1, %1, %2\n" \ + " bnez %1, 0b\n" \ + sc_append \ + "1:\n" \ + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ + : "rJ" ((long)__oldx), "rJ" (__newx), \ + "rJ" (__mask), "rJ" (~__mask) \ + : "memory"); \ + \ + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ + } \ }) -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ +#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + r, p, co, o, n) \ ({ \ - register unsigned int __rc; \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ + r = o; \ \ - __asm__ __volatile__ ( \ - prepend \ - "0: lr" lr_sfx " %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc" sc_sfx " %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - append \ - "1:\n" \ - : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ - : "rJ" (co o), "rJ" (n) \ - : "memory"); \ + __asm__ __volatile__ ( \ + cas_prepend \ + " amocas" cas_sfx " %0, %z2, %1\n" \ + cas_append \ + : "+&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ + } else { \ + register unsigned int __rc; \ + \ + __asm__ __volatile__ ( \ + sc_prepend \ + "0: lr" lr_sfx " %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc" sc_sfx " %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + sc_append \ + "1:\n" \ + : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ + : "rJ" (co o), "rJ" (n) \ + : "memory"); \ + } \ }) -#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ +#define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(__ptr)) __old = (old); \ @@ -163,17 +216,28 @@ \ switch (sizeof(*__ptr)) { \ case 1: \ + __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + __ret, __ptr, __old, __new); \ + break; \ case 2: \ - __arch_cmpxchg_masked(sc_sfx, prepend, append, \ - __ret, __ptr, __old, __new); \ + __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + __ret, __ptr, __old, __new); \ break; \ case 4: \ - __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ - __ret, __ptr, (long), __old, __new); \ + __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + __ret, __ptr, (long), __old, __new); \ break; \ case 8: \ - __arch_cmpxchg(".d", ".d" sc_sfx, prepend, append, \ - __ret, __ptr, /**/, __old, __new); \ + __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + __ret, __ptr, /**/, __old, __new); \ break; \ default: \ BUILD_BUG(); \ @@ -181,17 +245,40 @@ (__typeof__(*(__ptr)))__ret; \ }) +/* + * These macros are here to improve the readability of the arch_cmpxchg_XXX() + * macros. + */ +#define SC_SFX(x) x +#define CAS_SFX(x) x +#define SC_PREPEND(x) x +#define SC_APPEND(x) x +#define CAS_PREPEND(x) x +#define CAS_APPEND(x) x + #define arch_cmpxchg_relaxed(ptr, o, n) \ - _arch_cmpxchg((ptr), (o), (n), "", "", "") + _arch_cmpxchg((ptr), (o), (n), \ + SC_SFX(""), CAS_SFX(""), \ + SC_PREPEND(""), SC_APPEND(""), \ + CAS_PREPEND(""), CAS_APPEND("")) #define arch_cmpxchg_acquire(ptr, o, n) \ - _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER) + _arch_cmpxchg((ptr), (o), (n), \ + SC_SFX(""), CAS_SFX(""), \ + SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \ + CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER)) #define arch_cmpxchg_release(ptr, o, n) \ - _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "") + _arch_cmpxchg((ptr), (o), (n), \ + SC_SFX(""), CAS_SFX(""), \ + SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \ + CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND("")) #define arch_cmpxchg(ptr, o, n) \ - _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") + _arch_cmpxchg((ptr), (o), (n), \ + SC_SFX(".rl"), CAS_SFX(".aqrl"), \ + SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \ + CAS_PREPEND(""), CAS_APPEND("")) #define arch_cmpxchg_local(ptr, o, n) \ arch_cmpxchg_relaxed((ptr), (o), (n)) @@ -226,6 +313,44 @@ arch_cmpxchg_release((ptr), (o), (n)); \ }) +#if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) + +#define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) + +union __u128_halves { + u128 full; + struct { + u64 low, high; + }; +}; + +#define __arch_cmpxchg128(p, o, n, cas_sfx) \ +({ \ + __typeof__(*(p)) __o = (o); \ + union __u128_halves __hn = { .full = (n) }; \ + union __u128_halves __ho = { .full = (__o) }; \ + register unsigned long t1 asm ("t1") = __hn.low; \ + register unsigned long t2 asm ("t2") = __hn.high; \ + register unsigned long t3 asm ("t3") = __ho.low; \ + register unsigned long t4 asm ("t4") = __ho.high; \ + \ + __asm__ __volatile__ ( \ + " amocas.q" cas_sfx " %0, %z3, %2" \ + : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \ + : "rJ" (t1), "rJ" (t2) \ + : "memory"); \ + \ + ((u128)t4 << 64) | t3; \ +}) + +#define arch_cmpxchg128(ptr, o, n) \ + __arch_cmpxchg128((ptr), (o), (n), ".aqrl") + +#define arch_cmpxchg128_local(ptr, o, n) \ + __arch_cmpxchg128((ptr), (o), (n), "") + +#endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */ + #ifdef CONFIG_RISCV_ISA_ZAWRS /* * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to @@ -245,6 +370,11 @@ static __always_inline void __cmpwait(volatile void *ptr, : : : : no_zawrs); switch (size) { + case 1: + fallthrough; + case 2: + /* RISC-V doesn't have lr instructions on byte and half-word. */ + goto no_zawrs; case 4: asm volatile( " lr.w %0, %1\n" diff --git a/arch/riscv/include/asm/compat.h b/arch/riscv/include/asm/compat.h index aa103530a5c8..6081327e55f5 100644 --- a/arch/riscv/include/asm/compat.h +++ b/arch/riscv/include/asm/compat.h @@ -9,7 +9,6 @@ */ #include <linux/types.h> #include <linux/sched.h> -#include <linux/sched/task_stack.h> #include <asm-generic/compat.h> static inline int is_compat_task(void) diff --git a/arch/riscv/include/asm/cpufeature-macros.h b/arch/riscv/include/asm/cpufeature-macros.h new file mode 100644 index 000000000000..a8103edbf51f --- /dev/null +++ b/arch/riscv/include/asm/cpufeature-macros.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022-2024 Rivos, Inc + */ + +#ifndef _ASM_CPUFEATURE_MACROS_H +#define _ASM_CPUFEATURE_MACROS_H + +#include <asm/hwcap.h> +#include <asm/alternative-macros.h> + +#define STANDARD_EXT 0 + +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit); +#define riscv_isa_extension_available(isa_bitmap, ext) \ + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) + +static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor, + const unsigned long ext) +{ + asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1) + : + : [vendor] "i" (vendor), [ext] "i" (ext) + : + : l_no); + + return true; +l_no: + return false; +} + +static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor, + const unsigned long ext) +{ + asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1) + : + : [vendor] "i" (vendor), [ext] "i" (ext) + : + : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_unlikely(STANDARD_EXT, ext); + + return __riscv_isa_extension_available(NULL, ext); +} + +static __always_inline bool riscv_has_extension_likely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_likely(STANDARD_EXT, ext); + + return __riscv_isa_extension_available(NULL, ext); +} + +#endif /* _ASM_CPUFEATURE_MACROS_H */ diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 45f9c1171a48..4bd054c54c21 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -8,9 +8,12 @@ #include <linux/bitmap.h> #include <linux/jump_label.h> +#include <linux/workqueue.h> +#include <linux/kconfig.h> +#include <linux/percpu-defs.h> +#include <linux/threads.h> #include <asm/hwcap.h> -#include <asm/alternative-macros.h> -#include <asm/errno.h> +#include <asm/cpufeature-macros.h> /* * These are probed via a device_initcall(), via either the SBI or directly @@ -31,7 +34,7 @@ DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; -void riscv_user_isa_enable(void); +void __init riscv_user_isa_enable(void); #define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \ .name = #_name, \ @@ -58,8 +61,9 @@ void riscv_user_isa_enable(void); #define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \ _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) -#if defined(CONFIG_RISCV_MISALIGNED) bool check_unaligned_access_emulated_all_cpus(void); +#if defined(CONFIG_RISCV_SCALAR_MISALIGNED) +void check_unaligned_access_emulated(struct work_struct *work __always_unused); void unaligned_emulation_finish(void); bool unaligned_ctl_available(void); DECLARE_PER_CPU(long, misaligned_access_speed); @@ -70,6 +74,12 @@ static inline bool unaligned_ctl_available(void) } #endif +bool check_vector_unaligned_access_emulated_all_cpus(void); +#if defined(CONFIG_RISCV_VECTOR_MISALIGNED) +void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused); +DECLARE_PER_CPU(long, vector_misaligned_access); +#endif + #if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); @@ -103,61 +113,6 @@ extern const size_t riscv_isa_ext_count; extern bool riscv_isa_fallback; unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); - -#define STANDARD_EXT 0 - -bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit); -#define riscv_isa_extension_available(isa_bitmap, ext) \ - __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) - -static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor, - const unsigned long ext) -{ - asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1) - : - : [vendor] "i" (vendor), [ext] "i" (ext) - : - : l_no); - - return true; -l_no: - return false; -} - -static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor, - const unsigned long ext) -{ - asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1) - : - : [vendor] "i" (vendor), [ext] "i" (ext) - : - : l_yes); - - return false; -l_yes: - return true; -} - -static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext) -{ - compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) - return __riscv_has_extension_unlikely(STANDARD_EXT, ext); - - return __riscv_isa_extension_available(NULL, ext); -} - -static __always_inline bool riscv_has_extension_likely(const unsigned long ext) -{ - compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) - return __riscv_has_extension_likely(STANDARD_EXT, ext); - - return __riscv_isa_extension_available(NULL, ext); -} - static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) { compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 25966995da04..fe5d4eb9adea 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -119,6 +119,10 @@ /* HSTATUS flags */ #ifdef CONFIG_64BIT +#define HSTATUS_HUPMM _AC(0x3000000000000, UL) +#define HSTATUS_HUPMM_PMLEN_0 _AC(0x0000000000000, UL) +#define HSTATUS_HUPMM_PMLEN_7 _AC(0x2000000000000, UL) +#define HSTATUS_HUPMM_PMLEN_16 _AC(0x3000000000000, UL) #define HSTATUS_VSXL _AC(0x300000000, UL) #define HSTATUS_VSXL_SHIFT 32 #endif @@ -195,6 +199,10 @@ /* xENVCFG flags */ #define ENVCFG_STCE (_AC(1, ULL) << 63) #define ENVCFG_PBMTE (_AC(1, ULL) << 62) +#define ENVCFG_PMM (_AC(0x3, ULL) << 32) +#define ENVCFG_PMM_PMLEN_0 (_AC(0x0, ULL) << 32) +#define ENVCFG_PMM_PMLEN_7 (_AC(0x2, ULL) << 32) +#define ENVCFG_PMM_PMLEN_16 (_AC(0x3, ULL) << 32) #define ENVCFG_CBZE (_AC(1, UL) << 7) #define ENVCFG_CBCFE (_AC(1, UL) << 6) #define ENVCFG_CBIE_SHIFT 4 @@ -216,6 +224,12 @@ #define SMSTATEEN0_SSTATEEN0_SHIFT 63 #define SMSTATEEN0_SSTATEEN0 (_ULL(1) << SMSTATEEN0_SSTATEEN0_SHIFT) +/* mseccfg bits */ +#define MSECCFG_PMM ENVCFG_PMM +#define MSECCFG_PMM_PMLEN_0 ENVCFG_PMM_PMLEN_0 +#define MSECCFG_PMM_PMLEN_7 ENVCFG_PMM_PMLEN_7 +#define MSECCFG_PMM_PMLEN_16 ENVCFG_PMM_PMLEN_16 + /* symbolic CSR names: */ #define CSR_CYCLE 0xc00 #define CSR_TIME 0xc01 @@ -382,6 +396,8 @@ #define CSR_MIP 0x344 #define CSR_PMPCFG0 0x3a0 #define CSR_PMPADDR0 0x3b0 +#define CSR_MSECCFG 0x747 +#define CSR_MSECCFGH 0x757 #define CSR_MVENDORID 0xf11 #define CSR_MARCHID 0xf12 #define CSR_MIMPID 0xf13 diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h index 2293e535f865..b28ccc6cdeea 100644 --- a/arch/riscv/include/asm/entry-common.h +++ b/arch/riscv/include/asm/entry-common.h @@ -33,6 +33,7 @@ static inline int handle_misaligned_load(struct pt_regs *regs) { return -1; } + static inline int handle_misaligned_store(struct pt_regs *regs) { return -1; diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 46d9de54179e..08d2a5697466 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -93,6 +93,11 @@ #define RISCV_ISA_EXT_ZCMOP 84 #define RISCV_ISA_EXT_ZAWRS 85 #define RISCV_ISA_EXT_SVVPTC 86 +#define RISCV_ISA_EXT_SMMPM 87 +#define RISCV_ISA_EXT_SMNPM 88 +#define RISCV_ISA_EXT_SSNPM 89 +#define RISCV_ISA_EXT_ZABHA 90 +#define RISCV_ISA_EXT_ZICCRSE 91 #define RISCV_ISA_EXT_XLINUXENVCFG 127 @@ -101,8 +106,10 @@ #ifdef CONFIG_RISCV_M_MODE #define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SMAIA +#define RISCV_ISA_EXT_SUPM RISCV_ISA_EXT_SMNPM #else #define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SSAIA +#define RISCV_ISA_EXT_SUPM RISCV_ISA_EXT_SSNPM #endif #endif /* _ASM_RISCV_HWCAP_H */ diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index ffb9484531af..1ce1df6d0ff3 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 9 +#define RISCV_HWPROBE_MAX_KEY 10 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h index 1c768d02bd0c..87a71cc6d146 100644 --- a/arch/riscv/include/asm/jump_label.h +++ b/arch/riscv/include/asm/jump_label.h @@ -16,21 +16,28 @@ #define JUMP_LABEL_NOP_SIZE 4 +#define JUMP_TABLE_ENTRY(key, label) \ + ".pushsection __jump_table, \"aw\" \n\t" \ + ".align " RISCV_LGPTR " \n\t" \ + ".long 1b - ., " label " - . \n\t" \ + "" RISCV_PTR " " key " - . \n\t" \ + ".popsection \n\t" + +/* This macro is also expanded on the Rust side. */ +#define ARCH_STATIC_BRANCH_ASM(key, label) \ + " .align 2 \n\t" \ + " .option push \n\t" \ + " .option norelax \n\t" \ + " .option norvc \n\t" \ + "1: nop \n\t" \ + " .option pop \n\t" \ + JUMP_TABLE_ENTRY(key, label) + static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { asm goto( - " .align 2 \n\t" - " .option push \n\t" - " .option norelax \n\t" - " .option norvc \n\t" - "1: nop \n\t" - " .option pop \n\t" - " .pushsection __jump_table, \"aw\" \n\t" - " .align " RISCV_LGPTR " \n\t" - " .long 1b - ., %l[label] - . \n\t" - " " RISCV_PTR " %0 - . \n\t" - " .popsection \n\t" + ARCH_STATIC_BRANCH_ASM("%0", "%l[label]") : : "i"(&((char *)key)[branch]) : : label); return false; @@ -38,21 +45,20 @@ label: return true; } +#define ARCH_STATIC_BRANCH_JUMP_ASM(key, label) \ + " .align 2 \n\t" \ + " .option push \n\t" \ + " .option norelax \n\t" \ + " .option norvc \n\t" \ + "1: j " label " \n\t" \ + " .option pop \n\t" \ + JUMP_TABLE_ENTRY(key, label) + static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { asm goto( - " .align 2 \n\t" - " .option push \n\t" - " .option norelax \n\t" - " .option norvc \n\t" - "1: j %l[label] \n\t" - " .option pop \n\t" - " .pushsection __jump_table, \"aw\" \n\t" - " .align " RISCV_LGPTR " \n\t" - " .long 1b - ., %l[label] - . \n\t" - " " RISCV_PTR " %0 - . \n\t" - " .popsection \n\t" + ARCH_STATIC_BRANCH_JUMP_ASM("%0", "%l[label]") : : "i"(&((char *)key)[branch]) : : label); return false; diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 2e2254fd2a2a..35eab6e0f4ae 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -286,6 +286,16 @@ struct kvm_vcpu_arch { } sta; }; +/* + * Returns true if a Performance Monitoring Interrupt (PMI), a.k.a. perf event, + * arrived in guest context. For riscv, any event that arrives while a vCPU is + * loaded is considered to be "in guest". + */ +static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu) +{ + return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu; +} + static inline void kvm_arch_sync_events(struct kvm *kvm) {} #define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12 diff --git a/arch/riscv/include/asm/kvm_nacl.h b/arch/riscv/include/asm/kvm_nacl.h new file mode 100644 index 000000000000..4124d5e06a0f --- /dev/null +++ b/arch/riscv/include/asm/kvm_nacl.h @@ -0,0 +1,245 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Ventana Micro Systems Inc. + */ + +#ifndef __KVM_NACL_H +#define __KVM_NACL_H + +#include <linux/jump_label.h> +#include <linux/percpu.h> +#include <asm/byteorder.h> +#include <asm/csr.h> +#include <asm/sbi.h> + +struct kvm_vcpu_arch; + +DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_available); +#define kvm_riscv_nacl_available() \ + static_branch_unlikely(&kvm_riscv_nacl_available) + +DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_csr_available); +#define kvm_riscv_nacl_sync_csr_available() \ + static_branch_unlikely(&kvm_riscv_nacl_sync_csr_available) + +DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_hfence_available); +#define kvm_riscv_nacl_sync_hfence_available() \ + static_branch_unlikely(&kvm_riscv_nacl_sync_hfence_available) + +DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_sret_available); +#define kvm_riscv_nacl_sync_sret_available() \ + static_branch_unlikely(&kvm_riscv_nacl_sync_sret_available) + +DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_autoswap_csr_available); +#define kvm_riscv_nacl_autoswap_csr_available() \ + static_branch_unlikely(&kvm_riscv_nacl_autoswap_csr_available) + +struct kvm_riscv_nacl { + void *shmem; + phys_addr_t shmem_phys; +}; +DECLARE_PER_CPU(struct kvm_riscv_nacl, kvm_riscv_nacl); + +void __kvm_riscv_nacl_hfence(void *shmem, + unsigned long control, + unsigned long page_num, + unsigned long page_count); + +void __kvm_riscv_nacl_switch_to(struct kvm_vcpu_arch *vcpu_arch, + unsigned long sbi_ext_id, + unsigned long sbi_func_id); + +int kvm_riscv_nacl_enable(void); + +void kvm_riscv_nacl_disable(void); + +void kvm_riscv_nacl_exit(void); + +int kvm_riscv_nacl_init(void); + +#ifdef CONFIG_32BIT +#define lelong_to_cpu(__x) le32_to_cpu(__x) +#define cpu_to_lelong(__x) cpu_to_le32(__x) +#else +#define lelong_to_cpu(__x) le64_to_cpu(__x) +#define cpu_to_lelong(__x) cpu_to_le64(__x) +#endif + +#define nacl_shmem() \ + this_cpu_ptr(&kvm_riscv_nacl)->shmem + +#define nacl_scratch_read_long(__shmem, __offset) \ +({ \ + unsigned long *__p = (__shmem) + \ + SBI_NACL_SHMEM_SCRATCH_OFFSET + \ + (__offset); \ + lelong_to_cpu(*__p); \ +}) + +#define nacl_scratch_write_long(__shmem, __offset, __val) \ +do { \ + unsigned long *__p = (__shmem) + \ + SBI_NACL_SHMEM_SCRATCH_OFFSET + \ + (__offset); \ + *__p = cpu_to_lelong(__val); \ +} while (0) + +#define nacl_scratch_write_longs(__shmem, __offset, __array, __count) \ +do { \ + unsigned int __i; \ + unsigned long *__p = (__shmem) + \ + SBI_NACL_SHMEM_SCRATCH_OFFSET + \ + (__offset); \ + for (__i = 0; __i < (__count); __i++) \ + __p[__i] = cpu_to_lelong((__array)[__i]); \ +} while (0) + +#define nacl_sync_hfence(__e) \ + sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SYNC_HFENCE, \ + (__e), 0, 0, 0, 0, 0) + +#define nacl_hfence_mkconfig(__type, __order, __vmid, __asid) \ +({ \ + unsigned long __c = SBI_NACL_SHMEM_HFENCE_CONFIG_PEND; \ + __c |= ((__type) & SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_MASK) \ + << SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_SHIFT; \ + __c |= (((__order) - SBI_NACL_SHMEM_HFENCE_ORDER_BASE) & \ + SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_MASK) \ + << SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_SHIFT; \ + __c |= ((__vmid) & SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_MASK) \ + << SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_SHIFT; \ + __c |= ((__asid) & SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_MASK); \ + __c; \ +}) + +#define nacl_hfence_mkpnum(__order, __addr) \ + ((__addr) >> (__order)) + +#define nacl_hfence_mkpcount(__order, __size) \ + ((__size) >> (__order)) + +#define nacl_hfence_gvma(__shmem, __gpa, __gpsz, __order) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA, \ + __order, 0, 0), \ + nacl_hfence_mkpnum(__order, __gpa), \ + nacl_hfence_mkpcount(__order, __gpsz)) + +#define nacl_hfence_gvma_all(__shmem) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_ALL, \ + 0, 0, 0), 0, 0) + +#define nacl_hfence_gvma_vmid(__shmem, __vmid, __gpa, __gpsz, __order) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID, \ + __order, __vmid, 0), \ + nacl_hfence_mkpnum(__order, __gpa), \ + nacl_hfence_mkpcount(__order, __gpsz)) + +#define nacl_hfence_gvma_vmid_all(__shmem, __vmid) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID_ALL, \ + 0, __vmid, 0), 0, 0) + +#define nacl_hfence_vvma(__shmem, __vmid, __gva, __gvsz, __order) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA, \ + __order, __vmid, 0), \ + nacl_hfence_mkpnum(__order, __gva), \ + nacl_hfence_mkpcount(__order, __gvsz)) + +#define nacl_hfence_vvma_all(__shmem, __vmid) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ALL, \ + 0, __vmid, 0), 0, 0) + +#define nacl_hfence_vvma_asid(__shmem, __vmid, __asid, __gva, __gvsz, __order)\ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID, \ + __order, __vmid, __asid), \ + nacl_hfence_mkpnum(__order, __gva), \ + nacl_hfence_mkpcount(__order, __gvsz)) + +#define nacl_hfence_vvma_asid_all(__shmem, __vmid, __asid) \ +__kvm_riscv_nacl_hfence(__shmem, \ + nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID_ALL, \ + 0, __vmid, __asid), 0, 0) + +#define nacl_csr_read(__shmem, __csr) \ +({ \ + unsigned long *__a = (__shmem) + SBI_NACL_SHMEM_CSR_OFFSET; \ + lelong_to_cpu(__a[SBI_NACL_SHMEM_CSR_INDEX(__csr)]); \ +}) + +#define nacl_csr_write(__shmem, __csr, __val) \ +do { \ + void *__s = (__shmem); \ + unsigned int __i = SBI_NACL_SHMEM_CSR_INDEX(__csr); \ + unsigned long *__a = (__s) + SBI_NACL_SHMEM_CSR_OFFSET; \ + u8 *__b = (__s) + SBI_NACL_SHMEM_DBITMAP_OFFSET; \ + __a[__i] = cpu_to_lelong(__val); \ + __b[__i >> 3] |= 1U << (__i & 0x7); \ +} while (0) + +#define nacl_csr_swap(__shmem, __csr, __val) \ +({ \ + void *__s = (__shmem); \ + unsigned int __i = SBI_NACL_SHMEM_CSR_INDEX(__csr); \ + unsigned long *__a = (__s) + SBI_NACL_SHMEM_CSR_OFFSET; \ + u8 *__b = (__s) + SBI_NACL_SHMEM_DBITMAP_OFFSET; \ + unsigned long __r = lelong_to_cpu(__a[__i]); \ + __a[__i] = cpu_to_lelong(__val); \ + __b[__i >> 3] |= 1U << (__i & 0x7); \ + __r; \ +}) + +#define nacl_sync_csr(__csr) \ + sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SYNC_CSR, \ + (__csr), 0, 0, 0, 0, 0) + +/* + * Each ncsr_xyz() macro defined below has it's own static-branch so every + * use of ncsr_xyz() macro emits a patchable direct jump. This means multiple + * back-to-back ncsr_xyz() macro usage will emit multiple patchable direct + * jumps which is sub-optimal. + * + * Based on the above, it is recommended to avoid multiple back-to-back + * ncsr_xyz() macro usage. + */ + +#define ncsr_read(__csr) \ +({ \ + unsigned long __r; \ + if (kvm_riscv_nacl_available()) \ + __r = nacl_csr_read(nacl_shmem(), __csr); \ + else \ + __r = csr_read(__csr); \ + __r; \ +}) + +#define ncsr_write(__csr, __val) \ +do { \ + if (kvm_riscv_nacl_sync_csr_available()) \ + nacl_csr_write(nacl_shmem(), __csr, __val); \ + else \ + csr_write(__csr, __val); \ +} while (0) + +#define ncsr_swap(__csr, __val) \ +({ \ + unsigned long __r; \ + if (kvm_riscv_nacl_sync_csr_available()) \ + __r = nacl_csr_swap(nacl_shmem(), __csr, __val); \ + else \ + __r = csr_swap(__csr, __val); \ + __r; \ +}) + +#define nsync_csr(__csr) \ +do { \ + if (kvm_riscv_nacl_sync_csr_available()) \ + nacl_sync_csr(__csr); \ +} while (0) + +#endif diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index c9e03e9da3dc..1cc90465d75b 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -26,8 +26,15 @@ typedef struct { unsigned long exec_fdpic_loadmap; unsigned long interp_fdpic_loadmap; #endif + unsigned long flags; +#ifdef CONFIG_RISCV_ISA_SUPM + u8 pmlen; +#endif } mm_context_t; +/* Lock the pointer masking mode because this mm is multithreaded */ +#define MM_CONTEXT_LOCK_PMLEN 0 + #define cntx2asid(cntx) ((cntx) & SATP_ASID_MASK) #define cntx2version(cntx) ((cntx) & ~SATP_ASID_MASK) diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index 7030837adc1a..8c4bc49a3a0f 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -20,6 +20,9 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { +#ifdef CONFIG_RISCV_ISA_SUPM + next->context.pmlen = 0; +#endif switch_mm(prev, next, NULL); } @@ -30,11 +33,21 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_MMU atomic_long_set(&mm->context.id, 0); #endif + if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM)) + clear_bit(MM_CONTEXT_LOCK_PMLEN, &mm->context.flags); return 0; } DECLARE_STATIC_KEY_FALSE(use_asid_allocator); +#ifdef CONFIG_RISCV_ISA_SUPM +#define mm_untag_mask mm_untag_mask +static inline unsigned long mm_untag_mask(struct mm_struct *mm) +{ + return -1UL >> mm->context.pmlen; +} +#endif + #include <asm-generic/mmu_context.h> #endif /* _ASM_RISCV_MMU_CONTEXT_H */ diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index 665bbc9b2f84..bcc928fd3785 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -8,6 +8,7 @@ #ifndef _ASM_RISCV_PERF_EVENT_H #define _ASM_RISCV_PERF_EVENT_H +#ifdef CONFIG_PERF_EVENTS #include <linux/perf_event.h> #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs @@ -17,4 +18,6 @@ (regs)->sp = current_stack_pointer; \ (regs)->status = SR_PP; \ } +#endif + #endif /* _ASM_RISCV_PERF_EVENT_H */ diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index efa1b3519b23..5f56eb9d114a 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -102,6 +102,7 @@ struct thread_struct { unsigned long s[12]; /* s[0]: frame pointer */ struct __riscv_d_ext_state fstate; unsigned long bad_cause; + unsigned long envcfg; u32 riscv_v_flags; u32 vstate_ctrl; struct __riscv_v_ext_state vstate; @@ -177,6 +178,14 @@ extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); #define RISCV_SET_ICACHE_FLUSH_CTX(arg1, arg2) riscv_set_icache_flush_ctx(arg1, arg2) extern int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long per_thread); +#ifdef CONFIG_RISCV_ISA_SUPM +/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */ +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg); +long get_tagged_addr_ctrl(struct task_struct *task); +#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(current, arg) +#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current) +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_PROCESSOR_H */ diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 98f631b051db..6c82318065cf 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -34,6 +34,7 @@ enum sbi_ext_id { SBI_EXT_PMU = 0x504D55, SBI_EXT_DBCN = 0x4442434E, SBI_EXT_STA = 0x535441, + SBI_EXT_NACL = 0x4E41434C, /* Experimentals extensions must lie within this range */ SBI_EXT_EXPERIMENTAL_START = 0x08000000, @@ -281,6 +282,125 @@ struct sbi_sta_struct { #define SBI_SHMEM_DISABLE -1 +enum sbi_ext_nacl_fid { + SBI_EXT_NACL_PROBE_FEATURE = 0x0, + SBI_EXT_NACL_SET_SHMEM = 0x1, + SBI_EXT_NACL_SYNC_CSR = 0x2, + SBI_EXT_NACL_SYNC_HFENCE = 0x3, + SBI_EXT_NACL_SYNC_SRET = 0x4, +}; + +enum sbi_ext_nacl_feature { + SBI_NACL_FEAT_SYNC_CSR = 0x0, + SBI_NACL_FEAT_SYNC_HFENCE = 0x1, + SBI_NACL_FEAT_SYNC_SRET = 0x2, + SBI_NACL_FEAT_AUTOSWAP_CSR = 0x3, +}; + +#define SBI_NACL_SHMEM_ADDR_SHIFT 12 +#define SBI_NACL_SHMEM_SCRATCH_OFFSET 0x0000 +#define SBI_NACL_SHMEM_SCRATCH_SIZE 0x1000 +#define SBI_NACL_SHMEM_SRET_OFFSET 0x0000 +#define SBI_NACL_SHMEM_SRET_SIZE 0x0200 +#define SBI_NACL_SHMEM_AUTOSWAP_OFFSET (SBI_NACL_SHMEM_SRET_OFFSET + \ + SBI_NACL_SHMEM_SRET_SIZE) +#define SBI_NACL_SHMEM_AUTOSWAP_SIZE 0x0080 +#define SBI_NACL_SHMEM_UNUSED_OFFSET (SBI_NACL_SHMEM_AUTOSWAP_OFFSET + \ + SBI_NACL_SHMEM_AUTOSWAP_SIZE) +#define SBI_NACL_SHMEM_UNUSED_SIZE 0x0580 +#define SBI_NACL_SHMEM_HFENCE_OFFSET (SBI_NACL_SHMEM_UNUSED_OFFSET + \ + SBI_NACL_SHMEM_UNUSED_SIZE) +#define SBI_NACL_SHMEM_HFENCE_SIZE 0x0780 +#define SBI_NACL_SHMEM_DBITMAP_OFFSET (SBI_NACL_SHMEM_HFENCE_OFFSET + \ + SBI_NACL_SHMEM_HFENCE_SIZE) +#define SBI_NACL_SHMEM_DBITMAP_SIZE 0x0080 +#define SBI_NACL_SHMEM_CSR_OFFSET (SBI_NACL_SHMEM_DBITMAP_OFFSET + \ + SBI_NACL_SHMEM_DBITMAP_SIZE) +#define SBI_NACL_SHMEM_CSR_SIZE ((__riscv_xlen / 8) * 1024) +#define SBI_NACL_SHMEM_SIZE (SBI_NACL_SHMEM_CSR_OFFSET + \ + SBI_NACL_SHMEM_CSR_SIZE) + +#define SBI_NACL_SHMEM_CSR_INDEX(__csr_num) \ + ((((__csr_num) & 0xc00) >> 2) | ((__csr_num) & 0xff)) + +#define SBI_NACL_SHMEM_HFENCE_ENTRY_SZ ((__riscv_xlen / 8) * 4) +#define SBI_NACL_SHMEM_HFENCE_ENTRY_MAX \ + (SBI_NACL_SHMEM_HFENCE_SIZE / \ + SBI_NACL_SHMEM_HFENCE_ENTRY_SZ) +#define SBI_NACL_SHMEM_HFENCE_ENTRY(__num) \ + (SBI_NACL_SHMEM_HFENCE_OFFSET + \ + (__num) * SBI_NACL_SHMEM_HFENCE_ENTRY_SZ) +#define SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(__num) \ + SBI_NACL_SHMEM_HFENCE_ENTRY(__num) +#define SBI_NACL_SHMEM_HFENCE_ENTRY_PNUM(__num)\ + (SBI_NACL_SHMEM_HFENCE_ENTRY(__num) + (__riscv_xlen / 8)) +#define SBI_NACL_SHMEM_HFENCE_ENTRY_PCOUNT(__num)\ + (SBI_NACL_SHMEM_HFENCE_ENTRY(__num) + \ + ((__riscv_xlen / 8) * 3)) + +#define SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_BITS 1 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_SHIFT \ + (__riscv_xlen - SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_BITS) +#define SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_MASK \ + ((1UL << SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_BITS) - 1) +#define SBI_NACL_SHMEM_HFENCE_CONFIG_PEND \ + (SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_MASK << \ + SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_SHIFT) + +#define SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD1_BITS 3 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD1_SHIFT \ + (SBI_NACL_SHMEM_HFENCE_CONFIG_PEND_SHIFT - \ + SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD1_BITS) + +#define SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_BITS 4 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_SHIFT \ + (SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD1_SHIFT - \ + SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_BITS) +#define SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_MASK \ + ((1UL << SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_BITS) - 1) + +#define SBI_NACL_SHMEM_HFENCE_TYPE_GVMA 0x0 +#define SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_ALL 0x1 +#define SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID 0x2 +#define SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID_ALL 0x3 +#define SBI_NACL_SHMEM_HFENCE_TYPE_VVMA 0x4 +#define SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ALL 0x5 +#define SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID 0x6 +#define SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID_ALL 0x7 + +#define SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD2_BITS 1 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD2_SHIFT \ + (SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_SHIFT - \ + SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD2_BITS) + +#define SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_BITS 7 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_SHIFT \ + (SBI_NACL_SHMEM_HFENCE_CONFIG_RSVD2_SHIFT - \ + SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_BITS) +#define SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_MASK \ + ((1UL << SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_BITS) - 1) +#define SBI_NACL_SHMEM_HFENCE_ORDER_BASE 12 + +#if __riscv_xlen == 32 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_BITS 9 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_BITS 7 +#else +#define SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_BITS 16 +#define SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_BITS 14 +#endif +#define SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_SHIFT \ + SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_BITS +#define SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_MASK \ + ((1UL << SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_BITS) - 1) +#define SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_MASK \ + ((1UL << SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_BITS) - 1) + +#define SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS BIT(0) +#define SBI_NACL_SHMEM_AUTOSWAP_HSTATUS ((__riscv_xlen / 8) * 1) + +#define SBI_NACL_SHMEM_SRET_X(__i) ((__riscv_xlen / 8) * (__i)) +#define SBI_NACL_SHMEM_SRET_X_LAST 31 + /* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h new file mode 100644 index 000000000000..e5121b89acea --- /dev/null +++ b/arch/riscv/include/asm/spinlock.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_RISCV_SPINLOCK_H +#define __ASM_RISCV_SPINLOCK_H + +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS +#define _Q_PENDING_LOOPS (1 << 9) + +#define __no_arch_spinlock_redefine +#include <asm/ticket_spinlock.h> +#include <asm/qspinlock.h> +#include <asm/jump_label.h> + +/* + * TODO: Use an alternative instead of a static key when we are able to parse + * the extensions string earlier in the boot process. + */ +DECLARE_STATIC_KEY_TRUE(qspinlock_key); + +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \ +static __always_inline type arch_spin_##op(type_lock lock) \ +{ \ + if (static_branch_unlikely(&qspinlock_key)) \ + return queued_spin_##op(lock); \ + return ticket_spin_##op(lock); \ +} + +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *) +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *) +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *) +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *) +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *) +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t) + +#elif defined(CONFIG_RISCV_QUEUED_SPINLOCKS) + +#include <asm/qspinlock.h> + +#else + +#include <asm/ticket_spinlock.h> + +#endif + +#include <asm/qrwlock.h> + +#endif /* __ASM_RISCV_SPINLOCK_H */ diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 7594df37cc9f..94e33216b2d9 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -70,6 +70,24 @@ static __always_inline bool has_fpu(void) { return false; } #define __switch_to_fpu(__prev, __next) do { } while (0) #endif +static inline void envcfg_update_bits(struct task_struct *task, + unsigned long mask, unsigned long val) +{ + unsigned long envcfg; + + envcfg = (task->thread.envcfg & ~mask) | val; + task->thread.envcfg = envcfg; + if (task == current) + csr_write(CSR_ENVCFG, envcfg); +} + +static inline void __switch_to_envcfg(struct task_struct *next) +{ + asm volatile (ALTERNATIVE("nop", "csrw " __stringify(CSR_ENVCFG) ", %0", + 0, RISCV_ISA_EXT_XLINUXENVCFG, 1) + :: "r" (next->thread.envcfg) : "memory"); +} + extern struct task_struct *__switch_to(struct task_struct *, struct task_struct *); @@ -103,6 +121,7 @@ do { \ __switch_to_vector(__prev, __next); \ if (switch_to_should_flush_icache(__next)) \ local_flush_icache_all(); \ + __switch_to_envcfg(__next); \ ((last) = __switch_to(__prev, __next)); \ } while (0) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 72ec1d9bd3f3..fee56b0c8058 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -9,8 +9,41 @@ #define _ASM_RISCV_UACCESS_H #include <asm/asm-extable.h> +#include <asm/cpufeature.h> #include <asm/pgtable.h> /* for TASK_SIZE */ +#ifdef CONFIG_RISCV_ISA_SUPM +static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigned long addr) +{ + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) { + u8 pmlen = mm->context.pmlen; + + /* Virtual addresses are sign-extended; physical addresses are zero-extended. */ + if (IS_ENABLED(CONFIG_MMU)) + return (long)(addr << pmlen) >> pmlen; + else + return (addr << pmlen) >> pmlen; + } + + return addr; +} + +#define untagged_addr(addr) ({ \ + unsigned long __addr = (__force unsigned long)(addr); \ + (__force __typeof__(addr))__untagged_addr_remote(current->mm, __addr); \ +}) + +#define untagged_addr_remote(mm, addr) ({ \ + unsigned long __addr = (__force unsigned long)(addr); \ + mmap_assert_locked(mm); \ + (__force __typeof__(addr))__untagged_addr_remote(mm, __addr); \ +}) + +#define access_ok(addr, size) likely(__access_ok(untagged_addr(addr), size)) +#else +#define untagged_addr(addr) (addr) +#endif + /* * User space memory access functions */ @@ -130,7 +163,7 @@ do { \ */ #define __get_user(x, ptr) \ ({ \ - const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ + const __typeof__(*(ptr)) __user *__gu_ptr = untagged_addr(ptr); \ long __gu_err = 0; \ \ __chk_user_ptr(__gu_ptr); \ @@ -246,7 +279,7 @@ do { \ */ #define __put_user(x, ptr) \ ({ \ - __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_ptr = untagged_addr(ptr); \ __typeof__(*__gu_ptr) __val = (x); \ long __pu_err = 0; \ \ @@ -293,13 +326,13 @@ unsigned long __must_check __asm_copy_from_user(void *to, static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - return __asm_copy_from_user(to, from, n); + return __asm_copy_from_user(to, untagged_addr(from), n); } static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - return __asm_copy_to_user(to, from, n); + return __asm_copy_to_user(untagged_addr(to), from, n); } extern long strncpy_from_user(char *dest, const char __user *src, long count); @@ -314,7 +347,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) { might_fault(); return access_ok(to, n) ? - __clear_user(to, n) : n; + __clear_user(untagged_addr(to), n) : n; } #define __get_kernel_nofault(dst, src, type, err_label) \ diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index be7d309cca8a..c7c023afbacd 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -21,6 +21,7 @@ extern unsigned long riscv_v_vsize; int riscv_v_setup_vsize(void); +bool insn_is_vector(u32 insn_buf); bool riscv_v_first_use_handler(struct pt_regs *regs); void kernel_vector_begin(void); void kernel_vector_end(void); @@ -268,6 +269,7 @@ struct pt_regs; static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static __always_inline bool has_vector(void) { return false; } +static __always_inline bool insn_is_vector(u32 insn_buf) { return false; } static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 1e153cda57db..3af142b99f77 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -72,6 +72,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZCF (1ULL << 46) #define RISCV_HWPROBE_EXT_ZCMOP (1ULL << 47) #define RISCV_HWPROBE_EXT_ZAWRS (1ULL << 48) +#define RISCV_HWPROBE_EXT_SUPM (1ULL << 49) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) @@ -88,6 +89,11 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2 #define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3 #define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4 +#define RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF 10 +#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN 0 +#define RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW 2 +#define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3 +#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index e97db3296456..4f24201376b1 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -175,6 +175,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZCF, KVM_RISCV_ISA_EXT_ZCMOP, KVM_RISCV_ISA_EXT_ZAWRS, + KVM_RISCV_ISA_EXT_SMNPM, + KVM_RISCV_ISA_EXT_SSNPM, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 69dc8aaab3fb..063d1faf5a53 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -75,7 +75,8 @@ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o -obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o +obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o +obj-$(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS) += vec-copy-unaligned.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_FPU) += kernel_mode_fpu.o diff --git a/arch/riscv/kernel/copy-unaligned.h b/arch/riscv/kernel/copy-unaligned.h index e3d70d35b708..85d4d11450cb 100644 --- a/arch/riscv/kernel/copy-unaligned.h +++ b/arch/riscv/kernel/copy-unaligned.h @@ -10,4 +10,9 @@ void __riscv_copy_words_unaligned(void *dst, const void *src, size_t size); void __riscv_copy_bytes_unaligned(void *dst, const void *src, size_t size); +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS +void __riscv_copy_vec_words_unaligned(void *dst, const void *src, size_t size); +void __riscv_copy_vec_bytes_unaligned(void *dst, const void *src, size_t size); +#endif + #endif /* __RISCV_KERNEL_COPY_UNALIGNED_H */ diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 826f46b21f2e..467c5c735bf5 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -29,6 +29,8 @@ #define NUM_ALPHA_EXTS ('z' - 'a' + 1) +static bool any_cpu_has_zicboz; + unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ @@ -99,6 +101,7 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n"); return -EINVAL; } + any_cpu_has_zicboz = true; return 0; } @@ -315,6 +318,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { riscv_ext_zicbom_validate), __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate), + __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND), __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), @@ -323,6 +327,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM), __RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP), + __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA), __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS), __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS), __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA), @@ -375,9 +380,12 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_BUNDLE(zvksg, riscv_zvksg_bundled_exts), __RISCV_ISA_EXT_DATA(zvkt, RISCV_ISA_EXT_ZVKT), __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA), + __RISCV_ISA_EXT_DATA(smmpm, RISCV_ISA_EXT_SMMPM), + __RISCV_ISA_EXT_SUPERSET(smnpm, RISCV_ISA_EXT_SMNPM, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(smstateen, RISCV_ISA_EXT_SMSTATEEN), __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA), __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), + __RISCV_ISA_EXT_SUPERSET(ssnpm, RISCV_ISA_EXT_SSNPM, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), @@ -918,10 +926,12 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } -void riscv_user_isa_enable(void) +void __init riscv_user_isa_enable(void) { - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) - csr_set(CSR_ENVCFG, ENVCFG_CBZE); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOZ)) + current->thread.envcfg |= ENVCFG_CBZE; + else if (any_cpu_has_zicboz) + pr_warn("Zicboz disabled as it is unavailable on some harts\n"); } #ifdef CONFIG_RISCV_ALTERNATIVE diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S index 327cf527dd7e..f74f6b60e347 100644 --- a/arch/riscv/kernel/fpu.S +++ b/arch/riscv/kernel/fpu.S @@ -170,7 +170,7 @@ SYM_FUNC_END(__fstate_restore) __access_func(f31) -#ifdef CONFIG_RISCV_MISALIGNED +#ifdef CONFIG_RISCV_SCALAR_MISALIGNED /* * Disable compressed instructions set to keep a constant offset between FP @@ -224,4 +224,4 @@ SYM_FUNC_START(get_f64_reg) fp_access_epilogue SYM_FUNC_END(get_f64_reg) -#endif /* CONFIG_RISCV_MISALIGNED */ +#endif /* CONFIG_RISCV_SCALAR_MISALIGNED */ diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index c7468af77c66..b465bc9eb870 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -28,11 +28,21 @@ static bool fill_callchain(void *entry, unsigned long pc) void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + if (perf_guest_state()) { + /* TODO: We don't support guest os callchain now */ + return; + } + arch_stack_walk_user(fill_callchain, entry, regs); } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + if (perf_guest_state()) { + /* TODO: We don't support guest os callchain now */ + return; + } + walk_stackframe(NULL, regs, fill_callchain, entry); } diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index e3142d8a6e28..58b6482c2bf6 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 SiFive */ +#include <linux/bitfield.h> #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -180,6 +181,10 @@ void flush_thread(void) memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE); #endif +#ifdef CONFIG_RISCV_ISA_SUPM + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + envcfg_update_bits(current, ENVCFG_PMM, ENVCFG_PMM_PMLEN_0); +#endif } void arch_release_task_struct(struct task_struct *tsk) @@ -208,6 +213,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); + /* Ensure all threads in this mm have the same pointer masking mode. */ + if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM) && p->mm && (clone_flags & CLONE_VM)) + set_bit(MM_CONTEXT_LOCK_PMLEN, &p->mm->context.flags); + memset(&p->thread.s, 0, sizeof(p->thread.s)); /* p->thread holds context to be restored by __switch_to() */ @@ -242,3 +251,148 @@ void __init arch_task_cache_init(void) { riscv_v_setup_ctx_cache(); } + +#ifdef CONFIG_RISCV_ISA_SUPM +enum { + PMLEN_0 = 0, + PMLEN_7 = 7, + PMLEN_16 = 16, +}; + +static bool have_user_pmlen_7; +static bool have_user_pmlen_16; + +/* + * Control the relaxed ABI allowing tagged user addresses into the kernel. + */ +static unsigned int tagged_addr_disabled; + +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) +{ + unsigned long valid_mask = PR_PMLEN_MASK | PR_TAGGED_ADDR_ENABLE; + struct thread_info *ti = task_thread_info(task); + struct mm_struct *mm = task->mm; + unsigned long pmm; + u8 pmlen; + + if (is_compat_thread(ti)) + return -EINVAL; + + if (arg & ~valid_mask) + return -EINVAL; + + /* + * Prefer the smallest PMLEN that satisfies the user's request, + * in case choosing a larger PMLEN has a performance impact. + */ + pmlen = FIELD_GET(PR_PMLEN_MASK, arg); + if (pmlen == PMLEN_0) { + pmm = ENVCFG_PMM_PMLEN_0; + } else if (pmlen <= PMLEN_7 && have_user_pmlen_7) { + pmlen = PMLEN_7; + pmm = ENVCFG_PMM_PMLEN_7; + } else if (pmlen <= PMLEN_16 && have_user_pmlen_16) { + pmlen = PMLEN_16; + pmm = ENVCFG_PMM_PMLEN_16; + } else { + return -EINVAL; + } + + /* + * Do not allow the enabling of the tagged address ABI if globally + * disabled via sysctl abi.tagged_addr_disabled, if pointer masking + * is disabled for userspace. + */ + if (arg & PR_TAGGED_ADDR_ENABLE && (tagged_addr_disabled || !pmlen)) + return -EINVAL; + + if (!(arg & PR_TAGGED_ADDR_ENABLE)) + pmlen = PMLEN_0; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + if (test_bit(MM_CONTEXT_LOCK_PMLEN, &mm->context.flags) && mm->context.pmlen != pmlen) { + mmap_write_unlock(mm); + return -EBUSY; + } + + envcfg_update_bits(task, ENVCFG_PMM, pmm); + mm->context.pmlen = pmlen; + + mmap_write_unlock(mm); + + return 0; +} + +long get_tagged_addr_ctrl(struct task_struct *task) +{ + struct thread_info *ti = task_thread_info(task); + long ret = 0; + + if (is_compat_thread(ti)) + return -EINVAL; + + /* + * The mm context's pmlen is set only when the tagged address ABI is + * enabled, so the effective PMLEN must be extracted from envcfg.PMM. + */ + switch (task->thread.envcfg & ENVCFG_PMM) { + case ENVCFG_PMM_PMLEN_7: + ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_7); + break; + case ENVCFG_PMM_PMLEN_16: + ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_16); + break; + } + + if (task->mm->context.pmlen) + ret |= PR_TAGGED_ADDR_ENABLE; + + return ret; +} + +static bool try_to_set_pmm(unsigned long value) +{ + csr_set(CSR_ENVCFG, value); + return (csr_read_clear(CSR_ENVCFG, ENVCFG_PMM) & ENVCFG_PMM) == value; +} + +/* + * Global sysctl to disable the tagged user addresses support. This control + * only prevents the tagged address ABI enabling via prctl() and does not + * disable it for tasks that already opted in to the relaxed ABI. + */ + +static struct ctl_table tagged_addr_sysctl_table[] = { + { + .procname = "tagged_addr_disabled", + .mode = 0644, + .data = &tagged_addr_disabled, + .maxlen = sizeof(int), + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +}; + +static int __init tagged_addr_init(void) +{ + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return 0; + + /* + * envcfg.PMM is a WARL field. Detect which values are supported. + * Assume the supported PMLEN values are the same on all harts. + */ + csr_clear(CSR_ENVCFG, ENVCFG_PMM); + have_user_pmlen_7 = try_to_set_pmm(ENVCFG_PMM_PMLEN_7); + have_user_pmlen_16 = try_to_set_pmm(ENVCFG_PMM_PMLEN_16); + + if (!register_sysctl("abi", tagged_addr_sysctl_table)) + return -EINVAL; + + return 0; +} +core_initcall(tagged_addr_init); +#endif /* CONFIG_RISCV_ISA_SUPM */ diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 92731ff8c79a..ea67e9fb7a58 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -28,6 +28,9 @@ enum riscv_regset { #ifdef CONFIG_RISCV_ISA_V REGSET_V, #endif +#ifdef CONFIG_RISCV_ISA_SUPM + REGSET_TAGGED_ADDR_CTRL, +#endif }; static int riscv_gpr_get(struct task_struct *target, @@ -152,6 +155,35 @@ static int riscv_vr_set(struct task_struct *target, } #endif +#ifdef CONFIG_RISCV_ISA_SUPM +static int tagged_addr_ctrl_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + long ctrl = get_tagged_addr_ctrl(target); + + if (IS_ERR_VALUE(ctrl)) + return ctrl; + + return membuf_write(&to, &ctrl, sizeof(ctrl)); +} + +static int tagged_addr_ctrl_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + long ctrl; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); + if (ret) + return ret; + + return set_tagged_addr_ctrl(target, ctrl); +} +#endif + static const struct user_regset riscv_user_regset[] = { [REGSET_X] = { .core_note_type = NT_PRSTATUS, @@ -182,6 +214,16 @@ static const struct user_regset riscv_user_regset[] = { .set = riscv_vr_set, }, #endif +#ifdef CONFIG_RISCV_ISA_SUPM + [REGSET_TAGGED_ADDR_CTRL] = { + .core_note_type = NT_RISCV_TAGGED_ADDR_CTRL, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .regset_get = tagged_addr_ctrl_get, + .set = tagged_addr_ctrl_set, + }, +#endif }; static const struct user_regset_view riscv_user_native_view = { diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 26c886db4fb3..016b48fcd6f2 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -244,6 +244,42 @@ static void __init parse_dtb(void) #endif } +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS) +DEFINE_STATIC_KEY_TRUE(qspinlock_key); +EXPORT_SYMBOL(qspinlock_key); +#endif + +static void __init riscv_spinlock_init(void) +{ + char *using_ext = NULL; + + if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) { + pr_info("Ticket spinlock: enabled\n"); + return; + } + + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && + riscv_isa_extension_available(NULL, ZABHA) && + riscv_isa_extension_available(NULL, ZACAS)) { + using_ext = "using Zabha"; + } else if (riscv_isa_extension_available(NULL, ZICCRSE)) { + using_ext = "using Ziccrse"; + } +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS) + else { + static_branch_disable(&qspinlock_key); + pr_info("Ticket spinlock: enabled\n"); + return; + } +#endif + + if (!using_ext) + pr_err("Queued spinlock without Zabha or Ziccrse"); + else + pr_info("Queued spinlock %s: enabled\n", using_ext); +} + extern void __init init_rt_signal_env(void); void __init setup_arch(char **cmdline_p) @@ -297,6 +333,7 @@ void __init setup_arch(char **cmdline_p) riscv_set_dma_cache_alignment(); riscv_user_isa_enable(); + riscv_spinlock_init(); } bool arch_cpu_is_hotpluggable(int cpu) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 0f8f1c95ac38..e36d20205bd7 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -233,8 +233,6 @@ asmlinkage __visible void smp_callin(void) numa_add_cpu(curr_cpuid); set_cpu_online(curr_cpuid, true); - riscv_user_isa_enable(); - /* * Remote cache and TLB flushes are ignored while the CPU is offline, * so flush them both right now just in case. diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index c8cec0cc5833..9a8a0dc035b2 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -14,7 +14,7 @@ void suspend_save_csrs(struct suspend_context *context) { - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG)) context->envcfg = csr_read(CSR_ENVCFG); context->tvec = csr_read(CSR_TVEC); context->ie = csr_read(CSR_IE); @@ -37,7 +37,7 @@ void suspend_save_csrs(struct suspend_context *context) void suspend_restore_csrs(struct suspend_context *context) { csr_write(CSR_SCRATCH, 0); - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG)) csr_write(CSR_ENVCFG, context->envcfg); csr_write(CSR_TVEC, context->tvec); csr_write(CSR_IE, context->ie); diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 711a31f27c3d..cb93adfffc48 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -150,6 +150,9 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZFH); EXT_KEY(ZFHMIN); } + + if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM)) + EXT_KEY(SUPM); #undef EXT_KEY } @@ -201,6 +204,43 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) } #endif +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus) +{ + int cpu; + u64 perf = -1ULL; + + /* Return if supported or not even if speed wasn't probed */ + for_each_cpu(cpu, cpus) { + int this_perf = per_cpu(vector_misaligned_access, cpu); + + if (perf == -1ULL) + perf = this_perf; + + if (perf != this_perf) { + perf = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + break; + } + } + + if (perf == -1ULL) + return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + + return perf; +} +#else +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus) +{ + if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + + if (IS_ENABLED(CONFIG_RISCV_SLOW_VECTOR_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; + + return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; +} +#endif + static void hwprobe_one_pair(struct riscv_hwprobe *pair, const struct cpumask *cpus) { @@ -229,6 +269,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = hwprobe_misaligned(cpus); break; + case RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF: + pair->value = hwprobe_vec_misaligned(cpus); + break; + case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE: pair->value = 0; if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ)) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 1b9867136b61..7cc108aed74e 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -16,6 +16,7 @@ #include <asm/entry-common.h> #include <asm/hwprobe.h> #include <asm/cpufeature.h> +#include <asm/vector.h> #define INSN_MATCH_LB 0x3 #define INSN_MASK_LB 0x707f @@ -320,12 +321,37 @@ union reg_data { u64 data_u64; }; -static bool unaligned_ctl __read_mostly; - /* sysctl hooks */ int unaligned_enabled __read_mostly = 1; /* Enabled by default */ -int handle_misaligned_load(struct pt_regs *regs) +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +static int handle_vector_misaligned_load(struct pt_regs *regs) +{ + unsigned long epc = regs->epc; + unsigned long insn; + + if (get_insn(regs, epc, &insn)) + return -1; + + /* Only return 0 when in check_vector_unaligned_access_emulated */ + if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + *this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + regs->epc = epc + INSN_LEN(insn); + return 0; + } + + /* If vector instruction we don't emulate it yet */ + regs->epc = epc; + return -1; +} +#else +static int handle_vector_misaligned_load(struct pt_regs *regs) +{ + return -1; +} +#endif + +static int handle_scalar_misaligned_load(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; @@ -433,7 +459,7 @@ int handle_misaligned_load(struct pt_regs *regs) return 0; } -int handle_misaligned_store(struct pt_regs *regs) +static int handle_scalar_misaligned_store(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; @@ -524,11 +550,96 @@ int handle_misaligned_store(struct pt_regs *regs) return 0; } -static bool check_unaligned_access_emulated(int cpu) +int handle_misaligned_load(struct pt_regs *regs) +{ + unsigned long epc = regs->epc; + unsigned long insn; + + if (IS_ENABLED(CONFIG_RISCV_VECTOR_MISALIGNED)) { + if (get_insn(regs, epc, &insn)) + return -1; + + if (insn_is_vector(insn)) + return handle_vector_misaligned_load(regs); + } + + if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED)) + return handle_scalar_misaligned_load(regs); + + return -1; +} + +int handle_misaligned_store(struct pt_regs *regs) { + if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED)) + return handle_scalar_misaligned_store(regs); + + return -1; +} + +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused) +{ + long *mas_ptr = this_cpu_ptr(&vector_misaligned_access); + unsigned long tmp_var; + + *mas_ptr = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + + kernel_vector_begin(); + /* + * In pre-13.0.0 versions of GCC, vector registers cannot appear in + * the clobber list. This inline asm clobbers v0, but since we do not + * currently build the kernel with V enabled, the v0 clobber arg is not + * needed (as the compiler will not emit vector code itself). If the kernel + * is changed to build with V enabled, the clobber arg will need to be + * added here. + */ + __asm__ __volatile__ ( + ".balign 4\n\t" + ".option push\n\t" + ".option arch, +zve32x\n\t" + " vsetivli zero, 1, e16, m1, ta, ma\n\t" // Vectors of 16b + " vle16.v v0, (%[ptr])\n\t" // Load bytes + ".option pop\n\t" + : : [ptr] "r" ((u8 *)&tmp_var + 1)); + kernel_vector_end(); +} + +bool check_vector_unaligned_access_emulated_all_cpus(void) +{ + int cpu; + + if (!has_vector()) { + for_each_online_cpu(cpu) + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + return false; + } + + schedule_on_each_cpu(check_vector_unaligned_access_emulated); + + for_each_online_cpu(cpu) + if (per_cpu(vector_misaligned_access, cpu) + == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return false; + + return true; +} +#else +bool check_vector_unaligned_access_emulated_all_cpus(void) +{ + return false; +} +#endif + +#ifdef CONFIG_RISCV_SCALAR_MISALIGNED + +static bool unaligned_ctl __read_mostly; + +void check_unaligned_access_emulated(struct work_struct *work __always_unused) +{ + int cpu = smp_processor_id(); long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); unsigned long tmp_var, tmp_val; - bool misaligned_emu_detected; *mas_ptr = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; @@ -536,19 +647,16 @@ static bool check_unaligned_access_emulated(int cpu) " "REG_L" %[tmp], 1(%[ptr])\n" : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); - misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED); /* * If unaligned_ctl is already set, this means that we detected that all * CPUS uses emulated misaligned access at boot time. If that changed * when hotplugging the new cpu, this is something we don't handle. */ - if (unlikely(unaligned_ctl && !misaligned_emu_detected)) { + if (unlikely(unaligned_ctl && (*mas_ptr != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED))) { pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n"); while (true) cpu_relax(); } - - return misaligned_emu_detected; } bool check_unaligned_access_emulated_all_cpus(void) @@ -560,8 +668,11 @@ bool check_unaligned_access_emulated_all_cpus(void) * accesses emulated since tasks requesting such control can run on any * CPU. */ + schedule_on_each_cpu(check_unaligned_access_emulated); + for_each_online_cpu(cpu) - if (!check_unaligned_access_emulated(cpu)) + if (per_cpu(misaligned_access_speed, cpu) + != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED) return false; unaligned_ctl = true; @@ -572,3 +683,9 @@ bool unaligned_ctl_available(void) { return unaligned_ctl; } +#else +bool check_unaligned_access_emulated_all_cpus(void) +{ + return false; +} +#endif diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 160628a2116d..91f189cf1611 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -6,11 +6,13 @@ #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/jump_label.h> +#include <linux/kthread.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/types.h> #include <asm/cpufeature.h> #include <asm/hwprobe.h> +#include <asm/vector.h> #include "copy-unaligned.h" @@ -19,7 +21,8 @@ #define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) -DEFINE_PER_CPU(long, misaligned_access_speed); +DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; +DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS static cpumask_t fast_misaligned_access; @@ -191,6 +194,7 @@ static int riscv_online_cpu(unsigned int cpu) if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) goto exit; + check_unaligned_access_emulated(NULL); buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); if (!buf) { pr_warn("Allocation failure, not measuring misaligned performance\n"); @@ -259,23 +263,159 @@ out: kfree(bufs); return 0; } +#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ +static int check_unaligned_access_speed_all_cpus(void) +{ + return 0; +} +#endif -static int check_unaligned_access_all_cpus(void) +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS +static void check_vector_unaligned_access(struct work_struct *work __always_unused) { - bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); + int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; + struct page *page; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; - if (!all_cpus_emulated) - return check_unaligned_access_speed_all_cpus(); + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return; + + page = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!page) { + pr_warn("Allocation failure, not measuring vector misaligned performance\n"); + return; + } + + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ + src = dst + (MISALIGNED_BUFFER_SIZE / 2); + src += 2; + word_cycles = -1ULL; + + /* Do a warmup. */ + kernel_vector_begin(); + __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + kernel_vector_end(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n", + cpu); + + return; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of vector byte access time to vector unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow"); + + per_cpu(vector_misaligned_access, cpu) = speed; +} + +static int riscv_online_cpu_vec(unsigned int cpu) +{ + if (!has_vector()) + return 0; + + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) + return 0; + + check_vector_unaligned_access_emulated(NULL); + check_vector_unaligned_access(NULL); return 0; } -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ -static int check_unaligned_access_all_cpus(void) + +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { - check_unaligned_access_emulated_all_cpus(); + schedule_on_each_cpu(check_vector_unaligned_access); + + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu_vec, NULL); return 0; } +#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ +static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + return 0; +} #endif +static int check_unaligned_access_all_cpus(void) +{ + bool all_cpus_emulated, all_cpus_vec_unsupported; + + all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); + all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus(); + + if (!all_cpus_vec_unsupported && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { + kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"); + } + + if (!all_cpus_emulated) + return check_unaligned_access_speed_all_cpus(); + + return 0; +} + arch_initcall(check_unaligned_access_all_cpus); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 3f1c4b2d0b06..9a1b555e8733 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -45,7 +45,7 @@ $(obj)/vdso.o: $(obj)/vdso.so # link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE - $(call if_changed,vdsold) + $(call if_changed,vdsold_and_check) LDFLAGS_vdso.so.dbg = -shared -soname=linux-vdso.so.1 \ --build-id=sha1 --hash-style=both --eh-frame-hdr @@ -65,7 +65,8 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE # actual build commands # The DSO images are built using a special linker script # Make sure only to export the intended __vdso_xxx symbol offsets. -quiet_cmd_vdsold = VDSOLD $@ - cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ +quiet_cmd_vdsold_and_check = VDSOLD $@ + cmd_vdsold_and_check = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ - rm $@.tmp + rm $@.tmp && \ + $(cmd_vdso_check) diff --git a/arch/riscv/kernel/vec-copy-unaligned.S b/arch/riscv/kernel/vec-copy-unaligned.S new file mode 100644 index 000000000000..d16f19f1b3b6 --- /dev/null +++ b/arch/riscv/kernel/vec-copy-unaligned.S @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2024 Rivos Inc. */ + +#include <linux/args.h> +#include <linux/linkage.h> +#include <asm/asm.h> + + .text + +#define WORD_EEW 32 + +#define WORD_SEW CONCATENATE(e, WORD_EEW) +#define VEC_L CONCATENATE(vle, WORD_EEW).v +#define VEC_S CONCATENATE(vle, WORD_EEW).v + +/* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */ +/* Performs a memcpy without aligning buffers, using word loads and stores. */ +/* Note: The size is truncated to a multiple of WORD_EEW */ +SYM_FUNC_START(__riscv_copy_vec_words_unaligned) + andi a4, a2, ~(WORD_EEW-1) + beqz a4, 2f + add a3, a1, a4 + .option push + .option arch, +zve32x +1: + vsetivli t0, 8, WORD_SEW, m8, ta, ma + VEC_L v0, (a1) + VEC_S v0, (a0) + addi a0, a0, WORD_EEW + addi a1, a1, WORD_EEW + bltu a1, a3, 1b + +2: + .option pop + ret +SYM_FUNC_END(__riscv_copy_vec_words_unaligned) + +/* void __riscv_copy_vec_bytes_unaligned(void *, const void *, size_t) */ +/* Performs a memcpy without aligning buffers, using only byte accesses. */ +/* Note: The size is truncated to a multiple of 8 */ +SYM_FUNC_START(__riscv_copy_vec_bytes_unaligned) + andi a4, a2, ~(8-1) + beqz a4, 2f + add a3, a1, a4 + .option push + .option arch, +zve32x +1: + vsetivli t0, 8, e8, m8, ta, ma + vle8.v v0, (a1) + vse8.v v0, (a0) + addi a0, a0, 8 + addi a1, a1, 8 + bltu a1, a3, 1b + +2: + .option pop + ret +SYM_FUNC_END(__riscv_copy_vec_bytes_unaligned) diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 682b3feee451..821818886fab 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -66,7 +66,7 @@ void __init riscv_v_setup_ctx_cache(void) #endif } -static bool insn_is_vector(u32 insn_buf) +bool insn_is_vector(u32 insn_buf) { u32 opcode = insn_buf & __INSN_OPCODE_MASK; u32 width, csr; diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 26d1727f0550..0c3cbb0915ff 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -32,6 +32,7 @@ config KVM select KVM_XFER_TO_GUEST_WORK select KVM_GENERIC_MMU_NOTIFIER select SCHED_INFO + select GUEST_PERF_EVENTS if PERF_EVENTS help Support hosting virtualized guest machines. diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index c2cacfbc06a0..0fb1840c3e0a 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -9,27 +9,30 @@ include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o +# Ordered alphabetically +kvm-y += aia.o +kvm-y += aia_aplic.o +kvm-y += aia_device.o +kvm-y += aia_imsic.o kvm-y += main.o -kvm-y += vm.o -kvm-y += vmid.o -kvm-y += tlb.o kvm-y += mmu.o +kvm-y += nacl.o +kvm-y += tlb.o kvm-y += vcpu.o kvm-y += vcpu_exit.o kvm-y += vcpu_fp.o -kvm-y += vcpu_vector.o kvm-y += vcpu_insn.o kvm-y += vcpu_onereg.o -kvm-y += vcpu_switch.o +kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o kvm-y += vcpu_sbi.o -kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o kvm-y += vcpu_sbi_base.o -kvm-y += vcpu_sbi_replace.o kvm-y += vcpu_sbi_hsm.o +kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_sbi_pmu.o +kvm-y += vcpu_sbi_replace.o kvm-y += vcpu_sbi_sta.o +kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o +kvm-y += vcpu_switch.o kvm-y += vcpu_timer.o -kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o -kvm-y += aia.o -kvm-y += aia_device.o -kvm-y += aia_aplic.o -kvm-y += aia_imsic.o +kvm-y += vcpu_vector.o +kvm-y += vm.o +kvm-y += vmid.o diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 2967d305c442..dcced4db7fe8 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -16,6 +16,7 @@ #include <linux/percpu.h> #include <linux/spinlock.h> #include <asm/cpufeature.h> +#include <asm/kvm_nacl.h> struct aia_hgei_control { raw_spinlock_t lock; @@ -51,7 +52,7 @@ static int aia_find_hgei(struct kvm_vcpu *owner) return hgei; } -static void aia_set_hvictl(bool ext_irq_pending) +static inline unsigned long aia_hvictl_value(bool ext_irq_pending) { unsigned long hvictl; @@ -62,7 +63,7 @@ static void aia_set_hvictl(bool ext_irq_pending) hvictl = (IRQ_S_EXT << HVICTL_IID_SHIFT) & HVICTL_IID; hvictl |= ext_irq_pending; - csr_write(CSR_HVICTL, hvictl); + return hvictl; } #ifdef CONFIG_32BIT @@ -88,7 +89,7 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu) struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; if (kvm_riscv_aia_available()) - csr->vsieh = csr_read(CSR_VSIEH); + csr->vsieh = ncsr_read(CSR_VSIEH); } #endif @@ -115,7 +116,7 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) hgei = aia_find_hgei(vcpu); if (hgei > 0) - return !!(csr_read(CSR_HGEIP) & BIT(hgei)); + return !!(ncsr_read(CSR_HGEIP) & BIT(hgei)); return false; } @@ -128,45 +129,73 @@ void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu) return; #ifdef CONFIG_32BIT - csr_write(CSR_HVIPH, vcpu->arch.aia_context.guest_csr.hviph); + ncsr_write(CSR_HVIPH, vcpu->arch.aia_context.guest_csr.hviph); #endif - aia_set_hvictl(!!(csr->hvip & BIT(IRQ_VS_EXT))); + ncsr_write(CSR_HVICTL, aia_hvictl_value(!!(csr->hvip & BIT(IRQ_VS_EXT)))); } void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu) { struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + void *nsh; if (!kvm_riscv_aia_available()) return; - csr_write(CSR_VSISELECT, csr->vsiselect); - csr_write(CSR_HVIPRIO1, csr->hviprio1); - csr_write(CSR_HVIPRIO2, csr->hviprio2); + if (kvm_riscv_nacl_sync_csr_available()) { + nsh = nacl_shmem(); + nacl_csr_write(nsh, CSR_VSISELECT, csr->vsiselect); + nacl_csr_write(nsh, CSR_HVIPRIO1, csr->hviprio1); + nacl_csr_write(nsh, CSR_HVIPRIO2, csr->hviprio2); +#ifdef CONFIG_32BIT + nacl_csr_write(nsh, CSR_VSIEH, csr->vsieh); + nacl_csr_write(nsh, CSR_HVIPH, csr->hviph); + nacl_csr_write(nsh, CSR_HVIPRIO1H, csr->hviprio1h); + nacl_csr_write(nsh, CSR_HVIPRIO2H, csr->hviprio2h); +#endif + } else { + csr_write(CSR_VSISELECT, csr->vsiselect); + csr_write(CSR_HVIPRIO1, csr->hviprio1); + csr_write(CSR_HVIPRIO2, csr->hviprio2); #ifdef CONFIG_32BIT - csr_write(CSR_VSIEH, csr->vsieh); - csr_write(CSR_HVIPH, csr->hviph); - csr_write(CSR_HVIPRIO1H, csr->hviprio1h); - csr_write(CSR_HVIPRIO2H, csr->hviprio2h); + csr_write(CSR_VSIEH, csr->vsieh); + csr_write(CSR_HVIPH, csr->hviph); + csr_write(CSR_HVIPRIO1H, csr->hviprio1h); + csr_write(CSR_HVIPRIO2H, csr->hviprio2h); #endif + } } void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) { struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + void *nsh; if (!kvm_riscv_aia_available()) return; - csr->vsiselect = csr_read(CSR_VSISELECT); - csr->hviprio1 = csr_read(CSR_HVIPRIO1); - csr->hviprio2 = csr_read(CSR_HVIPRIO2); + if (kvm_riscv_nacl_available()) { + nsh = nacl_shmem(); + csr->vsiselect = nacl_csr_read(nsh, CSR_VSISELECT); + csr->hviprio1 = nacl_csr_read(nsh, CSR_HVIPRIO1); + csr->hviprio2 = nacl_csr_read(nsh, CSR_HVIPRIO2); #ifdef CONFIG_32BIT - csr->vsieh = csr_read(CSR_VSIEH); - csr->hviph = csr_read(CSR_HVIPH); - csr->hviprio1h = csr_read(CSR_HVIPRIO1H); - csr->hviprio2h = csr_read(CSR_HVIPRIO2H); + csr->vsieh = nacl_csr_read(nsh, CSR_VSIEH); + csr->hviph = nacl_csr_read(nsh, CSR_HVIPH); + csr->hviprio1h = nacl_csr_read(nsh, CSR_HVIPRIO1H); + csr->hviprio2h = nacl_csr_read(nsh, CSR_HVIPRIO2H); #endif + } else { + csr->vsiselect = csr_read(CSR_VSISELECT); + csr->hviprio1 = csr_read(CSR_HVIPRIO1); + csr->hviprio2 = csr_read(CSR_HVIPRIO2); +#ifdef CONFIG_32BIT + csr->vsieh = csr_read(CSR_VSIEH); + csr->hviph = csr_read(CSR_HVIPH); + csr->hviprio1h = csr_read(CSR_HVIPRIO1H); + csr->hviprio2h = csr_read(CSR_HVIPRIO2H); +#endif + } } int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu, @@ -250,20 +279,20 @@ static u8 aia_get_iprio8(struct kvm_vcpu *vcpu, unsigned int irq) switch (bitpos / BITS_PER_LONG) { case 0: - hviprio = csr_read(CSR_HVIPRIO1); + hviprio = ncsr_read(CSR_HVIPRIO1); break; case 1: #ifndef CONFIG_32BIT - hviprio = csr_read(CSR_HVIPRIO2); + hviprio = ncsr_read(CSR_HVIPRIO2); break; #else - hviprio = csr_read(CSR_HVIPRIO1H); + hviprio = ncsr_read(CSR_HVIPRIO1H); break; case 2: - hviprio = csr_read(CSR_HVIPRIO2); + hviprio = ncsr_read(CSR_HVIPRIO2); break; case 3: - hviprio = csr_read(CSR_HVIPRIO2H); + hviprio = ncsr_read(CSR_HVIPRIO2H); break; #endif default: @@ -283,20 +312,20 @@ static void aia_set_iprio8(struct kvm_vcpu *vcpu, unsigned int irq, u8 prio) switch (bitpos / BITS_PER_LONG) { case 0: - hviprio = csr_read(CSR_HVIPRIO1); + hviprio = ncsr_read(CSR_HVIPRIO1); break; case 1: #ifndef CONFIG_32BIT - hviprio = csr_read(CSR_HVIPRIO2); + hviprio = ncsr_read(CSR_HVIPRIO2); break; #else - hviprio = csr_read(CSR_HVIPRIO1H); + hviprio = ncsr_read(CSR_HVIPRIO1H); break; case 2: - hviprio = csr_read(CSR_HVIPRIO2); + hviprio = ncsr_read(CSR_HVIPRIO2); break; case 3: - hviprio = csr_read(CSR_HVIPRIO2H); + hviprio = ncsr_read(CSR_HVIPRIO2H); break; #endif default: @@ -308,20 +337,20 @@ static void aia_set_iprio8(struct kvm_vcpu *vcpu, unsigned int irq, u8 prio) switch (bitpos / BITS_PER_LONG) { case 0: - csr_write(CSR_HVIPRIO1, hviprio); + ncsr_write(CSR_HVIPRIO1, hviprio); break; case 1: #ifndef CONFIG_32BIT - csr_write(CSR_HVIPRIO2, hviprio); + ncsr_write(CSR_HVIPRIO2, hviprio); break; #else - csr_write(CSR_HVIPRIO1H, hviprio); + ncsr_write(CSR_HVIPRIO1H, hviprio); break; case 2: - csr_write(CSR_HVIPRIO2, hviprio); + ncsr_write(CSR_HVIPRIO2, hviprio); break; case 3: - csr_write(CSR_HVIPRIO2H, hviprio); + ncsr_write(CSR_HVIPRIO2H, hviprio); break; #endif default: @@ -377,7 +406,7 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num, return KVM_INSN_ILLEGAL_TRAP; /* First try to emulate in kernel space */ - isel = csr_read(CSR_VSISELECT) & ISELECT_MASK; + isel = ncsr_read(CSR_VSISELECT) & ISELECT_MASK; if (isel >= ISELECT_IPRIO0 && isel <= ISELECT_IPRIO15) return aia_rmw_iprio(vcpu, isel, val, new_val, wr_mask); else if (isel >= IMSIC_FIRST && isel <= IMSIC_LAST && @@ -499,6 +528,10 @@ static int aia_hgei_init(void) hgctrl->free_bitmap = 0; } + /* Skip SGEI interrupt setup for zero guest external interrupts */ + if (!kvm_riscv_aia_nr_hgei) + goto skip_sgei_interrupt; + /* Find INTC irq domain */ domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), DOMAIN_BUS_ANY); @@ -522,11 +555,16 @@ static int aia_hgei_init(void) return rc; } +skip_sgei_interrupt: return 0; } static void aia_hgei_exit(void) { + /* Do nothing for zero guest external interrupts */ + if (!kvm_riscv_aia_nr_hgei) + return; + /* Free per-CPU SGEI interrupt */ free_percpu_irq(hgei_parent_irq, &aia_hgei); } @@ -536,7 +574,7 @@ void kvm_riscv_aia_enable(void) if (!kvm_riscv_aia_available()) return; - aia_set_hvictl(false); + csr_write(CSR_HVICTL, aia_hvictl_value(false)); csr_write(CSR_HVIPRIO1, 0x0); csr_write(CSR_HVIPRIO2, 0x0); #ifdef CONFIG_32BIT @@ -572,7 +610,7 @@ void kvm_riscv_aia_disable(void) csr_clear(CSR_HIE, BIT(IRQ_S_GEXT)); disable_percpu_irq(hgei_parent_irq); - aia_set_hvictl(false); + csr_write(CSR_HVICTL, aia_hvictl_value(false)); raw_spin_lock_irqsave(&hgctrl->lock, flags); diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c index da6ff1bade0d..f59d1c0c8c43 100644 --- a/arch/riscv/kvm/aia_aplic.c +++ b/arch/riscv/kvm/aia_aplic.c @@ -143,7 +143,7 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending) if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH || sm == APLIC_SOURCECFG_SM_LEVEL_LOW) { if (!pending) - goto skip_write_pending; + goto noskip_write_pending; if ((irqd->state & APLIC_IRQ_STATE_INPUT) && sm == APLIC_SOURCECFG_SM_LEVEL_LOW) goto skip_write_pending; @@ -152,6 +152,7 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending) goto skip_write_pending; } +noskip_write_pending: if (pending) irqd->state |= APLIC_IRQ_STATE_PENDING; else diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index f3427f6de608..1fa8be5ee509 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -10,8 +10,8 @@ #include <linux/err.h> #include <linux/module.h> #include <linux/kvm_host.h> -#include <asm/csr.h> #include <asm/cpufeature.h> +#include <asm/kvm_nacl.h> #include <asm/sbi.h> long kvm_arch_dev_ioctl(struct file *filp, @@ -22,6 +22,12 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_arch_enable_virtualization_cpu(void) { + int rc; + + rc = kvm_riscv_nacl_enable(); + if (rc) + return rc; + csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT); csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT); @@ -49,11 +55,21 @@ void kvm_arch_disable_virtualization_cpu(void) csr_write(CSR_HVIP, 0); csr_write(CSR_HEDELEG, 0); csr_write(CSR_HIDELEG, 0); + + kvm_riscv_nacl_disable(); +} + +static void kvm_riscv_teardown(void) +{ + kvm_riscv_aia_exit(); + kvm_riscv_nacl_exit(); + kvm_unregister_perf_callbacks(); } static int __init riscv_kvm_init(void) { int rc; + char slist[64]; const char *str; if (!riscv_isa_extension_available(NULL, h)) { @@ -71,16 +87,53 @@ static int __init riscv_kvm_init(void) return -ENODEV; } + rc = kvm_riscv_nacl_init(); + if (rc && rc != -ENODEV) + return rc; + kvm_riscv_gstage_mode_detect(); kvm_riscv_gstage_vmid_detect(); rc = kvm_riscv_aia_init(); - if (rc && rc != -ENODEV) + if (rc && rc != -ENODEV) { + kvm_riscv_nacl_exit(); return rc; + } kvm_info("hypervisor extension available\n"); + if (kvm_riscv_nacl_available()) { + rc = 0; + slist[0] = '\0'; + if (kvm_riscv_nacl_sync_csr_available()) { + if (rc) + strcat(slist, ", "); + strcat(slist, "sync_csr"); + rc++; + } + if (kvm_riscv_nacl_sync_hfence_available()) { + if (rc) + strcat(slist, ", "); + strcat(slist, "sync_hfence"); + rc++; + } + if (kvm_riscv_nacl_sync_sret_available()) { + if (rc) + strcat(slist, ", "); + strcat(slist, "sync_sret"); + rc++; + } + if (kvm_riscv_nacl_autoswap_csr_available()) { + if (rc) + strcat(slist, ", "); + strcat(slist, "autoswap_csr"); + rc++; + } + kvm_info("using SBI nested acceleration with %s\n", + (rc) ? slist : "no features"); + } + switch (kvm_riscv_gstage_mode()) { case HGATP_MODE_SV32X4: str = "Sv32x4"; @@ -105,9 +158,11 @@ static int __init riscv_kvm_init(void) kvm_info("AIA available with %d guest external interrupts\n", kvm_riscv_aia_nr_hgei); + kvm_register_perf_callbacks(NULL); + rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); if (rc) { - kvm_riscv_aia_exit(); + kvm_riscv_teardown(); return rc; } @@ -117,7 +172,7 @@ module_init(riscv_kvm_init); static void __exit riscv_kvm_exit(void) { - kvm_riscv_aia_exit(); + kvm_riscv_teardown(); kvm_exit(); } diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index b63650f9b966..1087ea74567b 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -15,7 +15,7 @@ #include <linux/vmalloc.h> #include <linux/kvm_host.h> #include <linux/sched/signal.h> -#include <asm/csr.h> +#include <asm/kvm_nacl.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -601,6 +601,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, bool logging = (memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY)) ? true : false; unsigned long vma_pagesize, mmu_seq; + struct page *page; /* We need minimum second+third level pages */ ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels); @@ -631,7 +632,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, /* * Read mmu_invalidate_seq so that KVM can detect if the results of - * vma_lookup() or gfn_to_pfn_prot() become stale priort to acquiring + * vma_lookup() or __kvm_faultin_pfn() become stale prior to acquiring * kvm->mmu_lock. * * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs @@ -647,7 +648,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, return -EFAULT; } - hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable); + hfn = kvm_faultin_pfn(vcpu, gfn, is_write, &writable, &page); if (hfn == KVM_PFN_ERR_HWPOISON) { send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, vma_pageshift, current); @@ -669,7 +670,6 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, goto out_unlock; if (writable) { - kvm_set_pfn_dirty(hfn); mark_page_dirty(kvm, gfn); ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT, vma_pagesize, false, true); @@ -682,9 +682,8 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, kvm_err("Failed to map in G-stage\n"); out_unlock: + kvm_release_faultin_page(kvm, page, ret && ret != -EEXIST, writable); spin_unlock(&kvm->mmu_lock); - kvm_set_pfn_accessed(hfn); - kvm_release_pfn_clean(hfn); return ret; } @@ -732,7 +731,7 @@ void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu) hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID; hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; - csr_write(CSR_HGATP, hgatp); + ncsr_write(CSR_HGATP, hgatp); if (!kvm_riscv_gstage_vmid_bits()) kvm_riscv_local_hfence_gvma_all(); diff --git a/arch/riscv/kvm/nacl.c b/arch/riscv/kvm/nacl.c new file mode 100644 index 000000000000..08a95ad9ada2 --- /dev/null +++ b/arch/riscv/kvm/nacl.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 Ventana Micro Systems Inc. + */ + +#include <linux/kvm_host.h> +#include <linux/vmalloc.h> +#include <asm/kvm_nacl.h> + +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_available); +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_csr_available); +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_hfence_available); +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_sret_available); +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_autoswap_csr_available); +DEFINE_PER_CPU(struct kvm_riscv_nacl, kvm_riscv_nacl); + +void __kvm_riscv_nacl_hfence(void *shmem, + unsigned long control, + unsigned long page_num, + unsigned long page_count) +{ + int i, ent = -1, try_count = 5; + unsigned long *entp; + +again: + for (i = 0; i < SBI_NACL_SHMEM_HFENCE_ENTRY_MAX; i++) { + entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(i); + if (lelong_to_cpu(*entp) & SBI_NACL_SHMEM_HFENCE_CONFIG_PEND) + continue; + + ent = i; + break; + } + + if (ent < 0) { + if (try_count) { + nacl_sync_hfence(-1UL); + goto again; + } else { + pr_warn("KVM: No free entry in NACL shared memory\n"); + return; + } + } + + entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(i); + *entp = cpu_to_lelong(control); + entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_PNUM(i); + *entp = cpu_to_lelong(page_num); + entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_PCOUNT(i); + *entp = cpu_to_lelong(page_count); +} + +int kvm_riscv_nacl_enable(void) +{ + int rc; + struct sbiret ret; + struct kvm_riscv_nacl *nacl; + + if (!kvm_riscv_nacl_available()) + return 0; + nacl = this_cpu_ptr(&kvm_riscv_nacl); + + ret = sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SET_SHMEM, + nacl->shmem_phys, 0, 0, 0, 0, 0); + rc = sbi_err_map_linux_errno(ret.error); + if (rc) + return rc; + + return 0; +} + +void kvm_riscv_nacl_disable(void) +{ + if (!kvm_riscv_nacl_available()) + return; + + sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SET_SHMEM, + SBI_SHMEM_DISABLE, SBI_SHMEM_DISABLE, 0, 0, 0, 0); +} + +void kvm_riscv_nacl_exit(void) +{ + int cpu; + struct kvm_riscv_nacl *nacl; + + if (!kvm_riscv_nacl_available()) + return; + + /* Allocate per-CPU shared memory */ + for_each_possible_cpu(cpu) { + nacl = per_cpu_ptr(&kvm_riscv_nacl, cpu); + if (!nacl->shmem) + continue; + + free_pages((unsigned long)nacl->shmem, + get_order(SBI_NACL_SHMEM_SIZE)); + nacl->shmem = NULL; + nacl->shmem_phys = 0; + } +} + +static long nacl_probe_feature(long feature_id) +{ + struct sbiret ret; + + if (!kvm_riscv_nacl_available()) + return 0; + + ret = sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_PROBE_FEATURE, + feature_id, 0, 0, 0, 0, 0); + return ret.value; +} + +int kvm_riscv_nacl_init(void) +{ + int cpu; + struct page *shmem_page; + struct kvm_riscv_nacl *nacl; + + if (sbi_spec_version < sbi_mk_version(1, 0) || + sbi_probe_extension(SBI_EXT_NACL) <= 0) + return -ENODEV; + + /* Enable NACL support */ + static_branch_enable(&kvm_riscv_nacl_available); + + /* Probe NACL features */ + if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_CSR)) + static_branch_enable(&kvm_riscv_nacl_sync_csr_available); + if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_HFENCE)) + static_branch_enable(&kvm_riscv_nacl_sync_hfence_available); + if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_SRET)) + static_branch_enable(&kvm_riscv_nacl_sync_sret_available); + if (nacl_probe_feature(SBI_NACL_FEAT_AUTOSWAP_CSR)) + static_branch_enable(&kvm_riscv_nacl_autoswap_csr_available); + + /* Allocate per-CPU shared memory */ + for_each_possible_cpu(cpu) { + nacl = per_cpu_ptr(&kvm_riscv_nacl, cpu); + + shmem_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, + get_order(SBI_NACL_SHMEM_SIZE)); + if (!shmem_page) { + kvm_riscv_nacl_exit(); + return -ENOMEM; + } + nacl->shmem = page_to_virt(shmem_page); + nacl->shmem_phys = page_to_phys(shmem_page); + } + + return 0; +} diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c index 23c0e82b5103..2f91ea5f8493 100644 --- a/arch/riscv/kvm/tlb.c +++ b/arch/riscv/kvm/tlb.c @@ -14,6 +14,7 @@ #include <asm/csr.h> #include <asm/cpufeature.h> #include <asm/insn-def.h> +#include <asm/kvm_nacl.h> #define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL) @@ -186,18 +187,24 @@ void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu) void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu) { - struct kvm_vmid *vmid; + struct kvm_vmid *v = &vcpu->kvm->arch.vmid; + unsigned long vmid = READ_ONCE(v->vmid); - vmid = &vcpu->kvm->arch.vmid; - kvm_riscv_local_hfence_gvma_vmid_all(READ_ONCE(vmid->vmid)); + if (kvm_riscv_nacl_available()) + nacl_hfence_gvma_vmid_all(nacl_shmem(), vmid); + else + kvm_riscv_local_hfence_gvma_vmid_all(vmid); } void kvm_riscv_hfence_vvma_all_process(struct kvm_vcpu *vcpu) { - struct kvm_vmid *vmid; + struct kvm_vmid *v = &vcpu->kvm->arch.vmid; + unsigned long vmid = READ_ONCE(v->vmid); - vmid = &vcpu->kvm->arch.vmid; - kvm_riscv_local_hfence_vvma_all(READ_ONCE(vmid->vmid)); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma_all(nacl_shmem(), vmid); + else + kvm_riscv_local_hfence_vvma_all(vmid); } static bool vcpu_hfence_dequeue(struct kvm_vcpu *vcpu, @@ -251,6 +258,7 @@ static bool vcpu_hfence_enqueue(struct kvm_vcpu *vcpu, void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu) { + unsigned long vmid; struct kvm_riscv_hfence d = { 0 }; struct kvm_vmid *v = &vcpu->kvm->arch.vmid; @@ -259,26 +267,41 @@ void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu) case KVM_RISCV_HFENCE_UNKNOWN: break; case KVM_RISCV_HFENCE_GVMA_VMID_GPA: - kvm_riscv_local_hfence_gvma_vmid_gpa( - READ_ONCE(v->vmid), - d.addr, d.size, d.order); + vmid = READ_ONCE(v->vmid); + if (kvm_riscv_nacl_available()) + nacl_hfence_gvma_vmid(nacl_shmem(), vmid, + d.addr, d.size, d.order); + else + kvm_riscv_local_hfence_gvma_vmid_gpa(vmid, d.addr, + d.size, d.order); break; case KVM_RISCV_HFENCE_VVMA_ASID_GVA: kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD); - kvm_riscv_local_hfence_vvma_asid_gva( - READ_ONCE(v->vmid), d.asid, - d.addr, d.size, d.order); + vmid = READ_ONCE(v->vmid); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma_asid(nacl_shmem(), vmid, d.asid, + d.addr, d.size, d.order); + else + kvm_riscv_local_hfence_vvma_asid_gva(vmid, d.asid, d.addr, + d.size, d.order); break; case KVM_RISCV_HFENCE_VVMA_ASID_ALL: kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD); - kvm_riscv_local_hfence_vvma_asid_all( - READ_ONCE(v->vmid), d.asid); + vmid = READ_ONCE(v->vmid); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma_asid_all(nacl_shmem(), vmid, d.asid); + else + kvm_riscv_local_hfence_vvma_asid_all(vmid, d.asid); break; case KVM_RISCV_HFENCE_VVMA_GVA: kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_RCVD); - kvm_riscv_local_hfence_vvma_gva( - READ_ONCE(v->vmid), - d.addr, d.size, d.order); + vmid = READ_ONCE(v->vmid); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma(nacl_shmem(), vmid, + d.addr, d.size, d.order); + else + kvm_riscv_local_hfence_vvma_gva(vmid, d.addr, + d.size, d.order); break; default: break; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 8d7d381737ee..dc3f76f6e46c 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -17,8 +17,8 @@ #include <linux/sched/signal.h> #include <linux/fs.h> #include <linux/kvm_host.h> -#include <asm/csr.h> #include <asm/cacheflush.h> +#include <asm/kvm_nacl.h> #include <asm/kvm_vcpu_vector.h> #define CREATE_TRACE_POINTS @@ -226,6 +226,13 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false; } +#ifdef CONFIG_GUEST_PERF_EVENTS +unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.guest_context.sepc; +} +#endif + vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) { return VM_FAULT_SIGBUS; @@ -361,10 +368,10 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; /* Read current HVIP and VSIE CSRs */ - csr->vsie = csr_read(CSR_VSIE); + csr->vsie = ncsr_read(CSR_VSIE); /* Sync-up HVIP.VSSIP bit changes does by Guest */ - hvip = csr_read(CSR_HVIP); + hvip = ncsr_read(CSR_HVIP); if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) { if (hvip & (1UL << IRQ_VS_SOFT)) { if (!test_and_set_bit(IRQ_VS_SOFT, @@ -561,26 +568,49 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + void *nsh; struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; - csr_write(CSR_VSSTATUS, csr->vsstatus); - csr_write(CSR_VSIE, csr->vsie); - csr_write(CSR_VSTVEC, csr->vstvec); - csr_write(CSR_VSSCRATCH, csr->vsscratch); - csr_write(CSR_VSEPC, csr->vsepc); - csr_write(CSR_VSCAUSE, csr->vscause); - csr_write(CSR_VSTVAL, csr->vstval); - csr_write(CSR_HEDELEG, cfg->hedeleg); - csr_write(CSR_HVIP, csr->hvip); - csr_write(CSR_VSATP, csr->vsatp); - csr_write(CSR_HENVCFG, cfg->henvcfg); - if (IS_ENABLED(CONFIG_32BIT)) - csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32); - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { - csr_write(CSR_HSTATEEN0, cfg->hstateen0); + if (kvm_riscv_nacl_sync_csr_available()) { + nsh = nacl_shmem(); + nacl_csr_write(nsh, CSR_VSSTATUS, csr->vsstatus); + nacl_csr_write(nsh, CSR_VSIE, csr->vsie); + nacl_csr_write(nsh, CSR_VSTVEC, csr->vstvec); + nacl_csr_write(nsh, CSR_VSSCRATCH, csr->vsscratch); + nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc); + nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause); + nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval); + nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg); + nacl_csr_write(nsh, CSR_HVIP, csr->hvip); + nacl_csr_write(nsh, CSR_VSATP, csr->vsatp); + nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg); if (IS_ENABLED(CONFIG_32BIT)) - csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32); + nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { + nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0); + if (IS_ENABLED(CONFIG_32BIT)) + nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32); + } + } else { + csr_write(CSR_VSSTATUS, csr->vsstatus); + csr_write(CSR_VSIE, csr->vsie); + csr_write(CSR_VSTVEC, csr->vstvec); + csr_write(CSR_VSSCRATCH, csr->vsscratch); + csr_write(CSR_VSEPC, csr->vsepc); + csr_write(CSR_VSCAUSE, csr->vscause); + csr_write(CSR_VSTVAL, csr->vstval); + csr_write(CSR_HEDELEG, cfg->hedeleg); + csr_write(CSR_HVIP, csr->hvip); + csr_write(CSR_VSATP, csr->vsatp); + csr_write(CSR_HENVCFG, cfg->henvcfg); + if (IS_ENABLED(CONFIG_32BIT)) + csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { + csr_write(CSR_HSTATEEN0, cfg->hstateen0); + if (IS_ENABLED(CONFIG_32BIT)) + csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32); + } } kvm_riscv_gstage_update_hgatp(vcpu); @@ -603,6 +633,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + void *nsh; struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; vcpu->cpu = -1; @@ -618,15 +649,28 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->arch.isa); kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context); - csr->vsstatus = csr_read(CSR_VSSTATUS); - csr->vsie = csr_read(CSR_VSIE); - csr->vstvec = csr_read(CSR_VSTVEC); - csr->vsscratch = csr_read(CSR_VSSCRATCH); - csr->vsepc = csr_read(CSR_VSEPC); - csr->vscause = csr_read(CSR_VSCAUSE); - csr->vstval = csr_read(CSR_VSTVAL); - csr->hvip = csr_read(CSR_HVIP); - csr->vsatp = csr_read(CSR_VSATP); + if (kvm_riscv_nacl_available()) { + nsh = nacl_shmem(); + csr->vsstatus = nacl_csr_read(nsh, CSR_VSSTATUS); + csr->vsie = nacl_csr_read(nsh, CSR_VSIE); + csr->vstvec = nacl_csr_read(nsh, CSR_VSTVEC); + csr->vsscratch = nacl_csr_read(nsh, CSR_VSSCRATCH); + csr->vsepc = nacl_csr_read(nsh, CSR_VSEPC); + csr->vscause = nacl_csr_read(nsh, CSR_VSCAUSE); + csr->vstval = nacl_csr_read(nsh, CSR_VSTVAL); + csr->hvip = nacl_csr_read(nsh, CSR_HVIP); + csr->vsatp = nacl_csr_read(nsh, CSR_VSATP); + } else { + csr->vsstatus = csr_read(CSR_VSSTATUS); + csr->vsie = csr_read(CSR_VSIE); + csr->vstvec = csr_read(CSR_VSTVEC); + csr->vsscratch = csr_read(CSR_VSSCRATCH); + csr->vsepc = csr_read(CSR_VSEPC); + csr->vscause = csr_read(CSR_VSCAUSE); + csr->vstval = csr_read(CSR_VSTVAL); + csr->hvip = csr_read(CSR_HVIP); + csr->vsatp = csr_read(CSR_VSATP); + } } static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) @@ -681,7 +725,7 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) { struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; - csr_write(CSR_HVIP, csr->hvip); + ncsr_write(CSR_HVIP, csr->hvip); kvm_riscv_vcpu_aia_update_hvip(vcpu); } @@ -691,6 +735,7 @@ static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu * struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren); vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg); if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) && (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0)) @@ -704,6 +749,7 @@ static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *v struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren); csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg); if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) && (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0)) @@ -718,11 +764,81 @@ static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *v * This must be noinstr as instrumentation may make use of RCU, and this is not * safe during the EQS. */ -static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu) +static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu, + struct kvm_cpu_trap *trap) { + void *nsh; + struct kvm_cpu_context *gcntx = &vcpu->arch.guest_context; + struct kvm_cpu_context *hcntx = &vcpu->arch.host_context; + + /* + * We save trap CSRs (such as SEPC, SCAUSE, STVAL, HTVAL, and + * HTINST) here because we do local_irq_enable() after this + * function in kvm_arch_vcpu_ioctl_run() which can result in + * an interrupt immediately after local_irq_enable() and can + * potentially change trap CSRs. + */ + kvm_riscv_vcpu_swap_in_guest_state(vcpu); guest_state_enter_irqoff(); - __kvm_riscv_switch_to(&vcpu->arch); + + if (kvm_riscv_nacl_sync_sret_available()) { + nsh = nacl_shmem(); + + if (kvm_riscv_nacl_autoswap_csr_available()) { + hcntx->hstatus = + nacl_csr_read(nsh, CSR_HSTATUS); + nacl_scratch_write_long(nsh, + SBI_NACL_SHMEM_AUTOSWAP_OFFSET + + SBI_NACL_SHMEM_AUTOSWAP_HSTATUS, + gcntx->hstatus); + nacl_scratch_write_long(nsh, + SBI_NACL_SHMEM_AUTOSWAP_OFFSET, + SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS); + } else if (kvm_riscv_nacl_sync_csr_available()) { + hcntx->hstatus = nacl_csr_swap(nsh, + CSR_HSTATUS, gcntx->hstatus); + } else { + hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); + } + + nacl_scratch_write_longs(nsh, + SBI_NACL_SHMEM_SRET_OFFSET + + SBI_NACL_SHMEM_SRET_X(1), + &gcntx->ra, + SBI_NACL_SHMEM_SRET_X_LAST); + + __kvm_riscv_nacl_switch_to(&vcpu->arch, SBI_EXT_NACL, + SBI_EXT_NACL_SYNC_SRET); + + if (kvm_riscv_nacl_autoswap_csr_available()) { + nacl_scratch_write_long(nsh, + SBI_NACL_SHMEM_AUTOSWAP_OFFSET, + 0); + gcntx->hstatus = nacl_scratch_read_long(nsh, + SBI_NACL_SHMEM_AUTOSWAP_OFFSET + + SBI_NACL_SHMEM_AUTOSWAP_HSTATUS); + } else { + gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); + } + + trap->htval = nacl_csr_read(nsh, CSR_HTVAL); + trap->htinst = nacl_csr_read(nsh, CSR_HTINST); + } else { + hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); + + __kvm_riscv_switch_to(&vcpu->arch); + + gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); + + trap->htval = csr_read(CSR_HTVAL); + trap->htinst = csr_read(CSR_HTINST); + } + + trap->sepc = gcntx->sepc; + trap->scause = csr_read(CSR_SCAUSE); + trap->stval = csr_read(CSR_STVAL); + vcpu->arch.last_exit_cpu = vcpu->cpu; guest_state_exit_irqoff(); kvm_riscv_vcpu_swap_in_host_state(vcpu); @@ -839,22 +955,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) guest_timing_enter_irqoff(); - kvm_riscv_vcpu_enter_exit(vcpu); + kvm_riscv_vcpu_enter_exit(vcpu, &trap); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; - /* - * Save SCAUSE, STVAL, HTVAL, and HTINST because we might - * get an interrupt between __kvm_riscv_switch_to() and - * local_irq_enable() which can potentially change CSRs. - */ - trap.sepc = vcpu->arch.guest_context.sepc; - trap.scause = csr_read(CSR_SCAUSE); - trap.stval = csr_read(CSR_STVAL); - trap.htval = csr_read(CSR_HTVAL); - trap.htinst = csr_read(CSR_HTINST); - /* Syncup interrupts state with HW */ kvm_riscv_vcpu_sync_interrupts(vcpu); diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index b319c4c13c54..5b68490ad9b7 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -34,9 +34,11 @@ static const unsigned long kvm_isa_ext_arr[] = { [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v, /* Multi letter extensions (alphabetically sorted) */ + [KVM_RISCV_ISA_EXT_SMNPM] = RISCV_ISA_EXT_SSNPM, KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), KVM_ISA_EXT_ARR(SSCOFPMF), + KVM_ISA_EXT_ARR(SSNPM), KVM_ISA_EXT_ARR(SSTC), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), @@ -127,8 +129,10 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_C: case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: + case KVM_RISCV_ISA_EXT_SMNPM: /* There is not architectural config bit to disable sscofpmf completely */ case KVM_RISCV_ISA_EXT_SSCOFPMF: + case KVM_RISCV_ISA_EXT_SSNPM: case KVM_RISCV_ISA_EXT_SSTC: case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 7de128be8db9..6e704ed86a83 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -486,19 +486,22 @@ void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu) struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; const struct kvm_riscv_sbi_extension_entry *entry; const struct kvm_vcpu_sbi_extension *ext; - int i; + int idx, i; for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { entry = &sbi_ext[i]; ext = entry->ext_ptr; + idx = entry->ext_idx; + + if (idx < 0 || idx >= ARRAY_SIZE(scontext->ext_status)) + continue; if (ext->probe && !ext->probe(vcpu)) { - scontext->ext_status[entry->ext_idx] = - KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE; + scontext->ext_status[idx] = KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE; continue; } - scontext->ext_status[entry->ext_idx] = ext->default_disabled ? + scontext->ext_status[idx] = ext->default_disabled ? KVM_RISCV_SBI_EXT_STATUS_DISABLED : KVM_RISCV_SBI_EXT_STATUS_ENABLED; } diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S index 0c26189aa01c..47686bcb21e0 100644 --- a/arch/riscv/kvm/vcpu_switch.S +++ b/arch/riscv/kvm/vcpu_switch.S @@ -11,11 +11,7 @@ #include <asm/asm-offsets.h> #include <asm/csr.h> - .text - .altmacro - .option norelax - -SYM_FUNC_START(__kvm_riscv_switch_to) +.macro SAVE_HOST_GPRS /* Save Host GPRs (except A0 and T0-T6) */ REG_S ra, (KVM_ARCH_HOST_RA)(a0) REG_S sp, (KVM_ARCH_HOST_SP)(a0) @@ -40,39 +36,33 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_S s9, (KVM_ARCH_HOST_S9)(a0) REG_S s10, (KVM_ARCH_HOST_S10)(a0) REG_S s11, (KVM_ARCH_HOST_S11)(a0) +.endm +.macro SAVE_HOST_AND_RESTORE_GUEST_CSRS __resume_addr /* Load Guest CSR values */ REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0) - REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0) - REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) - la t4, .Lkvm_switch_return - REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0) + la t1, \__resume_addr + REG_L t2, (KVM_ARCH_GUEST_SEPC)(a0) /* Save Host and Restore Guest SSTATUS */ csrrw t0, CSR_SSTATUS, t0 - /* Save Host and Restore Guest HSTATUS */ - csrrw t1, CSR_HSTATUS, t1 - - /* Save Host and Restore Guest SCOUNTEREN */ - csrrw t2, CSR_SCOUNTEREN, t2 - /* Save Host STVEC and change it to return path */ - csrrw t4, CSR_STVEC, t4 + csrrw t1, CSR_STVEC, t1 + + /* Restore Guest SEPC */ + csrw CSR_SEPC, t2 /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */ csrrw t3, CSR_SSCRATCH, a0 - /* Restore Guest SEPC */ - csrw CSR_SEPC, t5 - /* Store Host CSR values */ REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0) - REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0) - REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0) + REG_S t1, (KVM_ARCH_HOST_STVEC)(a0) REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0) - REG_S t4, (KVM_ARCH_HOST_STVEC)(a0) +.endm +.macro RESTORE_GUEST_GPRS /* Restore Guest GPRs (except A0) */ REG_L ra, (KVM_ARCH_GUEST_RA)(a0) REG_L sp, (KVM_ARCH_GUEST_SP)(a0) @@ -107,13 +97,9 @@ SYM_FUNC_START(__kvm_riscv_switch_to) /* Restore Guest A0 */ REG_L a0, (KVM_ARCH_GUEST_A0)(a0) +.endm - /* Resume Guest */ - sret - - /* Back to Host */ - .align 2 -.Lkvm_switch_return: +.macro SAVE_GUEST_GPRS /* Swap Guest A0 with SSCRATCH */ csrrw a0, CSR_SSCRATCH, a0 @@ -148,39 +134,33 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_S t4, (KVM_ARCH_GUEST_T4)(a0) REG_S t5, (KVM_ARCH_GUEST_T5)(a0) REG_S t6, (KVM_ARCH_GUEST_T6)(a0) +.endm +.macro SAVE_GUEST_AND_RESTORE_HOST_CSRS /* Load Host CSR values */ - REG_L t1, (KVM_ARCH_HOST_STVEC)(a0) - REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0) - REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0) - REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0) - REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0) - - /* Save Guest SEPC */ - csrr t0, CSR_SEPC + REG_L t0, (KVM_ARCH_HOST_STVEC)(a0) + REG_L t1, (KVM_ARCH_HOST_SSCRATCH)(a0) + REG_L t2, (KVM_ARCH_HOST_SSTATUS)(a0) /* Save Guest A0 and Restore Host SSCRATCH */ - csrrw t2, CSR_SSCRATCH, t2 + csrrw t1, CSR_SSCRATCH, t1 - /* Restore Host STVEC */ - csrw CSR_STVEC, t1 - - /* Save Guest and Restore Host SCOUNTEREN */ - csrrw t3, CSR_SCOUNTEREN, t3 + /* Save Guest SEPC */ + csrr t3, CSR_SEPC - /* Save Guest and Restore Host HSTATUS */ - csrrw t4, CSR_HSTATUS, t4 + /* Restore Host STVEC */ + csrw CSR_STVEC, t0 /* Save Guest and Restore Host SSTATUS */ - csrrw t5, CSR_SSTATUS, t5 + csrrw t2, CSR_SSTATUS, t2 /* Store Guest CSR values */ - REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0) - REG_S t2, (KVM_ARCH_GUEST_A0)(a0) - REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) - REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0) - REG_S t5, (KVM_ARCH_GUEST_SSTATUS)(a0) + REG_S t1, (KVM_ARCH_GUEST_A0)(a0) + REG_S t2, (KVM_ARCH_GUEST_SSTATUS)(a0) + REG_S t3, (KVM_ARCH_GUEST_SEPC)(a0) +.endm +.macro RESTORE_HOST_GPRS /* Restore Host GPRs (except A0 and T0-T6) */ REG_L ra, (KVM_ARCH_HOST_RA)(a0) REG_L sp, (KVM_ARCH_HOST_SP)(a0) @@ -205,11 +185,68 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_L s9, (KVM_ARCH_HOST_S9)(a0) REG_L s10, (KVM_ARCH_HOST_S10)(a0) REG_L s11, (KVM_ARCH_HOST_S11)(a0) +.endm + + .text + .altmacro + .option norelax + + /* + * Parameters: + * A0 <= Pointer to struct kvm_vcpu_arch + */ +SYM_FUNC_START(__kvm_riscv_switch_to) + SAVE_HOST_GPRS + + SAVE_HOST_AND_RESTORE_GUEST_CSRS .Lkvm_switch_return + + RESTORE_GUEST_GPRS + + /* Resume Guest using SRET */ + sret + + /* Back to Host */ + .align 2 +.Lkvm_switch_return: + SAVE_GUEST_GPRS + + SAVE_GUEST_AND_RESTORE_HOST_CSRS + + RESTORE_HOST_GPRS /* Return to C code */ ret SYM_FUNC_END(__kvm_riscv_switch_to) + /* + * Parameters: + * A0 <= Pointer to struct kvm_vcpu_arch + * A1 <= SBI extension ID + * A2 <= SBI function ID + */ +SYM_FUNC_START(__kvm_riscv_nacl_switch_to) + SAVE_HOST_GPRS + + SAVE_HOST_AND_RESTORE_GUEST_CSRS .Lkvm_nacl_switch_return + + /* Resume Guest using SBI nested acceleration */ + add a6, a2, zero + add a7, a1, zero + ecall + + /* Back to Host */ + .align 2 +.Lkvm_nacl_switch_return: + SAVE_GUEST_GPRS + + SAVE_GUEST_AND_RESTORE_HOST_CSRS + + RESTORE_HOST_GPRS + + /* Return to C code */ + ret +SYM_FUNC_END(__kvm_riscv_nacl_switch_to) + SYM_CODE_START(__kvm_riscv_unpriv_trap) /* * We assume that faulting unpriv load/store instruction is diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index 75486b25ac45..96e7a4e463f7 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -11,8 +11,8 @@ #include <linux/kvm_host.h> #include <linux/uaccess.h> #include <clocksource/timer-riscv.h> -#include <asm/csr.h> #include <asm/delay.h> +#include <asm/kvm_nacl.h> #include <asm/kvm_vcpu_timer.h> static u64 kvm_riscv_current_cycles(struct kvm_guest_timer *gt) @@ -72,12 +72,12 @@ static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t) static int kvm_riscv_vcpu_update_vstimecmp(struct kvm_vcpu *vcpu, u64 ncycles) { #if defined(CONFIG_32BIT) - csr_write(CSR_VSTIMECMP, ncycles & 0xFFFFFFFF); - csr_write(CSR_VSTIMECMPH, ncycles >> 32); + ncsr_write(CSR_VSTIMECMP, ncycles & 0xFFFFFFFF); + ncsr_write(CSR_VSTIMECMPH, ncycles >> 32); #else - csr_write(CSR_VSTIMECMP, ncycles); + ncsr_write(CSR_VSTIMECMP, ncycles); #endif - return 0; + return 0; } static int kvm_riscv_vcpu_update_hrtimer(struct kvm_vcpu *vcpu, u64 ncycles) @@ -289,10 +289,10 @@ static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu) struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; #if defined(CONFIG_32BIT) - csr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta)); - csr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32)); + ncsr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta)); + ncsr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32)); #else - csr_write(CSR_HTIMEDELTA, gt->time_delta); + ncsr_write(CSR_HTIMEDELTA, gt->time_delta); #endif } @@ -306,10 +306,10 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) return; #if defined(CONFIG_32BIT) - csr_write(CSR_VSTIMECMP, (u32)t->next_cycles); - csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32)); + ncsr_write(CSR_VSTIMECMP, (u32)t->next_cycles); + ncsr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32)); #else - csr_write(CSR_VSTIMECMP, t->next_cycles); + ncsr_write(CSR_VSTIMECMP, t->next_cycles); #endif /* timer should be enabled for the remaining operations */ @@ -327,10 +327,10 @@ void kvm_riscv_vcpu_timer_sync(struct kvm_vcpu *vcpu) return; #if defined(CONFIG_32BIT) - t->next_cycles = csr_read(CSR_VSTIMECMP); - t->next_cycles |= (u64)csr_read(CSR_VSTIMECMPH) << 32; + t->next_cycles = ncsr_read(CSR_VSTIMECMP); + t->next_cycles |= (u64)ncsr_read(CSR_VSTIMECMPH) << 32; #else - t->next_cycles = csr_read(CSR_VSTIMECMP); + t->next_cycles = ncsr_read(CSR_VSTIMECMP); #endif } |