diff options
author | Deepak Gupta <debug@rivosinc.com> | 2023-09-27 22:47:59 +0000 |
---|---|---|
committer | Palmer Dabbelt <palmer@rivosinc.com> | 2023-10-27 14:43:05 -0700 |
commit | be97d0db5f44c0674480cb79ac6f5b0529b84c76 (patch) | |
tree | 9ff33ec11e30d6d5e27a55a8b6830ab0b2345b04 /arch/riscv/kernel/entry.S | |
parent | 0bb80ecc33a8fb5a682236443c1e740d5c917d1d (diff) |
riscv: VMAP_STACK overflow detection thread-safe
commit 31da94c25aea ("riscv: add VMAP_STACK overflow detection") added
support for CONFIG_VMAP_STACK. If overflow is detected, CPU switches to
`shadow_stack` temporarily before switching finally to per-cpu
`overflow_stack`.
If two CPUs/harts are racing and end up in over flowing kernel stack, one
or both will end up corrupting each other state because `shadow_stack` is
not per-cpu. This patch optimizes per-cpu overflow stack switch by
directly picking per-cpu `overflow_stack` and gets rid of `shadow_stack`.
Following are the changes in this patch
- Defines an asm macro to obtain per-cpu symbols in destination
register.
- In entry.S, when overflow is detected, per-cpu overflow stack is
located using per-cpu asm macro. Computing per-cpu symbol requires
a temporary register. x31 is saved away into CSR_SCRATCH
(CSR_SCRATCH is anyways zero since we're in kernel).
Please see Links for additional relevant disccussion and alternative
solution.
Tested by `echo EXHAUST_STACK > /sys/kernel/debug/provoke-crash/DIRECT`
Kernel crash log below
Insufficient stack space to handle exception!/debug/provoke-crash/DIRECT
Task stack: [0xff20000010a98000..0xff20000010a9c000]
Overflow stack: [0xff600001f7d98370..0xff600001f7d99370]
CPU: 1 PID: 205 Comm: bash Not tainted 6.1.0-rc2-00001-g328a1f96f7b9 #34
Hardware name: riscv-virtio,qemu (DT)
epc : __memset+0x60/0xfc
ra : recursive_loop+0x48/0xc6 [lkdtm]
epc : ffffffff808de0e4 ra : ffffffff0163a752 sp : ff20000010a97e80
gp : ffffffff815c0330 tp : ff600000820ea280 t0 : ff20000010a97e88
t1 : 000000000000002e t2 : 3233206874706564 s0 : ff20000010a982b0
s1 : 0000000000000012 a0 : ff20000010a97e88 a1 : 0000000000000000
a2 : 0000000000000400 a3 : ff20000010a98288 a4 : 0000000000000000
a5 : 0000000000000000 a6 : fffffffffffe43f0 a7 : 00007fffffffffff
s2 : ff20000010a97e88 s3 : ffffffff01644680 s4 : ff20000010a9be90
s5 : ff600000842ba6c0 s6 : 00aaaaaac29e42b0 s7 : 00fffffff0aa3684
s8 : 00aaaaaac2978040 s9 : 0000000000000065 s10: 00ffffff8a7cad10
s11: 00ffffff8a76a4e0 t3 : ffffffff815dbaf4 t4 : ffffffff815dbaf4
t5 : ffffffff815dbab8 t6 : ff20000010a9bb48
status: 0000000200000120 badaddr: ff20000010a97e88 cause: 000000000000000f
Kernel panic - not syncing: Kernel stack overflow
CPU: 1 PID: 205 Comm: bash Not tainted 6.1.0-rc2-00001-g328a1f96f7b9 #34
Hardware name: riscv-virtio,qemu (DT)
Call Trace:
[<ffffffff80006754>] dump_backtrace+0x30/0x38
[<ffffffff808de798>] show_stack+0x40/0x4c
[<ffffffff808ea2a8>] dump_stack_lvl+0x44/0x5c
[<ffffffff808ea2d8>] dump_stack+0x18/0x20
[<ffffffff808dec06>] panic+0x126/0x2fe
[<ffffffff800065ea>] walk_stackframe+0x0/0xf0
[<ffffffff0163a752>] recursive_loop+0x48/0xc6 [lkdtm]
SMP: stopping secondary CPUs
---[ end Kernel panic - not syncing: Kernel stack overflow ]---
Cc: Guo Ren <guoren@kernel.org>
Cc: Jisheng Zhang <jszhang@kernel.org>
Link: https://lore.kernel.org/linux-riscv/Y347B0x4VUNOd6V7@xhacker/T/#t
Link: https://lore.kernel.org/lkml/20221124094845.1907443-1-debug@rivosinc.com/
Signed-off-by: Deepak Gupta <debug@rivosinc.com>
Co-developed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Acked-by: Guo Ren <guoren@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20230927224757.1154247-9-samitolvanen@google.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Diffstat (limited to 'arch/riscv/kernel/entry.S')
-rw-r--r-- | arch/riscv/kernel/entry.S | 70 |
1 files changed, 10 insertions, 60 deletions
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 143a2bb3e697..3d11aa3af105 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -10,9 +10,11 @@ #include <asm/asm.h> #include <asm/csr.h> #include <asm/unistd.h> +#include <asm/page.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/errata_list.h> +#include <linux/sizes.h> SYM_CODE_START(handle_exception) /* @@ -170,67 +172,15 @@ SYM_CODE_END(ret_from_exception) #ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) - /* - * Takes the psuedo-spinlock for the shadow stack, in case multiple - * harts are concurrently overflowing their kernel stacks. We could - * store any value here, but since we're overflowing the kernel stack - * already we only have SP to use as a scratch register. So we just - * swap in the address of the spinlock, as that's definately non-zero. - * - * Pairs with a store_release in handle_bad_stack(). - */ -1: la sp, spin_shadow_stack - REG_AMOSWAP_AQ sp, sp, (sp) - bnez sp, 1b - - la sp, shadow_stack - addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE - - //save caller register to shadow stack - addi sp, sp, -(PT_SIZE_ON_STACK) - REG_S x1, PT_RA(sp) - REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) - - la ra, restore_caller_reg - tail get_overflow_stack - -restore_caller_reg: - //save per-cpu overflow stack - REG_S a0, -8(sp) - //restore caller register from shadow_stack - REG_L x1, PT_RA(sp) - REG_L x5, PT_T0(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) + /* we reach here from kernel context, sscratch must be 0 */ + csrrw x31, CSR_SCRATCH, x31 + asm_per_cpu sp, overflow_stack, x31 + li x31, OVERFLOW_STACK_SIZE + add sp, sp, x31 + /* zero out x31 again and restore x31 */ + xor x31, x31, x31 + csrrw x31, CSR_SCRATCH, x31 - //load per-cpu overflow stack - REG_L sp, -8(sp) addi sp, sp, -(PT_SIZE_ON_STACK) //save context to overflow stack |