summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel/syscall_64.c
diff options
context:
space:
mode:
authorMichael Ellerman <mpe@ellerman.id.au>2020-07-23 17:43:44 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2020-07-23 17:43:44 +1000
commit335aca5f65f1a39670944930131da5f2276f888f (patch)
treec8f41223681fc064f558083a2d46f712e43d8b66 /arch/powerpc/kernel/syscall_64.c
parent8ac9b9d61f0eceba6ce571e7527798465ae9a7c5 (diff)
parent7fa95f9adaee7e5cbb195d3359741120829e488b (diff)
Merge branch 'scv' support into next
From Nick's cover letter: Linux powerpc new system call instruction and ABI System Call Vectored (scv) ABI ============================== The scv instruction is introduced with POWER9 / ISA3, it comes with an rfscv counter-part. The benefit of these instructions is performance (trading slower SRR0/1 with faster LR/CTR registers, and entering the kernel with MSR[EE] and MSR[RI] left enabled, which can reduce MSR updates. The scv instruction has 128 levels (not enough to cover the Linux system call space). Assignment and advertisement ---------------------------- The proposal is to assign scv levels conservatively, and advertise them with HWCAP feature bits as we add support for more. Linux has not enabled FSCR[SCV] yet, so executing the scv instruction will cause the kernel to log a "SCV facility unavilable" message, and deliver a SIGILL with ILL_ILLOPC to the process. Linux has defined a HWCAP2 bit PPC_FEATURE2_SCV for SCV support, but does not set it. This change allocates the zero level ('scv 0'), advertised with PPC_FEATURE2_SCV, which will be used to provide normal Linux system calls (equivalent to 'sc'). Attempting to execute scv with other levels will cause a SIGILL to be delivered the same as before, but will not log a "SCV facility unavailable" message (because the processor facility is enabled). Calling convention ------------------ The proposal is for scv 0 to provide the standard Linux system call ABI with the following differences from sc convention[1]: - LR is to be volatile across scv calls. This is necessary because the scv instruction clobbers LR. From previous discussion, this should be possible to deal with in GCC clobbers and CFI. - cr1 and cr5-cr7 are volatile. This matches the C ABI and would allow the kernel system call exit to avoid restoring the volatile cr registers (although we probably still would anyway to avoid information leaks). - Error handling: The consensus among kernel, glibc, and musl is to move to using negative return values in r3 rather than CR0[SO]=1 to indicate error, which matches most other architectures, and is closer to a function call. Notes ----- - r0,r4-r8 are documented as volatile in the ABI, but the kernel patch as submitted currently preserves them. This is to leave room for deciding which way to go with these. Some small benefit was found by preserving them[1] but I'm not convinced it's worth deviating from the C function call ABI just for this. Release code should follow the ABI. Previous discussions: https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/208691.html https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/209268.html [1] https://github.com/torvalds/linux/blob/master/Documentation/powerpc/syscall64-abi.rst [2] https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/209263.html
Diffstat (limited to 'arch/powerpc/kernel/syscall_64.c')
-rw-r--r--arch/powerpc/kernel/syscall_64.c32
1 files changed, 24 insertions, 8 deletions
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index 5126f1d3184a..8e50818aa50b 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -60,6 +60,11 @@ notrace long system_call_exception(long r3, long r4, long r5,
local_irq_enable();
if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
+ if (unlikely(regs->trap == 0x7ff0)) {
+ /* Unsupported scv vector */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return regs->gpr[3];
+ }
/*
* We use the return value of do_syscall_trace_enter() as the
* syscall number. If the syscall was rejected for any reason
@@ -78,6 +83,11 @@ notrace long system_call_exception(long r3, long r4, long r5,
r8 = regs->gpr[8];
} else if (unlikely(r0 >= NR_syscalls)) {
+ if (unlikely(regs->trap == 0x7ff0)) {
+ /* Unsupported scv vector */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return regs->gpr[3];
+ }
return -ENOSYS;
}
@@ -105,16 +115,20 @@ notrace long system_call_exception(long r3, long r4, long r5,
* local irqs must be disabled. Returns false if the caller must re-enable
* them, check for new work, and try again.
*/
-static notrace inline bool prep_irq_for_enabled_exit(void)
+static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri)
{
/* This must be done with RI=1 because tracing may touch vmaps */
trace_hardirqs_on();
/* This pattern matches prep_irq_for_idle */
- __hard_EE_RI_disable();
+ if (clear_ri)
+ __hard_EE_RI_disable();
+ else
+ __hard_irq_disable();
if (unlikely(lazy_irq_pending_nocheck())) {
/* Took an interrupt, may have more exit work to do. */
- __hard_RI_enable();
+ if (clear_ri)
+ __hard_RI_enable();
trace_hardirqs_off();
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
@@ -136,7 +150,8 @@ static notrace inline bool prep_irq_for_enabled_exit(void)
* because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
*/
notrace unsigned long syscall_exit_prepare(unsigned long r3,
- struct pt_regs *regs)
+ struct pt_regs *regs,
+ long scv)
{
unsigned long *ti_flagsp = &current_thread_info()->flags;
unsigned long ti_flags;
@@ -151,7 +166,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
ti_flags = *ti_flagsp;
- if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) {
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) {
if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
r3 = -r3;
regs->ccr |= 0x10000000; /* Set SO bit in CR */
@@ -218,7 +233,8 @@ again:
}
}
- if (unlikely(!prep_irq_for_enabled_exit())) {
+ /* scv need not set RI=0 because SRRs are not used */
+ if (unlikely(!prep_irq_for_enabled_exit(!scv))) {
local_irq_enable();
goto again;
}
@@ -290,7 +306,7 @@ again:
}
}
- if (unlikely(!prep_irq_for_enabled_exit())) {
+ if (unlikely(!prep_irq_for_enabled_exit(true))) {
local_irq_enable();
local_irq_disable();
goto again;
@@ -353,7 +369,7 @@ again:
}
}
- if (unlikely(!prep_irq_for_enabled_exit())) {
+ if (unlikely(!prep_irq_for_enabled_exit(true))) {
/*
* Can't local_irq_restore to replay if we were in
* interrupt context. Must replay directly.