summaryrefslogtreecommitdiff
path: root/arch/x86/lib
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2021-03-26 16:12:02 +0100
committerIngo Molnar <mingo@kernel.org>2021-04-02 12:42:04 +0200
commit119251855f9adf9421cb5eb409933092141ab2c7 (patch)
tree00ba7bae553b629194bcd6fe615274026d4c5953 /arch/x86/lib
parent23c1ad538f4f371bdb67d8a112314842d5db7e5a (diff)
x86/retpoline: Simplify retpolines
Due to: c9c324dc22aa ("objtool: Support stack layout changes in alternatives") it is now possible to simplify the retpolines. Currently our retpolines consist of 2 symbols: - __x86_indirect_thunk_\reg: the compiler target - __x86_retpoline_\reg: the actual retpoline. Both are consecutive in code and aligned such that for any one register they both live in the same cacheline: 0000000000000000 <__x86_indirect_thunk_rax>: 0: ff e0 jmpq *%rax 2: 90 nop 3: 90 nop 4: 90 nop 0000000000000005 <__x86_retpoline_rax>: 5: e8 07 00 00 00 callq 11 <__x86_retpoline_rax+0xc> a: f3 90 pause c: 0f ae e8 lfence f: eb f9 jmp a <__x86_retpoline_rax+0x5> 11: 48 89 04 24 mov %rax,(%rsp) 15: c3 retq 16: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%rax,%rax,1) The thunk is an alternative_2, where one option is a JMP to the retpoline. This was done so that objtool didn't need to deal with alternatives with stack ops. But that problem has been solved, so now it is possible to fold the entire retpoline into the alternative to simplify and consolidate unused bytes: 0000000000000000 <__x86_indirect_thunk_rax>: 0: ff e0 jmpq *%rax 2: 90 nop 3: 90 nop 4: 90 nop 5: 90 nop 6: 90 nop 7: 90 nop 8: 90 nop 9: 90 nop a: 90 nop b: 90 nop c: 90 nop d: 90 nop e: 90 nop f: 90 nop 10: 90 nop 11: 66 66 2e 0f 1f 84 00 00 00 00 00 data16 nopw %cs:0x0(%rax,%rax,1) 1c: 0f 1f 40 00 nopl 0x0(%rax) Notice that since the longest alternative sequence is now: 0: e8 07 00 00 00 callq c <.altinstr_replacement+0xc> 5: f3 90 pause 7: 0f ae e8 lfence a: eb f9 jmp 5 <.altinstr_replacement+0x5> c: 48 89 04 24 mov %rax,(%rsp) 10: c3 retq 17 bytes, we have 15 bytes NOP at the end of our 32 byte slot. (IOW, if we can shrink the retpoline by 1 byte we can pack it more densely). [ bp: Massage commit message. ] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Ingo Molnar <mingo@kernel.org> Link: https://lkml.kernel.org/r/20210326151259.506071949@infradead.org
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/retpoline.S34
1 files changed, 17 insertions, 17 deletions
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 6bb74b5c238c..d2c0d14c1e22 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -10,27 +10,31 @@
#include <asm/unwind_hints.h>
#include <asm/frame.h>
-.macro THUNK reg
- .section .text.__x86.indirect_thunk
-
- .align 32
-SYM_FUNC_START(__x86_indirect_thunk_\reg)
- JMP_NOSPEC \reg
-SYM_FUNC_END(__x86_indirect_thunk_\reg)
-
-SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
+.macro RETPOLINE reg
ANNOTATE_INTRA_FUNCTION_CALL
- call .Ldo_rop_\@
+ call .Ldo_rop_\@
.Lspec_trap_\@:
UNWIND_HINT_EMPTY
pause
lfence
- jmp .Lspec_trap_\@
+ jmp .Lspec_trap_\@
.Ldo_rop_\@:
- mov %\reg, (%_ASM_SP)
+ mov %\reg, (%_ASM_SP)
UNWIND_HINT_FUNC
ret
-SYM_FUNC_END(__x86_retpoline_\reg)
+.endm
+
+.macro THUNK reg
+ .section .text.__x86.indirect_thunk
+
+ .align 32
+SYM_FUNC_START(__x86_indirect_thunk_\reg)
+
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+ __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
+
+SYM_FUNC_END(__x86_indirect_thunk_\reg)
.endm
@@ -48,7 +52,6 @@ SYM_FUNC_END(__x86_retpoline_\reg)
#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
-#define EXPORT_RETPOLINE(reg) __EXPORT_THUNK(__x86_retpoline_ ## reg)
#undef GEN
#define GEN(reg) THUNK reg
@@ -58,6 +61,3 @@ SYM_FUNC_END(__x86_retpoline_\reg)
#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>
-#undef GEN
-#define GEN(reg) EXPORT_RETPOLINE(reg)
-#include <asm/GEN-for-each-reg.h>