diff options
Diffstat (limited to 'arch/x86/lib/memcpy_64.S')
| -rw-r--r-- | arch/x86/lib/memcpy_64.S | 49 |
1 files changed, 18 insertions, 31 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 59cf2343f3d9..12a23fa7c44c 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -1,20 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright 2002 Andi Kleen */ +#include <linux/export.h> #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/errno.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> -#include <asm/export.h> -.pushsection .noinstr.text, "ax" - -/* - * We build a jump to memcpy_orig by default which gets NOPped out on - * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which - * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs - * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. - */ +.section .noinstr.text, "ax" /* * memcpy - Copy a memory block. @@ -26,35 +20,29 @@ * * Output: * rax original destination + * + * The FSRM alternative should be done inline (avoiding the call and + * the disgusting return handling), but that would require some help + * from the compiler for better calling conventions. + * + * The 'rep movsb' itself is small enough to replace the call, but the + * two register moves blow up the code. And one of them is "needed" + * only for the return value that is the same as the source input, + * which the compiler could/should do much better anyway. */ -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK(memcpy) - ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memcpy_erms", X86_FEATURE_ERMS +SYM_TYPED_FUNC_START(__memcpy) + ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM movq %rdi, %rax movq %rdx, %rcx - shrq $3, %rcx - andl $7, %edx - rep movsq - movl %edx, %ecx rep movsb RET -SYM_FUNC_END(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) -EXPORT_SYMBOL(memcpy) +SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) -/* - * memcpy_erms() - enhanced fast string memcpy. This is faster and - * simpler than memcpy. Use memcpy_erms when possible. - */ -SYM_FUNC_START_LOCAL(memcpy_erms) - movq %rdi, %rax - movq %rdx, %rcx - rep movsb - RET -SYM_FUNC_END(memcpy_erms) +SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy) +SYM_PIC_ALIAS(memcpy) +EXPORT_SYMBOL(memcpy) SYM_FUNC_START_LOCAL(memcpy_orig) movq %rdi, %rax @@ -183,4 +171,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig) RET SYM_FUNC_END(memcpy_orig) -.popsection |
