summaryrefslogtreecommitdiff
path: root/arch/x86/lib/copy_user_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/lib/copy_user_64.S')
-rw-r--r--arch/x86/lib/copy_user_64.S372
1 files changed, 93 insertions, 279 deletions
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index a30ca15be21c..06296eb69fd4 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -1,298 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
* Copyright 2002 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License v2.
*
* Functions to copy from and to user space.
*/
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-
-#define FIX_ALIGNMENT 1
-
-#include <asm/current.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/cpufeature.h>
-#include <asm/alternative-asm.h>
+#include <linux/cfi_types.h>
+#include <linux/objtool.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative.h>
#include <asm/asm.h>
-#include <asm/smap.h>
/*
- * By placing feature2 after feature1 in altinstructions section, we logically
- * implement:
- * If CPU has feature2, jmp to alt2 is used
- * else if CPU has feature1, jmp to alt1 is used
- * else jmp to orig is used.
- */
- .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
-0:
- .byte 0xe9 /* 32bit jump */
- .long \orig-1f /* by default jump to orig */
-1:
- .section .altinstr_replacement,"ax"
-2: .byte 0xe9 /* near jump with 32bit immediate */
- .long \alt1-1b /* offset */ /* or alternatively to alt1 */
-3: .byte 0xe9 /* near jump with 32bit immediate */
- .long \alt2-1b /* offset */ /* or alternatively to alt2 */
- .previous
-
- .section .altinstructions,"a"
- altinstruction_entry 0b,2b,\feature1,5,5
- altinstruction_entry 0b,3b,\feature2,5,5
- .previous
- .endm
-
- .macro ALIGN_DESTINATION
-#ifdef FIX_ALIGNMENT
- /* check for bad alignment of destination */
- movl %edi,%ecx
- andl $7,%ecx
- jz 102f /* already aligned */
- subl $8,%ecx
- negl %ecx
- subl %ecx,%edx
-100: movb (%rsi),%al
-101: movb %al,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz 100b
-102:
- .section .fixup,"ax"
-103: addl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
- .previous
-
- _ASM_EXTABLE(100b,103b)
- _ASM_EXTABLE(101b,103b)
-#endif
- .endm
-
-/* Standard copy_to_user with segment limit checking */
-ENTRY(_copy_to_user)
- CFI_STARTPROC
- GET_THREAD_INFO(%rax)
- movq %rdi,%rcx
- addq %rdx,%rcx
- jc bad_to_user
- cmpq TI_addr_limit(%rax),%rcx
- ja bad_to_user
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
- copy_user_generic_unrolled,copy_user_generic_string, \
- copy_user_enhanced_fast_string
- CFI_ENDPROC
-ENDPROC(_copy_to_user)
-
-/* Standard copy_from_user with segment limit checking */
-ENTRY(_copy_from_user)
- CFI_STARTPROC
- GET_THREAD_INFO(%rax)
- movq %rsi,%rcx
- addq %rdx,%rcx
- jc bad_from_user
- cmpq TI_addr_limit(%rax),%rcx
- ja bad_from_user
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
- copy_user_generic_unrolled,copy_user_generic_string, \
- copy_user_enhanced_fast_string
- CFI_ENDPROC
-ENDPROC(_copy_from_user)
-
- .section .fixup,"ax"
- /* must zero dest */
-ENTRY(bad_from_user)
-bad_from_user:
- CFI_STARTPROC
- movl %edx,%ecx
- xorl %eax,%eax
- rep
- stosb
-bad_to_user:
- movl %edx,%eax
- ret
- CFI_ENDPROC
-ENDPROC(bad_from_user)
- .previous
-
-/*
- * copy_user_generic_unrolled - memory copy with exception handling.
- * This version is for CPUs like P4 that don't have efficient micro
- * code for rep movsq
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-ENTRY(copy_user_generic_unrolled)
- CFI_STARTPROC
- ASM_STAC
- cmpl $8,%edx
- jb 20f /* less then 8 bytes, go to byte copy loop */
- ALIGN_DESTINATION
- movl %edx,%ecx
- andl $63,%edx
- shrl $6,%ecx
- jz 17f
-1: movq (%rsi),%r8
-2: movq 1*8(%rsi),%r9
-3: movq 2*8(%rsi),%r10
-4: movq 3*8(%rsi),%r11
-5: movq %r8,(%rdi)
-6: movq %r9,1*8(%rdi)
-7: movq %r10,2*8(%rdi)
-8: movq %r11,3*8(%rdi)
-9: movq 4*8(%rsi),%r8
-10: movq 5*8(%rsi),%r9
-11: movq 6*8(%rsi),%r10
-12: movq 7*8(%rsi),%r11
-13: movq %r8,4*8(%rdi)
-14: movq %r9,5*8(%rdi)
-15: movq %r10,6*8(%rdi)
-16: movq %r11,7*8(%rdi)
- leaq 64(%rsi),%rsi
- leaq 64(%rdi),%rdi
- decl %ecx
- jnz 1b
-17: movl %edx,%ecx
- andl $7,%edx
- shrl $3,%ecx
- jz 20f
-18: movq (%rsi),%r8
-19: movq %r8,(%rdi)
- leaq 8(%rsi),%rsi
- leaq 8(%rdi),%rdi
- decl %ecx
- jnz 18b
-20: andl %edx,%edx
- jz 23f
- movl %edx,%ecx
-21: movb (%rsi),%al
-22: movb %al,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz 21b
-23: xor %eax,%eax
- ASM_CLAC
- ret
-
- .section .fixup,"ax"
-30: shll $6,%ecx
- addl %ecx,%edx
- jmp 60f
-40: lea (%rdx,%rcx,8),%rdx
- jmp 60f
-50: movl %ecx,%edx
-60: jmp copy_user_handle_tail /* ecx is zerorest also */
- .previous
-
- _ASM_EXTABLE(1b,30b)
- _ASM_EXTABLE(2b,30b)
- _ASM_EXTABLE(3b,30b)
- _ASM_EXTABLE(4b,30b)
- _ASM_EXTABLE(5b,30b)
- _ASM_EXTABLE(6b,30b)
- _ASM_EXTABLE(7b,30b)
- _ASM_EXTABLE(8b,30b)
- _ASM_EXTABLE(9b,30b)
- _ASM_EXTABLE(10b,30b)
- _ASM_EXTABLE(11b,30b)
- _ASM_EXTABLE(12b,30b)
- _ASM_EXTABLE(13b,30b)
- _ASM_EXTABLE(14b,30b)
- _ASM_EXTABLE(15b,30b)
- _ASM_EXTABLE(16b,30b)
- _ASM_EXTABLE(18b,40b)
- _ASM_EXTABLE(19b,40b)
- _ASM_EXTABLE(21b,50b)
- _ASM_EXTABLE(22b,50b)
- CFI_ENDPROC
-ENDPROC(copy_user_generic_unrolled)
-
-/* Some CPUs run faster using the string copy instructions.
- * This is also a lot simpler. Use them when possible.
- *
- * Only 4GB of copy is supported. This shouldn't be a problem
- * because the kernel normally only writes from/to page sized chunks
- * even if user space passed a longer buffer.
- * And more would be dangerous because both Intel and AMD have
- * errata with rep movsq > 4GB. If someone feels the need to fix
- * this please consider this.
+ * rep_movs_alternative - memory copy with exception handling.
+ * This version is for CPUs that don't have FSRM (Fast Short Rep Movs)
*
* Input:
* rdi destination
* rsi source
- * rdx count
+ * rcx count
*
* Output:
- * eax uncopied bytes or 0 if successful.
- */
-ENTRY(copy_user_generic_string)
- CFI_STARTPROC
- ASM_STAC
- andl %edx,%edx
- jz 4f
- cmpl $8,%edx
- jb 2f /* less than 8 bytes, go to byte copy loop */
- ALIGN_DESTINATION
- movl %edx,%ecx
- shrl $3,%ecx
- andl $7,%edx
-1: rep
- movsq
-2: movl %edx,%ecx
-3: rep
- movsb
-4: xorl %eax,%eax
- ASM_CLAC
- ret
-
- .section .fixup,"ax"
-11: lea (%rdx,%rcx,8),%rcx
-12: movl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
- .previous
-
- _ASM_EXTABLE(1b,11b)
- _ASM_EXTABLE(3b,12b)
- CFI_ENDPROC
-ENDPROC(copy_user_generic_string)
-
-/*
- * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
- * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
+ * rcx uncopied bytes or 0 if successful.
*
- * Output:
- * eax uncopied bytes or 0 if successful.
+ * NOTE! The calling convention is very intentionally the same as
+ * for 'rep movs', so that we can rewrite the function call with
+ * just a plain 'rep movs' on machines that have FSRM. But to make
+ * it simpler for us, we can clobber rsi/rdi and rax freely.
*/
-ENTRY(copy_user_enhanced_fast_string)
- CFI_STARTPROC
- ASM_STAC
- andl %edx,%edx
- jz 2f
- movl %edx,%ecx
-1: rep
- movsb
-2: xorl %eax,%eax
- ASM_CLAC
- ret
-
- .section .fixup,"ax"
-12: movl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
- .previous
-
- _ASM_EXTABLE(1b,12b)
- CFI_ENDPROC
-ENDPROC(copy_user_enhanced_fast_string)
+SYM_FUNC_START(rep_movs_alternative)
+ ANNOTATE_NOENDBR
+ cmpq $64,%rcx
+ jae .Llarge
+
+ cmp $8,%ecx
+ jae .Lword
+
+ testl %ecx,%ecx
+ je .Lexit
+
+.Lcopy_user_tail:
+0: movb (%rsi),%al
+1: movb %al,(%rdi)
+ inc %rdi
+ inc %rsi
+ dec %rcx
+ jne .Lcopy_user_tail
+.Lexit:
+ RET
+
+ _ASM_EXTABLE_UA( 0b, .Lexit)
+ _ASM_EXTABLE_UA( 1b, .Lexit)
+
+ .p2align 4
+.Lword:
+2: movq (%rsi),%rax
+3: movq %rax,(%rdi)
+ addq $8,%rsi
+ addq $8,%rdi
+ sub $8,%ecx
+ je .Lexit
+ cmp $8,%ecx
+ jae .Lword
+ jmp .Lcopy_user_tail
+
+ _ASM_EXTABLE_UA( 2b, .Lcopy_user_tail)
+ _ASM_EXTABLE_UA( 3b, .Lcopy_user_tail)
+
+.Llarge:
+0: ALTERNATIVE "jmp .Llarge_movsq", "rep movsb", X86_FEATURE_ERMS
+1: RET
+
+ _ASM_EXTABLE_UA( 0b, 1b)
+
+.Llarge_movsq:
+ /* Do the first possibly unaligned word */
+0: movq (%rsi),%rax
+1: movq %rax,(%rdi)
+
+ _ASM_EXTABLE_UA( 0b, .Lcopy_user_tail)
+ _ASM_EXTABLE_UA( 1b, .Lcopy_user_tail)
+
+ /* What would be the offset to the aligned destination? */
+ leaq 8(%rdi),%rax
+ andq $-8,%rax
+ subq %rdi,%rax
+
+ /* .. and update pointers and count to match */
+ addq %rax,%rdi
+ addq %rax,%rsi
+ subq %rax,%rcx
+
+ /* make %rcx contain the number of words, %rax the remainder */
+ movq %rcx,%rax
+ shrq $3,%rcx
+ andl $7,%eax
+0: rep movsq
+ movl %eax,%ecx
+ testl %ecx,%ecx
+ jne .Lcopy_user_tail
+ RET
+
+1: leaq (%rax,%rcx,8),%rcx
+ jmp .Lcopy_user_tail
+
+ _ASM_EXTABLE_UA( 0b, 1b)
+SYM_FUNC_END(rep_movs_alternative)
+EXPORT_SYMBOL(rep_movs_alternative)