diff options
Diffstat (limited to 'arch/x86/lib/clear_page_64.S')
| -rw-r--r-- | arch/x86/lib/clear_page_64.S | 161 |
1 files changed, 116 insertions, 45 deletions
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index f2145cfa12a6..a508e4a8c66a 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,31 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/dwarf2.h> -#include <asm/alternative-asm.h> +#include <linux/cfi_types.h> +#include <linux/objtool.h> +#include <asm/asm.h> /* - * Zero a page. - * rdi page - */ -ENTRY(clear_page_c) - CFI_STARTPROC + * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is + * recommended to use this when possible and we do use them by default. + * If enhanced REP MOVSB/STOSB is not available, try to use fast string. + * Otherwise, use original. + */ + +/* + * Zero a page. + * %rdi - page + */ +SYM_TYPED_FUNC_START(clear_page_rep) movl $4096/8,%ecx xorl %eax,%eax rep stosq - ret - CFI_ENDPROC -ENDPROC(clear_page_c) - -ENTRY(clear_page_c_e) - CFI_STARTPROC - movl $4096,%ecx - xorl %eax,%eax - rep stosb - ret - CFI_ENDPROC -ENDPROC(clear_page_c_e) + RET +SYM_FUNC_END(clear_page_rep) +EXPORT_SYMBOL_GPL(clear_page_rep) -ENTRY(clear_page) - CFI_STARTPROC +SYM_TYPED_FUNC_START(clear_page_orig) xorl %eax,%eax movl $4096/64,%ecx .p2align 4 @@ -43,31 +42,103 @@ ENTRY(clear_page) leaq 64(%rdi),%rdi jnz .Lloop nop - ret - CFI_ENDPROC -.Lclear_page_end: -ENDPROC(clear_page) + RET +SYM_FUNC_END(clear_page_orig) +EXPORT_SYMBOL_GPL(clear_page_orig) + +SYM_TYPED_FUNC_START(clear_page_erms) + movl $4096,%ecx + xorl %eax,%eax + rep stosb + RET +SYM_FUNC_END(clear_page_erms) +EXPORT_SYMBOL_GPL(clear_page_erms) + +/* + * Default clear user-space. + * Input: + * rdi destination + * rcx count + * rax is zero + * + * Output: + * rcx: uncleared bytes or 0 if successful. + */ +SYM_FUNC_START(rep_stos_alternative) + ANNOTATE_NOENDBR + cmpq $64,%rcx + jae .Lunrolled + + cmp $8,%ecx + jae .Lword + + testl %ecx,%ecx + je .Lexit + +.Lclear_user_tail: +0: movb %al,(%rdi) + inc %rdi + dec %rcx + jnz .Lclear_user_tail +.Lexit: + RET + + _ASM_EXTABLE_UA( 0b, .Lexit) + +.Lword: +1: movq %rax,(%rdi) + addq $8,%rdi + sub $8,%ecx + je .Lexit + cmp $8,%ecx + jae .Lword + jmp .Lclear_user_tail + + .p2align 4 +.Lunrolled: +10: movq %rax,(%rdi) +11: movq %rax,8(%rdi) +12: movq %rax,16(%rdi) +13: movq %rax,24(%rdi) +14: movq %rax,32(%rdi) +15: movq %rax,40(%rdi) +16: movq %rax,48(%rdi) +17: movq %rax,56(%rdi) + addq $64,%rdi + subq $64,%rcx + cmpq $64,%rcx + jae .Lunrolled + cmpl $8,%ecx + jae .Lword + testl %ecx,%ecx + jne .Lclear_user_tail + RET /* - * Some CPUs support enhanced REP MOVSB/STOSB instructions. - * It is recommended to use this when possible. - * If enhanced REP MOVSB/STOSB is not available, try to use fast string. - * Otherwise, use original function. + * If we take an exception on any of the + * word stores, we know that %rcx isn't zero, + * so we can just go to the tail clearing to + * get the exact count. + * + * The unrolled case might end up clearing + * some bytes twice. Don't care. + * + * We could use the value in %rdi to avoid + * a second fault on the exact count case, + * but do we really care? No. * + * Finally, we could try to align %rdi at the + * top of the unrolling. But unaligned stores + * just aren't that common or expensive. */ - -#include <asm/cpufeature.h> - - .section .altinstr_replacement,"ax" -1: .byte 0xeb /* jmp <disp8> */ - .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ -2: .byte 0xeb /* jmp <disp8> */ - .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ -3: - .previous - .section .altinstructions,"a" - altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\ - .Lclear_page_end-clear_page, 2b-1b - altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \ - .Lclear_page_end-clear_page,3b-2b - .previous + _ASM_EXTABLE_UA( 1b, .Lclear_user_tail) + _ASM_EXTABLE_UA(10b, .Lclear_user_tail) + _ASM_EXTABLE_UA(11b, .Lclear_user_tail) + _ASM_EXTABLE_UA(12b, .Lclear_user_tail) + _ASM_EXTABLE_UA(13b, .Lclear_user_tail) + _ASM_EXTABLE_UA(14b, .Lclear_user_tail) + _ASM_EXTABLE_UA(15b, .Lclear_user_tail) + _ASM_EXTABLE_UA(16b, .Lclear_user_tail) + _ASM_EXTABLE_UA(17b, .Lclear_user_tail) +SYM_FUNC_END(rep_stos_alternative) +EXPORT_SYMBOL(rep_stos_alternative) |
