diff options
Diffstat (limited to 'arch/loongarch/lib/clear_user.S')
| -rw-r--r-- | arch/loongarch/lib/clear_user.S | 171 |
1 files changed, 141 insertions, 30 deletions
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 2dc48e61a2c8..7a0db643b286 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -3,19 +3,14 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include <linux/export.h> #include <asm/alternative-asm.h> #include <asm/asm.h> #include <asm/asmmacro.h> #include <asm/asm-extable.h> #include <asm/cpu.h> -#include <asm/export.h> #include <asm/regdef.h> - -.irp to, 0, 1, 2, 3, 4, 5, 6, 7 -.L_fixup_handle_\to\(): - addi.d a0, a1, (\to) * (-8) - jr ra -.endr +#include <asm/unwind_hints.h> SYM_FUNC_START(__clear_user) /* @@ -44,7 +39,7 @@ SYM_FUNC_START(__clear_user_generic) 2: move a0, a1 jr ra - _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 1b, 2b SYM_FUNC_END(__clear_user_generic) /* @@ -54,12 +49,21 @@ SYM_FUNC_END(__clear_user_generic) * a1: size */ SYM_FUNC_START(__clear_user_fast) - beqz a1, 10f + sltui t0, a1, 9 + bnez t0, .Lsmall + + add.d a2, a0, a1 +0: st.d zero, a0, 0 - ori a2, zero, 64 - blt a1, a2, 9f + /* align up address */ + addi.d a0, a0, 8 + bstrins.d a0, zero, 2, 0 + + addi.d a3, a2, -64 + bgeu a0, a3, .Llt64 /* set 64 bytes at a time */ +.Lloop64: 1: st.d zero, a0, 0 2: st.d zero, a0, 8 3: st.d zero, a0, 16 @@ -68,31 +72,138 @@ SYM_FUNC_START(__clear_user_fast) 6: st.d zero, a0, 40 7: st.d zero, a0, 48 8: st.d zero, a0, 56 - addi.d a0, a0, 64 - addi.d a1, a1, -64 - bge a1, a2, 1b - - beqz a1, 10f + bltu a0, a3, .Lloop64 /* set the remaining bytes */ -9: st.b zero, a0, 0 - addi.d a0, a0, 1 - addi.d a1, a1, -1 - bgt a1, zero, 9b +.Llt64: + addi.d a3, a2, -32 + bgeu a0, a3, .Llt32 +9: st.d zero, a0, 0 +10: st.d zero, a0, 8 +11: st.d zero, a0, 16 +12: st.d zero, a0, 24 + addi.d a0, a0, 32 + +.Llt32: + addi.d a3, a2, -16 + bgeu a0, a3, .Llt16 +13: st.d zero, a0, 0 +14: st.d zero, a0, 8 + addi.d a0, a0, 16 + +.Llt16: + addi.d a3, a2, -8 + bgeu a0, a3, .Llt8 +15: st.d zero, a0, 0 + addi.d a0, a0, 8 + +.Llt8: +16: st.d zero, a2, -8 /* return */ -10: move a0, a1 + move a0, zero + jr ra + + .align 4 +.Lsmall: + pcaddi t0, 4 + slli.d a2, a1, 4 + add.d t0, t0, a2 + jr t0 + + .align 4 + move a0, zero + jr ra + + .align 4 +17: st.b zero, a0, 0 + move a0, zero + jr ra + + .align 4 +18: st.h zero, a0, 0 + move a0, zero + jr ra + + .align 4 +19: st.h zero, a0, 0 +20: st.b zero, a0, 2 + move a0, zero + jr ra + + .align 4 +21: st.w zero, a0, 0 + move a0, zero + jr ra + + .align 4 +22: st.w zero, a0, 0 +23: st.b zero, a0, 4 + move a0, zero + jr ra + + .align 4 +24: st.w zero, a0, 0 +25: st.h zero, a0, 4 + move a0, zero + jr ra + + .align 4 +26: st.w zero, a0, 0 +27: st.w zero, a0, 3 + move a0, zero + jr ra + + .align 4 +28: st.d zero, a0, 0 + move a0, zero jr ra /* fixup and ex_table */ - _asm_extable 1b, .L_fixup_handle_0 - _asm_extable 2b, .L_fixup_handle_1 - _asm_extable 3b, .L_fixup_handle_2 - _asm_extable 4b, .L_fixup_handle_3 - _asm_extable 5b, .L_fixup_handle_4 - _asm_extable 6b, .L_fixup_handle_5 - _asm_extable 7b, .L_fixup_handle_6 - _asm_extable 8b, .L_fixup_handle_7 - _asm_extable 9b, .L_fixup_handle_0 +.Llarge_fixup: + sub.d a1, a2, a0 + +.Lsmall_fixup: +29: st.b zero, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, -1 + bgt a1, zero, 29b + +.Lexit: + move a0, a1 + jr ra + + _asm_extable 0b, .Lsmall_fixup + _asm_extable 1b, .Llarge_fixup + _asm_extable 2b, .Llarge_fixup + _asm_extable 3b, .Llarge_fixup + _asm_extable 4b, .Llarge_fixup + _asm_extable 5b, .Llarge_fixup + _asm_extable 6b, .Llarge_fixup + _asm_extable 7b, .Llarge_fixup + _asm_extable 8b, .Llarge_fixup + _asm_extable 9b, .Llarge_fixup + _asm_extable 10b, .Llarge_fixup + _asm_extable 11b, .Llarge_fixup + _asm_extable 12b, .Llarge_fixup + _asm_extable 13b, .Llarge_fixup + _asm_extable 14b, .Llarge_fixup + _asm_extable 15b, .Llarge_fixup + _asm_extable 16b, .Llarge_fixup + _asm_extable 17b, .Lexit + _asm_extable 18b, .Lsmall_fixup + _asm_extable 19b, .Lsmall_fixup + _asm_extable 20b, .Lsmall_fixup + _asm_extable 21b, .Lsmall_fixup + _asm_extable 22b, .Lsmall_fixup + _asm_extable 23b, .Lsmall_fixup + _asm_extable 24b, .Lsmall_fixup + _asm_extable 25b, .Lsmall_fixup + _asm_extable 26b, .Lsmall_fixup + _asm_extable 27b, .Lsmall_fixup + _asm_extable 28b, .Lsmall_fixup + _asm_extable 29b, .Lexit SYM_FUNC_END(__clear_user_fast) + +STACK_FRAME_NON_STANDARD __clear_user_fast |
