58 files changed, 1296 insertions, 1641 deletions
diff --git a/arch/arm/lib/.gitignore b/arch/arm/lib/.gitignore
new file mode 100644
index 000000000000..647d7a922e68
--- /dev/null
+++ b/arch/arm/lib/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+# This now-removed directory used to contain generated files.
+/crypto/
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index af72969820b4..0ca5aae1bcc3 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -1,32 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # linux/arch/arm/lib/Makefile
 #
 # Copyright (C) 1995-2000 Russell King
 #
 
-lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
+lib-y		:= changebit.o csumipv6.o csumpartial.o               \
 		   csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
 		   delay.o delay-loop.o findbit.o memchr.o memcpy.o   \
-		   memmove.o memset.o memzero.o setbit.o              \
+		   memmove.o memset.o setbit.o                        \
 		   strchr.o strrchr.o                                 \
 		   testchangebit.o testclearbit.o testsetbit.o        \
 		   ashldi3.o ashrdi3.o lshrdi3.o muldi3.o             \
 		   ucmpdi2.o lib1funcs.o div64.o                      \
 		   io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
-		   call_with_stack.o
+		   call_with_stack.o bswapsdi2.o
 
-mmu-y	:= clear_user.o copy_page.o getuser.o putuser.o
+mmu-y		:= clear_user.o copy_page.o getuser.o putuser.o       \
+		   copy_from_user.o copy_to_user.o
 
-# the code in uaccess.S is not preemption safe and
-# probably faster on ARMv3 only
-ifeq ($(CONFIG_PREEMPT),y)
-  mmu-y	+= copy_from_user.o copy_to_user.o
+ifdef CONFIG_CC_IS_CLANG
+  lib-y	+= backtrace-clang.o
 else
-ifneq ($(CONFIG_CPU_32v3),y)
-  mmu-y	+= copy_from_user.o copy_to_user.o
-else
-  mmu-y	+= uaccess.o
-endif
+  lib-y	+= backtrace.o
 endif
 
 # using lib_ here won't override already available weak symbols
@@ -40,8 +36,12 @@ else
   lib-y	+= io-readsw-armv4.o io-writesw-armv4.o
 endif
 
-lib-$(CONFIG_ARCH_RPC)		+= ecard.o io-acorn.o floppydma.o
-lib-$(CONFIG_ARCH_SHARK)	+= io-shark.o
-
 $(obj)/csumpartialcopy.o:	$(obj)/csumpartialcopygeneric.S
 $(obj)/csumpartialcopyuser.o:	$(obj)/csumpartialcopygeneric.S
+
+ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
+  CFLAGS_xor-neon.o		+= $(CC_FLAGS_FPU)
+  obj-$(CONFIG_XOR_BLOCKS)	+= xor-neon.o
+endif
+
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S
index 638deb13da1c..b05e95840651 100644
--- a/arch/arm/lib/ashldi3.S
+++ b/arch/arm/lib/ashldi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */
 
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_llsl)
  THUMB(	lsrmi	r3, al, ip		)
  THUMB(	orrmi	ah, ah, r3		)
 	mov	al, al, lsl r2
-	mov	pc, lr
+	ret	lr
 
 ENDPROC(__ashldi3)
 ENDPROC(__aeabi_llsl)
diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S
index 015e8aa5a1d1..275d7d2341a4 100644
--- a/arch/arm/lib/ashrdi3.S
+++ b/arch/arm/lib/ashrdi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */
 
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_lasr)
  THUMB(	lslmi	r3, ah, ip		)
  THUMB(	orrmi	al, al, r3		)
 	mov	ah, ah, asr r2
-	mov	pc, lr
+	ret	lr
 
 ENDPROC(__ashrdi3)
 ENDPROC(__aeabi_lasr)
diff --git a/arch/arm/lib/backtrace-clang.S b/arch/arm/lib/backtrace-clang.S
new file mode 100644
index 000000000000..290c52a60fc6
--- /dev/null
+++ b/arch/arm/lib/backtrace-clang.S
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  linux/arch/arm/lib/backtrace-clang.S
+ *
+ *  Copyright (C) 2019 Nathan Huckleberry
+ *
+ */
+#include <linux/kern_levels.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+		.text
+
+/* fp is 0 or stack frame */
+
+#define frame	r4
+#define sv_fp	r5
+#define sv_pc	r6
+#define mask	r7
+#define sv_lr	r8
+#define loglvl	r9
+
+ENTRY(c_backtrace)
+
+#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
+		ret	lr
+ENDPROC(c_backtrace)
+#else
+
+
+/*
+ * Clang does not store pc or sp in function prologues so we don't know exactly
+ * where the function starts.
+ *
+ * We can treat the current frame's lr as the saved pc and the preceding
+ * frame's lr as the current frame's lr, but we can't trace the most recent
+ * call.  Inserting a false stack frame allows us to reference the function
+ * called last in the stacktrace.
+ *
+ * If the call instruction was a bl we can look at the callers branch
+ * instruction to calculate the saved pc.  We can recover the pc in most cases,
+ * but in cases such as calling function pointers we cannot. In this case,
+ * default to using the lr. This will be some address in the function, but will
+ * not be the function start.
+ *
+ * Unfortunately due to the stack frame layout we can't dump r0 - r3, but these
+ * are less frequently saved.
+ *
+ * Stack frame layout:
+ * 		<larger addresses>
+ * 		saved lr
+ * 	frame=> saved fp
+ * 		optionally saved caller registers (r4 - r10)
+ * 		optionally saved arguments (r0 - r3)
+ * 		<top of stack frame>
+ * 		<smaller addresses>
+ *
+ * Functions start with the following code sequence:
+ * corrected pc =>  stmfd sp!, {..., fp, lr}
+ *		add fp, sp, #x
+ *		stmfd sp!, {r0 - r3} (optional)
+ *
+ *
+ *
+ *
+ *
+ *
+ * The diagram below shows an example stack setup for dump_stack.
+ *
+ * The frame for c_backtrace has pointers to the code of dump_stack. This is
+ * why the frame of c_backtrace is used to for the pc calculation of
+ * dump_stack. This is why we must move back a frame to print dump_stack.
+ *
+ * The stored locals for dump_stack are in dump_stack's frame. This means that
+ * to fully print dump_stack's frame we need both the frame for dump_stack (for
+ * locals) and the frame that was called by dump_stack (for pc).
+ *
+ * To print locals we must know where the function start is. If we read the
+ * function prologue opcodes we can determine which variables are stored in the
+ * stack frame.
+ *
+ * To find the function start of dump_stack we can look at the stored LR of
+ * show_stack. It points at the instruction directly after the bl dump_stack.
+ * We can then read the offset from the bl opcode to determine where the branch
+ * takes us.  The address calculated must be the start of dump_stack.
+ *
+ * c_backtrace frame           dump_stack:
+ * {[LR]    }  ============|   ...
+ * {[FP]    }  =======|    |   bl c_backtrace
+ *                    |    |=> ...
+ * {[R4-R10]}         |
+ * {[R0-R3] }         |        show_stack:
+ * dump_stack frame   |        ...
+ * {[LR]    } =============|   bl dump_stack
+ * {[FP]    } <=======|    |=> ...
+ * {[R4-R10]}
+ * {[R0-R3] }
+ */
+
+		stmfd	sp!, {r4 - r9, fp, lr}	@ Save an extra register
+						@ to ensure 8 byte alignment
+		movs	frame, r0		@ if frame pointer is zero
+		beq	no_frame		@ we have no stack frames
+		mov	loglvl, r2
+		tst	r1, #0x10		@ 26 or 32-bit mode?
+		moveq	mask, #0xfc000003
+		movne	mask, #0		@ mask for 32-bit
+
+/*
+ * Switches the current frame to be the frame for dump_stack.
+ */
+		add	frame, sp, #24		@ switch to false frame
+for_each_frame:	tst	frame, mask		@ Check for address exceptions
+		bne	no_frame
+
+/*
+ * sv_fp is the stack frame with the locals for the current considered
+ * function.
+ *
+ * sv_pc is the saved lr frame the frame above. This is a pointer to a code
+ * address within the current considered function, but it is not the function
+ * start. This value gets updated to be the function start later if it is
+ * possible.
+ */
+1001:		ldr	sv_pc, [frame, #4]	@ get saved 'pc'
+1002:		ldr	sv_fp, [frame, #0]	@ get saved fp
+
+		teq	sv_fp, mask		@ make sure next frame exists
+		beq	no_frame
+
+/*
+ * sv_lr is the lr from the function that called the current function. This is
+ * a pointer to a code address in the current function's caller.  sv_lr-4 is
+ * the instruction used to call the current function.
+ *
+ * This sv_lr can be used to calculate the function start if the function was
+ * called using a bl instruction. If the function start can be recovered sv_pc
+ * is overwritten with the function start.
+ *
+ * If the current function was called using a function pointer we cannot
+ * recover the function start and instead continue with sv_pc as an arbitrary
+ * value within the current function. If this is the case we cannot print
+ * registers for the current function, but the stacktrace is still printed
+ * properly.
+ */
+1003:		ldr	sv_lr, [sv_fp, #4]	@ get saved lr from next frame
+
+1004:		ldr	r0, [sv_lr, #-4]	@ get call instruction
+		ldr	r3, .Lopcode+4
+		and	r2, r3, r0		@ is this a bl call
+		teq	r2, r3
+		bne	finished_setup		@ give up if it's not
+		and	r0, #0xffffff		@ get call offset 24-bit int
+		lsl	r0, r0, #8		@ sign extend offset
+		asr	r0, r0, #8
+		ldr	sv_pc, [sv_fp, #4]	@ get lr address
+		add	sv_pc, sv_pc, #-4	@ get call instruction address
+		add	sv_pc, sv_pc, #8	@ take care of prefetch
+		add	sv_pc, sv_pc, r0, lsl #2@ find function start
+
+finished_setup:
+
+		bic	sv_pc, sv_pc, mask	@ mask PC/LR for the mode
+
+/*
+ * Print the function (sv_pc) and where it was called from (sv_lr).
+ */
+		mov	r0, sv_pc
+
+		mov	r1, sv_lr
+		mov	r2, frame
+		bic	r1, r1, mask		@ mask PC/LR for the mode
+		mov	r3, loglvl
+		bl	dump_backtrace_entry
+
+/*
+ * Test if the function start is a stmfd instruction to determine which
+ * registers were stored in the function prologue.
+ *
+ * If we could not recover the sv_pc because we were called through a function
+ * pointer the comparison will fail and no registers will print. Unwinding will
+ * continue as if there had been no registers stored in this frame.
+ */
+1005:		ldr	r1, [sv_pc, #0]		@ if stmfd sp!, {..., fp, lr}
+		ldr	r3, .Lopcode		@ instruction exists,
+		teq	r3, r1, lsr #11
+		ldr	r0, [frame]		@ locals are stored in
+						@ the preceding frame
+		subeq	r0, r0, #4
+		mov	r2, loglvl
+		bleq	dump_backtrace_stm	@ dump saved registers
+
+/*
+ * If we are out of frames or if the next frame is invalid.
+ */
+		teq	sv_fp, #0		@ zero saved fp means
+		beq	no_frame		@ no further frames
+
+		cmp	sv_fp, frame		@ next frame must be
+		mov	frame, sv_fp		@ above the current frame
+#ifdef CONFIG_IRQSTACKS
+		@
+		@ Kernel stacks may be discontiguous in memory. If the next
+		@ frame is below the previous frame, accept it as long as it
+		@ lives in kernel memory.
+		@
+		cmpls	sv_fp, #PAGE_OFFSET
+#endif
+		bhi	for_each_frame
+
+1006:		adr	r0, .Lbad
+		mov	r1, loglvl
+		mov	r2, frame
+		bl	_printk
+no_frame:	ldmfd	sp!, {r4 - r9, fp, pc}
+ENDPROC(c_backtrace)
+		.pushsection __ex_table,"a"
+		.align	3
+		.long	1001b, 1006b
+		.long	1002b, 1006b
+		.long	1003b, 1006b
+		.long	1004b, finished_setup
+		.long   1005b, 1006b
+		.popsection
+
+.Lbad:		.asciz	"%sBacktrace aborted due to bad frame pointer <%p>\n"
+		.align
+.Lopcode:	.word	0xe92d4800 >> 11	@ stmfd sp!, {... fp, lr}
+		.word	0x0b000000		@ bl if these bits are set
+
+#endif
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
index cd07b5814c23..293a2716bd20 100644
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -1,15 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/backtrace.S
  *
  *  Copyright (C) 1995, 1996 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * 27/03/03 Ian Molton Clean up CONFIG_CPU
- *
  */
+#include <linux/kern_levels.h>
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 		.text
@@ -21,16 +18,18 @@
 #define sv_pc	r6
 #define mask	r7
 #define offset	r8
+#define loglvl	r9
 
 ENTRY(c_backtrace)
 
 #if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
-		mov	pc, lr
+		ret	lr
 ENDPROC(c_backtrace)
 #else
-		stmfd	sp!, {r4 - r8, lr}	@ Save an extra register so we have a location...
+		stmfd	sp!, {r4 - r9, lr}	@ Save an extra register so we have a location...
 		movs	frame, r0		@ if frame pointer is zero
 		beq	no_frame		@ we have no stack frames
+		mov	loglvl, r2
 
 		tst	r1, #0x10		@ 26 or 32-bit mode?
  ARM(		moveq	mask, #0xfc000003	)
@@ -69,39 +68,51 @@ for_each_frame:	tst	frame, mask		@ Check for address exceptions
 
 1003:		ldr	r2, [sv_pc, #-4]	@ if stmfd sp!, {args} exists,
 		ldr	r3, .Ldsi+4		@ adjust saved 'pc' back one
-		teq	r3, r2, lsr #10		@ instruction
+		teq	r3, r2, lsr #11		@ instruction
 		subne	r0, sv_pc, #4		@ allow for mov
 		subeq	r0, sv_pc, #8		@ allow for mov + stmia
 
 		ldr	r1, [frame, #-4]	@ get saved lr
 		mov	r2, frame
 		bic	r1, r1, mask		@ mask PC/LR for the mode
+		mov	r3, loglvl
 		bl	dump_backtrace_entry
 
 		ldr	r1, [sv_pc, #-4]	@ if stmfd sp!, {args} exists,
 		ldr	r3, .Ldsi+4
-		teq	r3, r1, lsr #10
+		teq	r3, r1, lsr #11
 		ldreq	r0, [frame, #-8]	@ get sp
 		subeq	r0, r0, #4		@ point at the last arg
-		bleq	.Ldumpstm		@ dump saved registers
+		mov	r2, loglvl
+		bleq	dump_backtrace_stm	@ dump saved registers
 
 1004:		ldr	r1, [sv_pc, #0]		@ if stmfd sp!, {..., fp, ip, lr, pc}
 		ldr	r3, .Ldsi		@ instruction exists,
-		teq	r3, r1, lsr #10
+		teq	r3, r1, lsr #11
 		subeq	r0, frame, #16
-		bleq	.Ldumpstm		@ dump saved registers
+		mov	r2, loglvl
+		bleq	dump_backtrace_stm	@ dump saved registers
 
 		teq	sv_fp, #0		@ zero saved fp means
 		beq	no_frame		@ no further frames
 
 		cmp	sv_fp, frame		@ next frame must be
 		mov	frame, sv_fp		@ above the current frame
+#ifdef CONFIG_IRQSTACKS
+		@
+		@ Kernel stacks may be discontiguous in memory. If the next
+		@ frame is below the previous frame, accept it as long as it
+		@ lives in kernel memory.
+		@
+		cmpls	sv_fp, #PAGE_OFFSET
+#endif
 		bhi	for_each_frame
 
 1006:		adr	r0, .Lbad
-		mov	r1, frame
-		bl	printk
-no_frame:	ldmfd	sp!, {r4 - r8, pc}
+		mov	r1, loglvl
+		mov	r2, frame
+		bl	_printk
+no_frame:	ldmfd	sp!, {r4 - r9, pc}
 ENDPROC(c_backtrace)
 		
 		.pushsection __ex_table,"a"
@@ -112,41 +123,9 @@ ENDPROC(c_backtrace)
 		.long	1004b, 1006b
 		.popsection
 
-#define instr r4
-#define reg   r5
-#define stack r6
-
-.Ldumpstm:	stmfd	sp!, {instr, reg, stack, r7, lr}
-		mov	stack, r0
-		mov	instr, r1
-		mov	reg, #10
-		mov	r7, #0
-1:		mov	r3, #1
- ARM(		tst	instr, r3, lsl reg	)
- THUMB(		lsl	r3, reg			)
- THUMB(		tst	instr, r3		)
-		beq	2f
-		add	r7, r7, #1
-		teq	r7, #6
-		moveq	r7, #1
-		moveq	r1, #'\n'
-		movne	r1, #' '
-		ldr	r3, [stack], #-4
-		mov	r2, reg
-		adr	r0, .Lfp
-		bl	printk
-2:		subs	reg, reg, #1
-		bpl	1b
-		teq	r7, #0
-		adrne	r0, .Lcr
-		blne	printk
-		ldmfd	sp!, {instr, reg, stack, r7, pc}
-
-.Lfp:		.asciz	"%cr%d:%08x"
-.Lcr:		.asciz	"\n"
-.Lbad:		.asciz	"Backtrace aborted due to bad frame pointer <%p>\n"
+.Lbad:		.asciz	"%sBacktrace aborted due to bad frame pointer <%p>\n"
 		.align
-.Ldsi:		.word	0xe92dd800 >> 10	@ stmfd sp!, {... fp, ip, lr, pc}
-		.word	0xe92d0000 >> 10	@ stmfd sp!, {}
+.Ldsi:		.word	0xe92dd800 >> 11	@ stmfd sp!, {... fp, ip, lr, pc}
+		.word	0xe92d0000 >> 11	@ stmfd sp!, {}
 
 #endif
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index d6408d1ee543..f069d1b2318e 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/assembler.h>
 #include <asm/unwind.h>
 
 #if __LINUX_ARM_ARCH__ >= 6
@@ -5,11 +7,16 @@
 ENTRY(	\name		)
 UNWIND(	.fnstart	)
 	ands	ip, r1, #3
-	strneb	r1, [ip]		@ assert word-aligned
+	strbne	r1, [ip]		@ assert word-aligned
 	mov	r2, #1
 	and	r3, r0, #31		@ Get bit offset
 	mov	r0, r0, lsr #5
 	add	r1, r1, r0, lsl #2	@ Get word offset
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
+	.arch_extension	mp
+	ALT_SMP(W(pldw)	[r1])
+	ALT_UP(W(nop))
+#endif
 	mov	r3, r2, lsl r3
 1:	ldrex	r2, [r1]
 	\instr	r2, r2, r3
@@ -21,36 +28,49 @@ UNWIND(	.fnend		)
 ENDPROC(\name		)
 	.endm
 
-	.macro	testop, name, instr, store
+	.macro	__testop, name, instr, store, barrier
 ENTRY(	\name		)
 UNWIND(	.fnstart	)
 	ands	ip, r1, #3
-	strneb	r1, [ip]		@ assert word-aligned
+	strbne	r1, [ip]		@ assert word-aligned
 	mov	r2, #1
 	and	r3, r0, #31		@ Get bit offset
 	mov	r0, r0, lsr #5
 	add	r1, r1, r0, lsl #2	@ Get word offset
 	mov	r3, r2, lsl r3		@ create mask
-	smp_dmb
+	\barrier
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
+	.arch_extension	mp
+	ALT_SMP(W(pldw)	[r1])
+	ALT_UP(W(nop))
+#endif
 1:	ldrex	r2, [r1]
 	ands	r0, r2, r3		@ save old value of bit
 	\instr	r2, r2, r3		@ toggle bit
 	strex	ip, r2, [r1]
 	cmp	ip, #0
 	bne	1b
-	smp_dmb
+	\barrier
 	cmp	r0, #0
 	movne	r0, #1
 2:	bx	lr
 UNWIND(	.fnend		)
 ENDPROC(\name		)
 	.endm
+
+	.macro	testop, name, instr, store
+	__testop \name, \instr, \store, smp_dmb
+	.endm
+
+	.macro	sync_testop, name, instr, store
+	__testop \name, \instr, \store, __smp_dmb
+	.endm
 #else
 	.macro	bitop, name, instr
 ENTRY(	\name		)
 UNWIND(	.fnstart	)
 	ands	ip, r1, #3
-	strneb	r1, [ip]		@ assert word-aligned
+	strbne	r1, [ip]		@ assert word-aligned
 	and	r2, r0, #31
 	mov	r0, r0, lsr #5
 	mov	r3, #1
@@ -60,7 +80,7 @@ UNWIND(	.fnstart	)
 	\instr	r2, r2, r3
 	str	r2, [r1, r0, lsl #2]
 	restore_irqs ip
-	mov	pc, lr
+	ret	lr
 UNWIND(	.fnend		)
 ENDPROC(\name		)
 	.endm
@@ -77,7 +97,7 @@ ENDPROC(\name		)
 ENTRY(	\name		)
 UNWIND(	.fnstart	)
 	ands	ip, r1, #3
-	strneb	r1, [ip]		@ assert word-aligned
+	strbne	r1, [ip]		@ assert word-aligned
 	and	r3, r0, #31
 	mov	r0, r0, lsr #5
 	save_and_disable_irqs ip
@@ -88,7 +108,7 @@ UNWIND(	.fnstart	)
 	\store	r2, [r1]
 	moveq	r0, #0
 	restore_irqs ip
-	mov	pc, lr
+	ret	lr
 UNWIND(	.fnend		)
 ENDPROC(\name		)
 	.endm
diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S
new file mode 100644
index 000000000000..591ba077e874
--- /dev/null
+++ b/arch/arm/lib/bswapsdi2.S
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+#if __LINUX_ARM_ARCH__ >= 6
+ENTRY(__bswapsi2)
+	rev r0, r0
+	bx lr
+ENDPROC(__bswapsi2)
+
+ENTRY(__bswapdi2)
+	rev r3, r0
+	rev r0, r1
+	mov r1, r3
+	bx lr
+ENDPROC(__bswapdi2)
+#else
+ENTRY(__bswapsi2)
+	eor r3, r0, r0, ror #16
+	mov r3, r3, lsr #8
+	bic r3, r3, #0xff00
+	eor r0, r3, r0, ror #8
+	ret lr
+ENDPROC(__bswapsi2)
+
+ENTRY(__bswapdi2)
+	mov ip, r1
+	eor r3, ip, ip, ror #16
+	eor r1, r0, r0, ror #16
+	mov r1, r1, lsr #8
+	mov r3, r3, lsr #8
+	bic r3, r3, #0xff00
+	bic r1, r1, #0xff00
+	eor r1, r1, r0, ror #8
+	eor r0, r3, ip, ror #8
+	ret lr
+ENDPROC(__bswapdi2)
+#endif
diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S
index 916c80f13ae7..5030d4e8d126 100644
--- a/arch/arm/lib/call_with_stack.S
+++ b/arch/arm/lib/call_with_stack.S
@@ -1,44 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * arch/arm/lib/call_with_stack.S
  *
  * Copyright (C) 2011 ARM Ltd.
  * Written by Will Deacon <will.deacon@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 /*
  * void call_with_stack(void (*fn)(void *), void *arg, void *sp)
  *
  * Change the stack to that pointed at by sp, then invoke fn(arg) with
  * the new stack.
+ *
+ * The sequence below follows the APCS frame convention for frame pointer
+ * unwinding, and implements the unwinder annotations needed by the EABI
+ * unwinder.
  */
-ENTRY(call_with_stack)
-	str	sp, [r2, #-4]!
-	str	lr, [r2, #-4]!
 
+ENTRY(call_with_stack)
+#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC)
+	mov	ip, sp
+	push	{fp, ip, lr, pc}
+	sub	fp, ip, #4
+#else
+UNWIND( .fnstart		)
+UNWIND( .save	{fpreg, lr}	)
+	push	{fpreg, lr}
+UNWIND( .setfp	fpreg, sp	)
+	mov	fpreg, sp
+#endif
 	mov	sp, r2
 	mov	r2, r0
 	mov	r0, r1
 
-	adr	lr, BSYM(1f)
-	mov	pc, r2
+	bl_r	r2
 
-1:	ldr	lr, [sp]
-	ldr	sp, [sp, #4]
-	mov	pc, lr
+#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC)
+	ldmdb	fp, {fp, sp, pc}
+#else
+	mov	sp, fpreg
+	pop	{fpreg, pc}
+UNWIND( .fnend			)
+#endif
+	.globl call_with_stack_end
+call_with_stack_end:
 ENDPROC(call_with_stack)
diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S
index f4027862172f..02424765e9e1 100644
--- a/arch/arm/lib/changebit.S
+++ b/arch/arm/lib/changebit.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/changebit.S
  *
  *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
index 14a0d988c82c..8f2c4dbfc5f2 100644
--- a/arch/arm/lib/clear_user.S
+++ b/arch/arm/lib/clear_user.S
@@ -1,25 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/clear_user.S
  *
  *  Copyright (C) 1995, 1996,1997,1998 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 		.text
 
-/* Prototype: int __clear_user(void *addr, size_t sz)
+/* Prototype: unsigned long arm_clear_user(void *addr, size_t sz)
  * Purpose  : clear some user memory
  * Params   : addr - user memory address to clear
  *          : sz   - number of bytes to clear
  * Returns  : number of bytes NOT cleared
  */
 ENTRY(__clear_user_std)
-WEAK(__clear_user)
+WEAK(arm_clear_user)
+UNWIND(.fnstart)
+UNWIND(.save {r1, lr})
 		stmfd	sp!, {r1, lr}
 		mov	r2, #0
 		cmp	r1, #4
@@ -41,13 +41,14 @@ WEAK(__clear_user)
 		strusr	r2, r0, 1, ne, rept=2
 		tst	r1, #1			@ x1 x0 x1 x0 x1 x0 x1
 		it	ne			@ explicit IT needed for the label
-USER(		strnebt	r2, [r0])
+USER(		strbtne	r2, [r0])
 		mov	r0, #0
 		ldmfd	sp!, {r1, pc}
-ENDPROC(__clear_user)
+UNWIND(.fnend)
+ENDPROC(arm_clear_user)
 ENDPROC(__clear_user_std)
 
-		.pushsection .fixup,"ax"
+		.pushsection .text.fixup,"ax"
 		.align	0
 9001:		ldmfd	sp!, {r0, pc}
 		.popsection
diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S
index f6b75fb64d30..4646dee8a339 100644
--- a/arch/arm/lib/clearbit.S
+++ b/arch/arm/lib/clearbit.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/clearbit.S
  *
  *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index 66a477a3e3cc..270de7debd0f 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -1,22 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/copy_from_user.S
  *
  *  Author:	Nicolas Pitre
  *  Created:	Sep 29, 2005
  *  Copyright:	MontaVista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
  */
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 /*
  * Prototype:
  *
- *	size_t __copy_from_user(void *to, const void *from, size_t n)
+ *	size_t arm_copy_from_user(void *to, const void *from, size_t n)
  *
  * Purpose:
  *
@@ -33,12 +31,13 @@
  *	Number of bytes NOT copied.
  */
 
+#ifdef CONFIG_CPU_USE_DOMAINS
+
 #ifndef CONFIG_THUMB2_KERNEL
 #define LDR1W_SHIFT	0
 #else
 #define LDR1W_SHIFT	1
 #endif
-#define STR1W_SHIFT	0
 
 	.macro ldr1w ptr reg abort
 	ldrusr	\reg, \ptr, 4, abort=\abort
@@ -56,10 +55,30 @@
 	ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort
 	.endm
 
+#else
+
+#define LDR1W_SHIFT	0
+
+	.macro ldr1w ptr reg abort
+	USERL(\abort, W(ldr) \reg, [\ptr], #4)
+	.endm
+
+	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+	USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4})
+	.endm
+
+	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
+	.endm
+
+#endif /* CONFIG_CPU_USE_DOMAINS */
+
 	.macro ldr1b ptr reg cond=al abort
 	ldrusr	\reg, \ptr, 1, \cond, abort=\abort
 	.endm
 
+#define STR1W_SHIFT	0
+
 	.macro str1w ptr reg abort
 	W(str) \reg, [\ptr], #4
 	.endm
@@ -69,36 +88,38 @@
 	.endm
 
 	.macro str1b ptr reg cond=al abort
-	str\cond\()b \reg, [\ptr], #1
+	strb\cond \reg, [\ptr], #1
 	.endm
 
-	.macro enter reg1 reg2
+	.macro enter regs:vararg
 	mov	r3, #0
-	stmdb	sp!, {r0, r2, r3, \reg1, \reg2}
+UNWIND( .save	{r0, r2, r3, \regs}		)
+	stmdb	sp!, {r0, r2, r3, \regs}
 	.endm
 
-	.macro exit reg1 reg2
+	.macro exit regs:vararg
 	add	sp, sp, #8
-	ldmfd	sp!, {r0, \reg1, \reg2}
+	ldmfd	sp!, {r0, \regs}
 	.endm
 
 	.text
 
-ENTRY(__copy_from_user)
+ENTRY(arm_copy_from_user)
+#ifdef CONFIG_CPU_SPECTRE
+	ldr	r3, =TASK_SIZE
+	uaccess_mask_range_ptr r1, r2, r3, ip
+#endif
 
 #include "copy_template.S"
 
-ENDPROC(__copy_from_user)
+ENDPROC(arm_copy_from_user)
 
-	.pushsection .fixup,"ax"
+	.pushsection .text.fixup,"ax"
 	.align 0
 	copy_abort_preamble
-	ldmfd	sp!, {r1, r2}
-	sub	r3, r0, r1
-	rsb	r1, r3, r2
-	str	r1, [sp]
-	bl	__memzero
-	ldr	r0, [sp], #4
+	ldmfd	sp!, {r1, r2, r3}
+	sub	r0, r0, r1
+	rsb	r0, r0, r2
 	copy_abort_end
 	.popsection
 
diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S
index 6ee2f6706f86..5db1a8ee3d9f 100644
--- a/arch/arm/lib/copy_page.S
+++ b/arch/arm/lib/copy_page.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/copypage.S
  *
  *  Copyright (C) 1995-1999 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  *  ASM optimised string functions
  */
 #include <linux/linkage.h>
@@ -39,9 +36,9 @@ ENTRY(copy_page)
 	.endr
 		subs	r2, r2, #1			@	1
 		stmia	r0!, {r3, r4, ip, lr}		@	4
-		ldmgtia	r1!, {r3, r4, ip, lr}		@	4
+		ldmiagt	r1!, {r3, r4, ip, lr}		@	4
 		bgt	1b				@	1
-	PLD(	ldmeqia r1!, {r3, r4, ip, lr}	)
+	PLD(	ldmiaeq r1!, {r3, r4, ip, lr}	)
 	PLD(	beq	2b			)
 		ldmfd	sp!, {r4, pc}			@	3
 ENDPROC(copy_page)
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 805e3f8fb007..8fbafb074fe9 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/copy_template.s
  *
@@ -6,10 +7,6 @@
  *  Author:	Nicolas Pitre
  *  Created:	Sep 28, 2005
  *  Copyright:	MontaVista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
  */
 
 /*
@@ -53,6 +50,12 @@
  *	data as needed by the implementation including this code. Called
  *	upon code entry.
  *
+ * usave reg1 reg2
+ *
+ *	Unwind annotation macro is corresponding for 'enter' macro.
+ *	It tell unwinder that preserved some provided registers on the stack
+ *	and additional data by a prior 'enter' macro.
+ *
  * exit reg1 reg2
  *
  *	Restore registers with the values previously saved with the
@@ -66,8 +69,10 @@
  *	than one 32bit instruction in Thumb-2)
  */
 
-
-		enter	r4, lr
+	UNWIND(	.fnstart			)
+		enter	r4, UNWIND(fpreg,) lr
+	UNWIND(	.setfp	fpreg, sp		)
+	UNWIND(	mov	fpreg, sp		)
 
 		subs	r2, r2, #4
 		blt	8f
@@ -78,12 +83,12 @@
 		bne	10f
 
 1:		subs	r2, r2, #(28)
-		stmfd	sp!, {r5 - r8}
+		stmfd	sp!, {r5, r6, r8, r9}
 		blt	5f
 
 	CALGN(	ands	ip, r0, #31		)
 	CALGN(	rsb	r3, ip, #32		)
-	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
+	CALGN(	sbcsne	r4, r3, r2		)  @ C is always set here
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
@@ -97,9 +102,9 @@
 	PLD(	pld	[r1, #92]		)
 
 3:	PLD(	pld	[r1, #124]		)
-4:		ldr8w	r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+4:		ldr8w	r1, r3, r4, r5, r6, r8, r9, ip, lr, abort=20f
 		subs	r2, r2, #32
-		str8w	r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+		str8w	r0, r3, r4, r5, r6, r8, r9, ip, lr, abort=20f
 		bge	3b
 	PLD(	cmn	r2, #96			)
 	PLD(	bge	4b			)
@@ -119,8 +124,8 @@
 		ldr1w	r1, r4, abort=20f
 		ldr1w	r1, r5, abort=20f
 		ldr1w	r1, r6, abort=20f
-		ldr1w	r1, r7, abort=20f
 		ldr1w	r1, r8, abort=20f
+		ldr1w	r1, r9, abort=20f
 		ldr1w	r1, lr, abort=20f
 
 #if LDR1W_SHIFT < STR1W_SHIFT
@@ -137,13 +142,13 @@
 		str1w	r0, r4, abort=20f
 		str1w	r0, r5, abort=20f
 		str1w	r0, r6, abort=20f
-		str1w	r0, r7, abort=20f
 		str1w	r0, r8, abort=20f
+		str1w	r0, r9, abort=20f
 		str1w	r0, lr, abort=20f
 
 	CALGN(	bcs	2b			)
 
-7:		ldmfd	sp!, {r5 - r8}
+7:		ldmfd	sp!, {r5, r6, r8, r9}
 
 8:		movs	r2, r2, lsl #31
 		ldr1b	r1, r3, ne, abort=21f
@@ -153,7 +158,7 @@
 		str1b	r0, r4, cs, abort=21f
 		str1b	r0, ip, cs, abort=21f
 
-		exit	r4, pc
+		exit	r4, UNWIND(fpreg,) pc
 
 9:		rsb	ip, ip, #4
 		cmp	ip, #2
@@ -182,11 +187,11 @@
 
 	CALGN(	ands	ip, r0, #31		)
 	CALGN(	rsb	ip, ip, #32		)
-	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
 	CALGN(	subcc	r2, r2, ip		)
 	CALGN(	bcc	15f			)
 
-11:		stmfd	sp!, {r5 - r9}
+11:		stmfd	sp!, {r5, r6, r8 - r10}
 
 	PLD(	pld	[r1, #0]		)
 	PLD(	subs	r2, r2, #96		)
@@ -196,39 +201,39 @@
 	PLD(	pld	[r1, #92]		)
 
 12:	PLD(	pld	[r1, #124]		)
-13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
-		mov	r3, lr, pull #\pull
+13:		ldr4w	r1, r4, r5, r6, r8, abort=19f
+		mov	r3, lr, lspull #\pull
 		subs	r2, r2, #32
-		ldr4w	r1, r8, r9, ip, lr, abort=19f
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
-		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
+		ldr4w	r1, r9, r10, ip, lr, abort=19f
+		orr	r3, r3, r4, lspush #\push
+		mov	r4, r4, lspull #\pull
+		orr	r4, r4, r5, lspush #\push
+		mov	r5, r5, lspull #\pull
+		orr	r5, r5, r6, lspush #\push
+		mov	r6, r6, lspull #\pull
+		orr	r6, r6, r8, lspush #\push
+		mov	r8, r8, lspull #\pull
+		orr	r8, r8, r9, lspush #\push
+		mov	r9, r9, lspull #\pull
+		orr	r9, r9, r10, lspush #\push
+		mov	r10, r10, lspull #\pull
+		orr	r10, r10, ip, lspush #\push
+		mov	ip, ip, lspull #\pull
+		orr	ip, ip, lr, lspush #\push
+		str8w	r0, r3, r4, r5, r6, r8, r9, r10, ip, abort=19f
 		bge	12b
 	PLD(	cmn	r2, #96			)
 	PLD(	bge	13b			)
 
-		ldmfd	sp!, {r5 - r9}
+		ldmfd	sp!, {r5, r6, r8 - r10}
 
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov	r3, lr, pull #\pull
+15:		mov	r3, lr, lspull #\pull
 		ldr1w	r1, lr, abort=21f
 		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
+		orr	r3, r3, lr, lspush #\push
 		str1w	r0, r3, abort=21f
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
@@ -246,6 +251,7 @@
 
 18:		forward_copy_shift	pull=24	push=8
 
+	UNWIND(	.fnend				)
 
 /*
  * Abort preamble and completion macros.
@@ -255,13 +261,13 @@
  */
 
 	.macro	copy_abort_preamble
-19:	ldmfd	sp!, {r5 - r9}
+19:	ldmfd	sp!, {r5, r6, r8 - r10}
 	b	21f
-20:	ldmfd	sp!, {r5 - r8}
+20:	ldmfd	sp!, {r5, r6, r8, r9}
 21:
 	.endm
 
 	.macro	copy_abort_end
-	ldmfd	sp!, {r4, pc}
+	ldmfd	sp!, {r4, UNWIND(fpreg,) pc}
 	.endm
 
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index d066df686e17..fac49e57cc0b 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -1,22 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/copy_to_user.S
  *
  *  Author:	Nicolas Pitre
  *  Created:	Sep 29, 2005
  *  Copyright:	MontaVista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
  */
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 /*
  * Prototype:
  *
- *	size_t __copy_to_user(void *to, const void *from, size_t n)
+ *	size_t arm_copy_to_user(void *to, const void *from, size_t n)
  *
  * Purpose:
  *
@@ -34,11 +32,6 @@
  */
 
 #define LDR1W_SHIFT	0
-#ifndef CONFIG_THUMB2_KERNEL
-#define STR1W_SHIFT	0
-#else
-#define STR1W_SHIFT	1
-#endif
 
 	.macro ldr1w ptr reg abort
 	W(ldr) \reg, [\ptr], #4
@@ -53,9 +46,17 @@
 	.endm
 
 	.macro ldr1b ptr reg cond=al abort
-	ldr\cond\()b \reg, [\ptr], #1
+	ldrb\cond \reg, [\ptr], #1
 	.endm
 
+#ifdef CONFIG_CPU_USE_DOMAINS
+
+#ifndef CONFIG_THUMB2_KERNEL
+#define STR1W_SHIFT	0
+#else
+#define STR1W_SHIFT	1
+#endif
+
 	.macro str1w ptr reg abort
 	strusr	\reg, \ptr, 4, abort=\abort
 	.endm
@@ -71,31 +72,50 @@
 	str1w \ptr, \reg8, \abort
 	.endm
 
+#else
+
+#define STR1W_SHIFT	0
+
+	.macro str1w ptr reg abort
+	USERL(\abort, W(str) \reg, [\ptr], #4)
+	.endm
+
+	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	USERL(\abort, stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
+	.endm
+
+#endif /* CONFIG_CPU_USE_DOMAINS */
+
 	.macro str1b ptr reg cond=al abort
 	strusr	\reg, \ptr, 1, \cond, abort=\abort
 	.endm
 
-	.macro enter reg1 reg2
+	.macro enter regs:vararg
 	mov	r3, #0
-	stmdb	sp!, {r0, r2, r3, \reg1, \reg2}
+UNWIND( .save	{r0, r2, r3, \regs}		)
+	stmdb	sp!, {r0, r2, r3, \regs}
 	.endm
 
-	.macro exit reg1 reg2
+	.macro exit regs:vararg
 	add	sp, sp, #8
-	ldmfd	sp!, {r0, \reg1, \reg2}
+	ldmfd	sp!, {r0, \regs}
 	.endm
 
 	.text
 
 ENTRY(__copy_to_user_std)
-WEAK(__copy_to_user)
+WEAK(arm_copy_to_user)
+#ifdef CONFIG_CPU_SPECTRE
+	ldr	r3, =TASK_SIZE
+	uaccess_mask_range_ptr r0, r2, r3, ip
+#endif
 
 #include "copy_template.S"
 
-ENDPROC(__copy_to_user)
+ENDPROC(arm_copy_to_user)
 ENDPROC(__copy_to_user_std)
 
-	.pushsection .fixup,"ax"
+	.pushsection .text.fixup,"ax"
 	.align 0
 	copy_abort_preamble
 	ldmfd	sp!, {r1, r2, r3}
@@ -103,4 +123,3 @@ ENDPROC(__copy_to_user_std)
 	rsb	r0, r0, r2
 	copy_abort_end
 	.popsection
-
diff --git a/arch/arm/lib/csumipv6.S b/arch/arm/lib/csumipv6.S
index 3ac6ef01bc43..3559d515144c 100644
--- a/arch/arm/lib/csumipv6.S
+++ b/arch/arm/lib/csumipv6.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/csumipv6.S
  *
  *  Copyright (C) 1995-1998 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S
index 31d3cb34740d..87c9471be8b6 100644
--- a/arch/arm/lib/csumpartial.S
+++ b/arch/arm/lib/csumpartial.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/csumpartial.S
  *
  *  Copyright (C) 1995-1998 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -40,9 +37,9 @@ td3	.req	lr
 		/* we must have at least one byte. */
 		tst	buf, #1			@ odd address?
 		movne	sum, sum, ror #8
-		ldrneb	td0, [buf], #1
+		ldrbne	td0, [buf], #1
 		subne	len, len, #1
-		adcnes	sum, sum, td0, put_byte_1
+		adcsne	sum, sum, td0, put_byte_1
 
 .Lless4:		tst	len, #6
 		beq	.Lless8_byte
@@ -68,8 +65,8 @@ td3	.req	lr
 		bne	.Lless8_wordlp
 
 .Lless8_byte:	tst	len, #1			@ odd number of bytes
-		ldrneb	td0, [buf], #1		@ include last byte
-		adcnes	sum, sum, td0, put_byte_0	@ update checksum
+		ldrbne	td0, [buf], #1		@ include last byte
+		adcsne	sum, sum, td0, put_byte_0	@ update checksum
 
 .Ldone:		adc	r0, sum, #0		@ collect up the last carry
 		ldr	td0, [sp], #4
@@ -78,17 +75,17 @@ td3	.req	lr
 		ldr	pc, [sp], #4		@ return
 
 .Lnot_aligned:	tst	buf, #1			@ odd address
-		ldrneb	td0, [buf], #1		@ make even
+		ldrbne	td0, [buf], #1		@ make even
 		subne	len, len, #1
-		adcnes	sum, sum, td0, put_byte_1	@ update checksum
+		adcsne	sum, sum, td0, put_byte_1	@ update checksum
 
 		tst	buf, #2			@ 32-bit aligned?
 #if __LINUX_ARM_ARCH__ >= 4
-		ldrneh	td0, [buf], #2		@ make 32-bit aligned
+		ldrhne	td0, [buf], #2		@ make 32-bit aligned
 		subne	len, len, #2
 #else
-		ldrneb	td0, [buf], #1
-		ldrneb	ip, [buf], #1
+		ldrbne	td0, [buf], #1
+		ldrbne	ip, [buf], #1
 		subne	len, len, #2
 #ifndef __ARMEB__
 		orrne	td0, td0, ip, lsl #8
@@ -96,8 +93,8 @@ td3	.req	lr
 		orrne	td0, ip, td0, lsl #8
 #endif
 #endif
-		adcnes	sum, sum, td0		@ update checksum
-		mov	pc, lr
+		adcsne	sum, sum, td0		@ update checksum
+		ret	lr
 
 ENTRY(csum_partial)
 		stmfd	sp!, {buf, lr}
diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S
index d03fc71fc88c..1ca6aadd649c 100644
--- a/arch/arm/lib/csumpartialcopy.S
+++ b/arch/arm/lib/csumpartialcopy.S
@@ -1,19 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/csumpartialcopy.S
  *
  *  Copyright (C) 1995-1998 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
 		.text
 
-/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len, __u32 sum)
- * Params  : r0 = src, r1 = dst, r2 = len, r3 = checksum
+/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len)
+ * Params  : r0 = src, r1 = dst, r2 = len
  * Returns : r0 = new checksum
  */
 
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d620a5f22a09..0fd5c10e90a7 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -1,12 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/csumpartialcopygeneric.S
  *
  *  Copyright (C) 1995-2001 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
+#include <asm/assembler.h>
 
 /*
  * unsigned int
@@ -40,7 +38,7 @@ sum	.req	r3
 		adcs	sum, sum, ip, put_byte_1	@ update checksum
 		strb	ip, [dst], #1
 		tst	dst, #2
-		moveq	pc, lr			@ dst is now 32bit aligned
+		reteq	lr			@ dst is now 32bit aligned
 
 .Ldst_16bit:	load2b	r8, ip
 		sub	len, len, #2
@@ -48,7 +46,7 @@ sum	.req	r3
 		strb	r8, [dst], #1
 		adcs	sum, sum, ip, put_byte_1
 		strb	ip, [dst], #1
-		mov	pc, lr			@ dst is now 32bit aligned
+		ret	lr			@ dst is now 32bit aligned
 
 		/*
 		 * Handle 0 to 7 bytes, with any alignment of source and
@@ -88,6 +86,7 @@ sum	.req	r3
 
 FN_ENTRY
 		save_regs
+		mov	sum, #-1
 
 		cmp	len, #8			@ Ensure that we have at least
 		blo	.Lless8			@ 8 bytes to copy.
@@ -141,15 +140,15 @@ FN_ENTRY
 		tst	len, #2
 		mov	r5, r4, get_byte_0
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_2
 .Lexit:		tst	len, #1
-		strneb	r5, [dst], #1
+		strbne	r5, [dst], #1
 		andne	r5, r5, #255
-		adcnes	sum, sum, r5, put_byte_0
+		adcsne	sum, sum, r5, put_byte_0
 
 		/*
 		 * If the dst pointer was not 16-bit aligned, we
@@ -171,23 +170,23 @@ FN_ENTRY
 		cmp	ip, #2
 		beq	.Lsrc2_aligned
 		bhi	.Lsrc3_aligned
-		mov	r4, r5, pull #8		@ C = 0
+		mov	r4, r5, lspull #8		@ C = 0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
-		mov	r7, r7, pull #8
-		orr	r7, r7, r8, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
+		mov	r7, r7, lspull #8
+		orr	r7, r7, r8, lspush #24
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #8
+		mov	r4, r8, lspull #8
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -196,50 +195,50 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #8
+		mov	r4, r6, lspull #8
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #24
+		orr	r4, r4, r5, lspush #24
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #8
+		mov	r4, r5, lspull #8
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
 		tst	len, #2
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_2
 		b	.Lexit
 
-.Lsrc2_aligned:	mov	r4, r5, pull #16
+.Lsrc2_aligned:	mov	r4, r5, lspull #16
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
-		mov	r7, r7, pull #16
-		orr	r7, r7, r8, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
+		mov	r7, r7, lspull #16
+		orr	r7, r7, r8, lspush #16
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #16
+		mov	r4, r8, lspull #16
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -248,20 +247,20 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #16
+		mov	r4, r6, lspull #16
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #16
+		orr	r4, r4, r5, lspush #16
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #16
+		mov	r4, r5, lspull #16
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -276,24 +275,24 @@ FN_ENTRY
 		load1b	r5
 		b	.Lexit
 
-.Lsrc3_aligned:	mov	r4, r5, pull #24
+.Lsrc3_aligned:	mov	r4, r5, lspull #24
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
-		mov	r7, r7, pull #24
-		orr	r7, r7, r8, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
+		mov	r7, r7, lspull #24
+		orr	r7, r7, r8, lspush #8
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #24
+		mov	r4, r8, lspull #24
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -302,20 +301,20 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #24
+		mov	r4, r6, lspull #24
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #8
+		orr	r4, r4, r5, lspush #8
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #24
+		mov	r4, r5, lspull #24
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -326,7 +325,7 @@ FN_ENTRY
 		load1l	r4
 		mov	r5, r4, get_byte_0
 		strb	r5, [dst], #1
-		adcs	sum, sum, r4, push #24
+		adcs	sum, sum, r4, lspush #24
 		mov	r5, r4, get_byte_1
 		b	.Lexit
 FN_EXIT
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 7d08b43d2c0e..c289bde04743 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/csumpartialcopyuser.S
  *
  *  Copyright (C) 1995-1998 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * 27/03/03 Ian Molton Clean up CONFIG_CPU
- *
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -17,6 +13,36 @@
 
 		.text
 
+#if defined(CONFIG_CPU_SW_DOMAIN_PAN)
+
+		.macro	save_regs
+		mrc	p15, 0, ip, c3, c0, 0
+		stmfd	sp!, {r1, r2, r4 - r8, ip, lr}
+		uaccess_enable ip
+		.endm
+
+		.macro	load_regs
+		ldmfd	sp!, {r1, r2, r4 - r8, ip, lr}
+		mcr	p15, 0, ip, c3, c0, 0
+		ret	lr
+		.endm
+
+#elif defined(CONFIG_CPU_TTBR0_PAN)
+
+		.macro	save_regs
+		mrc	p15, 0, ip, c2, c0, 2		@ read TTBCR
+		stmfd	sp!, {r1, r2, r4 - r8, ip, lr}
+		uaccess_enable ip
+		.endm
+
+		.macro	load_regs
+		ldmfd	sp!, {r1, r2, r4 - r8, ip, lr}
+		mcr	p15, 0, ip, c2, c0, 2		@ restore TTBCR
+		ret	lr
+		.endm
+
+#else
+
 		.macro	save_regs
 		stmfd	sp!, {r1, r2, r4 - r8, lr}
 		.endm
@@ -25,6 +51,8 @@
 		ldmfd	sp!, {r1, r2, r4 - r8, pc}
 		.endm
 
+#endif
+
 		.macro	load1b,	reg1
 		ldrusr	\reg1, r0, 1
 		.endm
@@ -52,9 +80,9 @@
 
 /*
  * unsigned int
- * csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *err_ptr)
- *  r0 = src, r1 = dst, r2 = len, r3 = sum, [sp] = *err_ptr
- *  Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT
+ * csum_partial_copy_from_user(const char *src, char *dst, int len)
+ *  r0 = src, r1 = dst, r2 = len
+ *  Returns : r0 = checksum or 0
  */
 
 #define FN_ENTRY	ENTRY(csum_partial_copy_from_user)
@@ -63,21 +91,11 @@
 #include "csumpartialcopygeneric.S"
 
 /*
- * FIXME: minor buglet here
- * We don't return the checksum for the data present in the buffer.  To do
- * so properly, we would have to add in whatever registers were loaded before
- * the fault, which, with the current asm above is not predictable.
+ * We report fault by returning 0 csum - impossible in normal case, since
+ * we start with 0xffffffff for initial sum.
  */
-		.pushsection .fixup,"ax"
+		.pushsection .text.fixup,"ax"
 		.align	4
-9001:		mov	r4, #-EFAULT
-		ldr	r5, [sp, #8*4]		@ *err_ptr
-		str	r4, [r5]
-		ldmia	sp, {r1, r2}		@ retrieve dst, len
-		add	r2, r2, r1
-		mov	r0, #0			@ zero the buffer
-9002:		teq	r2, r1
-		strneb	r0, [r1], #1
-		bne	9002b
+9001:		mov	r0, #0
 		load_regs
 		.popsection
diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S
index 36b668d8e121..33b08ca1c242 100644
--- a/arch/arm/lib/delay-loop.S
+++ b/arch/arm/lib/delay-loop.S
@@ -1,67 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/delay.S
  *
  *  Copyright (C) 1995, 1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 #include <asm/assembler.h>
 #include <asm/delay.h>
+
+#ifdef CONFIG_ARCH_RPC
+		.arch	armv4
+#endif
+
 		.text
 
 .LC0:		.word	loops_per_jiffy
 .LC1:		.word	UDELAY_MULT
 
 /*
+ * loops = r0 * HZ * loops_per_jiffy / 1000000
+ *
  * r0  <= 2000
- * lpj <= 0x01ffffff (max. 3355 bogomips)
  * HZ  <= 1000
  */
 
-ENTRY(__loop_udelay)
+SYM_TYPED_FUNC_START(__loop_udelay)
 		ldr	r2, .LC1
-		mul	r0, r2, r0
-ENTRY(__loop_const_udelay)			@ 0 <= r0 <= 0x7fffff06
-		mov	r1, #-1
+		mul	r0, r2, r0		@ r0 = delay_us * UDELAY_MULT
+		b	__loop_const_udelay
+SYM_FUNC_END(__loop_udelay)
+
+SYM_TYPED_FUNC_START(__loop_const_udelay)	@ 0 <= r0 <= 0xfffffaf0
 		ldr	r2, .LC0
-		ldr	r2, [r2]		@ max = 0x01ffffff
-		add	r0, r0, r1, lsr #32-14
-		mov	r0, r0, lsr #14		@ max = 0x0001ffff
-		add	r2, r2, r1, lsr #32-10
-		mov	r2, r2, lsr #10		@ max = 0x00007fff
-		mul	r0, r2, r0		@ max = 2^32-1
-		add	r0, r0, r1, lsr #32-6
-		movs	r0, r0, lsr #6
-		moveq	pc, lr
+		ldr	r2, [r2]
+		umull	r1, r0, r2, r0		@ r0-r1 = r0 * loops_per_jiffy
+		adds	r1, r1, #0xffffffff	@ rounding up ...
+		adcs	r0, r0, r0		@ and right shift by 31
+		reteq	lr
+		b	__loop_delay
+SYM_FUNC_END(__loop_const_udelay)
 
-/*
- * loops = r0 * HZ * loops_per_jiffy / 1000000
- */
+		.align 3
 
 @ Delay routine
-ENTRY(__loop_delay)
+SYM_TYPED_FUNC_START(__loop_delay)
 		subs	r0, r0, #1
 #if 0
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
 #endif
 		bhi	__loop_delay
-		mov	pc, lr
-ENDPROC(__loop_udelay)
-ENDPROC(__loop_const_udelay)
-ENDPROC(__loop_delay)
+		ret	lr
+SYM_FUNC_END(__loop_delay)
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
index 64dbfa57204a..b7fe84f68bf1 100644
--- a/arch/arm/lib/delay.c
+++ b/arch/arm/lib/delay.c
@@ -1,24 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Delay loops based on the OpenRISC implementation.
  *
  * Copyright (C) 2012 ARM Limited
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
  * Author: Will Deacon <will.deacon@arm.com>
  */
 
+#include <linux/clocksource.h>
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -28,7 +17,7 @@
 /*
  * Default to the loop-based delay implementation.
  */
-struct arm_delay_ops arm_delay_ops = {
+struct arm_delay_ops arm_delay_ops __ro_after_init = {
 	.delay		= __loop_delay,
 	.const_udelay	= __loop_const_udelay,
 	.udelay		= __loop_udelay,
@@ -36,6 +25,7 @@ struct arm_delay_ops arm_delay_ops = {
 
 static const struct delay_timer *delay_timer;
 static bool delay_calibrated;
+static u64 delay_res;
 
 int read_current_timer(unsigned long *timer_val)
 {
@@ -47,6 +37,11 @@ int read_current_timer(unsigned long *timer_val)
 }
 EXPORT_SYMBOL_GPL(read_current_timer);
 
+static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift)
+{
+	return (cyc * mult) >> shift;
+}
+
 static void __timer_delay(unsigned long cycles)
 {
 	cycles_t start = get_cycles();
@@ -69,25 +64,42 @@ static void __timer_udelay(unsigned long usecs)
 
 void __init register_current_timer_delay(const struct delay_timer *timer)
 {
-	if (!delay_calibrated) {
-		pr_info("Switching to timer-based delay loop\n");
+	u32 new_mult, new_shift;
+	u64 res;
+
+	clocks_calc_mult_shift(&new_mult, &new_shift, timer->freq,
+			       NSEC_PER_SEC, 3600);
+	res = cyc_to_ns(1ULL, new_mult, new_shift);
+
+	if (res > 1000) {
+		pr_err("Ignoring delay timer %ps, which has insufficient resolution of %lluns\n",
+			timer, res);
+		return;
+	}
+
+	if (!delay_calibrated && (!delay_res || (res < delay_res))) {
+		pr_info("Switching to timer-based delay loop, resolution %lluns\n", res);
 		delay_timer			= timer;
 		lpj_fine			= timer->freq / HZ;
+		delay_res			= res;
 
 		/* cpufreq may scale loops_per_jiffy, so keep a private copy */
 		arm_delay_ops.ticks_per_jiffy	= lpj_fine;
 		arm_delay_ops.delay		= __timer_delay;
 		arm_delay_ops.const_udelay	= __timer_const_udelay;
 		arm_delay_ops.udelay		= __timer_udelay;
-
-		delay_calibrated		= true;
 	} else {
 		pr_info("Ignoring duplicate/late registration of read_current_timer delay\n");
 	}
 }
 
-unsigned long __cpuinit calibrate_delay_is_known(void)
+unsigned long calibrate_delay_is_known(void)
 {
 	delay_calibrated = true;
 	return lpj_fine;
 }
+
+void calibration_delay_done(void)
+{
+	delay_calibrated = true;
+}
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
index e55c4842c290..a87c02925ffa 100644
--- a/arch/arm/lib/div64.S
+++ b/arch/arm/lib/div64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/div64.S
  *
@@ -6,13 +7,10 @@
  *  Author:	Nicolas Pitre
  *  Created:	Oct 5, 2003
  *  Copyright:	Monta Vista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/unwind.h>
 
 #ifdef __ARMEB__
@@ -87,8 +85,8 @@ UNWIND(.fnstart)
  	@ Break out early if dividend reaches 0.
 2:	cmp	xh, yl
 	orrcs	yh, yh, ip
-	subcss	xh, xh, yl
-	movnes	ip, ip, lsr #1
+	subscs	xh, xh, yl
+	movsne	ip, ip, lsr #1
 	mov	yl, yl, lsr #1
 	bne	2b
 
@@ -97,7 +95,7 @@ UNWIND(.fnstart)
 	mov	yl, #0
 	cmpeq	xl, r4
 	movlo	xh, xl
-	movlo	pc, lr
+	retlo	lr
 
 	@ The division loop for lower bit positions.
 	@ Here we shift remainer bits leftwards rather than moving the
@@ -111,14 +109,14 @@ UNWIND(.fnstart)
 	subcs	xh, xh, r4
 	movs	ip, ip, lsr #1
 	bne	4b
-	mov	pc, lr
+	ret	lr
 
 	@ The top part of remainder became zero.  If carry is set
 	@ (the 33th bit) this is a false positive so resume the loop.
 	@ Otherwise, if lower part is also null then we are done.
 6:	bcs	5b
 	cmp	xl, #0
-	moveq	pc, lr
+	reteq	lr
 
 	@ We still have remainer bits in the low part.  Bring them up.
 
@@ -144,7 +142,7 @@ UNWIND(.fnstart)
 	movs	ip, ip, lsr #1
 	mov	xh, #1
 	bne	4b
-	mov	pc, lr
+	ret	lr
 
 8:	@ Division by a power of 2: determine what that divisor order is
 	@ then simply shift values around
@@ -184,13 +182,13 @@ UNWIND(.fnstart)
  THUMB(	orr	yl, yl, xh		)
 	mov	xh, xl, lsl ip
 	mov	xh, xh, lsr ip
-	mov	pc, lr
+	ret	lr
 
 	@ eq -> division by 1: obvious enough...
 9:	moveq	yl, xl
 	moveq	yh, xh
 	moveq	xh, #0
-	moveq	pc, lr
+	reteq	lr
 UNWIND(.fnend)
 
 UNWIND(.fnstart)
diff --git a/arch/arm/lib/ecard.S b/arch/arm/lib/ecard.S
deleted file mode 100644
index e6057fa851bb..000000000000
--- a/arch/arm/lib/ecard.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- *  linux/arch/arm/lib/ecard.S
- *
- *  Copyright (C) 1995, 1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 27/03/03 Ian Molton Clean up CONFIG_CPU
- *
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#define CPSR2SPSR(rt) \
-		mrs	rt, cpsr; \
-		msr	spsr_cxsf, rt
-
-@ Purpose: call an expansion card loader to read bytes.
-@ Proto  : char read_loader(int offset, char *card_base, char *loader);
-@ Returns: byte read
-
-ENTRY(ecard_loader_read)
-		stmfd	sp!, {r4 - r12, lr}
-		mov	r11, r1
-		mov	r1, r0
-		CPSR2SPSR(r0)
-		mov	lr, pc
-		mov	pc, r2
-		ldmfd	sp!, {r4 - r12, pc}
-
-@ Purpose: call an expansion card loader to reset the card
-@ Proto  : void read_loader(int card_base, char *loader);
-@ Returns: byte read
-
-ENTRY(ecard_loader_reset)
-		stmfd	sp!, {r4 - r12, lr}
-		mov	r11, r0
-		CPSR2SPSR(r0)
-		mov	lr, pc
-		add	pc, r1, #8
-		ldmfd	sp!, {r4 - r12, pc}
-
diff --git a/arch/arm/lib/error-inject.c b/arch/arm/lib/error-inject.c
new file mode 100644
index 000000000000..5a5b405792ba
--- /dev/null
+++ b/arch/arm/lib/error-inject.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+	instruction_pointer_set(regs, regs->ARM_lr);
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 64f6bc1a9132..b7ac2d3c0748 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/findbit.S
  *
  *  Copyright (C) 1995-2000 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * 16th March 2001 - John Ripley <jripley@sonicblue.com>
  *   Fixed so that "size" is an exclusive not an inclusive quantity.
  *   All users of these functions expect exclusive sizes, and may
@@ -15,182 +12,128 @@
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
                 .text
 
-/*
- * Purpose  : Find a 'zero' bit
- * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
- */
-ENTRY(_find_first_zero_bit_le)
-		teq	r1, #0	
-		beq	3f
-		mov	r2, #0
-1:
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
-		eors	r3, r3, #0xff		@ invert bits
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		cmp	r2, r1			@ any more?
-		blo	1b
-3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
-ENDPROC(_find_first_zero_bit_le)
+#ifdef __ARMEB__
+#define SWAB_ENDIAN le
+#else
+#define SWAB_ENDIAN be
+#endif
 
-/*
- * Purpose  : Find next 'zero' bit
- * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
- */
-ENTRY(_find_next_zero_bit_le)
+		.macro	find_first, endian, set, name
+ENTRY(_find_first_\name\()bit_\endian)
+	UNWIND(	.fnstart)
 		teq	r1, #0
-		beq	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
-		eor	r3, r3, #0xff		@ now looking for a 1 bit
-		movs	r3, r3, lsr ip		@ shift off unused bits
-		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(_find_next_zero_bit_le)
-
-/*
- * Purpose  : Find a 'one' bit
- * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit);
- */
-ENTRY(_find_first_bit_le)
-		teq	r1, #0	
 		beq	3f
 		mov	r2, #0
-1:
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
-		movs	r3, r3
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
+1:		ldr	r3, [r0], #4
+		.ifeq \set
+		mvns	r3, r3			@ invert/test bits
+		.else
+		movs	r3, r3			@ test bits
+		.endif
+		.ifc \endian, SWAB_ENDIAN
+		bne	.L_found_swab
+		.else
+		bne	.L_found		@ found the bit?
+		.endif
+		add	r2, r2, #32		@ next index
 2:		cmp	r2, r1			@ any more?
 		blo	1b
-3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
-ENDPROC(_find_first_bit_le)
+3:		mov	r0, r1			@ no more bits
+		ret	lr
+	UNWIND(	.fnend)
+ENDPROC(_find_first_\name\()bit_\endian)
+		.endm
 
-/*
- * Purpose  : Find next 'one' bit
- * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
- */
-ENTRY(_find_next_bit_le)
-		teq	r1, #0
-		beq	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
+		.macro	find_next, endian, set, name
+ENTRY(_find_next_\name\()bit_\endian)
+	UNWIND(	.fnstart)
+		cmp	r2, r1
+		bhs	3b
+		mov	ip, r2, lsr #5		@ word index
+		add	r0, r0, ip, lsl #2
+		ands	ip, r2, #31		@ bit position
+		beq	1b
+		ldr	r3, [r0], #4
+		.ifeq \set
+		mvn	r3, r3			@ invert bits
+		.endif
+		.ifc \endian, SWAB_ENDIAN
+		rev_l	r3, ip
+		.if	.Lrev_l_uses_tmp
+		@ we need to recompute ip because rev_l will have overwritten
+		@ it.
+		and	ip, r2, #31		@ bit position
+		.endif
+		.endif
 		movs	r3, r3, lsr ip		@ shift off unused bits
 		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
+		orr	r2, r2, #31		@ no zero bits
 		add	r2, r2, #1		@ align bit pointer
 		b	2b			@ loop for next bit
-ENDPROC(_find_next_bit_le)
+	UNWIND(	.fnend)
+ENDPROC(_find_next_\name\()bit_\endian)
+		.endm
 
-#ifdef __ARMEB__
+		.macro	find_bit, endian, set, name
+		find_first \endian, \set, \name
+		find_next  \endian, \set, \name
+		.endm
 
-ENTRY(_find_first_zero_bit_be)
-		teq	r1, #0
-		beq	3f
-		mov	r2, #0
-1:		eor	r3, r2, #0x18		@ big endian byte ordering
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		eors	r3, r3, #0xff		@ invert bits
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		cmp	r2, r1			@ any more?
-		blo	1b
-3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
-ENDPROC(_find_first_zero_bit_be)
+/* _find_first_zero_bit_le and _find_next_zero_bit_le */
+		find_bit le, 0, zero_
 
-ENTRY(_find_next_zero_bit_be)
-		teq	r1, #0
-		beq	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
-		eor	r3, r2, #0x18		@ big endian byte ordering
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		eor	r3, r3, #0xff		@ now looking for a 1 bit
-		movs	r3, r3, lsr ip		@ shift off unused bits
-		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(_find_next_zero_bit_be)
+/* _find_first_bit_le and _find_next_bit_le */
+		find_bit le, 1
 
-ENTRY(_find_first_bit_be)
-		teq	r1, #0
-		beq	3f
-		mov	r2, #0
-1:		eor	r3, r2, #0x18		@ big endian byte ordering
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		movs	r3, r3
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		cmp	r2, r1			@ any more?
-		blo	1b
-3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
-ENDPROC(_find_first_bit_be)
+#ifdef __ARMEB__
 
-ENTRY(_find_next_bit_be)
-		teq	r1, #0
-		beq	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
-		eor	r3, r2, #0x18		@ big endian byte ordering
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		movs	r3, r3, lsr ip		@ shift off unused bits
-		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(_find_next_bit_be)
+/* _find_first_zero_bit_be and _find_next_zero_bit_be */
+		find_bit be, 0, zero_
+
+/* _find_first_bit_be and _find_next_bit_be */
+		find_bit be, 1
 
 #endif
 
 /*
  * One or more bits in the LSB of r3 are assumed to be set.
  */
+.L_found_swab:
+	UNWIND(	.fnstart)
+		rev_l	r3, ip
 .L_found:
-#if __LINUX_ARM_ARCH__ >= 5
+#if __LINUX_ARM_ARCH__ >= 7
+		rbit	r3, r3			@ reverse bits
+		clz	r3, r3			@ count high zero bits
+		add	r0, r2, r3		@ add offset of first set bit
+#elif __LINUX_ARM_ARCH__ >= 5
 		rsb	r0, r3, #0
-		and	r3, r3, r0
-		clz	r3, r3
-		rsb	r3, r3, #31
-		add	r0, r2, r3
+		and	r3, r3, r0		@ mask out lowest bit set
+		clz	r3, r3			@ count high zero bits
+		rsb	r3, r3, #31		@ offset of first set bit
+		add	r0, r2, r3		@ add offset of first set bit
 #else
-		tst	r3, #0x0f
+		mov	ip, #~0
+		tst	r3, ip, lsr #16		@ test bits 0-15
+		addeq	r2, r2, #16
+		moveq	r3, r3, lsr #16
+		tst	r3, #0x00ff
+		addeq	r2, r2, #8
+		moveq	r3, r3, lsr #8
+		tst	r3, #0x000f
 		addeq	r2, r2, #4
-		movne	r3, r3, lsl #4
-		tst	r3, #0x30
+		moveq	r3, r3, lsr #4
+		tst	r3, #0x0003
 		addeq	r2, r2, #2
-		movne	r3, r3, lsl #2
-		tst	r3, #0x40
+		moveq	r3, r3, lsr #2
+		tst	r3, #0x0001
 		addeq	r2, r2, #1
 		mov	r0, r2
 #endif
 		cmp	r1, r0			@ Clamp to maxbit
 		movlo	r0, r1
-		mov	pc, lr
-
+		ret	lr
+	UNWIND(	.fnend)
diff --git a/arch/arm/lib/floppydma.S b/arch/arm/lib/floppydma.S
deleted file mode 100644
index 617150b1baef..000000000000
--- a/arch/arm/lib/floppydma.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  linux/arch/arm/lib/floppydma.S
- *
- *  Copyright (C) 1995, 1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-		.text
-
-		.global	floppy_fiqin_end
-ENTRY(floppy_fiqin_start)
-		subs	r9, r9, #1
-		ldrgtb	r12, [r11, #-4]
-		ldrleb	r12, [r11], #0
-		strb	r12, [r10], #1
-		subs	pc, lr, #4
-floppy_fiqin_end:
-
-		.global	floppy_fiqout_end
-ENTRY(floppy_fiqout_start)
-		subs	r9, r9, #1
-		ldrgeb	r12, [r10], #1
-		movlt	r12, #0
-		strleb	r12, [r11], #0
-		subles	pc, lr, #4
-		strb	r12, [r11, #-4]
-		subs	pc, lr, #4
-floppy_fiqout_end:
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 9b06bb41fca6..c5e420750c48 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/getuser.S
  *
  *  Copyright (C) 2001 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  *  Idea from x86 version, (C) Copyright 1998 Linus Torvalds
  *
  * These functions have a non-standard call interface to make them more
@@ -18,7 +15,7 @@
  * Inputs:	r0 contains the address
  *		r1 contains the address limit, which must be preserved
  * Outputs:	r0 is the error code
- *		r2 contains the zero-extended value
+ *		r2, r3 contains the zero-extended value
  *		lr corrupted
  *
  * No other registers must be altered.  (see <asm/uaccess.h>
@@ -36,11 +33,18 @@ ENTRY(__get_user_1)
 	check_uaccess r0, 1, r1, r2, __get_user_bad
 1: TUSER(ldrb)	r2, [r0]
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_user_1)
+_ASM_NOKPROBE(__get_user_1)
 
 ENTRY(__get_user_2)
 	check_uaccess r0, 2, r1, r2, __get_user_bad
+#if __LINUX_ARM_ARCH__ >= 6
+
+2: TUSER(ldrh)	r2, [r0]
+
+#else
+
 #ifdef CONFIG_CPU_USE_DOMAINS
 rb	.req	ip
 2:	ldrbt	r2, [r0], #1
@@ -55,26 +59,109 @@ rb	.req	r0
 #else
 	orr	r2, rb, r2, lsl #8
 #endif
+
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_user_2)
+_ASM_NOKPROBE(__get_user_2)
 
 ENTRY(__get_user_4)
 	check_uaccess r0, 4, r1, r2, __get_user_bad
 4: TUSER(ldr)	r2, [r0]
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_user_4)
+_ASM_NOKPROBE(__get_user_4)
+
+ENTRY(__get_user_8)
+	check_uaccess r0, 8, r1, r2, __get_user_bad8
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(ldr)	r2, [r0]
+6: TUSER(ldr)	r3, [r0, #4]
+#else
+5: TUSER(ldr)	r2, [r0], #4
+6: TUSER(ldr)	r3, [r0]
+#endif
+	mov	r0, #0
+	ret	lr
+ENDPROC(__get_user_8)
+_ASM_NOKPROBE(__get_user_8)
+
+#ifdef __ARMEB__
+ENTRY(__get_user_32t_8)
+	check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_CPU_USE_DOMAINS
+	add	r0, r0, #4
+7:	ldrt	r2, [r0]
+#else
+7:	ldr	r2, [r0, #4]
+#endif
+	mov	r0, #0
+	ret	lr
+ENDPROC(__get_user_32t_8)
+_ASM_NOKPROBE(__get_user_32t_8)
+
+ENTRY(__get_user_64t_1)
+	check_uaccess r0, 1, r1, r2, __get_user_bad8
+8: TUSER(ldrb)	r3, [r0]
+	mov	r0, #0
+	ret	lr
+ENDPROC(__get_user_64t_1)
+_ASM_NOKPROBE(__get_user_64t_1)
 
+ENTRY(__get_user_64t_2)
+	check_uaccess r0, 2, r1, r2, __get_user_bad8
+#ifdef CONFIG_CPU_USE_DOMAINS
+rb	.req	ip
+9:	ldrbt	r3, [r0], #1
+10:	ldrbt	rb, [r0], #0
+#else
+rb	.req	r0
+9:	ldrb	r3, [r0]
+10:	ldrb	rb, [r0, #1]
+#endif
+	orr	r3, rb, r3, lsl #8
+	mov	r0, #0
+	ret	lr
+ENDPROC(__get_user_64t_2)
+_ASM_NOKPROBE(__get_user_64t_2)
+
+ENTRY(__get_user_64t_4)
+	check_uaccess r0, 4, r1, r2, __get_user_bad8
+11: TUSER(ldr)	r3, [r0]
+	mov	r0, #0
+	ret	lr
+ENDPROC(__get_user_64t_4)
+_ASM_NOKPROBE(__get_user_64t_4)
+#endif
+
+__get_user_bad8:
+	mov	r3, #0
 __get_user_bad:
 	mov	r2, #0
 	mov	r0, #-EFAULT
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_user_bad)
+ENDPROC(__get_user_bad8)
+_ASM_NOKPROBE(__get_user_bad)
+_ASM_NOKPROBE(__get_user_bad8)
 
 .pushsection __ex_table, "a"
 	.long	1b, __get_user_bad
 	.long	2b, __get_user_bad
+#if __LINUX_ARM_ARCH__ < 6
 	.long	3b, __get_user_bad
+#endif
 	.long	4b, __get_user_bad
+	.long	5b, __get_user_bad8
+	.long	6b, __get_user_bad8
+#ifdef __ARMEB__
+	.long   7b, __get_user_bad
+	.long	8b, __get_user_bad8
+	.long	9b, __get_user_bad8
+	.long	10b, __get_user_bad8
+	.long	11b, __get_user_bad8
+#endif
 .popsection
diff --git a/arch/arm/lib/io-acorn.S b/arch/arm/lib/io-acorn.S
deleted file mode 100644
index 69719bad674d..000000000000
--- a/arch/arm/lib/io-acorn.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  linux/arch/arm/lib/io-acorn.S
- *
- *  Copyright (C) 1995, 1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 27/03/03 Ian Molton Clean up CONFIG_CPU
- *
- */
-#include <linux/linkage.h>
-#include <linux/kern_levels.h>
-#include <asm/assembler.h>
-
-		.text
-		.align
-
-.Liosl_warning:
-		.ascii	KERN_WARNING "insl/outsl not implemented, called from %08lX\0"
-		.align
-
-/*
- * These make no sense on Acorn machines.
- * Print a warning message.
- */
-ENTRY(insl)
-ENTRY(outsl)
-		adr	r0, .Liosl_warning
-		mov	r1, lr
-		b	printk
diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S
index 9f4238987fe9..0def9388fb15 100644
--- a/arch/arm/lib/io-readsb.S
+++ b/arch/arm/lib/io-readsb.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/io-readsb.S
  *
  *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -16,16 +13,16 @@
 		cmp	ip, #2
 		ldrb	r3, [r0]
 		strb	r3, [r1], #1
-		ldrgeb	r3, [r0]
-		strgeb	r3, [r1], #1
-		ldrgtb	r3, [r0]
-		strgtb	r3, [r1], #1
+		ldrbge	r3, [r0]
+		strbge	r3, [r1], #1
+		ldrbgt	r3, [r0]
+		strbgt	r3, [r1], #1
 		subs	r2, r2, ip
 		bne	.Linsb_aligned
 
 ENTRY(__raw_readsb)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		ands	ip, r1, #3
 		bne	.Linsb_align
 
@@ -72,7 +69,7 @@ ENTRY(__raw_readsb)
 		bpl	.Linsb_16_lp
 
 		tst	r2, #15
-		ldmeqfd	sp!, {r4 - r6, pc}
+		ldmfdeq	sp!, {r4 - r6, pc}
 
 .Linsb_no_16:	tst	r2, #8
 		beq	.Linsb_no_8
@@ -109,15 +106,15 @@ ENTRY(__raw_readsb)
 		str	r3, [r1], #4
 
 .Linsb_no_4:	ands	r2, r2, #3
-		ldmeqfd	sp!, {r4 - r6, pc}
+		ldmfdeq	sp!, {r4 - r6, pc}
 
 		cmp	r2, #2
 		ldrb	r3, [r0]
 		strb	r3, [r1], #1
-		ldrgeb	r3, [r0]
-		strgeb	r3, [r1], #1
-		ldrgtb	r3, [r0]
-		strgtb	r3, [r1]
+		ldrbge	r3, [r0]
+		strbge	r3, [r1], #1
+		ldrbgt	r3, [r0]
+		strbgt	r3, [r1]
 
 		ldmfd	sp!, {r4 - r6, pc}
 ENDPROC(__raw_readsb)
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 5fb97e7f9f4b..d9f6b372b058 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -1,18 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/io-readsl.S
  *
  *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
 ENTRY(__raw_readsl)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		ands	ip, r1, #3
 		bne	3f
 
@@ -30,10 +27,10 @@ ENTRY(__raw_readsl)
 2:		movs	r2, r2, lsl #31
 		ldrcs	r3, [r0, #0]
 		ldrcs	ip, [r0, #0]
-		stmcsia	r1!, {r3, ip}
+		stmiacs	r1!, {r3, ip}
 		ldrne	r3, [r0, #0]
 		strne	r3, [r1, #0]
-		mov	pc, lr
+		ret	lr
 
 3:		ldr	r3, [r0]
 		cmp	ip, #2
@@ -47,25 +44,25 @@ ENTRY(__raw_readsl)
 		strb	ip, [r1], #1
 
 4:		subs	r2, r2, #1
-		mov	ip, r3, pull #24
+		mov	ip, r3, lspull #24
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #8
+		orrne	ip, ip, r3, lspush #8
 		strne	ip, [r1], #4
 		bne	4b
 		b	8f
 
 5:		subs	r2, r2, #1
-		mov	ip, r3, pull #16
+		mov	ip, r3, lspull #16
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #16
+		orrne	ip, ip, r3, lspush #16
 		strne	ip, [r1], #4
 		bne	5b
 		b	7f
 
 6:		subs	r2, r2, #1
-		mov	ip, r3, pull #8
+		mov	ip, r3, lspull #8
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #24
+		orrne	ip, ip, r3, lspush #24
 		strne	ip, [r1], #4
 		bne	6b
 
@@ -75,5 +72,5 @@ ENTRY(__raw_readsl)
 		strb	r3, [r1, #1]
 8:		mov	r3, ip, get_byte_0
 		strb	r3, [r1, #0]
-		mov	pc, lr
+		ret	lr
 ENDPROC(__raw_readsl)
diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S
index 88487c8c4f23..266043610c0c 100644
--- a/arch/arm/lib/io-readsw-armv3.S
+++ b/arch/arm/lib/io-readsw-armv3.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/io-readsw-armv3.S
  *
  *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -27,11 +24,11 @@
 		strb	r3, [r1], #1
 
 		subs	r2, r2, #1
-		moveq	pc, lr
+		reteq	lr
 
 ENTRY(__raw_readsw)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		tst	r1, #3
 		bne	.Linsw_align
 
@@ -68,7 +65,7 @@ ENTRY(__raw_readsw)
 		bpl	.Linsw_8_lp
 
 		tst	r2, #7
-		ldmeqfd	sp!, {r4, r5, r6, pc}
+		ldmfdeq	sp!, {r4, r5, r6, pc}
 
 .Lno_insw_8:	tst	r2, #4
 		beq	.Lno_insw_4
@@ -97,9 +94,9 @@ ENTRY(__raw_readsw)
 
 .Lno_insw_2:	tst	r2, #1
 		ldrne	r3, [r0]
-		strneb	r3, [r1], #1
+		strbne	r3, [r1], #1
 		movne	r3, r3, lsr #8
-		strneb	r3, [r1]
+		strbne	r3, [r1]
 
 		ldmfd	sp!, {r4, r5, r6, pc}
 
diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S
index 1f393d42593d..228c176a94d1 100644
--- a/arch/arm/lib/io-readsw-armv4.S
+++ b/arch/arm/lib/io-readsw-armv4.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/io-readsw-armv4.S
  *
  *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -26,7 +23,7 @@
 
 ENTRY(__raw_readsw)
 		teq	r2, #0
-		moveq	pc, lr
+		reteq	lr
 		tst	r1, #3
 		bne	.Linsw_align
 
@@ -76,8 +73,8 @@ ENTRY(__raw_readsw)
 		pack	r3, r3, ip
 		str	r3, [r1], #4
 
-.Lno_insw_2:	ldrneh	r3, [r0]
-		strneh	r3, [r1]
+.Lno_insw_2:	ldrhne	r3, [r0]
+		strhne	r3, [r1]
 
 		ldmfd	sp!, {r4, r5, pc}
 
@@ -94,7 +91,7 @@ ENTRY(__raw_readsw)
 #endif
 
 .Linsw_noalign:	stmfd	sp!, {r4, lr}
-		ldrccb	ip, [r1, #-1]!
+		ldrbcc	ip, [r1, #-1]!
 		bcc	1f
 
 		ldrh	ip, [r0]
@@ -121,11 +118,11 @@ ENTRY(__raw_readsw)
 
 3:		tst	r2, #1
 		strb	ip, [r1], #1
-		ldrneh	ip, [r0]
+		ldrhne	ip, [r0]
    _BE_ONLY_(	movne	ip, ip, ror #8		)
-		strneb	ip, [r1], #1
+		strbne	ip, [r1], #1
    _LE_ONLY_(	movne	ip, ip, lsr #8		)
    _BE_ONLY_(	movne	ip, ip, lsr #24		)
-		strneb	ip, [r1]
+		strbne	ip, [r1]
 		ldmfd	sp!, {r4, pc}
 ENDPROC(__raw_readsw)
diff --git a/arch/arm/lib/io-shark.c b/arch/arm/lib/io-shark.c
deleted file mode 100644
index 824253948f51..000000000000
--- a/arch/arm/lib/io-shark.c
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- *  linux/arch/arm/lib/io-shark.c
- *
- *  by Alexander Schulz
- *
- * derived from:
- * linux/arch/arm/lib/io-ebsa.S
- * Copyright (C) 1995, 1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S
index 68b92f4acaeb..e2ae312f0b69 100644
--- a/arch/arm/lib/io-writesb.S
+++ b/arch/arm/lib/io-writesb.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/io-writesb.S
  *
  *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -36,16 +33,16 @@
 		cmp	ip, #2
 		ldrb	r3, [r1], #1
 		strb	r3, [r0]
-		ldrgeb	r3, [r1], #1
-		strgeb	r3, [r0]
-		ldrgtb	r3, [r1], #1
-		strgtb	r3, [r0]
+		ldrbge	r3, [r1], #1
+		strbge	r3, [r0]
+		ldrbgt	r3, [r1], #1
+		strbgt	r3, [r0]
 		subs	r2, r2, ip
 		bne	.Loutsb_aligned
 
 ENTRY(__raw_writesb)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		ands	ip, r1, #3
 		bne	.Loutsb_align
 
@@ -64,7 +61,7 @@ ENTRY(__raw_writesb)
 		bpl	.Loutsb_16_lp
 
 		tst	r2, #15
-		ldmeqfd	sp!, {r4, r5, pc}
+		ldmfdeq	sp!, {r4, r5, pc}
 
 .Loutsb_no_16:	tst	r2, #8
 		beq	.Loutsb_no_8
@@ -80,15 +77,15 @@ ENTRY(__raw_writesb)
 		outword	r3
 
 .Loutsb_no_4:	ands	r2, r2, #3
-		ldmeqfd	sp!, {r4, r5, pc}
+		ldmfdeq	sp!, {r4, r5, pc}
 
 		cmp	r2, #2
 		ldrb	r3, [r1], #1
 		strb	r3, [r0]
-		ldrgeb	r3, [r1], #1
-		strgeb	r3, [r0]
-		ldrgtb	r3, [r1]
-		strgtb	r3, [r0]
+		ldrbge	r3, [r1], #1
+		strbge	r3, [r0]
+		ldrbgt	r3, [r1]
+		strbgt	r3, [r0]
 
 		ldmfd	sp!, {r4, r5, pc}
 ENDPROC(__raw_writesb)
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index 8d3b7813725c..89ef7be61421 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -1,18 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/io-writesl.S
  *
  *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
 ENTRY(__raw_writesl)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		ands	ip, r1, #3
 		bne	3f
 
@@ -28,12 +25,12 @@ ENTRY(__raw_writesl)
 		bpl	1b
 		ldmfd	sp!, {r4, lr}
 2:		movs	r2, r2, lsl #31
-		ldmcsia	r1!, {r3, ip}
+		ldmiacs	r1!, {r3, ip}
 		strcs	r3, [r0, #0]
 		ldrne	r3, [r1, #0]
 		strcs	ip, [r0, #0]
 		strne	r3, [r0, #0]
-		mov	pc, lr
+		ret	lr
 
 3:		bic	r1, r1, #3
 		ldr	r3, [r1], #4
@@ -41,27 +38,27 @@ ENTRY(__raw_writesl)
 		blt	5f
 		bgt	6f
 
-4:		mov	ip, r3, pull #16
+4:		mov	ip, r3, lspull #16
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #16
+		orr	ip, ip, r3, lspush #16
 		str	ip, [r0]
 		bne	4b
-		mov	pc, lr
+		ret	lr
 
-5:		mov	ip, r3, pull #8
+5:		mov	ip, r3, lspull #8
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #24
+		orr	ip, ip, r3, lspush #24
 		str	ip, [r0]
 		bne	5b
-		mov	pc, lr
+		ret	lr
 
-6:		mov	ip, r3, pull #24
+6:		mov	ip, r3, lspull #24
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #8
+		orr	ip, ip, r3, lspush #8
 		str	ip, [r0]
 		bne	6b
-		mov	pc, lr
+		ret	lr
 ENDPROC(__raw_writesl)
diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S
index 49b800419e32..4cabbee7f3b8 100644
--- a/arch/arm/lib/io-writesw-armv3.S
+++ b/arch/arm/lib/io-writesw-armv3.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/io-writesw-armv3.S
  *
  *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -28,11 +25,11 @@
 		orr	r3, r3, r3, lsl #16
 		str	r3, [r0]
 		subs	r2, r2, #1
-		moveq	pc, lr
+		reteq	lr
 
 ENTRY(__raw_writesw)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		tst	r1, #3
 		bne	.Loutsw_align
 
@@ -79,7 +76,7 @@ ENTRY(__raw_writesw)
 		bpl	.Loutsw_8_lp
 
 		tst	r2, #7
-		ldmeqfd	sp!, {r4, r5, r6, pc}
+		ldmfdeq	sp!, {r4, r5, r6, pc}
 
 .Lno_outsw_8:	tst	r2, #4
 		beq	.Lno_outsw_4
diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S
index ff4f71b579ee..12eec53266c7 100644
--- a/arch/arm/lib/io-writesw-armv4.S
+++ b/arch/arm/lib/io-writesw-armv4.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/io-writesw-armv4.S
  *
  *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -31,7 +28,7 @@
 
 ENTRY(__raw_writesw)
 		teq	r2, #0
-		moveq	pc, lr
+		reteq	lr
 		ands	r3, r1, #3
 		bne	.Loutsw_align
 
@@ -61,8 +58,8 @@ ENTRY(__raw_writesw)
 		ldr	r3, [r1], #4
 		outword	r3
 
-.Lno_outsw_2:	ldrneh	r3, [r1]
-		strneh	r3, [r0]
+.Lno_outsw_2:	ldrhne	r3, [r1]
+		strhne	r3, [r0]
 
 		ldmfd	sp!, {r4, r5, pc}
 
@@ -95,6 +92,6 @@ ENTRY(__raw_writesw)
 
 		tst	r2, #1
 3:		movne	ip, r3, lsr #8
-		strneh	ip, [r0]
-		mov	pc, lr
+		strhne	ip, [r0]
+		ret	lr
 ENDPROC(__raw_writesw)
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index c562f649734c..c23f9d9e2970 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -96,7 +96,7 @@ Boston, MA 02111-1307, USA.  */
 	subhs	\dividend, \dividend, \divisor, lsr #3
 	orrhs	\result,   \result,   \curbit,  lsr #3
 	cmp	\dividend, #0			@ Early termination?
-	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
+	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
 	movne	\divisor,  \divisor, lsr #4
 	bne	1b
 
@@ -167,7 +167,7 @@ Boston, MA 02111-1307, USA.  */
 
 #endif
 
-	@ Perform all needed substractions to keep only the reminder.
+	@ Perform all needed subtractions to keep only the reminder.
 	@ Do comparisons in batch of 4 first.
 	subs	\order, \order, #3		@ yes, 3 is intended here
 	blt	2f
@@ -182,14 +182,14 @@ Boston, MA 02111-1307, USA.  */
 	subhs	\dividend, \dividend, \divisor, lsr #3
 	cmp	\dividend, #1
 	mov	\divisor, \divisor, lsr #4
-	subges	\order, \order, #4
+	subsge	\order, \order, #4
 	bge	1b
 
 	tst	\order, #3
 	teqne	\dividend, #0
 	beq	5f
 
-	@ Either 1, 2 or 3 comparison/substractions are left.
+	@ Either 1, 2 or 3 comparison/subtractions are left.
 2:	cmn	\order, #2
 	blt	4f
 	beq	3f
@@ -205,12 +205,16 @@ Boston, MA 02111-1307, USA.  */
 .endm
 
 
+#ifdef CONFIG_ARM_PATCH_IDIV
+	.align	3
+#endif
+
 ENTRY(__udivsi3)
 ENTRY(__aeabi_uidiv)
 UNWIND(.fnstart)
 
 	subs	r2, r1, #1
-	moveq	pc, lr
+	reteq	lr
 	bcc	Ldiv0
 	cmp	r0, r1
 	bls	11f
@@ -220,16 +224,16 @@ UNWIND(.fnstart)
 	ARM_DIV_BODY r0, r1, r2, r3
 
 	mov	r0, r2
-	mov	pc, lr
+	ret	lr
 
 11:	moveq	r0, #1
 	movne	r0, #0
-	mov	pc, lr
+	ret	lr
 
 12:	ARM_DIV2_ORDER r1, r2
 
 	mov	r0, r0, lsr r2
-	mov	pc, lr
+	ret	lr
 
 UNWIND(.fnend)
 ENDPROC(__udivsi3)
@@ -244,15 +248,19 @@ UNWIND(.fnstart)
 	moveq   r0, #0
 	tsthi	r1, r2				@ see if divisor is power of 2
 	andeq	r0, r0, r2
-	movls	pc, lr
+	retls	lr
 
 	ARM_MOD_BODY r0, r1, r2, r3
 
-	mov	pc, lr
+	ret	lr
 
 UNWIND(.fnend)
 ENDPROC(__umodsi3)
 
+#ifdef CONFIG_ARM_PATCH_IDIV
+	.align 3
+#endif
+
 ENTRY(__divsi3)
 ENTRY(__aeabi_idiv)
 UNWIND(.fnstart)
@@ -274,23 +282,23 @@ UNWIND(.fnstart)
 
 	cmp	ip, #0
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	ret	lr
 
 10:	teq	ip, r0				@ same sign ?
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	ret	lr
 
 11:	movlo	r0, #0
 	moveq	r0, ip, asr #31
 	orreq	r0, r0, #1
-	mov	pc, lr
+	ret	lr
 
 12:	ARM_DIV2_ORDER r1, r2
 
 	cmp	ip, #0
 	mov	r0, r3, lsr r2
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	ret	lr
 
 UNWIND(.fnend)
 ENDPROC(__divsi3)
@@ -315,7 +323,7 @@ UNWIND(.fnstart)
 
 10:	cmp	ip, #0
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	ret	lr
 
 UNWIND(.fnend)
 ENDPROC(__modsi3)
@@ -331,7 +339,7 @@ UNWIND(.save {r0, r1, ip, lr}	)
 	ldmfd	sp!, {r1, r2, ip, lr}
 	mul	r3, r0, r2
 	sub	r1, r1, r3
-	mov	pc, lr
+	ret	lr
 
 UNWIND(.fnend)
 ENDPROC(__aeabi_uidivmod)
@@ -344,7 +352,7 @@ UNWIND(.save {r0, r1, ip, lr}	)
 	ldmfd	sp!, {r1, r2, ip, lr}
 	mul	r3, r0, r2
 	sub	r1, r1, r3
-	mov	pc, lr
+	ret	lr
 
 UNWIND(.fnend)
 ENDPROC(__aeabi_idivmod)
diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S
index f83d449141f7..922dcd88b02b 100644
--- a/arch/arm/lib/lshrdi3.S
+++ b/arch/arm/lib/lshrdi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */
 
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_llsr)
  THUMB(	lslmi	r3, ah, ip		)
  THUMB(	orrmi	al, al, r3		)
 	mov	ah, ah, lsr r2
-	mov	pc, lr
+	ret	lr
 
 ENDPROC(__lshrdi3)
 ENDPROC(__aeabi_llsr)
diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S
index 1da86991d700..95bedafd0330 100644
--- a/arch/arm/lib/memchr.S
+++ b/arch/arm/lib/memchr.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/memchr.S
  *
  *  Copyright (C) 1995-2000 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  *  ASM optimised string functions
  */
 #include <linux/linkage.h>
@@ -22,5 +19,5 @@ ENTRY(memchr)
 	bne	1b
 	sub	r0, r0, #1
 2:	movne	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(memchr)
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index a9b9e2287a09..90f2b645aa0d 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -1,17 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/memcpy.S
  *
  *  Author:	Nicolas Pitre
  *  Created:	Sep 28, 2005
  *  Copyright:	MontaVista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
  */
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 #define LDR1W_SHIFT	0
 #define STR1W_SHIFT	0
@@ -29,7 +27,7 @@
 	.endm
 
 	.macro ldr1b ptr reg cond=al abort
-	ldr\cond\()b \reg, [\ptr], #1
+	ldrb\cond \reg, [\ptr], #1
 	.endm
 
 	.macro str1w ptr reg abort
@@ -41,23 +39,28 @@
 	.endm
 
 	.macro str1b ptr reg cond=al abort
-	str\cond\()b \reg, [\ptr], #1
+	strb\cond \reg, [\ptr], #1
 	.endm
 
-	.macro enter reg1 reg2
-	stmdb sp!, {r0, \reg1, \reg2}
+	.macro enter regs:vararg
+UNWIND( .save	{r0, \regs}		)
+	stmdb sp!, {r0, \regs}
 	.endm
 
-	.macro exit reg1 reg2
-	ldmfd sp!, {r0, \reg1, \reg2}
+	.macro exit regs:vararg
+	ldmfd sp!, {r0, \regs}
 	.endm
 
 	.text
 
 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 
-ENTRY(memcpy)
+ENTRY(__memcpy)
+ENTRY(mmiocpy)
+WEAK(memcpy)
 
 #include "copy_template.S"
 
 ENDPROC(memcpy)
+ENDPROC(mmiocpy)
+ENDPROC(__memcpy)
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 938fc14f962d..6410554039fd 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -1,17 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/memmove.S
  *
  *  Author:	Nicolas Pitre
  *  Created:	Sep 28, 2005
  *  Copyright:	(C) MontaVista Software Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
  */
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 		.text
 
@@ -26,13 +24,20 @@
  * occurring in the opposite direction.
  */
 
-ENTRY(memmove)
+ENTRY(__memmove)
+WEAK(memmove)
+	UNWIND(	.fnstart			)
 
 		subs	ip, r0, r1
 		cmphi	r2, ip
-		bls	memcpy
-
-		stmfd	sp!, {r0, r4, lr}
+		bls	__memcpy
+	UNWIND(	.fnend				)
+
+	UNWIND(	.fnstart			)
+	UNWIND(	.save	{r0, r4, fpreg, lr}	)
+		stmfd	sp!, {r0, r4, UNWIND(fpreg,) lr}
+	UNWIND(	.setfp	fpreg, sp		)
+	UNWIND(	mov	fpreg, sp		)
 		add	r1, r1, r2
 		add	r0, r0, r2
 		subs	r2, r2, #4
@@ -44,11 +49,11 @@ ENTRY(memmove)
 		bne	10f
 
 1:		subs	r2, r2, #(28)
-		stmfd	sp!, {r5 - r8}
+		stmfd	sp!, {r5, r6, r8, r9}
 		blt	5f
 
 	CALGN(	ands	ip, r0, #31		)
-	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
@@ -63,9 +68,9 @@ ENTRY(memmove)
 	PLD(	pld	[r1, #-96]		)
 
 3:	PLD(	pld	[r1, #-128]		)
-4:		ldmdb	r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+4:		ldmdb	r1!, {r3, r4, r5, r6, r8, r9, ip, lr}
 		subs	r2, r2, #32
-		stmdb	r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+		stmdb	r0!, {r3, r4, r5, r6, r8, r9, ip, lr}
 		bge	3b
 	PLD(	cmn	r2, #96			)
 	PLD(	bge	4b			)
@@ -79,8 +84,8 @@ ENTRY(memmove)
 		W(ldr)	r4, [r1, #-4]!
 		W(ldr)	r5, [r1, #-4]!
 		W(ldr)	r6, [r1, #-4]!
-		W(ldr)	r7, [r1, #-4]!
 		W(ldr)	r8, [r1, #-4]!
+		W(ldr)	r9, [r1, #-4]!
 		W(ldr)	lr, [r1, #-4]!
 
 		add	pc, pc, ip
@@ -90,29 +95,29 @@ ENTRY(memmove)
 		W(str)	r4, [r0, #-4]!
 		W(str)	r5, [r0, #-4]!
 		W(str)	r6, [r0, #-4]!
-		W(str)	r7, [r0, #-4]!
 		W(str)	r8, [r0, #-4]!
+		W(str)	r9, [r0, #-4]!
 		W(str)	lr, [r0, #-4]!
 
 	CALGN(	bcs	2b			)
 
-7:		ldmfd	sp!, {r5 - r8}
+7:		ldmfd	sp!, {r5, r6, r8, r9}
 
 8:		movs	r2, r2, lsl #31
-		ldrneb	r3, [r1, #-1]!
-		ldrcsb	r4, [r1, #-1]!
-		ldrcsb	ip, [r1, #-1]
-		strneb	r3, [r0, #-1]!
-		strcsb	r4, [r0, #-1]!
-		strcsb	ip, [r0, #-1]
-		ldmfd	sp!, {r0, r4, pc}
+		ldrbne	r3, [r1, #-1]!
+		ldrbcs	r4, [r1, #-1]!
+		ldrbcs	ip, [r1, #-1]
+		strbne	r3, [r0, #-1]!
+		strbcs	r4, [r0, #-1]!
+		strbcs	ip, [r0, #-1]
+		ldmfd	sp!, {r0, r4, UNWIND(fpreg,) pc}
 
 9:		cmp	ip, #2
-		ldrgtb	r3, [r1, #-1]!
-		ldrgeb	r4, [r1, #-1]!
+		ldrbgt	r3, [r1, #-1]!
+		ldrbge	r4, [r1, #-1]!
 		ldrb	lr, [r1, #-1]!
-		strgtb	r3, [r0, #-1]!
-		strgeb	r4, [r0, #-1]!
+		strbgt	r3, [r0, #-1]!
+		strbge	r4, [r0, #-1]!
 		subs	r2, r2, ip
 		strb	lr, [r0, #-1]!
 		blt	8b
@@ -132,11 +137,11 @@ ENTRY(memmove)
 		blt	14f
 
 	CALGN(	ands	ip, r0, #31		)
-	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
 	CALGN(	subcc	r2, r2, ip		)
 	CALGN(	bcc	15f			)
 
-11:		stmfd	sp!, {r5 - r9}
+11:		stmfd	sp!, {r5, r6, r8 - r10}
 
 	PLD(	pld	[r1, #-4]		)
 	PLD(	subs	r2, r2, #96		)
@@ -146,39 +151,39 @@ ENTRY(memmove)
 	PLD(	pld	[r1, #-96]		)
 
 12:	PLD(	pld	[r1, #-128]		)
-13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, push #\push
+13:		ldmdb   r1!, {r8, r9, r10, ip}
+		mov     lr, r3, lspush #\push
 		subs    r2, r2, #32
 		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, pull #\pull
-		mov     ip, ip, push #\push
-		orr     ip, ip, r9, pull #\pull
-		mov     r9, r9, push #\push
-		orr     r9, r9, r8, pull #\pull
-		mov     r8, r8, push #\push
-		orr     r8, r8, r7, pull #\pull
-		mov     r7, r7, push #\push
-		orr     r7, r7, r6, pull #\pull
-		mov     r6, r6, push #\push
-		orr     r6, r6, r5, pull #\pull
-		mov     r5, r5, push #\push
-		orr     r5, r5, r4, pull #\pull
-		mov     r4, r4, push #\push
-		orr     r4, r4, r3, pull #\pull
-		stmdb   r0!, {r4 - r9, ip, lr}
+		orr     lr, lr, ip, lspull #\pull
+		mov     ip, ip, lspush #\push
+		orr     ip, ip, r10, lspull #\pull
+		mov     r10, r10, lspush #\push
+		orr     r10, r10, r9, lspull #\pull
+		mov     r9, r9, lspush #\push
+		orr     r9, r9, r8, lspull #\pull
+		mov     r8, r8, lspush #\push
+		orr     r8, r8, r6, lspull #\pull
+		mov     r6, r6, lspush #\push
+		orr     r6, r6, r5, lspull #\pull
+		mov     r5, r5, lspush #\push
+		orr     r5, r5, r4, lspull #\pull
+		mov     r4, r4, lspush #\push
+		orr     r4, r4, r3, lspull #\pull
+		stmdb   r0!, {r4 - r6, r8 - r10, ip, lr}
 		bge	12b
 	PLD(	cmn	r2, #96			)
 	PLD(	bge	13b			)
 
-		ldmfd	sp!, {r5 - r9}
+		ldmfd	sp!, {r5, r6, r8 - r10}
 
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov     lr, r3, push #\push
+15:		mov     lr, r3, lspush #\push
 		ldr	r3, [r1, #-4]!
 		subs	ip, ip, #4
-		orr	lr, lr, r3, pull #\pull
+		orr	lr, lr, r3, lspull #\pull
 		str	lr, [r0, #-4]!
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
@@ -196,4 +201,6 @@ ENTRY(memmove)
 
 18:		backward_copy_shift	push=24	pull=8
 
+	UNWIND(	.fnend				)
 ENDPROC(memmove)
+ENDPROC(__memmove)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 94b0650ea98f..de75ae4d5ab4 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -1,21 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/memset.S
  *
  *  Copyright (C) 1995-2000 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  *  ASM optimised string functions
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 	.text
 	.align	5
 
-ENTRY(memset)
+ENTRY(__memset)
+ENTRY(mmioset)
+WEAK(memset)
+UNWIND( .fnstart         )
+	and	r1, r1, #255		@ cast to unsigned char
 	ands	r3, r0, #3		@ 1 unaligned?
 	mov	ip, r0			@ preserve r0 as return value
 	bne	6f			@ 1
@@ -25,34 +27,38 @@ ENTRY(memset)
 1:	orr	r1, r1, r1, lsl #8
 	orr	r1, r1, r1, lsl #16
 	mov	r3, r1
-	cmp	r2, #16
+7:	cmp	r2, #16
 	blt	4f
+UNWIND( .fnend              )
 
 #if ! CALGN(1)+0
 
 /*
  * We need 2 extra registers for this loop - use r8 and the LR
  */
+UNWIND( .fnstart            )
+UNWIND( .save {r8, lr}      )
 	stmfd	sp!, {r8, lr}
 	mov	r8, r1
-	mov	lr, r1
+	mov	lr, r3
 
 2:	subs	r2, r2, #64
-	stmgeia	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
-	stmgeia	ip!, {r1, r3, r8, lr}
-	stmgeia	ip!, {r1, r3, r8, lr}
-	stmgeia	ip!, {r1, r3, r8, lr}
+	stmiage	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
+	stmiage	ip!, {r1, r3, r8, lr}
+	stmiage	ip!, {r1, r3, r8, lr}
+	stmiage	ip!, {r1, r3, r8, lr}
 	bgt	2b
-	ldmeqfd	sp!, {r8, pc}		@ Now <64 bytes to go.
+	ldmfdeq	sp!, {r8, pc}		@ Now <64 bytes to go.
 /*
  * No need to correct the count; we're only testing bits from now on
  */
 	tst	r2, #32
-	stmneia	ip!, {r1, r3, r8, lr}
-	stmneia	ip!, {r1, r3, r8, lr}
+	stmiane	ip!, {r1, r3, r8, lr}
+	stmiane	ip!, {r1, r3, r8, lr}
 	tst	r2, #16
-	stmneia	ip!, {r1, r3, r8, lr}
+	stmiane	ip!, {r1, r3, r8, lr}
 	ldmfd	sp!, {r8, lr}
+UNWIND( .fnend              )
 
 #else
 
@@ -61,13 +67,15 @@ ENTRY(memset)
  * whole cache lines at once.
  */
 
+UNWIND( .fnstart               )
+UNWIND( .save {r4-r8, lr}      )
 	stmfd	sp!, {r4-r8, lr}
 	mov	r4, r1
-	mov	r5, r1
+	mov	r5, r3
 	mov	r6, r1
-	mov	r7, r1
+	mov	r7, r3
 	mov	r8, r1
-	mov	lr, r1
+	mov	lr, r3
 
 	cmp	r2, #96
 	tstgt	ip, #31
@@ -77,47 +85,64 @@ ENTRY(memset)
 	rsb	r8, r8, #32
 	sub	r2, r2, r8
 	movs	r8, r8, lsl #(32 - 4)
-	stmcsia	ip!, {r4, r5, r6, r7}
-	stmmiia	ip!, {r4, r5}
+	stmiacs	ip!, {r4, r5, r6, r7}
+	stmiami	ip!, {r4, r5}
 	tst	r8, #(1 << 30)
 	mov	r8, r1
 	strne	r1, [ip], #4
 
 3:	subs	r2, r2, #64
-	stmgeia	ip!, {r1, r3-r8, lr}
-	stmgeia	ip!, {r1, r3-r8, lr}
+	stmiage	ip!, {r1, r3-r8, lr}
+	stmiage	ip!, {r1, r3-r8, lr}
 	bgt	3b
-	ldmeqfd	sp!, {r4-r8, pc}
+	ldmfdeq	sp!, {r4-r8, pc}
 
 	tst	r2, #32
-	stmneia	ip!, {r1, r3-r8, lr}
+	stmiane	ip!, {r1, r3-r8, lr}
 	tst	r2, #16
-	stmneia	ip!, {r4-r7}
+	stmiane	ip!, {r4-r7}
 	ldmfd	sp!, {r4-r8, lr}
+UNWIND( .fnend                 )
 
 #endif
 
+UNWIND( .fnstart            )
 4:	tst	r2, #8
-	stmneia	ip!, {r1, r3}
+	stmiane	ip!, {r1, r3}
 	tst	r2, #4
 	strne	r1, [ip], #4
 /*
- * When we get here, we've got less than 4 bytes to zero.  We
+ * When we get here, we've got less than 4 bytes to set.  We
  * may have an unaligned pointer as well.
  */
 5:	tst	r2, #2
-	strneb	r1, [ip], #1
-	strneb	r1, [ip], #1
+	strbne	r1, [ip], #1
+	strbne	r1, [ip], #1
 	tst	r2, #1
-	strneb	r1, [ip], #1
-	mov	pc, lr
+	strbne	r1, [ip], #1
+	ret	lr
 
 6:	subs	r2, r2, #4		@ 1 do we have enough
 	blt	5b			@ 1 bytes to align with?
 	cmp	r3, #2			@ 1
-	strltb	r1, [ip], #1		@ 1
-	strleb	r1, [ip], #1		@ 1
+	strblt	r1, [ip], #1		@ 1
+	strble	r1, [ip], #1		@ 1
 	strb	r1, [ip], #1		@ 1
 	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
 	b	1b
+UNWIND( .fnend   )
 ENDPROC(memset)
+ENDPROC(mmioset)
+ENDPROC(__memset)
+
+ENTRY(__memset32)
+UNWIND( .fnstart         )
+	mov	r3, r1			@ copy r1 to r3 and fall into memset64
+UNWIND( .fnend   )
+ENDPROC(__memset32)
+ENTRY(__memset64)
+UNWIND( .fnstart         )
+	mov	ip, r0			@ preserve r0 as return value
+	b	7b			@ jump into the middle of memset
+UNWIND( .fnend   )
+ENDPROC(__memset64)
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
deleted file mode 100644
index 3fbdef5f802a..000000000000
--- a/arch/arm/lib/memzero.S
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- *  linux/arch/arm/lib/memzero.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.text
-	.align	5
-	.word	0
-/*
- * Align the pointer in r0.  r3 contains the number of bytes that we are
- * mis-aligned by, and r1 is the number of bytes.  If r1 < 4, then we
- * don't bother; we use byte stores instead.
- */
-1:	subs	r1, r1, #4		@ 1 do we have enough
-	blt	5f			@ 1 bytes to align with?
-	cmp	r3, #2			@ 1
-	strltb	r2, [r0], #1		@ 1
-	strleb	r2, [r0], #1		@ 1
-	strb	r2, [r0], #1		@ 1
-	add	r1, r1, r3		@ 1 (r1 = r1 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted.  Try doing the
- * memzero again.
- */
-
-ENTRY(__memzero)
-	mov	r2, #0			@ 1
-	ands	r3, r0, #3		@ 1 unaligned?
-	bne	1b			@ 1
-/*
- * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
- */
-	cmp	r1, #16			@ 1 we can skip this chunk if we
-	blt	4f			@ 1 have < 16 bytes
-
-#if ! CALGN(1)+0
-
-/*
- * We need an extra register for this loop - save the return address and
- * use the LR
- */
-	str	lr, [sp, #-4]!		@ 1
-	mov	ip, r2			@ 1
-	mov	lr, r2			@ 1
-
-3:	subs	r1, r1, #64		@ 1 write 32 bytes out per loop
-	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
-	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
-	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
-	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
-	bgt	3b			@ 1
-	ldmeqfd	sp!, {pc}		@ 1/2 quick exit
-/*
- * No need to correct the count; we're only testing bits from now on
- */
-	tst	r1, #32			@ 1
-	stmneia	r0!, {r2, r3, ip, lr}	@ 4
-	stmneia	r0!, {r2, r3, ip, lr}	@ 4
-	tst	r1, #16			@ 1 16 bytes or more?
-	stmneia	r0!, {r2, r3, ip, lr}	@ 4
-	ldr	lr, [sp], #4		@ 1
-
-#else
-
-/*
- * This version aligns the destination pointer in order to write
- * whole cache lines at once.
- */
-
-	stmfd	sp!, {r4-r7, lr}
-	mov	r4, r2
-	mov	r5, r2
-	mov	r6, r2
-	mov	r7, r2
-	mov	ip, r2
-	mov	lr, r2
-
-	cmp	r1, #96
-	andgts	ip, r0, #31
-	ble	3f
-
-	rsb	ip, ip, #32
-	sub	r1, r1, ip
-	movs	ip, ip, lsl #(32 - 4)
-	stmcsia	r0!, {r4, r5, r6, r7}
-	stmmiia	r0!, {r4, r5}
-	movs	ip, ip, lsl #2
-	strcs	r2, [r0], #4
-
-3:	subs	r1, r1, #64
-	stmgeia	r0!, {r2-r7, ip, lr}
-	stmgeia	r0!, {r2-r7, ip, lr}
-	bgt	3b
-	ldmeqfd	sp!, {r4-r7, pc}
-
-	tst	r1, #32
-	stmneia	r0!, {r2-r7, ip, lr}
-	tst	r1, #16
-	stmneia	r0!, {r4-r7}
-	ldmfd	sp!, {r4-r7, lr}
-
-#endif
-
-4:	tst	r1, #8			@ 1 8 bytes or more?
-	stmneia	r0!, {r2, r3}		@ 2
-	tst	r1, #4			@ 1 4 bytes or more?
-	strne	r2, [r0], #4		@ 1
-/*
- * When we get here, we've got less than 4 bytes to zero.  We
- * may have an unaligned pointer as well.
- */
-5:	tst	r1, #2			@ 1 2 bytes or more?
-	strneb	r2, [r0], #1		@ 1
-	strneb	r2, [r0], #1		@ 1
-	tst	r1, #1			@ 1 a byte left over
-	strneb	r2, [r0], #1		@ 1
-	mov	pc, lr			@ 1
-ENDPROC(__memzero)
diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S
index 36c91b4957e2..8362fe6c0de9 100644
--- a/arch/arm/lib/muldi3.S
+++ b/arch/arm/lib/muldi3.S
@@ -1,16 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/muldi3.S
  *
  *  Author:     Nicolas Pitre
  *  Created:    Oct 19, 2005
  *  Copyright:  Monta Vista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define xh r0
@@ -41,7 +39,7 @@ ENTRY(__aeabi_lmul)
 	adc	xh, xh, yh, lsr #16
 	adds	xl, xl, ip, lsl #16
 	adc	xh, xh, ip, lsr #16
-	mov	pc, lr
+	ret	lr
 
 ENDPROC(__muldi3)
 ENDPROC(__aeabi_lmul)
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
index 3d73dcb959b0..bdd8836dc5c2 100644
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/putuser.S
  *
  *  Copyright (C) 2001 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  *  Idea from x86 version, (C) Copyright 1998 Linus Torvalds
  *
  * These functions have a non-standard call interface to make
@@ -36,21 +33,18 @@ ENTRY(__put_user_1)
 	check_uaccess r0, 1, r1, ip, __put_user_bad
 1: TUSER(strb)	r2, [r0]
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_1)
 
 ENTRY(__put_user_2)
 	check_uaccess r0, 2, r1, ip, __put_user_bad
-	mov	ip, r2, lsr #8
-#ifdef CONFIG_THUMB2_KERNEL
-#ifndef __ARMEB__
-2: TUSER(strb)	r2, [r0]
-3: TUSER(strb)	ip, [r0, #1]
+#if __LINUX_ARM_ARCH__ >= 6
+
+2: TUSER(strh)	r2, [r0]
+
 #else
-2: TUSER(strb)	ip, [r0]
-3: TUSER(strb)	r2, [r0, #1]
-#endif
-#else	/* !CONFIG_THUMB2_KERNEL */
+
+	mov	ip, r2, lsr #8
 #ifndef __ARMEB__
 2: TUSER(strb)	r2, [r0], #1
 3: TUSER(strb)	ip, [r0]
@@ -58,16 +52,17 @@ ENTRY(__put_user_2)
 2: TUSER(strb)	ip, [r0], #1
 3: TUSER(strb)	r2, [r0]
 #endif
-#endif	/* CONFIG_THUMB2_KERNEL */
+
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_2)
 
 ENTRY(__put_user_4)
 	check_uaccess r0, 4, r1, ip, __put_user_bad
 4: TUSER(str)	r2, [r0]
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
@@ -80,18 +75,20 @@ ENTRY(__put_user_8)
 6: TUSER(str)	r3, [r0]
 #endif
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_8)
 
 __put_user_bad:
 	mov	r0, #-EFAULT
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_bad)
 
 .pushsection __ex_table, "a"
 	.long	1b, __put_user_bad
 	.long	2b, __put_user_bad
+#if __LINUX_ARM_ARCH__ < 6
 	.long	3b, __put_user_bad
+#endif
 	.long	4b, __put_user_bad
 	.long	5b, __put_user_bad
 	.long	6b, __put_user_bad
diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S
index 618fedae4b37..19a96f43f4bb 100644
--- a/arch/arm/lib/setbit.S
+++ b/arch/arm/lib/setbit.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/setbit.S
  *
  *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S
index d8f2a1c1aea4..09e2cc8a8950 100644
--- a/arch/arm/lib/strchr.S
+++ b/arch/arm/lib/strchr.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/strchr.S
  *
  *  Copyright (C) 1995-2000 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  *  ASM optimised string functions
  */
 #include <linux/linkage.h>
@@ -23,5 +20,5 @@ ENTRY(strchr)
 		teq	r2, r1
 		movne	r0, #0
 		subeq	r0, r0, #1
-		mov	pc, lr
+		ret	lr
 ENDPROC(strchr)
diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S
index 302f20cd2423..5e87247d1e8b 100644
--- a/arch/arm/lib/strrchr.S
+++ b/arch/arm/lib/strrchr.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/strrchr.S
  *
  *  Copyright (C) 1995-2000 Russell King
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  *  ASM optimised string functions
  */
 #include <linux/linkage.h>
@@ -22,5 +19,5 @@ ENTRY(strrchr)
 		teq	r2, #0
 		bne	1b
 		mov	r0, r3
-		mov	pc, lr
+		ret	lr
 ENDPROC(strrchr)
diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S
index 4becdc3a59cb..f13fe9bc2399 100644
--- a/arch/arm/lib/testchangebit.S
+++ b/arch/arm/lib/testchangebit.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/testchangebit.S
  *
  *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -13,3 +10,7 @@
                 .text
 
 testop	_test_and_change_bit, eor, str
+
+#if __LINUX_ARM_ARCH__ >= 6
+sync_testop	_sync_test_and_change_bit, eor, str
+#endif
diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S
index 918841dcce7a..4d2c5ca620eb 100644
--- a/arch/arm/lib/testclearbit.S
+++ b/arch/arm/lib/testclearbit.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/testclearbit.S
  *
  *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -13,3 +10,7 @@
                 .text
 
 testop	_test_and_clear_bit, bicne, strne
+
+#if __LINUX_ARM_ARCH__ >= 6
+sync_testop	_sync_test_and_clear_bit, bicne, strne
+#endif
diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S
index 8d1b2fe9e487..649dbab65d8d 100644
--- a/arch/arm/lib/testsetbit.S
+++ b/arch/arm/lib/testsetbit.S
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/testsetbit.S
  *
  *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -13,3 +10,7 @@
                 .text
 
 testop	_test_and_set_bit, orreq, streq
+
+#if __LINUX_ARM_ARCH__ >= 6
+sync_testop	_sync_test_and_set_bit, orreq, streq
+#endif
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
deleted file mode 100644
index 5c908b1cb8ed..000000000000
--- a/arch/arm/lib/uaccess.S
+++ /dev/null
@@ -1,564 +0,0 @@
-/*
- *  linux/arch/arm/lib/uaccess.S
- *
- *  Copyright (C) 1995, 1996,1997,1998 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Routines to block copy data to/from user memory
- *   These are highly optimised both for the 4k page size
- *   and for various alignments.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-#include <asm/domain.h>
-
-		.text
-
-#define PAGE_SHIFT 12
-
-/* Prototype: int __copy_to_user(void *to, const char *from, size_t n)
- * Purpose  : copy a block to user memory from kernel memory
- * Params   : to   - user memory
- *          : from - kernel memory
- *          : n    - number of bytes to copy
- * Returns  : Number of bytes NOT copied.
- */
-
-.Lc2u_dest_not_aligned:
-		rsb	ip, ip, #4
-		cmp	ip, #2
-		ldrb	r3, [r1], #1
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		ldrgeb	r3, [r1], #1
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		ldrgtb	r3, [r1], #1
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		sub	r2, r2, ip
-		b	.Lc2u_dest_aligned
-
-ENTRY(__copy_to_user)
-		stmfd	sp!, {r2, r4 - r7, lr}
-		cmp	r2, #4
-		blt	.Lc2u_not_enough
-		ands	ip, r0, #3
-		bne	.Lc2u_dest_not_aligned
-.Lc2u_dest_aligned:
-
-		ands	ip, r1, #3
-		bne	.Lc2u_src_not_aligned
-/*
- * Seeing as there has to be at least 8 bytes to copy, we can
- * copy one word, and force a user-mode page fault...
- */
-
-.Lc2u_0fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lc2u_0nowords
-		ldr	r3, [r1], #4
-USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
-		mov	ip, r0, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lc2u_0fupi
-/*
- * ip = max no. of bytes to copy before needing another "strt" insn
- */
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #32
-		blt	.Lc2u_0rem8lp
-
-.Lc2u_0cpy8lp:	ldmia	r1!, {r3 - r6}
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		ldmia	r1!, {r3 - r6}
-		subs	ip, ip, #32
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		bpl	.Lc2u_0cpy8lp
-
-.Lc2u_0rem8lp:	cmn	ip, #16
-		ldmgeia	r1!, {r3 - r6}
-		stmgeia	r0!, {r3 - r6}			@ Shouldnt fault
-		tst	ip, #8
-		ldmneia	r1!, {r3 - r4}
-		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
-		tst	ip, #4
-		ldrne	r3, [r1], #4
-	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
-		ands	ip, ip, #3
-		beq	.Lc2u_0fupi
-.Lc2u_0nowords:	teq	ip, #0
-		beq	.Lc2u_finished
-.Lc2u_nowords:	cmp	ip, #2
-		ldrb	r3, [r1], #1
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		ldrgeb	r3, [r1], #1
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		ldrgtb	r3, [r1], #1
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		b	.Lc2u_finished
-
-.Lc2u_not_enough:
-		movs	ip, r2
-		bne	.Lc2u_nowords
-.Lc2u_finished:	mov	r0, #0
-		ldmfd	sp!, {r2, r4 - r7, pc}
-
-.Lc2u_src_not_aligned:
-		bic	r1, r1, #3
-		ldr	r7, [r1], #4
-		cmp	ip, #2
-		bgt	.Lc2u_3fupi
-		beq	.Lc2u_2fupi
-.Lc2u_1fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lc2u_1nowords
-		mov	r3, r7, pull #8
-		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #24
-USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
-		mov	ip, r0, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lc2u_1fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lc2u_1rem8lp
-
-.Lc2u_1cpy8lp:	mov	r3, r7, pull #8
-		ldmia	r1!, {r4 - r7}
-		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		bpl	.Lc2u_1cpy8lp
-
-.Lc2u_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
-		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
-		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
-		tst	ip, #4
-		movne	r3, r7, pull #8
-		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #24
-	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
-		ands	ip, ip, #3
-		beq	.Lc2u_1fupi
-.Lc2u_1nowords:	mov	r3, r7, get_byte_1
-		teq	ip, #0
-		beq	.Lc2u_finished
-		cmp	ip, #2
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		movge	r3, r7, get_byte_2
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		movgt	r3, r7, get_byte_3
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		b	.Lc2u_finished
-
-.Lc2u_2fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lc2u_2nowords
-		mov	r3, r7, pull #16
-		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #16
-USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
-		mov	ip, r0, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lc2u_2fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lc2u_2rem8lp
-
-.Lc2u_2cpy8lp:	mov	r3, r7, pull #16
-		ldmia	r1!, {r4 - r7}
-		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		bpl	.Lc2u_2cpy8lp
-
-.Lc2u_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
-		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
-		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
-		tst	ip, #4
-		movne	r3, r7, pull #16
-		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #16
-	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
-		ands	ip, ip, #3
-		beq	.Lc2u_2fupi
-.Lc2u_2nowords:	mov	r3, r7, get_byte_2
-		teq	ip, #0
-		beq	.Lc2u_finished
-		cmp	ip, #2
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		movge	r3, r7, get_byte_3
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		ldrgtb	r3, [r1], #0
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		b	.Lc2u_finished
-
-.Lc2u_3fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lc2u_3nowords
-		mov	r3, r7, pull #24
-		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #8
-USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
-		mov	ip, r0, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lc2u_3fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lc2u_3rem8lp
-
-.Lc2u_3cpy8lp:	mov	r3, r7, pull #24
-		ldmia	r1!, {r4 - r7}
-		subs	ip, ip, #16
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		bpl	.Lc2u_3cpy8lp
-
-.Lc2u_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
-		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
-		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
-		tst	ip, #4
-		movne	r3, r7, pull #24
-		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #8
-	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
-		ands	ip, ip, #3
-		beq	.Lc2u_3fupi
-.Lc2u_3nowords:	mov	r3, r7, get_byte_3
-		teq	ip, #0
-		beq	.Lc2u_finished
-		cmp	ip, #2
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		ldrgeb	r3, [r1], #1
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		ldrgtb	r3, [r1], #0
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		b	.Lc2u_finished
-ENDPROC(__copy_to_user)
-
-		.pushsection .fixup,"ax"
-		.align	0
-9001:		ldmfd	sp!, {r0, r4 - r7, pc}
-		.popsection
-
-/* Prototype: unsigned long __copy_from_user(void *to,const void *from,unsigned long n);
- * Purpose  : copy a block from user memory to kernel memory
- * Params   : to   - kernel memory
- *          : from - user memory
- *          : n    - number of bytes to copy
- * Returns  : Number of bytes NOT copied.
- */
-.Lcfu_dest_not_aligned:
-		rsb	ip, ip, #4
-		cmp	ip, #2
-USER(	TUSER(	ldrb)	r3, [r1], #1)			@ May fault
-		strb	r3, [r0], #1
-USER(	TUSER(	ldrgeb) r3, [r1], #1)			@ May fault
-		strgeb	r3, [r0], #1
-USER(	TUSER(	ldrgtb) r3, [r1], #1)			@ May fault
-		strgtb	r3, [r0], #1
-		sub	r2, r2, ip
-		b	.Lcfu_dest_aligned
-
-ENTRY(__copy_from_user)
-		stmfd	sp!, {r0, r2, r4 - r7, lr}
-		cmp	r2, #4
-		blt	.Lcfu_not_enough
-		ands	ip, r0, #3
-		bne	.Lcfu_dest_not_aligned
-.Lcfu_dest_aligned:
-		ands	ip, r1, #3
-		bne	.Lcfu_src_not_aligned
-
-/*
- * Seeing as there has to be at least 8 bytes to copy, we can
- * copy one word, and force a user-mode page fault...
- */
-
-.Lcfu_0fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lcfu_0nowords
-USER(	TUSER(	ldr)	r3, [r1], #4)
-		str	r3, [r0], #4
-		mov	ip, r1, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lcfu_0fupi
-/*
- * ip = max no. of bytes to copy before needing another "strt" insn
- */
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #32
-		blt	.Lcfu_0rem8lp
-
-.Lcfu_0cpy8lp:	ldmia	r1!, {r3 - r6}			@ Shouldnt fault
-		stmia	r0!, {r3 - r6}
-		ldmia	r1!, {r3 - r6}			@ Shouldnt fault
-		subs	ip, ip, #32
-		stmia	r0!, {r3 - r6}
-		bpl	.Lcfu_0cpy8lp
-
-.Lcfu_0rem8lp:	cmn	ip, #16
-		ldmgeia	r1!, {r3 - r6}			@ Shouldnt fault
-		stmgeia	r0!, {r3 - r6}
-		tst	ip, #8
-		ldmneia	r1!, {r3 - r4}			@ Shouldnt fault
-		stmneia	r0!, {r3 - r4}
-		tst	ip, #4
-	TUSER(	ldrne) r3, [r1], #4			@ Shouldnt fault
-		strne	r3, [r0], #4
-		ands	ip, ip, #3
-		beq	.Lcfu_0fupi
-.Lcfu_0nowords:	teq	ip, #0
-		beq	.Lcfu_finished
-.Lcfu_nowords:	cmp	ip, #2
-USER(	TUSER(	ldrb)	r3, [r1], #1)			@ May fault
-		strb	r3, [r0], #1
-USER(	TUSER(	ldrgeb) r3, [r1], #1)			@ May fault
-		strgeb	r3, [r0], #1
-USER(	TUSER(	ldrgtb) r3, [r1], #1)			@ May fault
-		strgtb	r3, [r0], #1
-		b	.Lcfu_finished
-
-.Lcfu_not_enough:
-		movs	ip, r2
-		bne	.Lcfu_nowords
-.Lcfu_finished:	mov	r0, #0
-		add	sp, sp, #8
-		ldmfd	sp!, {r4 - r7, pc}
-
-.Lcfu_src_not_aligned:
-		bic	r1, r1, #3
-USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		cmp	ip, #2
-		bgt	.Lcfu_3fupi
-		beq	.Lcfu_2fupi
-.Lcfu_1fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lcfu_1nowords
-		mov	r3, r7, pull #8
-USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #24
-		str	r3, [r0], #4
-		mov	ip, r1, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lcfu_1fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lcfu_1rem8lp
-
-.Lcfu_1cpy8lp:	mov	r3, r7, pull #8
-		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
-		stmia	r0!, {r3 - r6}
-		bpl	.Lcfu_1cpy8lp
-
-.Lcfu_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
-		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
-		stmneia	r0!, {r3 - r4}
-		tst	ip, #4
-		movne	r3, r7, pull #8
-USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #24
-		strne	r3, [r0], #4
-		ands	ip, ip, #3
-		beq	.Lcfu_1fupi
-.Lcfu_1nowords:	mov	r3, r7, get_byte_1
-		teq	ip, #0
-		beq	.Lcfu_finished
-		cmp	ip, #2
-		strb	r3, [r0], #1
-		movge	r3, r7, get_byte_2
-		strgeb	r3, [r0], #1
-		movgt	r3, r7, get_byte_3
-		strgtb	r3, [r0], #1
-		b	.Lcfu_finished
-
-.Lcfu_2fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lcfu_2nowords
-		mov	r3, r7, pull #16
-USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #16
-		str	r3, [r0], #4
-		mov	ip, r1, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lcfu_2fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lcfu_2rem8lp
-
-
-.Lcfu_2cpy8lp:	mov	r3, r7, pull #16
-		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
-		stmia	r0!, {r3 - r6}
-		bpl	.Lcfu_2cpy8lp
-
-.Lcfu_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
-		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
-		stmneia	r0!, {r3 - r4}
-		tst	ip, #4
-		movne	r3, r7, pull #16
-USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #16
-		strne	r3, [r0], #4
-		ands	ip, ip, #3
-		beq	.Lcfu_2fupi
-.Lcfu_2nowords:	mov	r3, r7, get_byte_2
-		teq	ip, #0
-		beq	.Lcfu_finished
-		cmp	ip, #2
-		strb	r3, [r0], #1
-		movge	r3, r7, get_byte_3
-		strgeb	r3, [r0], #1
-USER(	TUSER(	ldrgtb) r3, [r1], #0)			@ May fault
-		strgtb	r3, [r0], #1
-		b	.Lcfu_finished
-
-.Lcfu_3fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lcfu_3nowords
-		mov	r3, r7, pull #24
-USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #8
-		str	r3, [r0], #4
-		mov	ip, r1, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lcfu_3fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lcfu_3rem8lp
-
-.Lcfu_3cpy8lp:	mov	r3, r7, pull #24
-		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
-		stmia	r0!, {r3 - r6}
-		subs	ip, ip, #16
-		bpl	.Lcfu_3cpy8lp
-
-.Lcfu_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
-		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
-		stmneia	r0!, {r3 - r4}
-		tst	ip, #4
-		movne	r3, r7, pull #24
-USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #8
-		strne	r3, [r0], #4
-		ands	ip, ip, #3
-		beq	.Lcfu_3fupi
-.Lcfu_3nowords:	mov	r3, r7, get_byte_3
-		teq	ip, #0
-		beq	.Lcfu_finished
-		cmp	ip, #2
-		strb	r3, [r0], #1
-USER(	TUSER(	ldrgeb) r3, [r1], #1)			@ May fault
-		strgeb	r3, [r0], #1
-USER(	TUSER(	ldrgtb) r3, [r1], #1)			@ May fault
-		strgtb	r3, [r0], #1
-		b	.Lcfu_finished
-ENDPROC(__copy_from_user)
-
-		.pushsection .fixup,"ax"
-		.align	0
-		/*
-		 * We took an exception.  r0 contains a pointer to
-		 * the byte not copied.
-		 */
-9001:		ldr	r2, [sp], #4			@ void *to
-		sub	r2, r0, r2			@ bytes copied
-		ldr	r1, [sp], #4			@ unsigned long count
-		subs	r4, r1, r2			@ bytes left to copy
-		movne	r1, r4
-		blne	__memzero
-		mov	r0, r4
-		ldmfd	sp!, {r4 - r7, pc}
-		.popsection
-
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index 025f742dd4df..c0ac7796d775 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  linux/arch/arm/lib/uaccess_with_memcpy.c
  *
  *  Written by: Lennert Buytenhek and Nicolas Pitre
  *  Copyright (C) 2009 Marvell Semiconductor
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/kernel.h>
@@ -18,6 +15,7 @@
 #include <linux/hardirq.h> /* for in_atomic() */
 #include <linux/gfp.h>
 #include <linux/highmem.h>
+#include <linux/hugetlb.h>
 #include <asm/current.h>
 #include <asm/page.h>
 
@@ -26,6 +24,7 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
 {
 	unsigned long addr = (unsigned long)_addr;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pmd_t *pmd;
 	pte_t *pte;
 	pud_t *pud;
@@ -35,15 +34,49 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
 	if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
 		return 0;
 
-	pud = pud_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	if (unlikely(p4d_none(*p4d) || p4d_bad(*p4d)))
+		return 0;
+
+	pud = pud_offset(p4d, addr);
 	if (unlikely(pud_none(*pud) || pud_bad(*pud)))
 		return 0;
 
 	pmd = pmd_offset(pud, addr);
-	if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
+	if (unlikely(pmd_none(*pmd)))
+		return 0;
+
+	/*
+	 * A pmd can be bad if it refers to a HugeTLB or THP page.
+	 *
+	 * Both THP and HugeTLB pages have the same pmd layout
+	 * and should not be manipulated by the pte functions.
+	 *
+	 * Lock the page table for the destination and check
+	 * to see that it's still huge and whether or not we will
+	 * need to fault on write.
+	 */
+	if (unlikely(pmd_leaf(*pmd))) {
+		ptl = &current->mm->page_table_lock;
+		spin_lock(ptl);
+		if (unlikely(!pmd_leaf(*pmd)
+			|| pmd_hugewillfault(*pmd))) {
+			spin_unlock(ptl);
+			return 0;
+		}
+
+		*ptep = NULL;
+		*ptlp = ptl;
+		return 1;
+	}
+
+	if (unlikely(pmd_bad(*pmd)))
 		return 0;
 
 	pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
+	if (unlikely(!pte))
+		return 0;
+
 	if (unlikely(!pte_present(*pte) || !pte_young(*pte) ||
 	    !pte_write(*pte) || !pte_dirty(*pte))) {
 		pte_unmap_unlock(pte, ptl);
@@ -59,18 +92,14 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
 static unsigned long noinline
 __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
 {
+	unsigned long ua_flags;
 	int atomic;
 
-	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
-		memcpy((void *)to, from, n);
-		return 0;
-	}
-
 	/* the mmap semaphore is taken only if not in an atomic context */
-	atomic = in_atomic();
+	atomic = faulthandler_disabled();
 
 	if (!atomic)
-		down_read(&current->mm->mmap_sem);
+		mmap_read_lock(current->mm);
 	while (n) {
 		pte_t *pte;
 		spinlock_t *ptl;
@@ -78,33 +107,38 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
 
 		while (!pin_page_for_write(to, &pte, &ptl)) {
 			if (!atomic)
-				up_read(&current->mm->mmap_sem);
+				mmap_read_unlock(current->mm);
 			if (__put_user(0, (char __user *)to))
 				goto out;
 			if (!atomic)
-				down_read(&current->mm->mmap_sem);
+				mmap_read_lock(current->mm);
 		}
 
 		tocopy = (~(unsigned long)to & ~PAGE_MASK) + 1;
 		if (tocopy > n)
 			tocopy = n;
 
-		memcpy((void *)to, from, tocopy);
+		ua_flags = uaccess_save_and_enable();
+		__memcpy((void *)to, from, tocopy);
+		uaccess_restore(ua_flags);
 		to += tocopy;
 		from += tocopy;
 		n -= tocopy;
 
-		pte_unmap_unlock(pte, ptl);
+		if (pte)
+			pte_unmap_unlock(pte, ptl);
+		else
+			spin_unlock(ptl);
 	}
 	if (!atomic)
-		up_read(&current->mm->mmap_sem);
+		mmap_read_unlock(current->mm);
 
 out:
 	return n;
 }
 
 unsigned long
-__copy_to_user(void __user *to, const void *from, unsigned long n)
+arm_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	/*
 	 * This test is stubbed out of the main function above to keep
@@ -113,54 +147,67 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
 	 * With frame pointer disabled, tail call optimization kicks in
 	 * as well making this test almost invisible.
 	 */
-	if (n < 64)
-		return __copy_to_user_std(to, from, n);
-	return __copy_to_user_memcpy(to, from, n);
+	if (n < 64) {
+		unsigned long ua_flags = uaccess_save_and_enable();
+		n = __copy_to_user_std(to, from, n);
+		uaccess_restore(ua_flags);
+	} else {
+		n = __copy_to_user_memcpy(uaccess_mask_range_ptr(to, n),
+					  from, n);
+	}
+	return n;
 }
 	
 static unsigned long noinline
 __clear_user_memset(void __user *addr, unsigned long n)
 {
-	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
-		memset((void *)addr, 0, n);
-		return 0;
-	}
+	unsigned long ua_flags;
 
-	down_read(&current->mm->mmap_sem);
+	mmap_read_lock(current->mm);
 	while (n) {
 		pte_t *pte;
 		spinlock_t *ptl;
 		int tocopy;
 
 		while (!pin_page_for_write(addr, &pte, &ptl)) {
-			up_read(&current->mm->mmap_sem);
+			mmap_read_unlock(current->mm);
 			if (__put_user(0, (char __user *)addr))
 				goto out;
-			down_read(&current->mm->mmap_sem);
+			mmap_read_lock(current->mm);
 		}
 
 		tocopy = (~(unsigned long)addr & ~PAGE_MASK) + 1;
 		if (tocopy > n)
 			tocopy = n;
 
-		memset((void *)addr, 0, tocopy);
+		ua_flags = uaccess_save_and_enable();
+		__memset((void *)addr, 0, tocopy);
+		uaccess_restore(ua_flags);
 		addr += tocopy;
 		n -= tocopy;
 
-		pte_unmap_unlock(pte, ptl);
+		if (pte)
+			pte_unmap_unlock(pte, ptl);
+		else
+			spin_unlock(ptl);
 	}
-	up_read(&current->mm->mmap_sem);
+	mmap_read_unlock(current->mm);
 
 out:
 	return n;
 }
 
-unsigned long __clear_user(void __user *addr, unsigned long n)
+unsigned long arm_clear_user(void __user *addr, unsigned long n)
 {
 	/* See rational for this in __copy_to_user() above. */
-	if (n < 64)
-		return __clear_user_std(addr, n);
-	return __clear_user_memset(addr, n);
+	if (n < 64) {
+		unsigned long ua_flags = uaccess_save_and_enable();
+		n = __clear_user_std(addr, n);
+		uaccess_restore(ua_flags);
+	} else {
+		n = __clear_user_memset(addr, n);
+	}
+	return n;
 }
 
 #if 0
@@ -193,7 +240,7 @@ static int __init test_size_treshold(void)
 	if (!dst_page)
 		goto no_dst;
 	kernel_ptr = page_address(src_page);
-	user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__P010));
+	user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__PAGE_COPY));
 	if (!user_ptr)
 		goto no_vmap;
 
diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S
index f0df6a91db04..679e16a210ae 100644
--- a/arch/arm/lib/ucmpdi2.S
+++ b/arch/arm/lib/ucmpdi2.S
@@ -1,16 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  linux/arch/arm/lib/ucmpdi2.S
  *
  *  Author:	Nicolas Pitre
  *  Created:	Oct 19, 2005
  *  Copyright:	Monta Vista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define xh r0
@@ -31,7 +29,7 @@ ENTRY(__ucmpdi2)
 	movlo	r0, #0
 	moveq	r0, #1
 	movhi	r0, #2
-	mov	pc, lr
+	ret	lr
 
 ENDPROC(__ucmpdi2)
 
@@ -44,7 +42,7 @@ ENTRY(__aeabi_ulcmp)
 	movlo	r0, #-1
 	moveq	r0, #0
 	movhi	r0, #1
-	mov	pc, lr
+	ret	lr
 
 ENDPROC(__aeabi_ulcmp)
 
diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c
new file mode 100644
index 000000000000..cf57fca97908
--- /dev/null
+++ b/arch/arm/lib/xor-neon.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/arch/arm/lib/xor-neon.c
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/raid/xor.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("NEON accelerated XOR implementation");
+MODULE_LICENSE("GPL");
+
+#ifndef __ARM_NEON__
+#error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon'
+#endif
+
+/*
+ * Pull in the reference implementations while instructing GCC (through
+ * -ftree-vectorize) to attempt to exploit implicit parallelism and emit
+ * NEON instructions. Clang does this by default at O2 so no pragma is
+ * needed.
+ */
+#ifdef CONFIG_CC_IS_GCC
+#pragma GCC optimize "tree-vectorize"
+#endif
+
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#include <asm-generic/xor.h>
+
+struct xor_block_template const xor_block_neon_inner = {
+	.name	= "__inner_neon__",
+	.do_2	= xor_8regs_2,
+	.do_3	= xor_8regs_3,
+	.do_4	= xor_8regs_4,
+	.do_5	= xor_8regs_5,
+};
+EXPORT_SYMBOL(xor_block_neon_inner);