From 0777e3e1723f69276136140209c11deeecb7c6dc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 5 Aug 2014 21:15:19 +0100 Subject: ARM: 8125/1: crypto: enable NEON SHA-1 for big endian This tweaks the SHA-1 NEON code slightly so it works correctly under big endian, and removes the Kconfig condition preventing it from being selected if CONFIG_CPU_BIG_ENDIAN is set. Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/crypto/sha1-armv7-neon.S | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'arch/arm/crypto') diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S index 50013c0e2864..dcd01f3f0bb0 100644 --- a/arch/arm/crypto/sha1-armv7-neon.S +++ b/arch/arm/crypto/sha1-armv7-neon.S @@ -9,7 +9,7 @@ */ #include - +#include .syntax unified .code 32 @@ -61,13 +61,13 @@ #define RT3 r12 #define W0 q0 -#define W1 q1 +#define W1 q7 #define W2 q2 #define W3 q3 #define W4 q4 -#define W5 q5 -#define W6 q6 -#define W7 q7 +#define W5 q6 +#define W6 q5 +#define W7 q1 #define tmp0 q8 #define tmp1 q9 @@ -79,6 +79,11 @@ #define qK3 q14 #define qK4 q15 +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ARM_LE(code...) +#else +#define ARM_LE(code...) code +#endif /* Round function macros. */ @@ -150,45 +155,45 @@ #define W_PRECALC_00_15() \ add RWK, sp, #(WK_offs(0)); \ \ - vld1.32 {tmp0, tmp1}, [RDATA]!; \ - vrev32.8 W0, tmp0; /* big => little */ \ - vld1.32 {tmp2, tmp3}, [RDATA]!; \ + vld1.32 {W0, W7}, [RDATA]!; \ + ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ + vld1.32 {W6, W5}, [RDATA]!; \ vadd.u32 tmp0, W0, curK; \ - vrev32.8 W7, tmp1; /* big => little */ \ - vrev32.8 W6, tmp2; /* big => little */ \ + ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ + ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ vadd.u32 tmp1, W7, curK; \ - vrev32.8 W5, tmp3; /* big => little */ \ + ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ vadd.u32 tmp2, W6, curK; \ vst1.32 {tmp0, tmp1}, [RWK]!; \ vadd.u32 tmp3, W5, curK; \ vst1.32 {tmp2, tmp3}, [RWK]; \ #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vld1.32 {tmp0, tmp1}, [RDATA]!; \ + vld1.32 {W0, W7}, [RDATA]!; \ #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ add RWK, sp, #(WK_offs(0)); \ #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vrev32.8 W0, tmp0; /* big => little */ \ + ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vld1.32 {tmp2, tmp3}, [RDATA]!; \ + vld1.32 {W6, W5}, [RDATA]!; \ #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ vadd.u32 tmp0, W0, curK; \ #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vrev32.8 W7, tmp1; /* big => little */ \ + ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vrev32.8 W6, tmp2; /* big => little */ \ + ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ vadd.u32 tmp1, W7, curK; \ #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vrev32.8 W5, tmp3; /* big => little */ \ + ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ vadd.u32 tmp2, W6, curK; \ -- cgit