aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/crypto
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2014-08-05 16:15:19 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2014-08-27 10:44:11 -0400
commit0777e3e1723f69276136140209c11deeecb7c6dc (patch)
tree59e04134940a024d8ad96c0274f856824a153f5b /arch/arm/crypto
parent52addcf9d6669fa439387610bc65c92fa0980cef (diff)
ARM: 8125/1: crypto: enable NEON SHA-1 for big endian
This tweaks the SHA-1 NEON code slightly so it works correctly under big endian, and removes the Kconfig condition preventing it from being selected if CONFIG_CPU_BIG_ENDIAN is set. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/crypto')
-rw-r--r--arch/arm/crypto/sha1-armv7-neon.S39
1 files changed, 22 insertions, 17 deletions
diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S
index 50013c0e2864..dcd01f3f0bb0 100644
--- a/arch/arm/crypto/sha1-armv7-neon.S
+++ b/arch/arm/crypto/sha1-armv7-neon.S
@@ -9,7 +9,7 @@
9 */ 9 */
10 10
11#include <linux/linkage.h> 11#include <linux/linkage.h>
12 12#include <asm/assembler.h>
13 13
14.syntax unified 14.syntax unified
15.code 32 15.code 32
@@ -61,13 +61,13 @@
61#define RT3 r12 61#define RT3 r12
62 62
63#define W0 q0 63#define W0 q0
64#define W1 q1 64#define W1 q7
65#define W2 q2 65#define W2 q2
66#define W3 q3 66#define W3 q3
67#define W4 q4 67#define W4 q4
68#define W5 q5 68#define W5 q6
69#define W6 q6 69#define W6 q5
70#define W7 q7 70#define W7 q1
71 71
72#define tmp0 q8 72#define tmp0 q8
73#define tmp1 q9 73#define tmp1 q9
@@ -79,6 +79,11 @@
79#define qK3 q14 79#define qK3 q14
80#define qK4 q15 80#define qK4 q15
81 81
82#ifdef CONFIG_CPU_BIG_ENDIAN
83#define ARM_LE(code...)
84#else
85#define ARM_LE(code...) code
86#endif
82 87
83/* Round function macros. */ 88/* Round function macros. */
84 89
@@ -150,45 +155,45 @@
150#define W_PRECALC_00_15() \ 155#define W_PRECALC_00_15() \
151 add RWK, sp, #(WK_offs(0)); \ 156 add RWK, sp, #(WK_offs(0)); \
152 \ 157 \
153 vld1.32 {tmp0, tmp1}, [RDATA]!; \ 158 vld1.32 {W0, W7}, [RDATA]!; \
154 vrev32.8 W0, tmp0; /* big => little */ \ 159 ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \
155 vld1.32 {tmp2, tmp3}, [RDATA]!; \ 160 vld1.32 {W6, W5}, [RDATA]!; \
156 vadd.u32 tmp0, W0, curK; \ 161 vadd.u32 tmp0, W0, curK; \
157 vrev32.8 W7, tmp1; /* big => little */ \ 162 ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \
158 vrev32.8 W6, tmp2; /* big => little */ \ 163 ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \
159 vadd.u32 tmp1, W7, curK; \ 164 vadd.u32 tmp1, W7, curK; \
160 vrev32.8 W5, tmp3; /* big => little */ \ 165 ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \
161 vadd.u32 tmp2, W6, curK; \ 166 vadd.u32 tmp2, W6, curK; \
162 vst1.32 {tmp0, tmp1}, [RWK]!; \ 167 vst1.32 {tmp0, tmp1}, [RWK]!; \
163 vadd.u32 tmp3, W5, curK; \ 168 vadd.u32 tmp3, W5, curK; \
164 vst1.32 {tmp2, tmp3}, [RWK]; \ 169 vst1.32 {tmp2, tmp3}, [RWK]; \
165 170
166#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 171#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
167 vld1.32 {tmp0, tmp1}, [RDATA]!; \ 172 vld1.32 {W0, W7}, [RDATA]!; \
168 173
169#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 174#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
170 add RWK, sp, #(WK_offs(0)); \ 175 add RWK, sp, #(WK_offs(0)); \
171 176
172#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 177#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
173 vrev32.8 W0, tmp0; /* big => little */ \ 178 ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \
174 179
175#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 180#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
176 vld1.32 {tmp2, tmp3}, [RDATA]!; \ 181 vld1.32 {W6, W5}, [RDATA]!; \
177 182
178#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 183#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
179 vadd.u32 tmp0, W0, curK; \ 184 vadd.u32 tmp0, W0, curK; \
180 185
181#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 186#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
182 vrev32.8 W7, tmp1; /* big => little */ \ 187 ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \
183 188
184#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 189#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
185 vrev32.8 W6, tmp2; /* big => little */ \ 190 ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \
186 191
187#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 192#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
188 vadd.u32 tmp1, W7, curK; \ 193 vadd.u32 tmp1, W7, curK; \
189 194
190#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 195#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
191 vrev32.8 W5, tmp3; /* big => little */ \ 196 ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \
192 197
193#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ 198#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
194 vadd.u32 tmp2, W6, curK; \ 199 vadd.u32 tmp2, W6, curK; \