diff options
author | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2014-08-05 16:15:19 -0400 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2014-08-27 10:44:11 -0400 |
commit | 0777e3e1723f69276136140209c11deeecb7c6dc (patch) | |
tree | 59e04134940a024d8ad96c0274f856824a153f5b /arch/arm/crypto | |
parent | 52addcf9d6669fa439387610bc65c92fa0980cef (diff) |
ARM: 8125/1: crypto: enable NEON SHA-1 for big endian
This tweaks the SHA-1 NEON code slightly so it works correctly under big
endian, and removes the Kconfig condition preventing it from being
selected if CONFIG_CPU_BIG_ENDIAN is set.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/crypto')
-rw-r--r-- | arch/arm/crypto/sha1-armv7-neon.S | 39 |
1 files changed, 22 insertions, 17 deletions
diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S index 50013c0e2864..dcd01f3f0bb0 100644 --- a/arch/arm/crypto/sha1-armv7-neon.S +++ b/arch/arm/crypto/sha1-armv7-neon.S | |||
@@ -9,7 +9,7 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/linkage.h> | 11 | #include <linux/linkage.h> |
12 | 12 | #include <asm/assembler.h> | |
13 | 13 | ||
14 | .syntax unified | 14 | .syntax unified |
15 | .code 32 | 15 | .code 32 |
@@ -61,13 +61,13 @@ | |||
61 | #define RT3 r12 | 61 | #define RT3 r12 |
62 | 62 | ||
63 | #define W0 q0 | 63 | #define W0 q0 |
64 | #define W1 q1 | 64 | #define W1 q7 |
65 | #define W2 q2 | 65 | #define W2 q2 |
66 | #define W3 q3 | 66 | #define W3 q3 |
67 | #define W4 q4 | 67 | #define W4 q4 |
68 | #define W5 q5 | 68 | #define W5 q6 |
69 | #define W6 q6 | 69 | #define W6 q5 |
70 | #define W7 q7 | 70 | #define W7 q1 |
71 | 71 | ||
72 | #define tmp0 q8 | 72 | #define tmp0 q8 |
73 | #define tmp1 q9 | 73 | #define tmp1 q9 |
@@ -79,6 +79,11 @@ | |||
79 | #define qK3 q14 | 79 | #define qK3 q14 |
80 | #define qK4 q15 | 80 | #define qK4 q15 |
81 | 81 | ||
82 | #ifdef CONFIG_CPU_BIG_ENDIAN | ||
83 | #define ARM_LE(code...) | ||
84 | #else | ||
85 | #define ARM_LE(code...) code | ||
86 | #endif | ||
82 | 87 | ||
83 | /* Round function macros. */ | 88 | /* Round function macros. */ |
84 | 89 | ||
@@ -150,45 +155,45 @@ | |||
150 | #define W_PRECALC_00_15() \ | 155 | #define W_PRECALC_00_15() \ |
151 | add RWK, sp, #(WK_offs(0)); \ | 156 | add RWK, sp, #(WK_offs(0)); \ |
152 | \ | 157 | \ |
153 | vld1.32 {tmp0, tmp1}, [RDATA]!; \ | 158 | vld1.32 {W0, W7}, [RDATA]!; \ |
154 | vrev32.8 W0, tmp0; /* big => little */ \ | 159 | ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ |
155 | vld1.32 {tmp2, tmp3}, [RDATA]!; \ | 160 | vld1.32 {W6, W5}, [RDATA]!; \ |
156 | vadd.u32 tmp0, W0, curK; \ | 161 | vadd.u32 tmp0, W0, curK; \ |
157 | vrev32.8 W7, tmp1; /* big => little */ \ | 162 | ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ |
158 | vrev32.8 W6, tmp2; /* big => little */ \ | 163 | ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ |
159 | vadd.u32 tmp1, W7, curK; \ | 164 | vadd.u32 tmp1, W7, curK; \ |
160 | vrev32.8 W5, tmp3; /* big => little */ \ | 165 | ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ |
161 | vadd.u32 tmp2, W6, curK; \ | 166 | vadd.u32 tmp2, W6, curK; \ |
162 | vst1.32 {tmp0, tmp1}, [RWK]!; \ | 167 | vst1.32 {tmp0, tmp1}, [RWK]!; \ |
163 | vadd.u32 tmp3, W5, curK; \ | 168 | vadd.u32 tmp3, W5, curK; \ |
164 | vst1.32 {tmp2, tmp3}, [RWK]; \ | 169 | vst1.32 {tmp2, tmp3}, [RWK]; \ |
165 | 170 | ||
166 | #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | 171 | #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
167 | vld1.32 {tmp0, tmp1}, [RDATA]!; \ | 172 | vld1.32 {W0, W7}, [RDATA]!; \ |
168 | 173 | ||
169 | #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | 174 | #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
170 | add RWK, sp, #(WK_offs(0)); \ | 175 | add RWK, sp, #(WK_offs(0)); \ |
171 | 176 | ||
172 | #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | 177 | #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
173 | vrev32.8 W0, tmp0; /* big => little */ \ | 178 | ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ |
174 | 179 | ||
175 | #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | 180 | #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
176 | vld1.32 {tmp2, tmp3}, [RDATA]!; \ | 181 | vld1.32 {W6, W5}, [RDATA]!; \ |
177 | 182 | ||
178 | #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | 183 | #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
179 | vadd.u32 tmp0, W0, curK; \ | 184 | vadd.u32 tmp0, W0, curK; \ |
180 | 185 | ||
181 | #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | 186 | #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
182 | vrev32.8 W7, tmp1; /* big => little */ \ | 187 | ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ |
183 | 188 | ||
184 | #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | 189 | #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
185 | vrev32.8 W6, tmp2; /* big => little */ \ | 190 | ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ |
186 | 191 | ||
187 | #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | 192 | #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
188 | vadd.u32 tmp1, W7, curK; \ | 193 | vadd.u32 tmp1, W7, curK; \ |
189 | 194 | ||
190 | #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | 195 | #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
191 | vrev32.8 W5, tmp3; /* big => little */ \ | 196 | ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ |
192 | 197 | ||
193 | #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | 198 | #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ |
194 | vadd.u32 tmp2, W6, curK; \ | 199 | vadd.u32 tmp2, W6, curK; \ |