diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2014-07-29 12:15:24 -0400 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2014-08-02 03:51:50 -0400 |
commit | c8611d712ad01289a0b6a83cc93bba3a1ef4e990 (patch) | |
tree | f3988ae6af0e0719fe501cbe3f040c41a917f345 /arch/arm/crypto/sha512_neon_glue.c | |
parent | 604682551aa511e00e57706ad5d9fcf955ee0323 (diff) |
ARM: 8120/1: crypto: sha512: add ARM NEON implementation
This patch adds ARM NEON assembly implementation of SHA-512 and SHA-384
algorithms.
tcrypt benchmark results on Cortex-A8, sha512-generic vs sha512-neon-asm:
block-size bytes/update old-vs-new
16 16 2.99x
64 16 2.67x
64 64 3.00x
256 16 2.64x
256 64 3.06x
256 256 3.33x
1024 16 2.53x
1024 256 3.39x
1024 1024 3.52x
2048 16 2.50x
2048 256 3.41x
2048 1024 3.54x
2048 2048 3.57x
4096 16 2.49x
4096 256 3.42x
4096 1024 3.56x
4096 4096 3.59x
8192 16 2.48x
8192 256 3.42x
8192 1024 3.56x
8192 4096 3.60x
8192 8192 3.60x
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/crypto/sha512_neon_glue.c')
-rw-r--r-- | arch/arm/crypto/sha512_neon_glue.c | 305 |
1 files changed, 305 insertions, 0 deletions
diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c new file mode 100644 index 000000000000..0d2758ff5e12 --- /dev/null +++ b/arch/arm/crypto/sha512_neon_glue.c | |||
@@ -0,0 +1,305 @@ | |||
1 | /* | ||
2 | * Glue code for the SHA512 Secure Hash Algorithm assembly implementation | ||
3 | * using NEON instructions. | ||
4 | * | ||
5 | * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
6 | * | ||
7 | * This file is based on sha512_ssse3_glue.c: | ||
8 | * Copyright (C) 2013 Intel Corporation | ||
9 | * Author: Tim Chen <tim.c.chen@linux.intel.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the Free | ||
13 | * Software Foundation; either version 2 of the License, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #include <crypto/internal/hash.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/mm.h> | ||
22 | #include <linux/cryptohash.h> | ||
23 | #include <linux/types.h> | ||
24 | #include <linux/string.h> | ||
25 | #include <crypto/sha.h> | ||
26 | #include <asm/byteorder.h> | ||
27 | #include <asm/simd.h> | ||
28 | #include <asm/neon.h> | ||
29 | |||
30 | |||
31 | static const u64 sha512_k[] = { | ||
32 | 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, | ||
33 | 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, | ||
34 | 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, | ||
35 | 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, | ||
36 | 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, | ||
37 | 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, | ||
38 | 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, | ||
39 | 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, | ||
40 | 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, | ||
41 | 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, | ||
42 | 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, | ||
43 | 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, | ||
44 | 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, | ||
45 | 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, | ||
46 | 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, | ||
47 | 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, | ||
48 | 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, | ||
49 | 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, | ||
50 | 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, | ||
51 | 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, | ||
52 | 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, | ||
53 | 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, | ||
54 | 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, | ||
55 | 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, | ||
56 | 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, | ||
57 | 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, | ||
58 | 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, | ||
59 | 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, | ||
60 | 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, | ||
61 | 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, | ||
62 | 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, | ||
63 | 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, | ||
64 | 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, | ||
65 | 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, | ||
66 | 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, | ||
67 | 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, | ||
68 | 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, | ||
69 | 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, | ||
70 | 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, | ||
71 | 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL | ||
72 | }; | ||
73 | |||
74 | |||
75 | asmlinkage void sha512_transform_neon(u64 *digest, const void *data, | ||
76 | const u64 k[], unsigned int num_blks); | ||
77 | |||
78 | |||
79 | static int sha512_neon_init(struct shash_desc *desc) | ||
80 | { | ||
81 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
82 | |||
83 | sctx->state[0] = SHA512_H0; | ||
84 | sctx->state[1] = SHA512_H1; | ||
85 | sctx->state[2] = SHA512_H2; | ||
86 | sctx->state[3] = SHA512_H3; | ||
87 | sctx->state[4] = SHA512_H4; | ||
88 | sctx->state[5] = SHA512_H5; | ||
89 | sctx->state[6] = SHA512_H6; | ||
90 | sctx->state[7] = SHA512_H7; | ||
91 | sctx->count[0] = sctx->count[1] = 0; | ||
92 | |||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | static int __sha512_neon_update(struct shash_desc *desc, const u8 *data, | ||
97 | unsigned int len, unsigned int partial) | ||
98 | { | ||
99 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
100 | unsigned int done = 0; | ||
101 | |||
102 | sctx->count[0] += len; | ||
103 | if (sctx->count[0] < len) | ||
104 | sctx->count[1]++; | ||
105 | |||
106 | if (partial) { | ||
107 | done = SHA512_BLOCK_SIZE - partial; | ||
108 | memcpy(sctx->buf + partial, data, done); | ||
109 | sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1); | ||
110 | } | ||
111 | |||
112 | if (len - done >= SHA512_BLOCK_SIZE) { | ||
113 | const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; | ||
114 | |||
115 | sha512_transform_neon(sctx->state, data + done, sha512_k, | ||
116 | rounds); | ||
117 | |||
118 | done += rounds * SHA512_BLOCK_SIZE; | ||
119 | } | ||
120 | |||
121 | memcpy(sctx->buf, data + done, len - done); | ||
122 | |||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | static int sha512_neon_update(struct shash_desc *desc, const u8 *data, | ||
127 | unsigned int len) | ||
128 | { | ||
129 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
130 | unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; | ||
131 | int res; | ||
132 | |||
133 | /* Handle the fast case right here */ | ||
134 | if (partial + len < SHA512_BLOCK_SIZE) { | ||
135 | sctx->count[0] += len; | ||
136 | if (sctx->count[0] < len) | ||
137 | sctx->count[1]++; | ||
138 | memcpy(sctx->buf + partial, data, len); | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | if (!may_use_simd()) { | ||
144 | res = crypto_sha512_update(desc, data, len); | ||
145 | } else { | ||
146 | kernel_neon_begin(); | ||
147 | res = __sha512_neon_update(desc, data, len, partial); | ||
148 | kernel_neon_end(); | ||
149 | } | ||
150 | |||
151 | return res; | ||
152 | } | ||
153 | |||
154 | |||
155 | /* Add padding and return the message digest. */ | ||
156 | static int sha512_neon_final(struct shash_desc *desc, u8 *out) | ||
157 | { | ||
158 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
159 | unsigned int i, index, padlen; | ||
160 | __be64 *dst = (__be64 *)out; | ||
161 | __be64 bits[2]; | ||
162 | static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; | ||
163 | |||
164 | /* save number of bits */ | ||
165 | bits[1] = cpu_to_be64(sctx->count[0] << 3); | ||
166 | bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); | ||
167 | |||
168 | /* Pad out to 112 mod 128 and append length */ | ||
169 | index = sctx->count[0] & 0x7f; | ||
170 | padlen = (index < 112) ? (112 - index) : ((128+112) - index); | ||
171 | |||
172 | if (!may_use_simd()) { | ||
173 | crypto_sha512_update(desc, padding, padlen); | ||
174 | crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
175 | } else { | ||
176 | kernel_neon_begin(); | ||
177 | /* We need to fill a whole block for __sha512_neon_update() */ | ||
178 | if (padlen <= 112) { | ||
179 | sctx->count[0] += padlen; | ||
180 | if (sctx->count[0] < padlen) | ||
181 | sctx->count[1]++; | ||
182 | memcpy(sctx->buf + index, padding, padlen); | ||
183 | } else { | ||
184 | __sha512_neon_update(desc, padding, padlen, index); | ||
185 | } | ||
186 | __sha512_neon_update(desc, (const u8 *)&bits, | ||
187 | sizeof(bits), 112); | ||
188 | kernel_neon_end(); | ||
189 | } | ||
190 | |||
191 | /* Store state in digest */ | ||
192 | for (i = 0; i < 8; i++) | ||
193 | dst[i] = cpu_to_be64(sctx->state[i]); | ||
194 | |||
195 | /* Wipe context */ | ||
196 | memset(sctx, 0, sizeof(*sctx)); | ||
197 | |||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | static int sha512_neon_export(struct shash_desc *desc, void *out) | ||
202 | { | ||
203 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
204 | |||
205 | memcpy(out, sctx, sizeof(*sctx)); | ||
206 | |||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | static int sha512_neon_import(struct shash_desc *desc, const void *in) | ||
211 | { | ||
212 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
213 | |||
214 | memcpy(sctx, in, sizeof(*sctx)); | ||
215 | |||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | static int sha384_neon_init(struct shash_desc *desc) | ||
220 | { | ||
221 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
222 | |||
223 | sctx->state[0] = SHA384_H0; | ||
224 | sctx->state[1] = SHA384_H1; | ||
225 | sctx->state[2] = SHA384_H2; | ||
226 | sctx->state[3] = SHA384_H3; | ||
227 | sctx->state[4] = SHA384_H4; | ||
228 | sctx->state[5] = SHA384_H5; | ||
229 | sctx->state[6] = SHA384_H6; | ||
230 | sctx->state[7] = SHA384_H7; | ||
231 | |||
232 | sctx->count[0] = sctx->count[1] = 0; | ||
233 | |||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | static int sha384_neon_final(struct shash_desc *desc, u8 *hash) | ||
238 | { | ||
239 | u8 D[SHA512_DIGEST_SIZE]; | ||
240 | |||
241 | sha512_neon_final(desc, D); | ||
242 | |||
243 | memcpy(hash, D, SHA384_DIGEST_SIZE); | ||
244 | memset(D, 0, SHA512_DIGEST_SIZE); | ||
245 | |||
246 | return 0; | ||
247 | } | ||
248 | |||
249 | static struct shash_alg algs[] = { { | ||
250 | .digestsize = SHA512_DIGEST_SIZE, | ||
251 | .init = sha512_neon_init, | ||
252 | .update = sha512_neon_update, | ||
253 | .final = sha512_neon_final, | ||
254 | .export = sha512_neon_export, | ||
255 | .import = sha512_neon_import, | ||
256 | .descsize = sizeof(struct sha512_state), | ||
257 | .statesize = sizeof(struct sha512_state), | ||
258 | .base = { | ||
259 | .cra_name = "sha512", | ||
260 | .cra_driver_name = "sha512-neon", | ||
261 | .cra_priority = 250, | ||
262 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
263 | .cra_blocksize = SHA512_BLOCK_SIZE, | ||
264 | .cra_module = THIS_MODULE, | ||
265 | } | ||
266 | }, { | ||
267 | .digestsize = SHA384_DIGEST_SIZE, | ||
268 | .init = sha384_neon_init, | ||
269 | .update = sha512_neon_update, | ||
270 | .final = sha384_neon_final, | ||
271 | .export = sha512_neon_export, | ||
272 | .import = sha512_neon_import, | ||
273 | .descsize = sizeof(struct sha512_state), | ||
274 | .statesize = sizeof(struct sha512_state), | ||
275 | .base = { | ||
276 | .cra_name = "sha384", | ||
277 | .cra_driver_name = "sha384-neon", | ||
278 | .cra_priority = 250, | ||
279 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
280 | .cra_blocksize = SHA384_BLOCK_SIZE, | ||
281 | .cra_module = THIS_MODULE, | ||
282 | } | ||
283 | } }; | ||
284 | |||
285 | static int __init sha512_neon_mod_init(void) | ||
286 | { | ||
287 | if (!cpu_has_neon()) | ||
288 | return -ENODEV; | ||
289 | |||
290 | return crypto_register_shashes(algs, ARRAY_SIZE(algs)); | ||
291 | } | ||
292 | |||
293 | static void __exit sha512_neon_mod_fini(void) | ||
294 | { | ||
295 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); | ||
296 | } | ||
297 | |||
298 | module_init(sha512_neon_mod_init); | ||
299 | module_exit(sha512_neon_mod_fini); | ||
300 | |||
301 | MODULE_LICENSE("GPL"); | ||
302 | MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated"); | ||
303 | |||
304 | MODULE_ALIAS("sha512"); | ||
305 | MODULE_ALIAS("sha384"); | ||