aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/crypto/sha512_neon_glue.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2014-07-29 12:15:24 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2014-08-02 03:51:50 -0400
commitc8611d712ad01289a0b6a83cc93bba3a1ef4e990 (patch)
treef3988ae6af0e0719fe501cbe3f040c41a917f345 /arch/arm/crypto/sha512_neon_glue.c
parent604682551aa511e00e57706ad5d9fcf955ee0323 (diff)
ARM: 8120/1: crypto: sha512: add ARM NEON implementation
This patch adds ARM NEON assembly implementation of SHA-512 and SHA-384 algorithms. tcrypt benchmark results on Cortex-A8, sha512-generic vs sha512-neon-asm: block-size bytes/update old-vs-new 16 16 2.99x 64 16 2.67x 64 64 3.00x 256 16 2.64x 256 64 3.06x 256 256 3.33x 1024 16 2.53x 1024 256 3.39x 1024 1024 3.52x 2048 16 2.50x 2048 256 3.41x 2048 1024 3.54x 2048 2048 3.57x 4096 16 2.49x 4096 256 3.42x 4096 1024 3.56x 4096 4096 3.59x 8192 16 2.48x 8192 256 3.42x 8192 1024 3.56x 8192 4096 3.60x 8192 8192 3.60x Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/crypto/sha512_neon_glue.c')
-rw-r--r--arch/arm/crypto/sha512_neon_glue.c305
1 files changed, 305 insertions, 0 deletions
diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c
new file mode 100644
index 000000000000..0d2758ff5e12
--- /dev/null
+++ b/arch/arm/crypto/sha512_neon_glue.c
@@ -0,0 +1,305 @@
1/*
2 * Glue code for the SHA512 Secure Hash Algorithm assembly implementation
3 * using NEON instructions.
4 *
5 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
6 *
7 * This file is based on sha512_ssse3_glue.c:
8 * Copyright (C) 2013 Intel Corporation
9 * Author: Tim Chen <tim.c.chen@linux.intel.com>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the Free
13 * Software Foundation; either version 2 of the License, or (at your option)
14 * any later version.
15 *
16 */
17
18#include <crypto/internal/hash.h>
19#include <linux/init.h>
20#include <linux/module.h>
21#include <linux/mm.h>
22#include <linux/cryptohash.h>
23#include <linux/types.h>
24#include <linux/string.h>
25#include <crypto/sha.h>
26#include <asm/byteorder.h>
27#include <asm/simd.h>
28#include <asm/neon.h>
29
30
31static const u64 sha512_k[] = {
32 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
33 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
34 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
35 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
36 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
37 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
38 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
39 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
40 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
41 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
42 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
43 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
44 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
45 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
46 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
47 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
48 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
49 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
50 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
51 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
52 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
53 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
54 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
55 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
56 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
57 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
58 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
59 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
60 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
61 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
62 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
63 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
64 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
65 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
66 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
67 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
68 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
69 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
70 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
71 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
72};
73
74
75asmlinkage void sha512_transform_neon(u64 *digest, const void *data,
76 const u64 k[], unsigned int num_blks);
77
78
79static int sha512_neon_init(struct shash_desc *desc)
80{
81 struct sha512_state *sctx = shash_desc_ctx(desc);
82
83 sctx->state[0] = SHA512_H0;
84 sctx->state[1] = SHA512_H1;
85 sctx->state[2] = SHA512_H2;
86 sctx->state[3] = SHA512_H3;
87 sctx->state[4] = SHA512_H4;
88 sctx->state[5] = SHA512_H5;
89 sctx->state[6] = SHA512_H6;
90 sctx->state[7] = SHA512_H7;
91 sctx->count[0] = sctx->count[1] = 0;
92
93 return 0;
94}
95
96static int __sha512_neon_update(struct shash_desc *desc, const u8 *data,
97 unsigned int len, unsigned int partial)
98{
99 struct sha512_state *sctx = shash_desc_ctx(desc);
100 unsigned int done = 0;
101
102 sctx->count[0] += len;
103 if (sctx->count[0] < len)
104 sctx->count[1]++;
105
106 if (partial) {
107 done = SHA512_BLOCK_SIZE - partial;
108 memcpy(sctx->buf + partial, data, done);
109 sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1);
110 }
111
112 if (len - done >= SHA512_BLOCK_SIZE) {
113 const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
114
115 sha512_transform_neon(sctx->state, data + done, sha512_k,
116 rounds);
117
118 done += rounds * SHA512_BLOCK_SIZE;
119 }
120
121 memcpy(sctx->buf, data + done, len - done);
122
123 return 0;
124}
125
126static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
127 unsigned int len)
128{
129 struct sha512_state *sctx = shash_desc_ctx(desc);
130 unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
131 int res;
132
133 /* Handle the fast case right here */
134 if (partial + len < SHA512_BLOCK_SIZE) {
135 sctx->count[0] += len;
136 if (sctx->count[0] < len)
137 sctx->count[1]++;
138 memcpy(sctx->buf + partial, data, len);
139
140 return 0;
141 }
142
143 if (!may_use_simd()) {
144 res = crypto_sha512_update(desc, data, len);
145 } else {
146 kernel_neon_begin();
147 res = __sha512_neon_update(desc, data, len, partial);
148 kernel_neon_end();
149 }
150
151 return res;
152}
153
154
155/* Add padding and return the message digest. */
156static int sha512_neon_final(struct shash_desc *desc, u8 *out)
157{
158 struct sha512_state *sctx = shash_desc_ctx(desc);
159 unsigned int i, index, padlen;
160 __be64 *dst = (__be64 *)out;
161 __be64 bits[2];
162 static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
163
164 /* save number of bits */
165 bits[1] = cpu_to_be64(sctx->count[0] << 3);
166 bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
167
168 /* Pad out to 112 mod 128 and append length */
169 index = sctx->count[0] & 0x7f;
170 padlen = (index < 112) ? (112 - index) : ((128+112) - index);
171
172 if (!may_use_simd()) {
173 crypto_sha512_update(desc, padding, padlen);
174 crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
175 } else {
176 kernel_neon_begin();
177 /* We need to fill a whole block for __sha512_neon_update() */
178 if (padlen <= 112) {
179 sctx->count[0] += padlen;
180 if (sctx->count[0] < padlen)
181 sctx->count[1]++;
182 memcpy(sctx->buf + index, padding, padlen);
183 } else {
184 __sha512_neon_update(desc, padding, padlen, index);
185 }
186 __sha512_neon_update(desc, (const u8 *)&bits,
187 sizeof(bits), 112);
188 kernel_neon_end();
189 }
190
191 /* Store state in digest */
192 for (i = 0; i < 8; i++)
193 dst[i] = cpu_to_be64(sctx->state[i]);
194
195 /* Wipe context */
196 memset(sctx, 0, sizeof(*sctx));
197
198 return 0;
199}
200
201static int sha512_neon_export(struct shash_desc *desc, void *out)
202{
203 struct sha512_state *sctx = shash_desc_ctx(desc);
204
205 memcpy(out, sctx, sizeof(*sctx));
206
207 return 0;
208}
209
210static int sha512_neon_import(struct shash_desc *desc, const void *in)
211{
212 struct sha512_state *sctx = shash_desc_ctx(desc);
213
214 memcpy(sctx, in, sizeof(*sctx));
215
216 return 0;
217}
218
219static int sha384_neon_init(struct shash_desc *desc)
220{
221 struct sha512_state *sctx = shash_desc_ctx(desc);
222
223 sctx->state[0] = SHA384_H0;
224 sctx->state[1] = SHA384_H1;
225 sctx->state[2] = SHA384_H2;
226 sctx->state[3] = SHA384_H3;
227 sctx->state[4] = SHA384_H4;
228 sctx->state[5] = SHA384_H5;
229 sctx->state[6] = SHA384_H6;
230 sctx->state[7] = SHA384_H7;
231
232 sctx->count[0] = sctx->count[1] = 0;
233
234 return 0;
235}
236
237static int sha384_neon_final(struct shash_desc *desc, u8 *hash)
238{
239 u8 D[SHA512_DIGEST_SIZE];
240
241 sha512_neon_final(desc, D);
242
243 memcpy(hash, D, SHA384_DIGEST_SIZE);
244 memset(D, 0, SHA512_DIGEST_SIZE);
245
246 return 0;
247}
248
249static struct shash_alg algs[] = { {
250 .digestsize = SHA512_DIGEST_SIZE,
251 .init = sha512_neon_init,
252 .update = sha512_neon_update,
253 .final = sha512_neon_final,
254 .export = sha512_neon_export,
255 .import = sha512_neon_import,
256 .descsize = sizeof(struct sha512_state),
257 .statesize = sizeof(struct sha512_state),
258 .base = {
259 .cra_name = "sha512",
260 .cra_driver_name = "sha512-neon",
261 .cra_priority = 250,
262 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
263 .cra_blocksize = SHA512_BLOCK_SIZE,
264 .cra_module = THIS_MODULE,
265 }
266}, {
267 .digestsize = SHA384_DIGEST_SIZE,
268 .init = sha384_neon_init,
269 .update = sha512_neon_update,
270 .final = sha384_neon_final,
271 .export = sha512_neon_export,
272 .import = sha512_neon_import,
273 .descsize = sizeof(struct sha512_state),
274 .statesize = sizeof(struct sha512_state),
275 .base = {
276 .cra_name = "sha384",
277 .cra_driver_name = "sha384-neon",
278 .cra_priority = 250,
279 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
280 .cra_blocksize = SHA384_BLOCK_SIZE,
281 .cra_module = THIS_MODULE,
282 }
283} };
284
285static int __init sha512_neon_mod_init(void)
286{
287 if (!cpu_has_neon())
288 return -ENODEV;
289
290 return crypto_register_shashes(algs, ARRAY_SIZE(algs));
291}
292
293static void __exit sha512_neon_mod_fini(void)
294{
295 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
296}
297
298module_init(sha512_neon_mod_init);
299module_exit(sha512_neon_mod_fini);
300
301MODULE_LICENSE("GPL");
302MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated");
303
304MODULE_ALIAS("sha512");
305MODULE_ALIAS("sha384");