aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 13:42:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 13:42:15 -0400
commitcb906953d2c3fd450655d9fa833f03690ad50c23 (patch)
tree06c5665afb24baee3ac49f62db61ca97918079b4 /arch/arm64
parent6c373ca89399c5a3f7ef210ad8f63dc3437da345 (diff)
parent3abafaf2192b1712079edfd4232b19877d6f41a5 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: "Here is the crypto update for 4.1: New interfaces: - user-space interface for AEAD - user-space interface for RNG (i.e., pseudo RNG) New hashes: - ARMv8 SHA1/256 - ARMv8 AES - ARMv8 GHASH - ARM assembler and NEON SHA256 - MIPS OCTEON SHA1/256/512 - MIPS img-hash SHA1/256 and MD5 - Power 8 VMX AES/CBC/CTR/GHASH - PPC assembler AES, SHA1/256 and MD5 - Broadcom IPROC RNG driver Cleanups/fixes: - prevent internal helper algos from being exposed to user-space - merge common code from assembly/C SHA implementations - misc fixes" * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (169 commits) crypto: arm - workaround for building with old binutils crypto: arm/sha256 - avoid sha256 code on ARMv7-M crypto: x86/sha512_ssse3 - move SHA-384/512 SSSE3 implementation to base layer crypto: x86/sha256_ssse3 - move SHA-224/256 SSSE3 implementation to base layer crypto: x86/sha1_ssse3 - move SHA-1 SSSE3 implementation to base layer crypto: arm64/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer crypto: arm64/sha1-ce - move SHA-1 ARMv8 implementation to base layer crypto: arm/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer crypto: arm/sha256 - move SHA-224/256 ASM/NEON implementation to base layer crypto: arm/sha1-ce - move SHA-1 ARMv8 implementation to base layer crypto: arm/sha1_neon - move SHA-1 NEON implementation to base layer crypto: arm/sha1 - move SHA-1 ARM asm implementation to base layer crypto: sha512-generic - move to generic glue implementation crypto: sha256-generic - move to generic glue implementation crypto: sha1-generic - move to generic glue implementation crypto: sha512 - implement base layer for SHA-512 crypto: sha256 - implement base layer for SHA-256 crypto: sha1 - implement base layer for SHA-1 crypto: api - remove instance when test failed crypto: api - Move alg ref count init to crypto_check_alg ...
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/crypto/aes-glue.c12
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S33
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c151
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S29
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c227
5 files changed, 130 insertions, 322 deletions
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index b1b5b893eb20..05d9e16c0dfd 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -284,7 +284,8 @@ static struct crypto_alg aes_algs[] = { {
284 .cra_name = "__ecb-aes-" MODE, 284 .cra_name = "__ecb-aes-" MODE,
285 .cra_driver_name = "__driver-ecb-aes-" MODE, 285 .cra_driver_name = "__driver-ecb-aes-" MODE,
286 .cra_priority = 0, 286 .cra_priority = 0,
287 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 287 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
288 CRYPTO_ALG_INTERNAL,
288 .cra_blocksize = AES_BLOCK_SIZE, 289 .cra_blocksize = AES_BLOCK_SIZE,
289 .cra_ctxsize = sizeof(struct crypto_aes_ctx), 290 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
290 .cra_alignmask = 7, 291 .cra_alignmask = 7,
@@ -302,7 +303,8 @@ static struct crypto_alg aes_algs[] = { {
302 .cra_name = "__cbc-aes-" MODE, 303 .cra_name = "__cbc-aes-" MODE,
303 .cra_driver_name = "__driver-cbc-aes-" MODE, 304 .cra_driver_name = "__driver-cbc-aes-" MODE,
304 .cra_priority = 0, 305 .cra_priority = 0,
305 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 306 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
307 CRYPTO_ALG_INTERNAL,
306 .cra_blocksize = AES_BLOCK_SIZE, 308 .cra_blocksize = AES_BLOCK_SIZE,
307 .cra_ctxsize = sizeof(struct crypto_aes_ctx), 309 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
308 .cra_alignmask = 7, 310 .cra_alignmask = 7,
@@ -320,7 +322,8 @@ static struct crypto_alg aes_algs[] = { {
320 .cra_name = "__ctr-aes-" MODE, 322 .cra_name = "__ctr-aes-" MODE,
321 .cra_driver_name = "__driver-ctr-aes-" MODE, 323 .cra_driver_name = "__driver-ctr-aes-" MODE,
322 .cra_priority = 0, 324 .cra_priority = 0,
323 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 325 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
326 CRYPTO_ALG_INTERNAL,
324 .cra_blocksize = 1, 327 .cra_blocksize = 1,
325 .cra_ctxsize = sizeof(struct crypto_aes_ctx), 328 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
326 .cra_alignmask = 7, 329 .cra_alignmask = 7,
@@ -338,7 +341,8 @@ static struct crypto_alg aes_algs[] = { {
338 .cra_name = "__xts-aes-" MODE, 341 .cra_name = "__xts-aes-" MODE,
339 .cra_driver_name = "__driver-xts-aes-" MODE, 342 .cra_driver_name = "__driver-xts-aes-" MODE,
340 .cra_priority = 0, 343 .cra_priority = 0,
341 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
345 CRYPTO_ALG_INTERNAL,
342 .cra_blocksize = AES_BLOCK_SIZE, 346 .cra_blocksize = AES_BLOCK_SIZE,
343 .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), 347 .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
344 .cra_alignmask = 7, 348 .cra_alignmask = 7,
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 09d57d98609c..033aae6d732a 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -66,8 +66,8 @@
66 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 66 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
67 67
68 /* 68 /*
69 * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, 69 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
70 * u8 *head, long bytes) 70 * int blocks)
71 */ 71 */
72ENTRY(sha1_ce_transform) 72ENTRY(sha1_ce_transform)
73 /* load round constants */ 73 /* load round constants */
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform)
78 ld1r {k3.4s}, [x6] 78 ld1r {k3.4s}, [x6]
79 79
80 /* load state */ 80 /* load state */
81 ldr dga, [x2] 81 ldr dga, [x0]
82 ldr dgb, [x2, #16] 82 ldr dgb, [x0, #16]
83 83
84 /* load partial state (if supplied) */ 84 /* load sha1_ce_state::finalize */
85 cbz x3, 0f 85 ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
86 ld1 {v8.4s-v11.4s}, [x3]
87 b 1f
88 86
89 /* load input */ 87 /* load input */
900: ld1 {v8.4s-v11.4s}, [x1], #64 880: ld1 {v8.4s-v11.4s}, [x1], #64
91 sub w0, w0, #1 89 sub w2, w2, #1
92 90
931:
94CPU_LE( rev32 v8.16b, v8.16b ) 91CPU_LE( rev32 v8.16b, v8.16b )
95CPU_LE( rev32 v9.16b, v9.16b ) 92CPU_LE( rev32 v9.16b, v9.16b )
96CPU_LE( rev32 v10.16b, v10.16b ) 93CPU_LE( rev32 v10.16b, v10.16b )
97CPU_LE( rev32 v11.16b, v11.16b ) 94CPU_LE( rev32 v11.16b, v11.16b )
98 95
992: add t0.4s, v8.4s, k0.4s 961: add t0.4s, v8.4s, k0.4s
100 mov dg0v.16b, dgav.16b 97 mov dg0v.16b, dgav.16b
101 98
102 add_update c, ev, k0, 8, 9, 10, 11, dgb 99 add_update c, ev, k0, 8, 9, 10, 11, dgb
@@ -127,15 +124,15 @@ CPU_LE( rev32 v11.16b, v11.16b )
127 add dgbv.2s, dgbv.2s, dg1v.2s 124 add dgbv.2s, dgbv.2s, dg1v.2s
128 add dgav.4s, dgav.4s, dg0v.4s 125 add dgav.4s, dgav.4s, dg0v.4s
129 126
130 cbnz w0, 0b 127 cbnz w2, 0b
131 128
132 /* 129 /*
133 * Final block: add padding and total bit count. 130 * Final block: add padding and total bit count.
134 * Skip if we have no total byte count in x4. In that case, the input 131 * Skip if the input size was not a round multiple of the block size,
135 * size was not a round multiple of the block size, and the padding is 132 * the padding is handled by the C code in that case.
136 * handled by the C code.
137 */ 133 */
138 cbz x4, 3f 134 cbz x4, 3f
135 ldr x4, [x0, #:lo12:sha1_ce_offsetof_count]
139 movi v9.2d, #0 136 movi v9.2d, #0
140 mov x8, #0x80000000 137 mov x8, #0x80000000
141 movi v10.2d, #0 138 movi v10.2d, #0
@@ -144,10 +141,10 @@ CPU_LE( rev32 v11.16b, v11.16b )
144 mov x4, #0 141 mov x4, #0
145 mov v11.d[0], xzr 142 mov v11.d[0], xzr
146 mov v11.d[1], x7 143 mov v11.d[1], x7
147 b 2b 144 b 1b
148 145
149 /* store new state */ 146 /* store new state */
1503: str dga, [x2] 1473: str dga, [x0]
151 str dgb, [x2, #16] 148 str dgb, [x0, #16]
152 ret 149 ret
153ENDPROC(sha1_ce_transform) 150ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 6fe83f37a750..114e7cc5de8c 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -12,144 +12,81 @@
12#include <asm/unaligned.h> 12#include <asm/unaligned.h>
13#include <crypto/internal/hash.h> 13#include <crypto/internal/hash.h>
14#include <crypto/sha.h> 14#include <crypto/sha.h>
15#include <crypto/sha1_base.h>
15#include <linux/cpufeature.h> 16#include <linux/cpufeature.h>
16#include <linux/crypto.h> 17#include <linux/crypto.h>
17#include <linux/module.h> 18#include <linux/module.h>
18 19
20#define ASM_EXPORT(sym, val) \
21 asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
22
19MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); 23MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
20MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 24MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
21MODULE_LICENSE("GPL v2"); 25MODULE_LICENSE("GPL v2");
22 26
23asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, 27struct sha1_ce_state {
24 u8 *head, long bytes); 28 struct sha1_state sst;
29 u32 finalize;
30};
25 31
26static int sha1_init(struct shash_desc *desc) 32asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
27{ 33 int blocks);
28 struct sha1_state *sctx = shash_desc_ctx(desc);
29 34
30 *sctx = (struct sha1_state){ 35static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
31 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, 36 unsigned int len)
32 };
33 return 0;
34}
35
36static int sha1_update(struct shash_desc *desc, const u8 *data,
37 unsigned int len)
38{ 37{
39 struct sha1_state *sctx = shash_desc_ctx(desc); 38 struct sha1_ce_state *sctx = shash_desc_ctx(desc);
40 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
41
42 sctx->count += len;
43
44 if ((partial + len) >= SHA1_BLOCK_SIZE) {
45 int blocks;
46
47 if (partial) {
48 int p = SHA1_BLOCK_SIZE - partial;
49 39
50 memcpy(sctx->buffer + partial, data, p); 40 sctx->finalize = 0;
51 data += p; 41 kernel_neon_begin_partial(16);
52 len -= p; 42 sha1_base_do_update(desc, data, len,
53 } 43 (sha1_block_fn *)sha1_ce_transform);
54 44 kernel_neon_end();
55 blocks = len / SHA1_BLOCK_SIZE;
56 len %= SHA1_BLOCK_SIZE;
57
58 kernel_neon_begin_partial(16);
59 sha1_ce_transform(blocks, data, sctx->state,
60 partial ? sctx->buffer : NULL, 0);
61 kernel_neon_end();
62 45
63 data += blocks * SHA1_BLOCK_SIZE;
64 partial = 0;
65 }
66 if (len)
67 memcpy(sctx->buffer + partial, data, len);
68 return 0; 46 return 0;
69} 47}
70 48
71static int sha1_final(struct shash_desc *desc, u8 *out) 49static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
50 unsigned int len, u8 *out)
72{ 51{
73 static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; 52 struct sha1_ce_state *sctx = shash_desc_ctx(desc);
53 bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
74 54
75 struct sha1_state *sctx = shash_desc_ctx(desc); 55 ASM_EXPORT(sha1_ce_offsetof_count,
76 __be64 bits = cpu_to_be64(sctx->count << 3); 56 offsetof(struct sha1_ce_state, sst.count));
77 __be32 *dst = (__be32 *)out; 57 ASM_EXPORT(sha1_ce_offsetof_finalize,
78 int i; 58 offsetof(struct sha1_ce_state, finalize));
79
80 u32 padlen = SHA1_BLOCK_SIZE
81 - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
82
83 sha1_update(desc, padding, padlen);
84 sha1_update(desc, (const u8 *)&bits, sizeof(bits));
85
86 for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
87 put_unaligned_be32(sctx->state[i], dst++);
88
89 *sctx = (struct sha1_state){};
90 return 0;
91}
92
93static int sha1_finup(struct shash_desc *desc, const u8 *data,
94 unsigned int len, u8 *out)
95{
96 struct sha1_state *sctx = shash_desc_ctx(desc);
97 __be32 *dst = (__be32 *)out;
98 int blocks;
99 int i;
100
101 if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
102 sha1_update(desc, data, len);
103 return sha1_final(desc, out);
104 }
105 59
106 /* 60 /*
107 * Use a fast path if the input is a multiple of 64 bytes. In 61 * Allow the asm code to perform the finalization if there is no
108 * this case, there is no need to copy data around, and we can 62 * partial data and the input is a round multiple of the block size.
109 * perform the entire digest calculation in a single invocation
110 * of sha1_ce_transform()
111 */ 63 */
112 blocks = len / SHA1_BLOCK_SIZE; 64 sctx->finalize = finalize;
113 65
114 kernel_neon_begin_partial(16); 66 kernel_neon_begin_partial(16);
115 sha1_ce_transform(blocks, data, sctx->state, NULL, len); 67 sha1_base_do_update(desc, data, len,
68 (sha1_block_fn *)sha1_ce_transform);
69 if (!finalize)
70 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
116 kernel_neon_end(); 71 kernel_neon_end();
117 72 return sha1_base_finish(desc, out);
118 for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
119 put_unaligned_be32(sctx->state[i], dst++);
120
121 *sctx = (struct sha1_state){};
122 return 0;
123} 73}
124 74
125static int sha1_export(struct shash_desc *desc, void *out) 75static int sha1_ce_final(struct shash_desc *desc, u8 *out)
126{ 76{
127 struct sha1_state *sctx = shash_desc_ctx(desc); 77 kernel_neon_begin_partial(16);
128 struct sha1_state *dst = out; 78 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
129 79 kernel_neon_end();
130 *dst = *sctx; 80 return sha1_base_finish(desc, out);
131 return 0;
132}
133
134static int sha1_import(struct shash_desc *desc, const void *in)
135{
136 struct sha1_state *sctx = shash_desc_ctx(desc);
137 struct sha1_state const *src = in;
138
139 *sctx = *src;
140 return 0;
141} 81}
142 82
143static struct shash_alg alg = { 83static struct shash_alg alg = {
144 .init = sha1_init, 84 .init = sha1_base_init,
145 .update = sha1_update, 85 .update = sha1_ce_update,
146 .final = sha1_final, 86 .final = sha1_ce_final,
147 .finup = sha1_finup, 87 .finup = sha1_ce_finup,
148 .export = sha1_export, 88 .descsize = sizeof(struct sha1_ce_state),
149 .import = sha1_import,
150 .descsize = sizeof(struct sha1_state),
151 .digestsize = SHA1_DIGEST_SIZE, 89 .digestsize = SHA1_DIGEST_SIZE,
152 .statesize = sizeof(struct sha1_state),
153 .base = { 90 .base = {
154 .cra_name = "sha1", 91 .cra_name = "sha1",
155 .cra_driver_name = "sha1-ce", 92 .cra_driver_name = "sha1-ce",
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 7f29fc031ea8..5df9d9d470ad 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -73,8 +73,8 @@
73 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 73 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
74 74
75 /* 75 /*
76 * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, 76 * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
77 * u8 *head, long bytes) 77 * int blocks)
78 */ 78 */
79ENTRY(sha2_ce_transform) 79ENTRY(sha2_ce_transform)
80 /* load round constants */ 80 /* load round constants */
@@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform)
85 ld1 {v12.4s-v15.4s}, [x8] 85 ld1 {v12.4s-v15.4s}, [x8]
86 86
87 /* load state */ 87 /* load state */
88 ldp dga, dgb, [x2] 88 ldp dga, dgb, [x0]
89 89
90 /* load partial input (if supplied) */ 90 /* load sha256_ce_state::finalize */
91 cbz x3, 0f 91 ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
92 ld1 {v16.4s-v19.4s}, [x3]
93 b 1f
94 92
95 /* load input */ 93 /* load input */
960: ld1 {v16.4s-v19.4s}, [x1], #64 940: ld1 {v16.4s-v19.4s}, [x1], #64
97 sub w0, w0, #1 95 sub w2, w2, #1
98 96
991:
100CPU_LE( rev32 v16.16b, v16.16b ) 97CPU_LE( rev32 v16.16b, v16.16b )
101CPU_LE( rev32 v17.16b, v17.16b ) 98CPU_LE( rev32 v17.16b, v17.16b )
102CPU_LE( rev32 v18.16b, v18.16b ) 99CPU_LE( rev32 v18.16b, v18.16b )
103CPU_LE( rev32 v19.16b, v19.16b ) 100CPU_LE( rev32 v19.16b, v19.16b )
104 101
1052: add t0.4s, v16.4s, v0.4s 1021: add t0.4s, v16.4s, v0.4s
106 mov dg0v.16b, dgav.16b 103 mov dg0v.16b, dgav.16b
107 mov dg1v.16b, dgbv.16b 104 mov dg1v.16b, dgbv.16b
108 105
@@ -131,15 +128,15 @@ CPU_LE( rev32 v19.16b, v19.16b )
131 add dgbv.4s, dgbv.4s, dg1v.4s 128 add dgbv.4s, dgbv.4s, dg1v.4s
132 129
133 /* handled all input blocks? */ 130 /* handled all input blocks? */
134 cbnz w0, 0b 131 cbnz w2, 0b
135 132
136 /* 133 /*
137 * Final block: add padding and total bit count. 134 * Final block: add padding and total bit count.
138 * Skip if we have no total byte count in x4. In that case, the input 135 * Skip if the input size was not a round multiple of the block size,
139 * size was not a round multiple of the block size, and the padding is 136 * the padding is handled by the C code in that case.
140 * handled by the C code.
141 */ 137 */
142 cbz x4, 3f 138 cbz x4, 3f
139 ldr x4, [x0, #:lo12:sha256_ce_offsetof_count]
143 movi v17.2d, #0 140 movi v17.2d, #0
144 mov x8, #0x80000000 141 mov x8, #0x80000000
145 movi v18.2d, #0 142 movi v18.2d, #0
@@ -148,9 +145,9 @@ CPU_LE( rev32 v19.16b, v19.16b )
148 mov x4, #0 145 mov x4, #0
149 mov v19.d[0], xzr 146 mov v19.d[0], xzr
150 mov v19.d[1], x7 147 mov v19.d[1], x7
151 b 2b 148 b 1b
152 149
153 /* store new state */ 150 /* store new state */
1543: stp dga, dgb, [x2] 1513: stp dga, dgb, [x0]
155 ret 152 ret
156ENDPROC(sha2_ce_transform) 153ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index ae67e88c28b9..1340e44c048b 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -12,206 +12,82 @@
12#include <asm/unaligned.h> 12#include <asm/unaligned.h>
13#include <crypto/internal/hash.h> 13#include <crypto/internal/hash.h>
14#include <crypto/sha.h> 14#include <crypto/sha.h>
15#include <crypto/sha256_base.h>
15#include <linux/cpufeature.h> 16#include <linux/cpufeature.h>
16#include <linux/crypto.h> 17#include <linux/crypto.h>
17#include <linux/module.h> 18#include <linux/module.h>
18 19
20#define ASM_EXPORT(sym, val) \
21 asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
22
19MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); 23MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
20MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 24MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
21MODULE_LICENSE("GPL v2"); 25MODULE_LICENSE("GPL v2");
22 26
23asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, 27struct sha256_ce_state {
24 u8 *head, long bytes); 28 struct sha256_state sst;
25 29 u32 finalize;
26static int sha224_init(struct shash_desc *desc) 30};
27{
28 struct sha256_state *sctx = shash_desc_ctx(desc);
29
30 *sctx = (struct sha256_state){
31 .state = {
32 SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
33 SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
34 }
35 };
36 return 0;
37}
38
39static int sha256_init(struct shash_desc *desc)
40{
41 struct sha256_state *sctx = shash_desc_ctx(desc);
42
43 *sctx = (struct sha256_state){
44 .state = {
45 SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
46 SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
47 }
48 };
49 return 0;
50}
51
52static int sha2_update(struct shash_desc *desc, const u8 *data,
53 unsigned int len)
54{
55 struct sha256_state *sctx = shash_desc_ctx(desc);
56 unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
57
58 sctx->count += len;
59
60 if ((partial + len) >= SHA256_BLOCK_SIZE) {
61 int blocks;
62
63 if (partial) {
64 int p = SHA256_BLOCK_SIZE - partial;
65
66 memcpy(sctx->buf + partial, data, p);
67 data += p;
68 len -= p;
69 }
70 31
71 blocks = len / SHA256_BLOCK_SIZE; 32asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
72 len %= SHA256_BLOCK_SIZE; 33 int blocks);
73 34
74 kernel_neon_begin_partial(28); 35static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
75 sha2_ce_transform(blocks, data, sctx->state, 36 unsigned int len)
76 partial ? sctx->buf : NULL, 0);
77 kernel_neon_end();
78
79 data += blocks * SHA256_BLOCK_SIZE;
80 partial = 0;
81 }
82 if (len)
83 memcpy(sctx->buf + partial, data, len);
84 return 0;
85}
86
87static void sha2_final(struct shash_desc *desc)
88{ 37{
89 static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; 38 struct sha256_ce_state *sctx = shash_desc_ctx(desc);
90
91 struct sha256_state *sctx = shash_desc_ctx(desc);
92 __be64 bits = cpu_to_be64(sctx->count << 3);
93 u32 padlen = SHA256_BLOCK_SIZE
94 - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
95
96 sha2_update(desc, padding, padlen);
97 sha2_update(desc, (const u8 *)&bits, sizeof(bits));
98}
99
100static int sha224_final(struct shash_desc *desc, u8 *out)
101{
102 struct sha256_state *sctx = shash_desc_ctx(desc);
103 __be32 *dst = (__be32 *)out;
104 int i;
105
106 sha2_final(desc);
107
108 for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
109 put_unaligned_be32(sctx->state[i], dst++);
110
111 *sctx = (struct sha256_state){};
112 return 0;
113}
114 39
115static int sha256_final(struct shash_desc *desc, u8 *out) 40 sctx->finalize = 0;
116{ 41 kernel_neon_begin_partial(28);
117 struct sha256_state *sctx = shash_desc_ctx(desc); 42 sha256_base_do_update(desc, data, len,
118 __be32 *dst = (__be32 *)out; 43 (sha256_block_fn *)sha2_ce_transform);
119 int i; 44 kernel_neon_end();
120
121 sha2_final(desc);
122
123 for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
124 put_unaligned_be32(sctx->state[i], dst++);
125 45
126 *sctx = (struct sha256_state){};
127 return 0; 46 return 0;
128} 47}
129 48
130static void sha2_finup(struct shash_desc *desc, const u8 *data, 49static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
131 unsigned int len) 50 unsigned int len, u8 *out)
132{ 51{
133 struct sha256_state *sctx = shash_desc_ctx(desc); 52 struct sha256_ce_state *sctx = shash_desc_ctx(desc);
134 int blocks; 53 bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
135 54
136 if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) { 55 ASM_EXPORT(sha256_ce_offsetof_count,
137 sha2_update(desc, data, len); 56 offsetof(struct sha256_ce_state, sst.count));
138 sha2_final(desc); 57 ASM_EXPORT(sha256_ce_offsetof_finalize,
139 return; 58 offsetof(struct sha256_ce_state, finalize));
140 }
141 59
142 /* 60 /*
143 * Use a fast path if the input is a multiple of 64 bytes. In 61 * Allow the asm code to perform the finalization if there is no
144 * this case, there is no need to copy data around, and we can 62 * partial data and the input is a round multiple of the block size.
145 * perform the entire digest calculation in a single invocation
146 * of sha2_ce_transform()
147 */ 63 */
148 blocks = len / SHA256_BLOCK_SIZE; 64 sctx->finalize = finalize;
149 65
150 kernel_neon_begin_partial(28); 66 kernel_neon_begin_partial(28);
151 sha2_ce_transform(blocks, data, sctx->state, NULL, len); 67 sha256_base_do_update(desc, data, len,
68 (sha256_block_fn *)sha2_ce_transform);
69 if (!finalize)
70 sha256_base_do_finalize(desc,
71 (sha256_block_fn *)sha2_ce_transform);
152 kernel_neon_end(); 72 kernel_neon_end();
73 return sha256_base_finish(desc, out);
153} 74}
154 75
155static int sha224_finup(struct shash_desc *desc, const u8 *data, 76static int sha256_ce_final(struct shash_desc *desc, u8 *out)
156 unsigned int len, u8 *out)
157{ 77{
158 struct sha256_state *sctx = shash_desc_ctx(desc); 78 kernel_neon_begin_partial(28);
159 __be32 *dst = (__be32 *)out; 79 sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
160 int i; 80 kernel_neon_end();
161 81 return sha256_base_finish(desc, out);
162 sha2_finup(desc, data, len);
163
164 for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
165 put_unaligned_be32(sctx->state[i], dst++);
166
167 *sctx = (struct sha256_state){};
168 return 0;
169}
170
171static int sha256_finup(struct shash_desc *desc, const u8 *data,
172 unsigned int len, u8 *out)
173{
174 struct sha256_state *sctx = shash_desc_ctx(desc);
175 __be32 *dst = (__be32 *)out;
176 int i;
177
178 sha2_finup(desc, data, len);
179
180 for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
181 put_unaligned_be32(sctx->state[i], dst++);
182
183 *sctx = (struct sha256_state){};
184 return 0;
185}
186
187static int sha2_export(struct shash_desc *desc, void *out)
188{
189 struct sha256_state *sctx = shash_desc_ctx(desc);
190 struct sha256_state *dst = out;
191
192 *dst = *sctx;
193 return 0;
194}
195
196static int sha2_import(struct shash_desc *desc, const void *in)
197{
198 struct sha256_state *sctx = shash_desc_ctx(desc);
199 struct sha256_state const *src = in;
200
201 *sctx = *src;
202 return 0;
203} 82}
204 83
205static struct shash_alg algs[] = { { 84static struct shash_alg algs[] = { {
206 .init = sha224_init, 85 .init = sha224_base_init,
207 .update = sha2_update, 86 .update = sha256_ce_update,
208 .final = sha224_final, 87 .final = sha256_ce_final,
209 .finup = sha224_finup, 88 .finup = sha256_ce_finup,
210 .export = sha2_export, 89 .descsize = sizeof(struct sha256_ce_state),
211 .import = sha2_import,
212 .descsize = sizeof(struct sha256_state),
213 .digestsize = SHA224_DIGEST_SIZE, 90 .digestsize = SHA224_DIGEST_SIZE,
214 .statesize = sizeof(struct sha256_state),
215 .base = { 91 .base = {
216 .cra_name = "sha224", 92 .cra_name = "sha224",
217 .cra_driver_name = "sha224-ce", 93 .cra_driver_name = "sha224-ce",
@@ -221,15 +97,12 @@ static struct shash_alg algs[] = { {
221 .cra_module = THIS_MODULE, 97 .cra_module = THIS_MODULE,
222 } 98 }
223}, { 99}, {
224 .init = sha256_init, 100 .init = sha256_base_init,
225 .update = sha2_update, 101 .update = sha256_ce_update,
226 .final = sha256_final, 102 .final = sha256_ce_final,
227 .finup = sha256_finup, 103 .finup = sha256_ce_finup,
228 .export = sha2_export, 104 .descsize = sizeof(struct sha256_ce_state),
229 .import = sha2_import,
230 .descsize = sizeof(struct sha256_state),
231 .digestsize = SHA256_DIGEST_SIZE, 105 .digestsize = SHA256_DIGEST_SIZE,
232 .statesize = sizeof(struct sha256_state),
233 .base = { 106 .base = {
234 .cra_name = "sha256", 107 .cra_name = "sha256",
235 .cra_driver_name = "sha256-ce", 108 .cra_driver_name = "sha256-ce",