diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-15 13:42:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-15 13:42:15 -0400 |
commit | cb906953d2c3fd450655d9fa833f03690ad50c23 (patch) | |
tree | 06c5665afb24baee3ac49f62db61ca97918079b4 /arch/arm64 | |
parent | 6c373ca89399c5a3f7ef210ad8f63dc3437da345 (diff) | |
parent | 3abafaf2192b1712079edfd4232b19877d6f41a5 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
"Here is the crypto update for 4.1:
New interfaces:
- user-space interface for AEAD
- user-space interface for RNG (i.e., pseudo RNG)
New hashes:
- ARMv8 SHA1/256
- ARMv8 AES
- ARMv8 GHASH
- ARM assembler and NEON SHA256
- MIPS OCTEON SHA1/256/512
- MIPS img-hash SHA1/256 and MD5
- Power 8 VMX AES/CBC/CTR/GHASH
- PPC assembler AES, SHA1/256 and MD5
- Broadcom IPROC RNG driver
Cleanups/fixes:
- prevent internal helper algos from being exposed to user-space
- merge common code from assembly/C SHA implementations
- misc fixes"
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (169 commits)
crypto: arm - workaround for building with old binutils
crypto: arm/sha256 - avoid sha256 code on ARMv7-M
crypto: x86/sha512_ssse3 - move SHA-384/512 SSSE3 implementation to base layer
crypto: x86/sha256_ssse3 - move SHA-224/256 SSSE3 implementation to base layer
crypto: x86/sha1_ssse3 - move SHA-1 SSSE3 implementation to base layer
crypto: arm64/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer
crypto: arm64/sha1-ce - move SHA-1 ARMv8 implementation to base layer
crypto: arm/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer
crypto: arm/sha256 - move SHA-224/256 ASM/NEON implementation to base layer
crypto: arm/sha1-ce - move SHA-1 ARMv8 implementation to base layer
crypto: arm/sha1_neon - move SHA-1 NEON implementation to base layer
crypto: arm/sha1 - move SHA-1 ARM asm implementation to base layer
crypto: sha512-generic - move to generic glue implementation
crypto: sha256-generic - move to generic glue implementation
crypto: sha1-generic - move to generic glue implementation
crypto: sha512 - implement base layer for SHA-512
crypto: sha256 - implement base layer for SHA-256
crypto: sha1 - implement base layer for SHA-1
crypto: api - remove instance when test failed
crypto: api - Move alg ref count init to crypto_check_alg
...
Diffstat (limited to 'arch/arm64')
-rw-r--r-- | arch/arm64/crypto/aes-glue.c | 12 | ||||
-rw-r--r-- | arch/arm64/crypto/sha1-ce-core.S | 33 | ||||
-rw-r--r-- | arch/arm64/crypto/sha1-ce-glue.c | 151 | ||||
-rw-r--r-- | arch/arm64/crypto/sha2-ce-core.S | 29 | ||||
-rw-r--r-- | arch/arm64/crypto/sha2-ce-glue.c | 227 |
5 files changed, 130 insertions, 322 deletions
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index b1b5b893eb20..05d9e16c0dfd 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c | |||
@@ -284,7 +284,8 @@ static struct crypto_alg aes_algs[] = { { | |||
284 | .cra_name = "__ecb-aes-" MODE, | 284 | .cra_name = "__ecb-aes-" MODE, |
285 | .cra_driver_name = "__driver-ecb-aes-" MODE, | 285 | .cra_driver_name = "__driver-ecb-aes-" MODE, |
286 | .cra_priority = 0, | 286 | .cra_priority = 0, |
287 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 287 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
288 | CRYPTO_ALG_INTERNAL, | ||
288 | .cra_blocksize = AES_BLOCK_SIZE, | 289 | .cra_blocksize = AES_BLOCK_SIZE, |
289 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | 290 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), |
290 | .cra_alignmask = 7, | 291 | .cra_alignmask = 7, |
@@ -302,7 +303,8 @@ static struct crypto_alg aes_algs[] = { { | |||
302 | .cra_name = "__cbc-aes-" MODE, | 303 | .cra_name = "__cbc-aes-" MODE, |
303 | .cra_driver_name = "__driver-cbc-aes-" MODE, | 304 | .cra_driver_name = "__driver-cbc-aes-" MODE, |
304 | .cra_priority = 0, | 305 | .cra_priority = 0, |
305 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 306 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
307 | CRYPTO_ALG_INTERNAL, | ||
306 | .cra_blocksize = AES_BLOCK_SIZE, | 308 | .cra_blocksize = AES_BLOCK_SIZE, |
307 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | 309 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), |
308 | .cra_alignmask = 7, | 310 | .cra_alignmask = 7, |
@@ -320,7 +322,8 @@ static struct crypto_alg aes_algs[] = { { | |||
320 | .cra_name = "__ctr-aes-" MODE, | 322 | .cra_name = "__ctr-aes-" MODE, |
321 | .cra_driver_name = "__driver-ctr-aes-" MODE, | 323 | .cra_driver_name = "__driver-ctr-aes-" MODE, |
322 | .cra_priority = 0, | 324 | .cra_priority = 0, |
323 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 325 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
326 | CRYPTO_ALG_INTERNAL, | ||
324 | .cra_blocksize = 1, | 327 | .cra_blocksize = 1, |
325 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | 328 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), |
326 | .cra_alignmask = 7, | 329 | .cra_alignmask = 7, |
@@ -338,7 +341,8 @@ static struct crypto_alg aes_algs[] = { { | |||
338 | .cra_name = "__xts-aes-" MODE, | 341 | .cra_name = "__xts-aes-" MODE, |
339 | .cra_driver_name = "__driver-xts-aes-" MODE, | 342 | .cra_driver_name = "__driver-xts-aes-" MODE, |
340 | .cra_priority = 0, | 343 | .cra_priority = 0, |
341 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 344 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
345 | CRYPTO_ALG_INTERNAL, | ||
342 | .cra_blocksize = AES_BLOCK_SIZE, | 346 | .cra_blocksize = AES_BLOCK_SIZE, |
343 | .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), | 347 | .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), |
344 | .cra_alignmask = 7, | 348 | .cra_alignmask = 7, |
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 09d57d98609c..033aae6d732a 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S | |||
@@ -66,8 +66,8 @@ | |||
66 | .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 | 66 | .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, | 69 | * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, |
70 | * u8 *head, long bytes) | 70 | * int blocks) |
71 | */ | 71 | */ |
72 | ENTRY(sha1_ce_transform) | 72 | ENTRY(sha1_ce_transform) |
73 | /* load round constants */ | 73 | /* load round constants */ |
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform) | |||
78 | ld1r {k3.4s}, [x6] | 78 | ld1r {k3.4s}, [x6] |
79 | 79 | ||
80 | /* load state */ | 80 | /* load state */ |
81 | ldr dga, [x2] | 81 | ldr dga, [x0] |
82 | ldr dgb, [x2, #16] | 82 | ldr dgb, [x0, #16] |
83 | 83 | ||
84 | /* load partial state (if supplied) */ | 84 | /* load sha1_ce_state::finalize */ |
85 | cbz x3, 0f | 85 | ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize] |
86 | ld1 {v8.4s-v11.4s}, [x3] | ||
87 | b 1f | ||
88 | 86 | ||
89 | /* load input */ | 87 | /* load input */ |
90 | 0: ld1 {v8.4s-v11.4s}, [x1], #64 | 88 | 0: ld1 {v8.4s-v11.4s}, [x1], #64 |
91 | sub w0, w0, #1 | 89 | sub w2, w2, #1 |
92 | 90 | ||
93 | 1: | ||
94 | CPU_LE( rev32 v8.16b, v8.16b ) | 91 | CPU_LE( rev32 v8.16b, v8.16b ) |
95 | CPU_LE( rev32 v9.16b, v9.16b ) | 92 | CPU_LE( rev32 v9.16b, v9.16b ) |
96 | CPU_LE( rev32 v10.16b, v10.16b ) | 93 | CPU_LE( rev32 v10.16b, v10.16b ) |
97 | CPU_LE( rev32 v11.16b, v11.16b ) | 94 | CPU_LE( rev32 v11.16b, v11.16b ) |
98 | 95 | ||
99 | 2: add t0.4s, v8.4s, k0.4s | 96 | 1: add t0.4s, v8.4s, k0.4s |
100 | mov dg0v.16b, dgav.16b | 97 | mov dg0v.16b, dgav.16b |
101 | 98 | ||
102 | add_update c, ev, k0, 8, 9, 10, 11, dgb | 99 | add_update c, ev, k0, 8, 9, 10, 11, dgb |
@@ -127,15 +124,15 @@ CPU_LE( rev32 v11.16b, v11.16b ) | |||
127 | add dgbv.2s, dgbv.2s, dg1v.2s | 124 | add dgbv.2s, dgbv.2s, dg1v.2s |
128 | add dgav.4s, dgav.4s, dg0v.4s | 125 | add dgav.4s, dgav.4s, dg0v.4s |
129 | 126 | ||
130 | cbnz w0, 0b | 127 | cbnz w2, 0b |
131 | 128 | ||
132 | /* | 129 | /* |
133 | * Final block: add padding and total bit count. | 130 | * Final block: add padding and total bit count. |
134 | * Skip if we have no total byte count in x4. In that case, the input | 131 | * Skip if the input size was not a round multiple of the block size, |
135 | * size was not a round multiple of the block size, and the padding is | 132 | * the padding is handled by the C code in that case. |
136 | * handled by the C code. | ||
137 | */ | 133 | */ |
138 | cbz x4, 3f | 134 | cbz x4, 3f |
135 | ldr x4, [x0, #:lo12:sha1_ce_offsetof_count] | ||
139 | movi v9.2d, #0 | 136 | movi v9.2d, #0 |
140 | mov x8, #0x80000000 | 137 | mov x8, #0x80000000 |
141 | movi v10.2d, #0 | 138 | movi v10.2d, #0 |
@@ -144,10 +141,10 @@ CPU_LE( rev32 v11.16b, v11.16b ) | |||
144 | mov x4, #0 | 141 | mov x4, #0 |
145 | mov v11.d[0], xzr | 142 | mov v11.d[0], xzr |
146 | mov v11.d[1], x7 | 143 | mov v11.d[1], x7 |
147 | b 2b | 144 | b 1b |
148 | 145 | ||
149 | /* store new state */ | 146 | /* store new state */ |
150 | 3: str dga, [x2] | 147 | 3: str dga, [x0] |
151 | str dgb, [x2, #16] | 148 | str dgb, [x0, #16] |
152 | ret | 149 | ret |
153 | ENDPROC(sha1_ce_transform) | 150 | ENDPROC(sha1_ce_transform) |
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 6fe83f37a750..114e7cc5de8c 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c | |||
@@ -12,144 +12,81 @@ | |||
12 | #include <asm/unaligned.h> | 12 | #include <asm/unaligned.h> |
13 | #include <crypto/internal/hash.h> | 13 | #include <crypto/internal/hash.h> |
14 | #include <crypto/sha.h> | 14 | #include <crypto/sha.h> |
15 | #include <crypto/sha1_base.h> | ||
15 | #include <linux/cpufeature.h> | 16 | #include <linux/cpufeature.h> |
16 | #include <linux/crypto.h> | 17 | #include <linux/crypto.h> |
17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
18 | 19 | ||
20 | #define ASM_EXPORT(sym, val) \ | ||
21 | asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); | ||
22 | |||
19 | MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); | 23 | MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); |
20 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 24 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
21 | MODULE_LICENSE("GPL v2"); | 25 | MODULE_LICENSE("GPL v2"); |
22 | 26 | ||
23 | asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, | 27 | struct sha1_ce_state { |
24 | u8 *head, long bytes); | 28 | struct sha1_state sst; |
29 | u32 finalize; | ||
30 | }; | ||
25 | 31 | ||
26 | static int sha1_init(struct shash_desc *desc) | 32 | asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, |
27 | { | 33 | int blocks); |
28 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
29 | 34 | ||
30 | *sctx = (struct sha1_state){ | 35 | static int sha1_ce_update(struct shash_desc *desc, const u8 *data, |
31 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | 36 | unsigned int len) |
32 | }; | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | static int sha1_update(struct shash_desc *desc, const u8 *data, | ||
37 | unsigned int len) | ||
38 | { | 37 | { |
39 | struct sha1_state *sctx = shash_desc_ctx(desc); | 38 | struct sha1_ce_state *sctx = shash_desc_ctx(desc); |
40 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
41 | |||
42 | sctx->count += len; | ||
43 | |||
44 | if ((partial + len) >= SHA1_BLOCK_SIZE) { | ||
45 | int blocks; | ||
46 | |||
47 | if (partial) { | ||
48 | int p = SHA1_BLOCK_SIZE - partial; | ||
49 | 39 | ||
50 | memcpy(sctx->buffer + partial, data, p); | 40 | sctx->finalize = 0; |
51 | data += p; | 41 | kernel_neon_begin_partial(16); |
52 | len -= p; | 42 | sha1_base_do_update(desc, data, len, |
53 | } | 43 | (sha1_block_fn *)sha1_ce_transform); |
54 | 44 | kernel_neon_end(); | |
55 | blocks = len / SHA1_BLOCK_SIZE; | ||
56 | len %= SHA1_BLOCK_SIZE; | ||
57 | |||
58 | kernel_neon_begin_partial(16); | ||
59 | sha1_ce_transform(blocks, data, sctx->state, | ||
60 | partial ? sctx->buffer : NULL, 0); | ||
61 | kernel_neon_end(); | ||
62 | 45 | ||
63 | data += blocks * SHA1_BLOCK_SIZE; | ||
64 | partial = 0; | ||
65 | } | ||
66 | if (len) | ||
67 | memcpy(sctx->buffer + partial, data, len); | ||
68 | return 0; | 46 | return 0; |
69 | } | 47 | } |
70 | 48 | ||
71 | static int sha1_final(struct shash_desc *desc, u8 *out) | 49 | static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, |
50 | unsigned int len, u8 *out) | ||
72 | { | 51 | { |
73 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | 52 | struct sha1_ce_state *sctx = shash_desc_ctx(desc); |
53 | bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); | ||
74 | 54 | ||
75 | struct sha1_state *sctx = shash_desc_ctx(desc); | 55 | ASM_EXPORT(sha1_ce_offsetof_count, |
76 | __be64 bits = cpu_to_be64(sctx->count << 3); | 56 | offsetof(struct sha1_ce_state, sst.count)); |
77 | __be32 *dst = (__be32 *)out; | 57 | ASM_EXPORT(sha1_ce_offsetof_finalize, |
78 | int i; | 58 | offsetof(struct sha1_ce_state, finalize)); |
79 | |||
80 | u32 padlen = SHA1_BLOCK_SIZE | ||
81 | - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE); | ||
82 | |||
83 | sha1_update(desc, padding, padlen); | ||
84 | sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
85 | |||
86 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
87 | put_unaligned_be32(sctx->state[i], dst++); | ||
88 | |||
89 | *sctx = (struct sha1_state){}; | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | static int sha1_finup(struct shash_desc *desc, const u8 *data, | ||
94 | unsigned int len, u8 *out) | ||
95 | { | ||
96 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
97 | __be32 *dst = (__be32 *)out; | ||
98 | int blocks; | ||
99 | int i; | ||
100 | |||
101 | if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) { | ||
102 | sha1_update(desc, data, len); | ||
103 | return sha1_final(desc, out); | ||
104 | } | ||
105 | 59 | ||
106 | /* | 60 | /* |
107 | * Use a fast path if the input is a multiple of 64 bytes. In | 61 | * Allow the asm code to perform the finalization if there is no |
108 | * this case, there is no need to copy data around, and we can | 62 | * partial data and the input is a round multiple of the block size. |
109 | * perform the entire digest calculation in a single invocation | ||
110 | * of sha1_ce_transform() | ||
111 | */ | 63 | */ |
112 | blocks = len / SHA1_BLOCK_SIZE; | 64 | sctx->finalize = finalize; |
113 | 65 | ||
114 | kernel_neon_begin_partial(16); | 66 | kernel_neon_begin_partial(16); |
115 | sha1_ce_transform(blocks, data, sctx->state, NULL, len); | 67 | sha1_base_do_update(desc, data, len, |
68 | (sha1_block_fn *)sha1_ce_transform); | ||
69 | if (!finalize) | ||
70 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); | ||
116 | kernel_neon_end(); | 71 | kernel_neon_end(); |
117 | 72 | return sha1_base_finish(desc, out); | |
118 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
119 | put_unaligned_be32(sctx->state[i], dst++); | ||
120 | |||
121 | *sctx = (struct sha1_state){}; | ||
122 | return 0; | ||
123 | } | 73 | } |
124 | 74 | ||
125 | static int sha1_export(struct shash_desc *desc, void *out) | 75 | static int sha1_ce_final(struct shash_desc *desc, u8 *out) |
126 | { | 76 | { |
127 | struct sha1_state *sctx = shash_desc_ctx(desc); | 77 | kernel_neon_begin_partial(16); |
128 | struct sha1_state *dst = out; | 78 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); |
129 | 79 | kernel_neon_end(); | |
130 | *dst = *sctx; | 80 | return sha1_base_finish(desc, out); |
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static int sha1_import(struct shash_desc *desc, const void *in) | ||
135 | { | ||
136 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
137 | struct sha1_state const *src = in; | ||
138 | |||
139 | *sctx = *src; | ||
140 | return 0; | ||
141 | } | 81 | } |
142 | 82 | ||
143 | static struct shash_alg alg = { | 83 | static struct shash_alg alg = { |
144 | .init = sha1_init, | 84 | .init = sha1_base_init, |
145 | .update = sha1_update, | 85 | .update = sha1_ce_update, |
146 | .final = sha1_final, | 86 | .final = sha1_ce_final, |
147 | .finup = sha1_finup, | 87 | .finup = sha1_ce_finup, |
148 | .export = sha1_export, | 88 | .descsize = sizeof(struct sha1_ce_state), |
149 | .import = sha1_import, | ||
150 | .descsize = sizeof(struct sha1_state), | ||
151 | .digestsize = SHA1_DIGEST_SIZE, | 89 | .digestsize = SHA1_DIGEST_SIZE, |
152 | .statesize = sizeof(struct sha1_state), | ||
153 | .base = { | 90 | .base = { |
154 | .cra_name = "sha1", | 91 | .cra_name = "sha1", |
155 | .cra_driver_name = "sha1-ce", | 92 | .cra_driver_name = "sha1-ce", |
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 7f29fc031ea8..5df9d9d470ad 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S | |||
@@ -73,8 +73,8 @@ | |||
73 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 | 73 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, | 76 | * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, |
77 | * u8 *head, long bytes) | 77 | * int blocks) |
78 | */ | 78 | */ |
79 | ENTRY(sha2_ce_transform) | 79 | ENTRY(sha2_ce_transform) |
80 | /* load round constants */ | 80 | /* load round constants */ |
@@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform) | |||
85 | ld1 {v12.4s-v15.4s}, [x8] | 85 | ld1 {v12.4s-v15.4s}, [x8] |
86 | 86 | ||
87 | /* load state */ | 87 | /* load state */ |
88 | ldp dga, dgb, [x2] | 88 | ldp dga, dgb, [x0] |
89 | 89 | ||
90 | /* load partial input (if supplied) */ | 90 | /* load sha256_ce_state::finalize */ |
91 | cbz x3, 0f | 91 | ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] |
92 | ld1 {v16.4s-v19.4s}, [x3] | ||
93 | b 1f | ||
94 | 92 | ||
95 | /* load input */ | 93 | /* load input */ |
96 | 0: ld1 {v16.4s-v19.4s}, [x1], #64 | 94 | 0: ld1 {v16.4s-v19.4s}, [x1], #64 |
97 | sub w0, w0, #1 | 95 | sub w2, w2, #1 |
98 | 96 | ||
99 | 1: | ||
100 | CPU_LE( rev32 v16.16b, v16.16b ) | 97 | CPU_LE( rev32 v16.16b, v16.16b ) |
101 | CPU_LE( rev32 v17.16b, v17.16b ) | 98 | CPU_LE( rev32 v17.16b, v17.16b ) |
102 | CPU_LE( rev32 v18.16b, v18.16b ) | 99 | CPU_LE( rev32 v18.16b, v18.16b ) |
103 | CPU_LE( rev32 v19.16b, v19.16b ) | 100 | CPU_LE( rev32 v19.16b, v19.16b ) |
104 | 101 | ||
105 | 2: add t0.4s, v16.4s, v0.4s | 102 | 1: add t0.4s, v16.4s, v0.4s |
106 | mov dg0v.16b, dgav.16b | 103 | mov dg0v.16b, dgav.16b |
107 | mov dg1v.16b, dgbv.16b | 104 | mov dg1v.16b, dgbv.16b |
108 | 105 | ||
@@ -131,15 +128,15 @@ CPU_LE( rev32 v19.16b, v19.16b ) | |||
131 | add dgbv.4s, dgbv.4s, dg1v.4s | 128 | add dgbv.4s, dgbv.4s, dg1v.4s |
132 | 129 | ||
133 | /* handled all input blocks? */ | 130 | /* handled all input blocks? */ |
134 | cbnz w0, 0b | 131 | cbnz w2, 0b |
135 | 132 | ||
136 | /* | 133 | /* |
137 | * Final block: add padding and total bit count. | 134 | * Final block: add padding and total bit count. |
138 | * Skip if we have no total byte count in x4. In that case, the input | 135 | * Skip if the input size was not a round multiple of the block size, |
139 | * size was not a round multiple of the block size, and the padding is | 136 | * the padding is handled by the C code in that case. |
140 | * handled by the C code. | ||
141 | */ | 137 | */ |
142 | cbz x4, 3f | 138 | cbz x4, 3f |
139 | ldr x4, [x0, #:lo12:sha256_ce_offsetof_count] | ||
143 | movi v17.2d, #0 | 140 | movi v17.2d, #0 |
144 | mov x8, #0x80000000 | 141 | mov x8, #0x80000000 |
145 | movi v18.2d, #0 | 142 | movi v18.2d, #0 |
@@ -148,9 +145,9 @@ CPU_LE( rev32 v19.16b, v19.16b ) | |||
148 | mov x4, #0 | 145 | mov x4, #0 |
149 | mov v19.d[0], xzr | 146 | mov v19.d[0], xzr |
150 | mov v19.d[1], x7 | 147 | mov v19.d[1], x7 |
151 | b 2b | 148 | b 1b |
152 | 149 | ||
153 | /* store new state */ | 150 | /* store new state */ |
154 | 3: stp dga, dgb, [x2] | 151 | 3: stp dga, dgb, [x0] |
155 | ret | 152 | ret |
156 | ENDPROC(sha2_ce_transform) | 153 | ENDPROC(sha2_ce_transform) |
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index ae67e88c28b9..1340e44c048b 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c | |||
@@ -12,206 +12,82 @@ | |||
12 | #include <asm/unaligned.h> | 12 | #include <asm/unaligned.h> |
13 | #include <crypto/internal/hash.h> | 13 | #include <crypto/internal/hash.h> |
14 | #include <crypto/sha.h> | 14 | #include <crypto/sha.h> |
15 | #include <crypto/sha256_base.h> | ||
15 | #include <linux/cpufeature.h> | 16 | #include <linux/cpufeature.h> |
16 | #include <linux/crypto.h> | 17 | #include <linux/crypto.h> |
17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
18 | 19 | ||
20 | #define ASM_EXPORT(sym, val) \ | ||
21 | asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); | ||
22 | |||
19 | MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); | 23 | MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); |
20 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 24 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
21 | MODULE_LICENSE("GPL v2"); | 25 | MODULE_LICENSE("GPL v2"); |
22 | 26 | ||
23 | asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, | 27 | struct sha256_ce_state { |
24 | u8 *head, long bytes); | 28 | struct sha256_state sst; |
25 | 29 | u32 finalize; | |
26 | static int sha224_init(struct shash_desc *desc) | 30 | }; |
27 | { | ||
28 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
29 | |||
30 | *sctx = (struct sha256_state){ | ||
31 | .state = { | ||
32 | SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, | ||
33 | SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, | ||
34 | } | ||
35 | }; | ||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | static int sha256_init(struct shash_desc *desc) | ||
40 | { | ||
41 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
42 | |||
43 | *sctx = (struct sha256_state){ | ||
44 | .state = { | ||
45 | SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, | ||
46 | SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, | ||
47 | } | ||
48 | }; | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static int sha2_update(struct shash_desc *desc, const u8 *data, | ||
53 | unsigned int len) | ||
54 | { | ||
55 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
56 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
57 | |||
58 | sctx->count += len; | ||
59 | |||
60 | if ((partial + len) >= SHA256_BLOCK_SIZE) { | ||
61 | int blocks; | ||
62 | |||
63 | if (partial) { | ||
64 | int p = SHA256_BLOCK_SIZE - partial; | ||
65 | |||
66 | memcpy(sctx->buf + partial, data, p); | ||
67 | data += p; | ||
68 | len -= p; | ||
69 | } | ||
70 | 31 | ||
71 | blocks = len / SHA256_BLOCK_SIZE; | 32 | asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, |
72 | len %= SHA256_BLOCK_SIZE; | 33 | int blocks); |
73 | 34 | ||
74 | kernel_neon_begin_partial(28); | 35 | static int sha256_ce_update(struct shash_desc *desc, const u8 *data, |
75 | sha2_ce_transform(blocks, data, sctx->state, | 36 | unsigned int len) |
76 | partial ? sctx->buf : NULL, 0); | ||
77 | kernel_neon_end(); | ||
78 | |||
79 | data += blocks * SHA256_BLOCK_SIZE; | ||
80 | partial = 0; | ||
81 | } | ||
82 | if (len) | ||
83 | memcpy(sctx->buf + partial, data, len); | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | static void sha2_final(struct shash_desc *desc) | ||
88 | { | 37 | { |
89 | static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; | 38 | struct sha256_ce_state *sctx = shash_desc_ctx(desc); |
90 | |||
91 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
92 | __be64 bits = cpu_to_be64(sctx->count << 3); | ||
93 | u32 padlen = SHA256_BLOCK_SIZE | ||
94 | - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE); | ||
95 | |||
96 | sha2_update(desc, padding, padlen); | ||
97 | sha2_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
98 | } | ||
99 | |||
100 | static int sha224_final(struct shash_desc *desc, u8 *out) | ||
101 | { | ||
102 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
103 | __be32 *dst = (__be32 *)out; | ||
104 | int i; | ||
105 | |||
106 | sha2_final(desc); | ||
107 | |||
108 | for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) | ||
109 | put_unaligned_be32(sctx->state[i], dst++); | ||
110 | |||
111 | *sctx = (struct sha256_state){}; | ||
112 | return 0; | ||
113 | } | ||
114 | 39 | ||
115 | static int sha256_final(struct shash_desc *desc, u8 *out) | 40 | sctx->finalize = 0; |
116 | { | 41 | kernel_neon_begin_partial(28); |
117 | struct sha256_state *sctx = shash_desc_ctx(desc); | 42 | sha256_base_do_update(desc, data, len, |
118 | __be32 *dst = (__be32 *)out; | 43 | (sha256_block_fn *)sha2_ce_transform); |
119 | int i; | 44 | kernel_neon_end(); |
120 | |||
121 | sha2_final(desc); | ||
122 | |||
123 | for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) | ||
124 | put_unaligned_be32(sctx->state[i], dst++); | ||
125 | 45 | ||
126 | *sctx = (struct sha256_state){}; | ||
127 | return 0; | 46 | return 0; |
128 | } | 47 | } |
129 | 48 | ||
130 | static void sha2_finup(struct shash_desc *desc, const u8 *data, | 49 | static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, |
131 | unsigned int len) | 50 | unsigned int len, u8 *out) |
132 | { | 51 | { |
133 | struct sha256_state *sctx = shash_desc_ctx(desc); | 52 | struct sha256_ce_state *sctx = shash_desc_ctx(desc); |
134 | int blocks; | 53 | bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE); |
135 | 54 | ||
136 | if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) { | 55 | ASM_EXPORT(sha256_ce_offsetof_count, |
137 | sha2_update(desc, data, len); | 56 | offsetof(struct sha256_ce_state, sst.count)); |
138 | sha2_final(desc); | 57 | ASM_EXPORT(sha256_ce_offsetof_finalize, |
139 | return; | 58 | offsetof(struct sha256_ce_state, finalize)); |
140 | } | ||
141 | 59 | ||
142 | /* | 60 | /* |
143 | * Use a fast path if the input is a multiple of 64 bytes. In | 61 | * Allow the asm code to perform the finalization if there is no |
144 | * this case, there is no need to copy data around, and we can | 62 | * partial data and the input is a round multiple of the block size. |
145 | * perform the entire digest calculation in a single invocation | ||
146 | * of sha2_ce_transform() | ||
147 | */ | 63 | */ |
148 | blocks = len / SHA256_BLOCK_SIZE; | 64 | sctx->finalize = finalize; |
149 | 65 | ||
150 | kernel_neon_begin_partial(28); | 66 | kernel_neon_begin_partial(28); |
151 | sha2_ce_transform(blocks, data, sctx->state, NULL, len); | 67 | sha256_base_do_update(desc, data, len, |
68 | (sha256_block_fn *)sha2_ce_transform); | ||
69 | if (!finalize) | ||
70 | sha256_base_do_finalize(desc, | ||
71 | (sha256_block_fn *)sha2_ce_transform); | ||
152 | kernel_neon_end(); | 72 | kernel_neon_end(); |
73 | return sha256_base_finish(desc, out); | ||
153 | } | 74 | } |
154 | 75 | ||
155 | static int sha224_finup(struct shash_desc *desc, const u8 *data, | 76 | static int sha256_ce_final(struct shash_desc *desc, u8 *out) |
156 | unsigned int len, u8 *out) | ||
157 | { | 77 | { |
158 | struct sha256_state *sctx = shash_desc_ctx(desc); | 78 | kernel_neon_begin_partial(28); |
159 | __be32 *dst = (__be32 *)out; | 79 | sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); |
160 | int i; | 80 | kernel_neon_end(); |
161 | 81 | return sha256_base_finish(desc, out); | |
162 | sha2_finup(desc, data, len); | ||
163 | |||
164 | for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) | ||
165 | put_unaligned_be32(sctx->state[i], dst++); | ||
166 | |||
167 | *sctx = (struct sha256_state){}; | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | static int sha256_finup(struct shash_desc *desc, const u8 *data, | ||
172 | unsigned int len, u8 *out) | ||
173 | { | ||
174 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
175 | __be32 *dst = (__be32 *)out; | ||
176 | int i; | ||
177 | |||
178 | sha2_finup(desc, data, len); | ||
179 | |||
180 | for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) | ||
181 | put_unaligned_be32(sctx->state[i], dst++); | ||
182 | |||
183 | *sctx = (struct sha256_state){}; | ||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | static int sha2_export(struct shash_desc *desc, void *out) | ||
188 | { | ||
189 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
190 | struct sha256_state *dst = out; | ||
191 | |||
192 | *dst = *sctx; | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | static int sha2_import(struct shash_desc *desc, const void *in) | ||
197 | { | ||
198 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
199 | struct sha256_state const *src = in; | ||
200 | |||
201 | *sctx = *src; | ||
202 | return 0; | ||
203 | } | 82 | } |
204 | 83 | ||
205 | static struct shash_alg algs[] = { { | 84 | static struct shash_alg algs[] = { { |
206 | .init = sha224_init, | 85 | .init = sha224_base_init, |
207 | .update = sha2_update, | 86 | .update = sha256_ce_update, |
208 | .final = sha224_final, | 87 | .final = sha256_ce_final, |
209 | .finup = sha224_finup, | 88 | .finup = sha256_ce_finup, |
210 | .export = sha2_export, | 89 | .descsize = sizeof(struct sha256_ce_state), |
211 | .import = sha2_import, | ||
212 | .descsize = sizeof(struct sha256_state), | ||
213 | .digestsize = SHA224_DIGEST_SIZE, | 90 | .digestsize = SHA224_DIGEST_SIZE, |
214 | .statesize = sizeof(struct sha256_state), | ||
215 | .base = { | 91 | .base = { |
216 | .cra_name = "sha224", | 92 | .cra_name = "sha224", |
217 | .cra_driver_name = "sha224-ce", | 93 | .cra_driver_name = "sha224-ce", |
@@ -221,15 +97,12 @@ static struct shash_alg algs[] = { { | |||
221 | .cra_module = THIS_MODULE, | 97 | .cra_module = THIS_MODULE, |
222 | } | 98 | } |
223 | }, { | 99 | }, { |
224 | .init = sha256_init, | 100 | .init = sha256_base_init, |
225 | .update = sha2_update, | 101 | .update = sha256_ce_update, |
226 | .final = sha256_final, | 102 | .final = sha256_ce_final, |
227 | .finup = sha256_finup, | 103 | .finup = sha256_ce_finup, |
228 | .export = sha2_export, | 104 | .descsize = sizeof(struct sha256_ce_state), |
229 | .import = sha2_import, | ||
230 | .descsize = sizeof(struct sha256_state), | ||
231 | .digestsize = SHA256_DIGEST_SIZE, | 105 | .digestsize = SHA256_DIGEST_SIZE, |
232 | .statesize = sizeof(struct sha256_state), | ||
233 | .base = { | 106 | .base = { |
234 | .cra_name = "sha256", | 107 | .cra_name = "sha256", |
235 | .cra_driver_name = "sha256-ce", | 108 | .cra_driver_name = "sha256-ce", |