diff options
author | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2015-04-09 06:55:45 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2015-04-10 09:39:46 -0400 |
commit | 03802f6a80b3a993067af97b0dc094f60d6fbc8b (patch) | |
tree | f2978d2f3c13b154756f3f4b99dc49d1455ba0cc /arch/arm64 | |
parent | 07eb54d306f4f0efabe0a0d5dd6739d079d90e0e (diff) |
crypto: arm64/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer
This removes all the boilerplate from the existing implementation,
and replaces it with calls into the base layer.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm64')
-rw-r--r-- | arch/arm64/crypto/sha2-ce-core.S | 29 | ||||
-rw-r--r-- | arch/arm64/crypto/sha2-ce-glue.c | 227 |
2 files changed, 63 insertions, 193 deletions
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 7f29fc031ea8..5df9d9d470ad 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S | |||
@@ -73,8 +73,8 @@ | |||
73 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 | 73 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, | 76 | * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, |
77 | * u8 *head, long bytes) | 77 | * int blocks) |
78 | */ | 78 | */ |
79 | ENTRY(sha2_ce_transform) | 79 | ENTRY(sha2_ce_transform) |
80 | /* load round constants */ | 80 | /* load round constants */ |
@@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform) | |||
85 | ld1 {v12.4s-v15.4s}, [x8] | 85 | ld1 {v12.4s-v15.4s}, [x8] |
86 | 86 | ||
87 | /* load state */ | 87 | /* load state */ |
88 | ldp dga, dgb, [x2] | 88 | ldp dga, dgb, [x0] |
89 | 89 | ||
90 | /* load partial input (if supplied) */ | 90 | /* load sha256_ce_state::finalize */ |
91 | cbz x3, 0f | 91 | ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] |
92 | ld1 {v16.4s-v19.4s}, [x3] | ||
93 | b 1f | ||
94 | 92 | ||
95 | /* load input */ | 93 | /* load input */ |
96 | 0: ld1 {v16.4s-v19.4s}, [x1], #64 | 94 | 0: ld1 {v16.4s-v19.4s}, [x1], #64 |
97 | sub w0, w0, #1 | 95 | sub w2, w2, #1 |
98 | 96 | ||
99 | 1: | ||
100 | CPU_LE( rev32 v16.16b, v16.16b ) | 97 | CPU_LE( rev32 v16.16b, v16.16b ) |
101 | CPU_LE( rev32 v17.16b, v17.16b ) | 98 | CPU_LE( rev32 v17.16b, v17.16b ) |
102 | CPU_LE( rev32 v18.16b, v18.16b ) | 99 | CPU_LE( rev32 v18.16b, v18.16b ) |
103 | CPU_LE( rev32 v19.16b, v19.16b ) | 100 | CPU_LE( rev32 v19.16b, v19.16b ) |
104 | 101 | ||
105 | 2: add t0.4s, v16.4s, v0.4s | 102 | 1: add t0.4s, v16.4s, v0.4s |
106 | mov dg0v.16b, dgav.16b | 103 | mov dg0v.16b, dgav.16b |
107 | mov dg1v.16b, dgbv.16b | 104 | mov dg1v.16b, dgbv.16b |
108 | 105 | ||
@@ -131,15 +128,15 @@ CPU_LE( rev32 v19.16b, v19.16b ) | |||
131 | add dgbv.4s, dgbv.4s, dg1v.4s | 128 | add dgbv.4s, dgbv.4s, dg1v.4s |
132 | 129 | ||
133 | /* handled all input blocks? */ | 130 | /* handled all input blocks? */ |
134 | cbnz w0, 0b | 131 | cbnz w2, 0b |
135 | 132 | ||
136 | /* | 133 | /* |
137 | * Final block: add padding and total bit count. | 134 | * Final block: add padding and total bit count. |
138 | * Skip if we have no total byte count in x4. In that case, the input | 135 | * Skip if the input size was not a round multiple of the block size, |
139 | * size was not a round multiple of the block size, and the padding is | 136 | * the padding is handled by the C code in that case. |
140 | * handled by the C code. | ||
141 | */ | 137 | */ |
142 | cbz x4, 3f | 138 | cbz x4, 3f |
139 | ldr x4, [x0, #:lo12:sha256_ce_offsetof_count] | ||
143 | movi v17.2d, #0 | 140 | movi v17.2d, #0 |
144 | mov x8, #0x80000000 | 141 | mov x8, #0x80000000 |
145 | movi v18.2d, #0 | 142 | movi v18.2d, #0 |
@@ -148,9 +145,9 @@ CPU_LE( rev32 v19.16b, v19.16b ) | |||
148 | mov x4, #0 | 145 | mov x4, #0 |
149 | mov v19.d[0], xzr | 146 | mov v19.d[0], xzr |
150 | mov v19.d[1], x7 | 147 | mov v19.d[1], x7 |
151 | b 2b | 148 | b 1b |
152 | 149 | ||
153 | /* store new state */ | 150 | /* store new state */ |
154 | 3: stp dga, dgb, [x2] | 151 | 3: stp dga, dgb, [x0] |
155 | ret | 152 | ret |
156 | ENDPROC(sha2_ce_transform) | 153 | ENDPROC(sha2_ce_transform) |
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index ae67e88c28b9..1340e44c048b 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c | |||
@@ -12,206 +12,82 @@ | |||
12 | #include <asm/unaligned.h> | 12 | #include <asm/unaligned.h> |
13 | #include <crypto/internal/hash.h> | 13 | #include <crypto/internal/hash.h> |
14 | #include <crypto/sha.h> | 14 | #include <crypto/sha.h> |
15 | #include <crypto/sha256_base.h> | ||
15 | #include <linux/cpufeature.h> | 16 | #include <linux/cpufeature.h> |
16 | #include <linux/crypto.h> | 17 | #include <linux/crypto.h> |
17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
18 | 19 | ||
20 | #define ASM_EXPORT(sym, val) \ | ||
21 | asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); | ||
22 | |||
19 | MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); | 23 | MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); |
20 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 24 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
21 | MODULE_LICENSE("GPL v2"); | 25 | MODULE_LICENSE("GPL v2"); |
22 | 26 | ||
23 | asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, | 27 | struct sha256_ce_state { |
24 | u8 *head, long bytes); | 28 | struct sha256_state sst; |
25 | 29 | u32 finalize; | |
26 | static int sha224_init(struct shash_desc *desc) | 30 | }; |
27 | { | ||
28 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
29 | |||
30 | *sctx = (struct sha256_state){ | ||
31 | .state = { | ||
32 | SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, | ||
33 | SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, | ||
34 | } | ||
35 | }; | ||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | static int sha256_init(struct shash_desc *desc) | ||
40 | { | ||
41 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
42 | |||
43 | *sctx = (struct sha256_state){ | ||
44 | .state = { | ||
45 | SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, | ||
46 | SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, | ||
47 | } | ||
48 | }; | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static int sha2_update(struct shash_desc *desc, const u8 *data, | ||
53 | unsigned int len) | ||
54 | { | ||
55 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
56 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
57 | |||
58 | sctx->count += len; | ||
59 | |||
60 | if ((partial + len) >= SHA256_BLOCK_SIZE) { | ||
61 | int blocks; | ||
62 | |||
63 | if (partial) { | ||
64 | int p = SHA256_BLOCK_SIZE - partial; | ||
65 | |||
66 | memcpy(sctx->buf + partial, data, p); | ||
67 | data += p; | ||
68 | len -= p; | ||
69 | } | ||
70 | 31 | ||
71 | blocks = len / SHA256_BLOCK_SIZE; | 32 | asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, |
72 | len %= SHA256_BLOCK_SIZE; | 33 | int blocks); |
73 | 34 | ||
74 | kernel_neon_begin_partial(28); | 35 | static int sha256_ce_update(struct shash_desc *desc, const u8 *data, |
75 | sha2_ce_transform(blocks, data, sctx->state, | 36 | unsigned int len) |
76 | partial ? sctx->buf : NULL, 0); | ||
77 | kernel_neon_end(); | ||
78 | |||
79 | data += blocks * SHA256_BLOCK_SIZE; | ||
80 | partial = 0; | ||
81 | } | ||
82 | if (len) | ||
83 | memcpy(sctx->buf + partial, data, len); | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | static void sha2_final(struct shash_desc *desc) | ||
88 | { | 37 | { |
89 | static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; | 38 | struct sha256_ce_state *sctx = shash_desc_ctx(desc); |
90 | |||
91 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
92 | __be64 bits = cpu_to_be64(sctx->count << 3); | ||
93 | u32 padlen = SHA256_BLOCK_SIZE | ||
94 | - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE); | ||
95 | |||
96 | sha2_update(desc, padding, padlen); | ||
97 | sha2_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
98 | } | ||
99 | |||
100 | static int sha224_final(struct shash_desc *desc, u8 *out) | ||
101 | { | ||
102 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
103 | __be32 *dst = (__be32 *)out; | ||
104 | int i; | ||
105 | |||
106 | sha2_final(desc); | ||
107 | |||
108 | for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) | ||
109 | put_unaligned_be32(sctx->state[i], dst++); | ||
110 | |||
111 | *sctx = (struct sha256_state){}; | ||
112 | return 0; | ||
113 | } | ||
114 | 39 | ||
115 | static int sha256_final(struct shash_desc *desc, u8 *out) | 40 | sctx->finalize = 0; |
116 | { | 41 | kernel_neon_begin_partial(28); |
117 | struct sha256_state *sctx = shash_desc_ctx(desc); | 42 | sha256_base_do_update(desc, data, len, |
118 | __be32 *dst = (__be32 *)out; | 43 | (sha256_block_fn *)sha2_ce_transform); |
119 | int i; | 44 | kernel_neon_end(); |
120 | |||
121 | sha2_final(desc); | ||
122 | |||
123 | for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) | ||
124 | put_unaligned_be32(sctx->state[i], dst++); | ||
125 | 45 | ||
126 | *sctx = (struct sha256_state){}; | ||
127 | return 0; | 46 | return 0; |
128 | } | 47 | } |
129 | 48 | ||
130 | static void sha2_finup(struct shash_desc *desc, const u8 *data, | 49 | static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, |
131 | unsigned int len) | 50 | unsigned int len, u8 *out) |
132 | { | 51 | { |
133 | struct sha256_state *sctx = shash_desc_ctx(desc); | 52 | struct sha256_ce_state *sctx = shash_desc_ctx(desc); |
134 | int blocks; | 53 | bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE); |
135 | 54 | ||
136 | if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) { | 55 | ASM_EXPORT(sha256_ce_offsetof_count, |
137 | sha2_update(desc, data, len); | 56 | offsetof(struct sha256_ce_state, sst.count)); |
138 | sha2_final(desc); | 57 | ASM_EXPORT(sha256_ce_offsetof_finalize, |
139 | return; | 58 | offsetof(struct sha256_ce_state, finalize)); |
140 | } | ||
141 | 59 | ||
142 | /* | 60 | /* |
143 | * Use a fast path if the input is a multiple of 64 bytes. In | 61 | * Allow the asm code to perform the finalization if there is no |
144 | * this case, there is no need to copy data around, and we can | 62 | * partial data and the input is a round multiple of the block size. |
145 | * perform the entire digest calculation in a single invocation | ||
146 | * of sha2_ce_transform() | ||
147 | */ | 63 | */ |
148 | blocks = len / SHA256_BLOCK_SIZE; | 64 | sctx->finalize = finalize; |
149 | 65 | ||
150 | kernel_neon_begin_partial(28); | 66 | kernel_neon_begin_partial(28); |
151 | sha2_ce_transform(blocks, data, sctx->state, NULL, len); | 67 | sha256_base_do_update(desc, data, len, |
68 | (sha256_block_fn *)sha2_ce_transform); | ||
69 | if (!finalize) | ||
70 | sha256_base_do_finalize(desc, | ||
71 | (sha256_block_fn *)sha2_ce_transform); | ||
152 | kernel_neon_end(); | 72 | kernel_neon_end(); |
73 | return sha256_base_finish(desc, out); | ||
153 | } | 74 | } |
154 | 75 | ||
155 | static int sha224_finup(struct shash_desc *desc, const u8 *data, | 76 | static int sha256_ce_final(struct shash_desc *desc, u8 *out) |
156 | unsigned int len, u8 *out) | ||
157 | { | 77 | { |
158 | struct sha256_state *sctx = shash_desc_ctx(desc); | 78 | kernel_neon_begin_partial(28); |
159 | __be32 *dst = (__be32 *)out; | 79 | sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); |
160 | int i; | 80 | kernel_neon_end(); |
161 | 81 | return sha256_base_finish(desc, out); | |
162 | sha2_finup(desc, data, len); | ||
163 | |||
164 | for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) | ||
165 | put_unaligned_be32(sctx->state[i], dst++); | ||
166 | |||
167 | *sctx = (struct sha256_state){}; | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | static int sha256_finup(struct shash_desc *desc, const u8 *data, | ||
172 | unsigned int len, u8 *out) | ||
173 | { | ||
174 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
175 | __be32 *dst = (__be32 *)out; | ||
176 | int i; | ||
177 | |||
178 | sha2_finup(desc, data, len); | ||
179 | |||
180 | for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) | ||
181 | put_unaligned_be32(sctx->state[i], dst++); | ||
182 | |||
183 | *sctx = (struct sha256_state){}; | ||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | static int sha2_export(struct shash_desc *desc, void *out) | ||
188 | { | ||
189 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
190 | struct sha256_state *dst = out; | ||
191 | |||
192 | *dst = *sctx; | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | static int sha2_import(struct shash_desc *desc, const void *in) | ||
197 | { | ||
198 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
199 | struct sha256_state const *src = in; | ||
200 | |||
201 | *sctx = *src; | ||
202 | return 0; | ||
203 | } | 82 | } |
204 | 83 | ||
205 | static struct shash_alg algs[] = { { | 84 | static struct shash_alg algs[] = { { |
206 | .init = sha224_init, | 85 | .init = sha224_base_init, |
207 | .update = sha2_update, | 86 | .update = sha256_ce_update, |
208 | .final = sha224_final, | 87 | .final = sha256_ce_final, |
209 | .finup = sha224_finup, | 88 | .finup = sha256_ce_finup, |
210 | .export = sha2_export, | 89 | .descsize = sizeof(struct sha256_ce_state), |
211 | .import = sha2_import, | ||
212 | .descsize = sizeof(struct sha256_state), | ||
213 | .digestsize = SHA224_DIGEST_SIZE, | 90 | .digestsize = SHA224_DIGEST_SIZE, |
214 | .statesize = sizeof(struct sha256_state), | ||
215 | .base = { | 91 | .base = { |
216 | .cra_name = "sha224", | 92 | .cra_name = "sha224", |
217 | .cra_driver_name = "sha224-ce", | 93 | .cra_driver_name = "sha224-ce", |
@@ -221,15 +97,12 @@ static struct shash_alg algs[] = { { | |||
221 | .cra_module = THIS_MODULE, | 97 | .cra_module = THIS_MODULE, |
222 | } | 98 | } |
223 | }, { | 99 | }, { |
224 | .init = sha256_init, | 100 | .init = sha256_base_init, |
225 | .update = sha2_update, | 101 | .update = sha256_ce_update, |
226 | .final = sha256_final, | 102 | .final = sha256_ce_final, |
227 | .finup = sha256_finup, | 103 | .finup = sha256_ce_finup, |
228 | .export = sha2_export, | 104 | .descsize = sizeof(struct sha256_ce_state), |
229 | .import = sha2_import, | ||
230 | .descsize = sizeof(struct sha256_state), | ||
231 | .digestsize = SHA256_DIGEST_SIZE, | 105 | .digestsize = SHA256_DIGEST_SIZE, |
232 | .statesize = sizeof(struct sha256_state), | ||
233 | .base = { | 106 | .base = { |
234 | .cra_name = "sha256", | 107 | .cra_name = "sha256", |
235 | .cra_driver_name = "sha256-ce", | 108 | .cra_driver_name = "sha256-ce", |