diff options
author | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2015-04-09 06:55:44 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2015-04-10 09:39:46 -0400 |
commit | 07eb54d306f4f0efabe0a0d5dd6739d079d90e0e (patch) | |
tree | 9aeaec23ebc92feaf488c57bd27a2bbda0fc3648 /arch/arm64 | |
parent | 9205b94923213ee164d7398fdc90826e463c281a (diff) |
crypto: arm64/sha1-ce - move SHA-1 ARMv8 implementation to base layer
This removes all the boilerplate from the existing implementation,
and replaces it with calls into the base layer.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm64')
-rw-r--r-- | arch/arm64/crypto/sha1-ce-core.S | 33 | ||||
-rw-r--r-- | arch/arm64/crypto/sha1-ce-glue.c | 151 |
2 files changed, 59 insertions, 125 deletions
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 09d57d98609c..033aae6d732a 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S | |||
@@ -66,8 +66,8 @@ | |||
66 | .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 | 66 | .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, | 69 | * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, |
70 | * u8 *head, long bytes) | 70 | * int blocks) |
71 | */ | 71 | */ |
72 | ENTRY(sha1_ce_transform) | 72 | ENTRY(sha1_ce_transform) |
73 | /* load round constants */ | 73 | /* load round constants */ |
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform) | |||
78 | ld1r {k3.4s}, [x6] | 78 | ld1r {k3.4s}, [x6] |
79 | 79 | ||
80 | /* load state */ | 80 | /* load state */ |
81 | ldr dga, [x2] | 81 | ldr dga, [x0] |
82 | ldr dgb, [x2, #16] | 82 | ldr dgb, [x0, #16] |
83 | 83 | ||
84 | /* load partial state (if supplied) */ | 84 | /* load sha1_ce_state::finalize */ |
85 | cbz x3, 0f | 85 | ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize] |
86 | ld1 {v8.4s-v11.4s}, [x3] | ||
87 | b 1f | ||
88 | 86 | ||
89 | /* load input */ | 87 | /* load input */ |
90 | 0: ld1 {v8.4s-v11.4s}, [x1], #64 | 88 | 0: ld1 {v8.4s-v11.4s}, [x1], #64 |
91 | sub w0, w0, #1 | 89 | sub w2, w2, #1 |
92 | 90 | ||
93 | 1: | ||
94 | CPU_LE( rev32 v8.16b, v8.16b ) | 91 | CPU_LE( rev32 v8.16b, v8.16b ) |
95 | CPU_LE( rev32 v9.16b, v9.16b ) | 92 | CPU_LE( rev32 v9.16b, v9.16b ) |
96 | CPU_LE( rev32 v10.16b, v10.16b ) | 93 | CPU_LE( rev32 v10.16b, v10.16b ) |
97 | CPU_LE( rev32 v11.16b, v11.16b ) | 94 | CPU_LE( rev32 v11.16b, v11.16b ) |
98 | 95 | ||
99 | 2: add t0.4s, v8.4s, k0.4s | 96 | 1: add t0.4s, v8.4s, k0.4s |
100 | mov dg0v.16b, dgav.16b | 97 | mov dg0v.16b, dgav.16b |
101 | 98 | ||
102 | add_update c, ev, k0, 8, 9, 10, 11, dgb | 99 | add_update c, ev, k0, 8, 9, 10, 11, dgb |
@@ -127,15 +124,15 @@ CPU_LE( rev32 v11.16b, v11.16b ) | |||
127 | add dgbv.2s, dgbv.2s, dg1v.2s | 124 | add dgbv.2s, dgbv.2s, dg1v.2s |
128 | add dgav.4s, dgav.4s, dg0v.4s | 125 | add dgav.4s, dgav.4s, dg0v.4s |
129 | 126 | ||
130 | cbnz w0, 0b | 127 | cbnz w2, 0b |
131 | 128 | ||
132 | /* | 129 | /* |
133 | * Final block: add padding and total bit count. | 130 | * Final block: add padding and total bit count. |
134 | * Skip if we have no total byte count in x4. In that case, the input | 131 | * Skip if the input size was not a round multiple of the block size, |
135 | * size was not a round multiple of the block size, and the padding is | 132 | * the padding is handled by the C code in that case. |
136 | * handled by the C code. | ||
137 | */ | 133 | */ |
138 | cbz x4, 3f | 134 | cbz x4, 3f |
135 | ldr x4, [x0, #:lo12:sha1_ce_offsetof_count] | ||
139 | movi v9.2d, #0 | 136 | movi v9.2d, #0 |
140 | mov x8, #0x80000000 | 137 | mov x8, #0x80000000 |
141 | movi v10.2d, #0 | 138 | movi v10.2d, #0 |
@@ -144,10 +141,10 @@ CPU_LE( rev32 v11.16b, v11.16b ) | |||
144 | mov x4, #0 | 141 | mov x4, #0 |
145 | mov v11.d[0], xzr | 142 | mov v11.d[0], xzr |
146 | mov v11.d[1], x7 | 143 | mov v11.d[1], x7 |
147 | b 2b | 144 | b 1b |
148 | 145 | ||
149 | /* store new state */ | 146 | /* store new state */ |
150 | 3: str dga, [x2] | 147 | 3: str dga, [x0] |
151 | str dgb, [x2, #16] | 148 | str dgb, [x0, #16] |
152 | ret | 149 | ret |
153 | ENDPROC(sha1_ce_transform) | 150 | ENDPROC(sha1_ce_transform) |
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 6fe83f37a750..114e7cc5de8c 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c | |||
@@ -12,144 +12,81 @@ | |||
12 | #include <asm/unaligned.h> | 12 | #include <asm/unaligned.h> |
13 | #include <crypto/internal/hash.h> | 13 | #include <crypto/internal/hash.h> |
14 | #include <crypto/sha.h> | 14 | #include <crypto/sha.h> |
15 | #include <crypto/sha1_base.h> | ||
15 | #include <linux/cpufeature.h> | 16 | #include <linux/cpufeature.h> |
16 | #include <linux/crypto.h> | 17 | #include <linux/crypto.h> |
17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
18 | 19 | ||
20 | #define ASM_EXPORT(sym, val) \ | ||
21 | asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); | ||
22 | |||
19 | MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); | 23 | MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); |
20 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 24 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
21 | MODULE_LICENSE("GPL v2"); | 25 | MODULE_LICENSE("GPL v2"); |
22 | 26 | ||
23 | asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, | 27 | struct sha1_ce_state { |
24 | u8 *head, long bytes); | 28 | struct sha1_state sst; |
29 | u32 finalize; | ||
30 | }; | ||
25 | 31 | ||
26 | static int sha1_init(struct shash_desc *desc) | 32 | asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, |
27 | { | 33 | int blocks); |
28 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
29 | 34 | ||
30 | *sctx = (struct sha1_state){ | 35 | static int sha1_ce_update(struct shash_desc *desc, const u8 *data, |
31 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | 36 | unsigned int len) |
32 | }; | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | static int sha1_update(struct shash_desc *desc, const u8 *data, | ||
37 | unsigned int len) | ||
38 | { | 37 | { |
39 | struct sha1_state *sctx = shash_desc_ctx(desc); | 38 | struct sha1_ce_state *sctx = shash_desc_ctx(desc); |
40 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
41 | |||
42 | sctx->count += len; | ||
43 | |||
44 | if ((partial + len) >= SHA1_BLOCK_SIZE) { | ||
45 | int blocks; | ||
46 | |||
47 | if (partial) { | ||
48 | int p = SHA1_BLOCK_SIZE - partial; | ||
49 | 39 | ||
50 | memcpy(sctx->buffer + partial, data, p); | 40 | sctx->finalize = 0; |
51 | data += p; | 41 | kernel_neon_begin_partial(16); |
52 | len -= p; | 42 | sha1_base_do_update(desc, data, len, |
53 | } | 43 | (sha1_block_fn *)sha1_ce_transform); |
54 | 44 | kernel_neon_end(); | |
55 | blocks = len / SHA1_BLOCK_SIZE; | ||
56 | len %= SHA1_BLOCK_SIZE; | ||
57 | |||
58 | kernel_neon_begin_partial(16); | ||
59 | sha1_ce_transform(blocks, data, sctx->state, | ||
60 | partial ? sctx->buffer : NULL, 0); | ||
61 | kernel_neon_end(); | ||
62 | 45 | ||
63 | data += blocks * SHA1_BLOCK_SIZE; | ||
64 | partial = 0; | ||
65 | } | ||
66 | if (len) | ||
67 | memcpy(sctx->buffer + partial, data, len); | ||
68 | return 0; | 46 | return 0; |
69 | } | 47 | } |
70 | 48 | ||
71 | static int sha1_final(struct shash_desc *desc, u8 *out) | 49 | static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, |
50 | unsigned int len, u8 *out) | ||
72 | { | 51 | { |
73 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | 52 | struct sha1_ce_state *sctx = shash_desc_ctx(desc); |
53 | bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); | ||
74 | 54 | ||
75 | struct sha1_state *sctx = shash_desc_ctx(desc); | 55 | ASM_EXPORT(sha1_ce_offsetof_count, |
76 | __be64 bits = cpu_to_be64(sctx->count << 3); | 56 | offsetof(struct sha1_ce_state, sst.count)); |
77 | __be32 *dst = (__be32 *)out; | 57 | ASM_EXPORT(sha1_ce_offsetof_finalize, |
78 | int i; | 58 | offsetof(struct sha1_ce_state, finalize)); |
79 | |||
80 | u32 padlen = SHA1_BLOCK_SIZE | ||
81 | - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE); | ||
82 | |||
83 | sha1_update(desc, padding, padlen); | ||
84 | sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
85 | |||
86 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
87 | put_unaligned_be32(sctx->state[i], dst++); | ||
88 | |||
89 | *sctx = (struct sha1_state){}; | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | static int sha1_finup(struct shash_desc *desc, const u8 *data, | ||
94 | unsigned int len, u8 *out) | ||
95 | { | ||
96 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
97 | __be32 *dst = (__be32 *)out; | ||
98 | int blocks; | ||
99 | int i; | ||
100 | |||
101 | if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) { | ||
102 | sha1_update(desc, data, len); | ||
103 | return sha1_final(desc, out); | ||
104 | } | ||
105 | 59 | ||
106 | /* | 60 | /* |
107 | * Use a fast path if the input is a multiple of 64 bytes. In | 61 | * Allow the asm code to perform the finalization if there is no |
108 | * this case, there is no need to copy data around, and we can | 62 | * partial data and the input is a round multiple of the block size. |
109 | * perform the entire digest calculation in a single invocation | ||
110 | * of sha1_ce_transform() | ||
111 | */ | 63 | */ |
112 | blocks = len / SHA1_BLOCK_SIZE; | 64 | sctx->finalize = finalize; |
113 | 65 | ||
114 | kernel_neon_begin_partial(16); | 66 | kernel_neon_begin_partial(16); |
115 | sha1_ce_transform(blocks, data, sctx->state, NULL, len); | 67 | sha1_base_do_update(desc, data, len, |
68 | (sha1_block_fn *)sha1_ce_transform); | ||
69 | if (!finalize) | ||
70 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); | ||
116 | kernel_neon_end(); | 71 | kernel_neon_end(); |
117 | 72 | return sha1_base_finish(desc, out); | |
118 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
119 | put_unaligned_be32(sctx->state[i], dst++); | ||
120 | |||
121 | *sctx = (struct sha1_state){}; | ||
122 | return 0; | ||
123 | } | 73 | } |
124 | 74 | ||
125 | static int sha1_export(struct shash_desc *desc, void *out) | 75 | static int sha1_ce_final(struct shash_desc *desc, u8 *out) |
126 | { | 76 | { |
127 | struct sha1_state *sctx = shash_desc_ctx(desc); | 77 | kernel_neon_begin_partial(16); |
128 | struct sha1_state *dst = out; | 78 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); |
129 | 79 | kernel_neon_end(); | |
130 | *dst = *sctx; | 80 | return sha1_base_finish(desc, out); |
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static int sha1_import(struct shash_desc *desc, const void *in) | ||
135 | { | ||
136 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
137 | struct sha1_state const *src = in; | ||
138 | |||
139 | *sctx = *src; | ||
140 | return 0; | ||
141 | } | 81 | } |
142 | 82 | ||
143 | static struct shash_alg alg = { | 83 | static struct shash_alg alg = { |
144 | .init = sha1_init, | 84 | .init = sha1_base_init, |
145 | .update = sha1_update, | 85 | .update = sha1_ce_update, |
146 | .final = sha1_final, | 86 | .final = sha1_ce_final, |
147 | .finup = sha1_finup, | 87 | .finup = sha1_ce_finup, |
148 | .export = sha1_export, | 88 | .descsize = sizeof(struct sha1_ce_state), |
149 | .import = sha1_import, | ||
150 | .descsize = sizeof(struct sha1_state), | ||
151 | .digestsize = SHA1_DIGEST_SIZE, | 89 | .digestsize = SHA1_DIGEST_SIZE, |
152 | .statesize = sizeof(struct sha1_state), | ||
153 | .base = { | 90 | .base = { |
154 | .cra_name = "sha1", | 91 | .cra_name = "sha1", |
155 | .cra_driver_name = "sha1-ce", | 92 | .cra_driver_name = "sha1-ce", |