aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2015-04-09 06:55:44 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2015-04-10 09:39:46 -0400
commit07eb54d306f4f0efabe0a0d5dd6739d079d90e0e (patch)
tree9aeaec23ebc92feaf488c57bd27a2bbda0fc3648 /arch/arm64
parent9205b94923213ee164d7398fdc90826e463c281a (diff)
crypto: arm64/sha1-ce - move SHA-1 ARMv8 implementation to base layer
This removes all the boilerplate from the existing implementation, and replaces it with calls into the base layer. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S33
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c151
2 files changed, 59 insertions, 125 deletions
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 09d57d98609c..033aae6d732a 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -66,8 +66,8 @@
66 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 66 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
67 67
68 /* 68 /*
69 * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, 69 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
70 * u8 *head, long bytes) 70 * int blocks)
71 */ 71 */
72ENTRY(sha1_ce_transform) 72ENTRY(sha1_ce_transform)
73 /* load round constants */ 73 /* load round constants */
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform)
78 ld1r {k3.4s}, [x6] 78 ld1r {k3.4s}, [x6]
79 79
80 /* load state */ 80 /* load state */
81 ldr dga, [x2] 81 ldr dga, [x0]
82 ldr dgb, [x2, #16] 82 ldr dgb, [x0, #16]
83 83
84 /* load partial state (if supplied) */ 84 /* load sha1_ce_state::finalize */
85 cbz x3, 0f 85 ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
86 ld1 {v8.4s-v11.4s}, [x3]
87 b 1f
88 86
89 /* load input */ 87 /* load input */
900: ld1 {v8.4s-v11.4s}, [x1], #64 880: ld1 {v8.4s-v11.4s}, [x1], #64
91 sub w0, w0, #1 89 sub w2, w2, #1
92 90
931:
94CPU_LE( rev32 v8.16b, v8.16b ) 91CPU_LE( rev32 v8.16b, v8.16b )
95CPU_LE( rev32 v9.16b, v9.16b ) 92CPU_LE( rev32 v9.16b, v9.16b )
96CPU_LE( rev32 v10.16b, v10.16b ) 93CPU_LE( rev32 v10.16b, v10.16b )
97CPU_LE( rev32 v11.16b, v11.16b ) 94CPU_LE( rev32 v11.16b, v11.16b )
98 95
992: add t0.4s, v8.4s, k0.4s 961: add t0.4s, v8.4s, k0.4s
100 mov dg0v.16b, dgav.16b 97 mov dg0v.16b, dgav.16b
101 98
102 add_update c, ev, k0, 8, 9, 10, 11, dgb 99 add_update c, ev, k0, 8, 9, 10, 11, dgb
@@ -127,15 +124,15 @@ CPU_LE( rev32 v11.16b, v11.16b )
127 add dgbv.2s, dgbv.2s, dg1v.2s 124 add dgbv.2s, dgbv.2s, dg1v.2s
128 add dgav.4s, dgav.4s, dg0v.4s 125 add dgav.4s, dgav.4s, dg0v.4s
129 126
130 cbnz w0, 0b 127 cbnz w2, 0b
131 128
132 /* 129 /*
133 * Final block: add padding and total bit count. 130 * Final block: add padding and total bit count.
134 * Skip if we have no total byte count in x4. In that case, the input 131 * Skip if the input size was not a round multiple of the block size,
135 * size was not a round multiple of the block size, and the padding is 132 * the padding is handled by the C code in that case.
136 * handled by the C code.
137 */ 133 */
138 cbz x4, 3f 134 cbz x4, 3f
135 ldr x4, [x0, #:lo12:sha1_ce_offsetof_count]
139 movi v9.2d, #0 136 movi v9.2d, #0
140 mov x8, #0x80000000 137 mov x8, #0x80000000
141 movi v10.2d, #0 138 movi v10.2d, #0
@@ -144,10 +141,10 @@ CPU_LE( rev32 v11.16b, v11.16b )
144 mov x4, #0 141 mov x4, #0
145 mov v11.d[0], xzr 142 mov v11.d[0], xzr
146 mov v11.d[1], x7 143 mov v11.d[1], x7
147 b 2b 144 b 1b
148 145
149 /* store new state */ 146 /* store new state */
1503: str dga, [x2] 1473: str dga, [x0]
151 str dgb, [x2, #16] 148 str dgb, [x0, #16]
152 ret 149 ret
153ENDPROC(sha1_ce_transform) 150ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 6fe83f37a750..114e7cc5de8c 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -12,144 +12,81 @@
12#include <asm/unaligned.h> 12#include <asm/unaligned.h>
13#include <crypto/internal/hash.h> 13#include <crypto/internal/hash.h>
14#include <crypto/sha.h> 14#include <crypto/sha.h>
15#include <crypto/sha1_base.h>
15#include <linux/cpufeature.h> 16#include <linux/cpufeature.h>
16#include <linux/crypto.h> 17#include <linux/crypto.h>
17#include <linux/module.h> 18#include <linux/module.h>
18 19
20#define ASM_EXPORT(sym, val) \
21 asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
22
19MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); 23MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
20MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 24MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
21MODULE_LICENSE("GPL v2"); 25MODULE_LICENSE("GPL v2");
22 26
23asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, 27struct sha1_ce_state {
24 u8 *head, long bytes); 28 struct sha1_state sst;
29 u32 finalize;
30};
25 31
26static int sha1_init(struct shash_desc *desc) 32asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
27{ 33 int blocks);
28 struct sha1_state *sctx = shash_desc_ctx(desc);
29 34
30 *sctx = (struct sha1_state){ 35static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
31 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, 36 unsigned int len)
32 };
33 return 0;
34}
35
36static int sha1_update(struct shash_desc *desc, const u8 *data,
37 unsigned int len)
38{ 37{
39 struct sha1_state *sctx = shash_desc_ctx(desc); 38 struct sha1_ce_state *sctx = shash_desc_ctx(desc);
40 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
41
42 sctx->count += len;
43
44 if ((partial + len) >= SHA1_BLOCK_SIZE) {
45 int blocks;
46
47 if (partial) {
48 int p = SHA1_BLOCK_SIZE - partial;
49 39
50 memcpy(sctx->buffer + partial, data, p); 40 sctx->finalize = 0;
51 data += p; 41 kernel_neon_begin_partial(16);
52 len -= p; 42 sha1_base_do_update(desc, data, len,
53 } 43 (sha1_block_fn *)sha1_ce_transform);
54 44 kernel_neon_end();
55 blocks = len / SHA1_BLOCK_SIZE;
56 len %= SHA1_BLOCK_SIZE;
57
58 kernel_neon_begin_partial(16);
59 sha1_ce_transform(blocks, data, sctx->state,
60 partial ? sctx->buffer : NULL, 0);
61 kernel_neon_end();
62 45
63 data += blocks * SHA1_BLOCK_SIZE;
64 partial = 0;
65 }
66 if (len)
67 memcpy(sctx->buffer + partial, data, len);
68 return 0; 46 return 0;
69} 47}
70 48
71static int sha1_final(struct shash_desc *desc, u8 *out) 49static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
50 unsigned int len, u8 *out)
72{ 51{
73 static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; 52 struct sha1_ce_state *sctx = shash_desc_ctx(desc);
53 bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
74 54
75 struct sha1_state *sctx = shash_desc_ctx(desc); 55 ASM_EXPORT(sha1_ce_offsetof_count,
76 __be64 bits = cpu_to_be64(sctx->count << 3); 56 offsetof(struct sha1_ce_state, sst.count));
77 __be32 *dst = (__be32 *)out; 57 ASM_EXPORT(sha1_ce_offsetof_finalize,
78 int i; 58 offsetof(struct sha1_ce_state, finalize));
79
80 u32 padlen = SHA1_BLOCK_SIZE
81 - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
82
83 sha1_update(desc, padding, padlen);
84 sha1_update(desc, (const u8 *)&bits, sizeof(bits));
85
86 for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
87 put_unaligned_be32(sctx->state[i], dst++);
88
89 *sctx = (struct sha1_state){};
90 return 0;
91}
92
93static int sha1_finup(struct shash_desc *desc, const u8 *data,
94 unsigned int len, u8 *out)
95{
96 struct sha1_state *sctx = shash_desc_ctx(desc);
97 __be32 *dst = (__be32 *)out;
98 int blocks;
99 int i;
100
101 if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
102 sha1_update(desc, data, len);
103 return sha1_final(desc, out);
104 }
105 59
106 /* 60 /*
107 * Use a fast path if the input is a multiple of 64 bytes. In 61 * Allow the asm code to perform the finalization if there is no
108 * this case, there is no need to copy data around, and we can 62 * partial data and the input is a round multiple of the block size.
109 * perform the entire digest calculation in a single invocation
110 * of sha1_ce_transform()
111 */ 63 */
112 blocks = len / SHA1_BLOCK_SIZE; 64 sctx->finalize = finalize;
113 65
114 kernel_neon_begin_partial(16); 66 kernel_neon_begin_partial(16);
115 sha1_ce_transform(blocks, data, sctx->state, NULL, len); 67 sha1_base_do_update(desc, data, len,
68 (sha1_block_fn *)sha1_ce_transform);
69 if (!finalize)
70 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
116 kernel_neon_end(); 71 kernel_neon_end();
117 72 return sha1_base_finish(desc, out);
118 for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
119 put_unaligned_be32(sctx->state[i], dst++);
120
121 *sctx = (struct sha1_state){};
122 return 0;
123} 73}
124 74
125static int sha1_export(struct shash_desc *desc, void *out) 75static int sha1_ce_final(struct shash_desc *desc, u8 *out)
126{ 76{
127 struct sha1_state *sctx = shash_desc_ctx(desc); 77 kernel_neon_begin_partial(16);
128 struct sha1_state *dst = out; 78 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
129 79 kernel_neon_end();
130 *dst = *sctx; 80 return sha1_base_finish(desc, out);
131 return 0;
132}
133
134static int sha1_import(struct shash_desc *desc, const void *in)
135{
136 struct sha1_state *sctx = shash_desc_ctx(desc);
137 struct sha1_state const *src = in;
138
139 *sctx = *src;
140 return 0;
141} 81}
142 82
143static struct shash_alg alg = { 83static struct shash_alg alg = {
144 .init = sha1_init, 84 .init = sha1_base_init,
145 .update = sha1_update, 85 .update = sha1_ce_update,
146 .final = sha1_final, 86 .final = sha1_ce_final,
147 .finup = sha1_finup, 87 .finup = sha1_ce_finup,
148 .export = sha1_export, 88 .descsize = sizeof(struct sha1_ce_state),
149 .import = sha1_import,
150 .descsize = sizeof(struct sha1_state),
151 .digestsize = SHA1_DIGEST_SIZE, 89 .digestsize = SHA1_DIGEST_SIZE,
152 .statesize = sizeof(struct sha1_state),
153 .base = { 90 .base = {
154 .cra_name = "sha1", 91 .cra_name = "sha1",
155 .cra_driver_name = "sha1-ce", 92 .cra_driver_name = "sha1-ce",