diff options
author | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2018-01-19 07:04:40 -0500 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2018-01-25 09:10:36 -0500 |
commit | fb87127bcefc17efab757606e1b1e333fd614dd0 (patch) | |
tree | 832de08d7e3f6113d245304a6223541d21247646 | |
parent | 140aa50d68ea01e7caea6900f6f51882d4dd65c5 (diff) |
crypto: arm64/sha512 - fix/improve new v8.2 Crypto Extensions code
Add a missing symbol export that prevents this code to be built as a
module. Also, move the round constant table to the .rodata section,
and use a more optimized version of the core transform.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r-- | arch/arm64/crypto/sha512-ce-core.S | 145 | ||||
-rw-r--r-- | arch/arm64/crypto/sha512-glue.c | 1 |
2 files changed, 72 insertions, 74 deletions
diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S index 6c562f8df0b0..7f3bca5c59a2 100644 --- a/arch/arm64/crypto/sha512-ce-core.S +++ b/arch/arm64/crypto/sha512-ce-core.S | |||
@@ -12,10 +12,7 @@ | |||
12 | #include <linux/linkage.h> | 12 | #include <linux/linkage.h> |
13 | #include <asm/assembler.h> | 13 | #include <asm/assembler.h> |
14 | 14 | ||
15 | // | 15 | .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 |
16 | // Temporary - for testing only. binutils has no support for these yet | ||
17 | // | ||
18 | .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | ||
19 | .set .Lq\b, \b | 16 | .set .Lq\b, \b |
20 | .set .Lv\b\().2d, \b | 17 | .set .Lv\b\().2d, \b |
21 | .endr | 18 | .endr |
@@ -36,12 +33,10 @@ | |||
36 | .inst 0xce608800 | .L\rd | (.L\rn << 5) | (.L\rm << 16) | 33 | .inst 0xce608800 | .L\rd | (.L\rn << 5) | (.L\rm << 16) |
37 | .endm | 34 | .endm |
38 | 35 | ||
39 | .text | ||
40 | .arch armv8-a+crypto | ||
41 | |||
42 | /* | 36 | /* |
43 | * The SHA-512 round constants | 37 | * The SHA-512 round constants |
44 | */ | 38 | */ |
39 | .section ".rodata", "a" | ||
45 | .align 4 | 40 | .align 4 |
46 | .Lsha512_rcon: | 41 | .Lsha512_rcon: |
47 | .quad 0x428a2f98d728ae22, 0x7137449123ef65cd | 42 | .quad 0x428a2f98d728ae22, 0x7137449123ef65cd |
@@ -87,20 +82,20 @@ | |||
87 | 82 | ||
88 | .macro dround, i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4 | 83 | .macro dround, i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4 |
89 | .ifnb \rc1 | 84 | .ifnb \rc1 |
90 | ld1 {v\rc1\().2d}, [x3], #16 | 85 | ld1 {v\rc1\().2d}, [x4], #16 |
91 | .endif | 86 | .endif |
92 | add v\rc0\().2d, v\rc0\().2d, v\in0\().2d | 87 | add v5.2d, v\rc0\().2d, v\in0\().2d |
93 | ext v6.16b, v\i2\().16b, v\i3\().16b, #8 | 88 | ext v6.16b, v\i2\().16b, v\i3\().16b, #8 |
94 | ext v\rc0\().16b, v\rc0\().16b, v\rc0\().16b, #8 | 89 | ext v5.16b, v5.16b, v5.16b, #8 |
95 | ext v7.16b, v\i1\().16b, v\i2\().16b, #8 | 90 | ext v7.16b, v\i1\().16b, v\i2\().16b, #8 |
96 | add v\i3\().2d, v\i3\().2d, v\rc0\().2d | 91 | add v\i3\().2d, v\i3\().2d, v5.2d |
97 | .ifnb \in1 | 92 | .ifnb \in1 |
98 | ext v10.16b, v\in3\().16b, v\in4\().16b, #8 | 93 | ext v5.16b, v\in3\().16b, v\in4\().16b, #8 |
99 | sha512su0 v\in0\().2d, v\in1\().2d | 94 | sha512su0 v\in0\().2d, v\in1\().2d |
100 | .endif | 95 | .endif |
101 | sha512h q\i3, q6, v7.2d | 96 | sha512h q\i3, q6, v7.2d |
102 | .ifnb \in1 | 97 | .ifnb \in1 |
103 | sha512su1 v\in0\().2d, v\in2\().2d, v10.2d | 98 | sha512su1 v\in0\().2d, v\in2\().2d, v5.2d |
104 | .endif | 99 | .endif |
105 | add v\i4\().2d, v\i1\().2d, v\i3\().2d | 100 | add v\i4\().2d, v\i1\().2d, v\i3\().2d |
106 | sha512h2 q\i3, q\i1, v\i0\().2d | 101 | sha512h2 q\i3, q\i1, v\i0\().2d |
@@ -110,18 +105,20 @@ | |||
110 | * void sha512_ce_transform(struct sha512_state *sst, u8 const *src, | 105 | * void sha512_ce_transform(struct sha512_state *sst, u8 const *src, |
111 | * int blocks) | 106 | * int blocks) |
112 | */ | 107 | */ |
108 | .text | ||
113 | ENTRY(sha512_ce_transform) | 109 | ENTRY(sha512_ce_transform) |
114 | /* load state */ | 110 | /* load state */ |
115 | ld1 {v20.2d-v23.2d}, [x0] | 111 | ld1 {v8.2d-v11.2d}, [x0] |
112 | |||
113 | /* load first 4 round constants */ | ||
114 | adr_l x3, .Lsha512_rcon | ||
115 | ld1 {v20.2d-v23.2d}, [x3], #64 | ||
116 | 116 | ||
117 | /* load input */ | 117 | /* load input */ |
118 | 0: ld1 {v12.2d-v15.2d}, [x1], #64 | 118 | 0: ld1 {v12.2d-v15.2d}, [x1], #64 |
119 | ld1 {v16.2d-v19.2d}, [x1], #64 | 119 | ld1 {v16.2d-v19.2d}, [x1], #64 |
120 | sub w2, w2, #1 | 120 | sub w2, w2, #1 |
121 | 121 | ||
122 | /* load round constants */ | ||
123 | adr x3, .Lsha512_rcon | ||
124 | |||
125 | CPU_LE( rev64 v12.16b, v12.16b ) | 122 | CPU_LE( rev64 v12.16b, v12.16b ) |
126 | CPU_LE( rev64 v13.16b, v13.16b ) | 123 | CPU_LE( rev64 v13.16b, v13.16b ) |
127 | CPU_LE( rev64 v14.16b, v14.16b ) | 124 | CPU_LE( rev64 v14.16b, v14.16b ) |
@@ -131,12 +128,12 @@ CPU_LE( rev64 v17.16b, v17.16b ) | |||
131 | CPU_LE( rev64 v18.16b, v18.16b ) | 128 | CPU_LE( rev64 v18.16b, v18.16b ) |
132 | CPU_LE( rev64 v19.16b, v19.16b ) | 129 | CPU_LE( rev64 v19.16b, v19.16b ) |
133 | 130 | ||
134 | ld1 {v8.2d}, [x3], #16 | 131 | mov x4, x3 // rc pointer |
135 | 132 | ||
136 | mov v0.16b, v20.16b | 133 | mov v0.16b, v8.16b |
137 | mov v1.16b, v21.16b | 134 | mov v1.16b, v9.16b |
138 | mov v2.16b, v22.16b | 135 | mov v2.16b, v10.16b |
139 | mov v3.16b, v23.16b | 136 | mov v3.16b, v11.16b |
140 | 137 | ||
141 | // v0 ab cd -- ef gh ab | 138 | // v0 ab cd -- ef gh ab |
142 | // v1 cd -- ef gh ab cd | 139 | // v1 cd -- ef gh ab cd |
@@ -144,64 +141,64 @@ CPU_LE( rev64 v19.16b, v19.16b ) | |||
144 | // v3 gh ab cd -- ef gh | 141 | // v3 gh ab cd -- ef gh |
145 | // v4 -- ef gh ab cd -- | 142 | // v4 -- ef gh ab cd -- |
146 | 143 | ||
147 | dround 0, 1, 2, 3, 4, 8, 9, 12, 13, 19, 16, 17 | 144 | dround 0, 1, 2, 3, 4, 20, 24, 12, 13, 19, 16, 17 |
148 | dround 3, 0, 4, 2, 1, 9, 8, 13, 14, 12, 17, 18 | 145 | dround 3, 0, 4, 2, 1, 21, 25, 13, 14, 12, 17, 18 |
149 | dround 2, 3, 1, 4, 0, 8, 9, 14, 15, 13, 18, 19 | 146 | dround 2, 3, 1, 4, 0, 22, 26, 14, 15, 13, 18, 19 |
150 | dround 4, 2, 0, 1, 3, 9, 8, 15, 16, 14, 19, 12 | 147 | dround 4, 2, 0, 1, 3, 23, 27, 15, 16, 14, 19, 12 |
151 | dround 1, 4, 3, 0, 2, 8, 9, 16, 17, 15, 12, 13 | 148 | dround 1, 4, 3, 0, 2, 24, 28, 16, 17, 15, 12, 13 |
152 | 149 | ||
153 | dround 0, 1, 2, 3, 4, 9, 8, 17, 18, 16, 13, 14 | 150 | dround 0, 1, 2, 3, 4, 25, 29, 17, 18, 16, 13, 14 |
154 | dround 3, 0, 4, 2, 1, 8, 9, 18, 19, 17, 14, 15 | 151 | dround 3, 0, 4, 2, 1, 26, 30, 18, 19, 17, 14, 15 |
155 | dround 2, 3, 1, 4, 0, 9, 8, 19, 12, 18, 15, 16 | 152 | dround 2, 3, 1, 4, 0, 27, 31, 19, 12, 18, 15, 16 |
156 | dround 4, 2, 0, 1, 3, 8, 9, 12, 13, 19, 16, 17 | 153 | dround 4, 2, 0, 1, 3, 28, 24, 12, 13, 19, 16, 17 |
157 | dround 1, 4, 3, 0, 2, 9, 8, 13, 14, 12, 17, 18 | 154 | dround 1, 4, 3, 0, 2, 29, 25, 13, 14, 12, 17, 18 |
158 | 155 | ||
159 | dround 0, 1, 2, 3, 4, 8, 9, 14, 15, 13, 18, 19 | 156 | dround 0, 1, 2, 3, 4, 30, 26, 14, 15, 13, 18, 19 |
160 | dround 3, 0, 4, 2, 1, 9, 8, 15, 16, 14, 19, 12 | 157 | dround 3, 0, 4, 2, 1, 31, 27, 15, 16, 14, 19, 12 |
161 | dround 2, 3, 1, 4, 0, 8, 9, 16, 17, 15, 12, 13 | 158 | dround 2, 3, 1, 4, 0, 24, 28, 16, 17, 15, 12, 13 |
162 | dround 4, 2, 0, 1, 3, 9, 8, 17, 18, 16, 13, 14 | 159 | dround 4, 2, 0, 1, 3, 25, 29, 17, 18, 16, 13, 14 |
163 | dround 1, 4, 3, 0, 2, 8, 9, 18, 19, 17, 14, 15 | 160 | dround 1, 4, 3, 0, 2, 26, 30, 18, 19, 17, 14, 15 |
164 | 161 | ||
165 | dround 0, 1, 2, 3, 4, 9, 8, 19, 12, 18, 15, 16 | 162 | dround 0, 1, 2, 3, 4, 27, 31, 19, 12, 18, 15, 16 |
166 | dround 3, 0, 4, 2, 1, 8, 9, 12, 13, 19, 16, 17 | 163 | dround 3, 0, 4, 2, 1, 28, 24, 12, 13, 19, 16, 17 |
167 | dround 2, 3, 1, 4, 0, 9, 8, 13, 14, 12, 17, 18 | 164 | dround 2, 3, 1, 4, 0, 29, 25, 13, 14, 12, 17, 18 |
168 | dround 4, 2, 0, 1, 3, 8, 9, 14, 15, 13, 18, 19 | 165 | dround 4, 2, 0, 1, 3, 30, 26, 14, 15, 13, 18, 19 |
169 | dround 1, 4, 3, 0, 2, 9, 8, 15, 16, 14, 19, 12 | 166 | dround 1, 4, 3, 0, 2, 31, 27, 15, 16, 14, 19, 12 |
170 | 167 | ||
171 | dround 0, 1, 2, 3, 4, 8, 9, 16, 17, 15, 12, 13 | 168 | dround 0, 1, 2, 3, 4, 24, 28, 16, 17, 15, 12, 13 |
172 | dround 3, 0, 4, 2, 1, 9, 8, 17, 18, 16, 13, 14 | 169 | dround 3, 0, 4, 2, 1, 25, 29, 17, 18, 16, 13, 14 |
173 | dround 2, 3, 1, 4, 0, 8, 9, 18, 19, 17, 14, 15 | 170 | dround 2, 3, 1, 4, 0, 26, 30, 18, 19, 17, 14, 15 |
174 | dround 4, 2, 0, 1, 3, 9, 8, 19, 12, 18, 15, 16 | 171 | dround 4, 2, 0, 1, 3, 27, 31, 19, 12, 18, 15, 16 |
175 | dround 1, 4, 3, 0, 2, 8, 9, 12, 13, 19, 16, 17 | 172 | dround 1, 4, 3, 0, 2, 28, 24, 12, 13, 19, 16, 17 |
176 | 173 | ||
177 | dround 0, 1, 2, 3, 4, 9, 8, 13, 14, 12, 17, 18 | 174 | dround 0, 1, 2, 3, 4, 29, 25, 13, 14, 12, 17, 18 |
178 | dround 3, 0, 4, 2, 1, 8, 9, 14, 15, 13, 18, 19 | 175 | dround 3, 0, 4, 2, 1, 30, 26, 14, 15, 13, 18, 19 |
179 | dround 2, 3, 1, 4, 0, 9, 8, 15, 16, 14, 19, 12 | 176 | dround 2, 3, 1, 4, 0, 31, 27, 15, 16, 14, 19, 12 |
180 | dround 4, 2, 0, 1, 3, 8, 9, 16, 17, 15, 12, 13 | 177 | dround 4, 2, 0, 1, 3, 24, 28, 16, 17, 15, 12, 13 |
181 | dround 1, 4, 3, 0, 2, 9, 8, 17, 18, 16, 13, 14 | 178 | dround 1, 4, 3, 0, 2, 25, 29, 17, 18, 16, 13, 14 |
182 | 179 | ||
183 | dround 0, 1, 2, 3, 4, 8, 9, 18, 19, 17, 14, 15 | 180 | dround 0, 1, 2, 3, 4, 26, 30, 18, 19, 17, 14, 15 |
184 | dround 3, 0, 4, 2, 1, 9, 8, 19, 12, 18, 15, 16 | 181 | dround 3, 0, 4, 2, 1, 27, 31, 19, 12, 18, 15, 16 |
185 | dround 2, 3, 1, 4, 0, 8, 9, 12 | 182 | dround 2, 3, 1, 4, 0, 28, 24, 12 |
186 | dround 4, 2, 0, 1, 3, 9, 8, 13 | 183 | dround 4, 2, 0, 1, 3, 29, 25, 13 |
187 | dround 1, 4, 3, 0, 2, 8, 9, 14 | 184 | dround 1, 4, 3, 0, 2, 30, 26, 14 |
188 | 185 | ||
189 | dround 0, 1, 2, 3, 4, 9, 8, 15 | 186 | dround 0, 1, 2, 3, 4, 31, 27, 15 |
190 | dround 3, 0, 4, 2, 1, 8, 9, 16 | 187 | dround 3, 0, 4, 2, 1, 24, , 16 |
191 | dround 2, 3, 1, 4, 0, 9, 8, 17 | 188 | dround 2, 3, 1, 4, 0, 25, , 17 |
192 | dround 4, 2, 0, 1, 3, 8, 9, 18 | 189 | dround 4, 2, 0, 1, 3, 26, , 18 |
193 | dround 1, 4, 3, 0, 2, 9, , 19 | 190 | dround 1, 4, 3, 0, 2, 27, , 19 |
194 | 191 | ||
195 | /* update state */ | 192 | /* update state */ |
196 | add v20.2d, v20.2d, v0.2d | 193 | add v8.2d, v8.2d, v0.2d |
197 | add v21.2d, v21.2d, v1.2d | 194 | add v9.2d, v9.2d, v1.2d |
198 | add v22.2d, v22.2d, v2.2d | 195 | add v10.2d, v10.2d, v2.2d |
199 | add v23.2d, v23.2d, v3.2d | 196 | add v11.2d, v11.2d, v3.2d |
200 | 197 | ||
201 | /* handled all input blocks? */ | 198 | /* handled all input blocks? */ |
202 | cbnz w2, 0b | 199 | cbnz w2, 0b |
203 | 200 | ||
204 | /* store new state */ | 201 | /* store new state */ |
205 | 3: st1 {v20.2d-v23.2d}, [x0] | 202 | 3: st1 {v8.2d-v11.2d}, [x0] |
206 | ret | 203 | ret |
207 | ENDPROC(sha512_ce_transform) | 204 | ENDPROC(sha512_ce_transform) |
diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c index aff35c9992a4..27db4851e380 100644 --- a/arch/arm64/crypto/sha512-glue.c +++ b/arch/arm64/crypto/sha512-glue.c | |||
@@ -27,6 +27,7 @@ MODULE_ALIAS_CRYPTO("sha512"); | |||
27 | 27 | ||
28 | asmlinkage void sha512_block_data_order(u32 *digest, const void *data, | 28 | asmlinkage void sha512_block_data_order(u32 *digest, const void *data, |
29 | unsigned int num_blks); | 29 | unsigned int num_blks); |
30 | EXPORT_SYMBOL(sha512_block_data_order); | ||
30 | 31 | ||
31 | static int sha512_update(struct shash_desc *desc, const u8 *data, | 32 | static int sha512_update(struct shash_desc *desc, const u8 *data, |
32 | unsigned int len) | 33 | unsigned int len) |