aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2018-01-19 07:04:40 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2018-01-25 09:10:36 -0500
commitfb87127bcefc17efab757606e1b1e333fd614dd0 (patch)
tree832de08d7e3f6113d245304a6223541d21247646
parent140aa50d68ea01e7caea6900f6f51882d4dd65c5 (diff)
crypto: arm64/sha512 - fix/improve new v8.2 Crypto Extensions code
Add a missing symbol export that prevents this code to be built as a module. Also, move the round constant table to the .rodata section, and use a more optimized version of the core transform. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--arch/arm64/crypto/sha512-ce-core.S145
-rw-r--r--arch/arm64/crypto/sha512-glue.c1
2 files changed, 72 insertions, 74 deletions
diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S
index 6c562f8df0b0..7f3bca5c59a2 100644
--- a/arch/arm64/crypto/sha512-ce-core.S
+++ b/arch/arm64/crypto/sha512-ce-core.S
@@ -12,10 +12,7 @@
12#include <linux/linkage.h> 12#include <linux/linkage.h>
13#include <asm/assembler.h> 13#include <asm/assembler.h>
14 14
15 // 15 .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
16 // Temporary - for testing only. binutils has no support for these yet
17 //
18 .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
19 .set .Lq\b, \b 16 .set .Lq\b, \b
20 .set .Lv\b\().2d, \b 17 .set .Lv\b\().2d, \b
21 .endr 18 .endr
@@ -36,12 +33,10 @@
36 .inst 0xce608800 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 33 .inst 0xce608800 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
37 .endm 34 .endm
38 35
39 .text
40 .arch armv8-a+crypto
41
42 /* 36 /*
43 * The SHA-512 round constants 37 * The SHA-512 round constants
44 */ 38 */
39 .section ".rodata", "a"
45 .align 4 40 .align 4
46.Lsha512_rcon: 41.Lsha512_rcon:
47 .quad 0x428a2f98d728ae22, 0x7137449123ef65cd 42 .quad 0x428a2f98d728ae22, 0x7137449123ef65cd
@@ -87,20 +82,20 @@
87 82
88 .macro dround, i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4 83 .macro dround, i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4
89 .ifnb \rc1 84 .ifnb \rc1
90 ld1 {v\rc1\().2d}, [x3], #16 85 ld1 {v\rc1\().2d}, [x4], #16
91 .endif 86 .endif
92 add v\rc0\().2d, v\rc0\().2d, v\in0\().2d 87 add v5.2d, v\rc0\().2d, v\in0\().2d
93 ext v6.16b, v\i2\().16b, v\i3\().16b, #8 88 ext v6.16b, v\i2\().16b, v\i3\().16b, #8
94 ext v\rc0\().16b, v\rc0\().16b, v\rc0\().16b, #8 89 ext v5.16b, v5.16b, v5.16b, #8
95 ext v7.16b, v\i1\().16b, v\i2\().16b, #8 90 ext v7.16b, v\i1\().16b, v\i2\().16b, #8
96 add v\i3\().2d, v\i3\().2d, v\rc0\().2d 91 add v\i3\().2d, v\i3\().2d, v5.2d
97 .ifnb \in1 92 .ifnb \in1
98 ext v10.16b, v\in3\().16b, v\in4\().16b, #8 93 ext v5.16b, v\in3\().16b, v\in4\().16b, #8
99 sha512su0 v\in0\().2d, v\in1\().2d 94 sha512su0 v\in0\().2d, v\in1\().2d
100 .endif 95 .endif
101 sha512h q\i3, q6, v7.2d 96 sha512h q\i3, q6, v7.2d
102 .ifnb \in1 97 .ifnb \in1
103 sha512su1 v\in0\().2d, v\in2\().2d, v10.2d 98 sha512su1 v\in0\().2d, v\in2\().2d, v5.2d
104 .endif 99 .endif
105 add v\i4\().2d, v\i1\().2d, v\i3\().2d 100 add v\i4\().2d, v\i1\().2d, v\i3\().2d
106 sha512h2 q\i3, q\i1, v\i0\().2d 101 sha512h2 q\i3, q\i1, v\i0\().2d
@@ -110,18 +105,20 @@
110 * void sha512_ce_transform(struct sha512_state *sst, u8 const *src, 105 * void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
111 * int blocks) 106 * int blocks)
112 */ 107 */
108 .text
113ENTRY(sha512_ce_transform) 109ENTRY(sha512_ce_transform)
114 /* load state */ 110 /* load state */
115 ld1 {v20.2d-v23.2d}, [x0] 111 ld1 {v8.2d-v11.2d}, [x0]
112
113 /* load first 4 round constants */
114 adr_l x3, .Lsha512_rcon
115 ld1 {v20.2d-v23.2d}, [x3], #64
116 116
117 /* load input */ 117 /* load input */
1180: ld1 {v12.2d-v15.2d}, [x1], #64 1180: ld1 {v12.2d-v15.2d}, [x1], #64
119 ld1 {v16.2d-v19.2d}, [x1], #64 119 ld1 {v16.2d-v19.2d}, [x1], #64
120 sub w2, w2, #1 120 sub w2, w2, #1
121 121
122 /* load round constants */
123 adr x3, .Lsha512_rcon
124
125CPU_LE( rev64 v12.16b, v12.16b ) 122CPU_LE( rev64 v12.16b, v12.16b )
126CPU_LE( rev64 v13.16b, v13.16b ) 123CPU_LE( rev64 v13.16b, v13.16b )
127CPU_LE( rev64 v14.16b, v14.16b ) 124CPU_LE( rev64 v14.16b, v14.16b )
@@ -131,12 +128,12 @@ CPU_LE( rev64 v17.16b, v17.16b )
131CPU_LE( rev64 v18.16b, v18.16b ) 128CPU_LE( rev64 v18.16b, v18.16b )
132CPU_LE( rev64 v19.16b, v19.16b ) 129CPU_LE( rev64 v19.16b, v19.16b )
133 130
134 ld1 {v8.2d}, [x3], #16 131 mov x4, x3 // rc pointer
135 132
136 mov v0.16b, v20.16b 133 mov v0.16b, v8.16b
137 mov v1.16b, v21.16b 134 mov v1.16b, v9.16b
138 mov v2.16b, v22.16b 135 mov v2.16b, v10.16b
139 mov v3.16b, v23.16b 136 mov v3.16b, v11.16b
140 137
141 // v0 ab cd -- ef gh ab 138 // v0 ab cd -- ef gh ab
142 // v1 cd -- ef gh ab cd 139 // v1 cd -- ef gh ab cd
@@ -144,64 +141,64 @@ CPU_LE( rev64 v19.16b, v19.16b )
144 // v3 gh ab cd -- ef gh 141 // v3 gh ab cd -- ef gh
145 // v4 -- ef gh ab cd -- 142 // v4 -- ef gh ab cd --
146 143
147 dround 0, 1, 2, 3, 4, 8, 9, 12, 13, 19, 16, 17 144 dround 0, 1, 2, 3, 4, 20, 24, 12, 13, 19, 16, 17
148 dround 3, 0, 4, 2, 1, 9, 8, 13, 14, 12, 17, 18 145 dround 3, 0, 4, 2, 1, 21, 25, 13, 14, 12, 17, 18
149 dround 2, 3, 1, 4, 0, 8, 9, 14, 15, 13, 18, 19 146 dround 2, 3, 1, 4, 0, 22, 26, 14, 15, 13, 18, 19
150 dround 4, 2, 0, 1, 3, 9, 8, 15, 16, 14, 19, 12 147 dround 4, 2, 0, 1, 3, 23, 27, 15, 16, 14, 19, 12
151 dround 1, 4, 3, 0, 2, 8, 9, 16, 17, 15, 12, 13 148 dround 1, 4, 3, 0, 2, 24, 28, 16, 17, 15, 12, 13
152 149
153 dround 0, 1, 2, 3, 4, 9, 8, 17, 18, 16, 13, 14 150 dround 0, 1, 2, 3, 4, 25, 29, 17, 18, 16, 13, 14
154 dround 3, 0, 4, 2, 1, 8, 9, 18, 19, 17, 14, 15 151 dround 3, 0, 4, 2, 1, 26, 30, 18, 19, 17, 14, 15
155 dround 2, 3, 1, 4, 0, 9, 8, 19, 12, 18, 15, 16 152 dround 2, 3, 1, 4, 0, 27, 31, 19, 12, 18, 15, 16
156 dround 4, 2, 0, 1, 3, 8, 9, 12, 13, 19, 16, 17 153 dround 4, 2, 0, 1, 3, 28, 24, 12, 13, 19, 16, 17
157 dround 1, 4, 3, 0, 2, 9, 8, 13, 14, 12, 17, 18 154 dround 1, 4, 3, 0, 2, 29, 25, 13, 14, 12, 17, 18
158 155
159 dround 0, 1, 2, 3, 4, 8, 9, 14, 15, 13, 18, 19 156 dround 0, 1, 2, 3, 4, 30, 26, 14, 15, 13, 18, 19
160 dround 3, 0, 4, 2, 1, 9, 8, 15, 16, 14, 19, 12 157 dround 3, 0, 4, 2, 1, 31, 27, 15, 16, 14, 19, 12
161 dround 2, 3, 1, 4, 0, 8, 9, 16, 17, 15, 12, 13 158 dround 2, 3, 1, 4, 0, 24, 28, 16, 17, 15, 12, 13
162 dround 4, 2, 0, 1, 3, 9, 8, 17, 18, 16, 13, 14 159 dround 4, 2, 0, 1, 3, 25, 29, 17, 18, 16, 13, 14
163 dround 1, 4, 3, 0, 2, 8, 9, 18, 19, 17, 14, 15 160 dround 1, 4, 3, 0, 2, 26, 30, 18, 19, 17, 14, 15
164 161
165 dround 0, 1, 2, 3, 4, 9, 8, 19, 12, 18, 15, 16 162 dround 0, 1, 2, 3, 4, 27, 31, 19, 12, 18, 15, 16
166 dround 3, 0, 4, 2, 1, 8, 9, 12, 13, 19, 16, 17 163 dround 3, 0, 4, 2, 1, 28, 24, 12, 13, 19, 16, 17
167 dround 2, 3, 1, 4, 0, 9, 8, 13, 14, 12, 17, 18 164 dround 2, 3, 1, 4, 0, 29, 25, 13, 14, 12, 17, 18
168 dround 4, 2, 0, 1, 3, 8, 9, 14, 15, 13, 18, 19 165 dround 4, 2, 0, 1, 3, 30, 26, 14, 15, 13, 18, 19
169 dround 1, 4, 3, 0, 2, 9, 8, 15, 16, 14, 19, 12 166 dround 1, 4, 3, 0, 2, 31, 27, 15, 16, 14, 19, 12
170 167
171 dround 0, 1, 2, 3, 4, 8, 9, 16, 17, 15, 12, 13 168 dround 0, 1, 2, 3, 4, 24, 28, 16, 17, 15, 12, 13
172 dround 3, 0, 4, 2, 1, 9, 8, 17, 18, 16, 13, 14 169 dround 3, 0, 4, 2, 1, 25, 29, 17, 18, 16, 13, 14
173 dround 2, 3, 1, 4, 0, 8, 9, 18, 19, 17, 14, 15 170 dround 2, 3, 1, 4, 0, 26, 30, 18, 19, 17, 14, 15
174 dround 4, 2, 0, 1, 3, 9, 8, 19, 12, 18, 15, 16 171 dround 4, 2, 0, 1, 3, 27, 31, 19, 12, 18, 15, 16
175 dround 1, 4, 3, 0, 2, 8, 9, 12, 13, 19, 16, 17 172 dround 1, 4, 3, 0, 2, 28, 24, 12, 13, 19, 16, 17
176 173
177 dround 0, 1, 2, 3, 4, 9, 8, 13, 14, 12, 17, 18 174 dround 0, 1, 2, 3, 4, 29, 25, 13, 14, 12, 17, 18
178 dround 3, 0, 4, 2, 1, 8, 9, 14, 15, 13, 18, 19 175 dround 3, 0, 4, 2, 1, 30, 26, 14, 15, 13, 18, 19
179 dround 2, 3, 1, 4, 0, 9, 8, 15, 16, 14, 19, 12 176 dround 2, 3, 1, 4, 0, 31, 27, 15, 16, 14, 19, 12
180 dround 4, 2, 0, 1, 3, 8, 9, 16, 17, 15, 12, 13 177 dround 4, 2, 0, 1, 3, 24, 28, 16, 17, 15, 12, 13
181 dround 1, 4, 3, 0, 2, 9, 8, 17, 18, 16, 13, 14 178 dround 1, 4, 3, 0, 2, 25, 29, 17, 18, 16, 13, 14
182 179
183 dround 0, 1, 2, 3, 4, 8, 9, 18, 19, 17, 14, 15 180 dround 0, 1, 2, 3, 4, 26, 30, 18, 19, 17, 14, 15
184 dround 3, 0, 4, 2, 1, 9, 8, 19, 12, 18, 15, 16 181 dround 3, 0, 4, 2, 1, 27, 31, 19, 12, 18, 15, 16
185 dround 2, 3, 1, 4, 0, 8, 9, 12 182 dround 2, 3, 1, 4, 0, 28, 24, 12
186 dround 4, 2, 0, 1, 3, 9, 8, 13 183 dround 4, 2, 0, 1, 3, 29, 25, 13
187 dround 1, 4, 3, 0, 2, 8, 9, 14 184 dround 1, 4, 3, 0, 2, 30, 26, 14
188 185
189 dround 0, 1, 2, 3, 4, 9, 8, 15 186 dround 0, 1, 2, 3, 4, 31, 27, 15
190 dround 3, 0, 4, 2, 1, 8, 9, 16 187 dround 3, 0, 4, 2, 1, 24, , 16
191 dround 2, 3, 1, 4, 0, 9, 8, 17 188 dround 2, 3, 1, 4, 0, 25, , 17
192 dround 4, 2, 0, 1, 3, 8, 9, 18 189 dround 4, 2, 0, 1, 3, 26, , 18
193 dround 1, 4, 3, 0, 2, 9, , 19 190 dround 1, 4, 3, 0, 2, 27, , 19
194 191
195 /* update state */ 192 /* update state */
196 add v20.2d, v20.2d, v0.2d 193 add v8.2d, v8.2d, v0.2d
197 add v21.2d, v21.2d, v1.2d 194 add v9.2d, v9.2d, v1.2d
198 add v22.2d, v22.2d, v2.2d 195 add v10.2d, v10.2d, v2.2d
199 add v23.2d, v23.2d, v3.2d 196 add v11.2d, v11.2d, v3.2d
200 197
201 /* handled all input blocks? */ 198 /* handled all input blocks? */
202 cbnz w2, 0b 199 cbnz w2, 0b
203 200
204 /* store new state */ 201 /* store new state */
2053: st1 {v20.2d-v23.2d}, [x0] 2023: st1 {v8.2d-v11.2d}, [x0]
206 ret 203 ret
207ENDPROC(sha512_ce_transform) 204ENDPROC(sha512_ce_transform)
diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c
index aff35c9992a4..27db4851e380 100644
--- a/arch/arm64/crypto/sha512-glue.c
+++ b/arch/arm64/crypto/sha512-glue.c
@@ -27,6 +27,7 @@ MODULE_ALIAS_CRYPTO("sha512");
27 27
28asmlinkage void sha512_block_data_order(u32 *digest, const void *data, 28asmlinkage void sha512_block_data_order(u32 *digest, const void *data,
29 unsigned int num_blks); 29 unsigned int num_blks);
30EXPORT_SYMBOL(sha512_block_data_order);
30 31
31static int sha512_update(struct shash_desc *desc, const u8 *data, 32static int sha512_update(struct shash_desc *desc, const u8 *data,
32 unsigned int len) 33 unsigned int len)