diff options
author | tim <tim.c.chen@linux.intel.com> | 2015-09-10 18:27:13 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2015-09-21 10:01:06 -0400 |
commit | 600a2334e83d22e5c3f7ff2581f545bfc354d206 (patch) | |
tree | cf4da0fd9cd4ea5058623d4f0c80ddc0a7376c5c /arch/x86/crypto/sha256_ni_asm.S | |
parent | c356a7e975a25e8867961c1b7a4a965d506f0a04 (diff) |
crypto: x86/sha - Intel SHA Extensions optimized SHA256 transform function
This patch includes the Intel SHA Extensions optimized implementation
of SHA-256 update function. This function has been tested on Broxton
platform and measured a speed up of 3.6x over the SSSE3 implementiation
for 4K blocks.
Originally-by: Chandramouli Narayanan <mouli_7982@yahoo.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/sha256_ni_asm.S')
-rw-r--r-- | arch/x86/crypto/sha256_ni_asm.S | 353 |
1 files changed, 353 insertions, 0 deletions
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S new file mode 100644 index 000000000000..748cdf21a938 --- /dev/null +++ b/arch/x86/crypto/sha256_ni_asm.S | |||
@@ -0,0 +1,353 @@ | |||
1 | /* | ||
2 | * Intel SHA Extensions optimized implementation of a SHA-256 update function | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2015 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Sean Gulley <sean.m.gulley@intel.com> | ||
22 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
23 | * | ||
24 | * BSD LICENSE | ||
25 | * | ||
26 | * Copyright(c) 2015 Intel Corporation. | ||
27 | * | ||
28 | * Redistribution and use in source and binary forms, with or without | ||
29 | * modification, are permitted provided that the following conditions | ||
30 | * are met: | ||
31 | * | ||
32 | * * Redistributions of source code must retain the above copyright | ||
33 | * notice, this list of conditions and the following disclaimer. | ||
34 | * * Redistributions in binary form must reproduce the above copyright | ||
35 | * notice, this list of conditions and the following disclaimer in | ||
36 | * the documentation and/or other materials provided with the | ||
37 | * distribution. | ||
38 | * * Neither the name of Intel Corporation nor the names of its | ||
39 | * contributors may be used to endorse or promote products derived | ||
40 | * from this software without specific prior written permission. | ||
41 | * | ||
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
53 | * | ||
54 | */ | ||
55 | |||
56 | #include <linux/linkage.h> | ||
57 | |||
58 | #define DIGEST_PTR %rdi /* 1st arg */ | ||
59 | #define DATA_PTR %rsi /* 2nd arg */ | ||
60 | #define NUM_BLKS %rdx /* 3rd arg */ | ||
61 | |||
62 | #define SHA256CONSTANTS %rax | ||
63 | |||
64 | #define MSG %xmm0 | ||
65 | #define STATE0 %xmm1 | ||
66 | #define STATE1 %xmm2 | ||
67 | #define MSGTMP0 %xmm3 | ||
68 | #define MSGTMP1 %xmm4 | ||
69 | #define MSGTMP2 %xmm5 | ||
70 | #define MSGTMP3 %xmm6 | ||
71 | #define MSGTMP4 %xmm7 | ||
72 | |||
73 | #define SHUF_MASK %xmm8 | ||
74 | |||
75 | #define ABEF_SAVE %xmm9 | ||
76 | #define CDGH_SAVE %xmm10 | ||
77 | |||
78 | /* | ||
79 | * Intel SHA Extensions optimized implementation of a SHA-256 update function | ||
80 | * | ||
81 | * The function takes a pointer to the current hash values, a pointer to the | ||
82 | * input data, and a number of 64 byte blocks to process. Once all blocks have | ||
83 | * been processed, the digest pointer is updated with the resulting hash value. | ||
84 | * The function only processes complete blocks, there is no functionality to | ||
85 | * store partial blocks. All message padding and hash value initialization must | ||
86 | * be done outside the update function. | ||
87 | * | ||
88 | * The indented lines in the loop are instructions related to rounds processing. | ||
89 | * The non-indented lines are instructions related to the message schedule. | ||
90 | * | ||
91 | * void sha256_ni_transform(uint32_t *digest, const void *data, | ||
92 | uint32_t numBlocks); | ||
93 | * digest : pointer to digest | ||
94 | * data: pointer to input data | ||
95 | * numBlocks: Number of blocks to process | ||
96 | */ | ||
97 | |||
98 | .text | ||
99 | .align 32 | ||
100 | ENTRY(sha256_ni_transform) | ||
101 | |||
102 | shl $6, NUM_BLKS /* convert to bytes */ | ||
103 | jz .Ldone_hash | ||
104 | add DATA_PTR, NUM_BLKS /* pointer to end of data */ | ||
105 | |||
106 | /* | ||
107 | * load initial hash values | ||
108 | * Need to reorder these appropriately | ||
109 | * DCBA, HGFE -> ABEF, CDGH | ||
110 | */ | ||
111 | movdqu 0*16(DIGEST_PTR), STATE0 | ||
112 | movdqu 1*16(DIGEST_PTR), STATE1 | ||
113 | |||
114 | pshufd $0xB1, STATE0, STATE0 /* CDAB */ | ||
115 | pshufd $0x1B, STATE1, STATE1 /* EFGH */ | ||
116 | movdqa STATE0, MSGTMP4 | ||
117 | palignr $8, STATE1, STATE0 /* ABEF */ | ||
118 | pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ | ||
119 | |||
120 | movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK | ||
121 | lea K256(%rip), SHA256CONSTANTS | ||
122 | |||
123 | .Lloop0: | ||
124 | /* Save hash values for addition after rounds */ | ||
125 | movdqa STATE0, ABEF_SAVE | ||
126 | movdqa STATE1, CDGH_SAVE | ||
127 | |||
128 | /* Rounds 0-3 */ | ||
129 | movdqu 0*16(DATA_PTR), MSG | ||
130 | pshufb SHUF_MASK, MSG | ||
131 | movdqa MSG, MSGTMP0 | ||
132 | paddd 0*16(SHA256CONSTANTS), MSG | ||
133 | sha256rnds2 STATE0, STATE1 | ||
134 | pshufd $0x0E, MSG, MSG | ||
135 | sha256rnds2 STATE1, STATE0 | ||
136 | |||
137 | /* Rounds 4-7 */ | ||
138 | movdqu 1*16(DATA_PTR), MSG | ||
139 | pshufb SHUF_MASK, MSG | ||
140 | movdqa MSG, MSGTMP1 | ||
141 | paddd 1*16(SHA256CONSTANTS), MSG | ||
142 | sha256rnds2 STATE0, STATE1 | ||
143 | pshufd $0x0E, MSG, MSG | ||
144 | sha256rnds2 STATE1, STATE0 | ||
145 | sha256msg1 MSGTMP1, MSGTMP0 | ||
146 | |||
147 | /* Rounds 8-11 */ | ||
148 | movdqu 2*16(DATA_PTR), MSG | ||
149 | pshufb SHUF_MASK, MSG | ||
150 | movdqa MSG, MSGTMP2 | ||
151 | paddd 2*16(SHA256CONSTANTS), MSG | ||
152 | sha256rnds2 STATE0, STATE1 | ||
153 | pshufd $0x0E, MSG, MSG | ||
154 | sha256rnds2 STATE1, STATE0 | ||
155 | sha256msg1 MSGTMP2, MSGTMP1 | ||
156 | |||
157 | /* Rounds 12-15 */ | ||
158 | movdqu 3*16(DATA_PTR), MSG | ||
159 | pshufb SHUF_MASK, MSG | ||
160 | movdqa MSG, MSGTMP3 | ||
161 | paddd 3*16(SHA256CONSTANTS), MSG | ||
162 | sha256rnds2 STATE0, STATE1 | ||
163 | movdqa MSGTMP3, MSGTMP4 | ||
164 | palignr $4, MSGTMP2, MSGTMP4 | ||
165 | paddd MSGTMP4, MSGTMP0 | ||
166 | sha256msg2 MSGTMP3, MSGTMP0 | ||
167 | pshufd $0x0E, MSG, MSG | ||
168 | sha256rnds2 STATE1, STATE0 | ||
169 | sha256msg1 MSGTMP3, MSGTMP2 | ||
170 | |||
171 | /* Rounds 16-19 */ | ||
172 | movdqa MSGTMP0, MSG | ||
173 | paddd 4*16(SHA256CONSTANTS), MSG | ||
174 | sha256rnds2 STATE0, STATE1 | ||
175 | movdqa MSGTMP0, MSGTMP4 | ||
176 | palignr $4, MSGTMP3, MSGTMP4 | ||
177 | paddd MSGTMP4, MSGTMP1 | ||
178 | sha256msg2 MSGTMP0, MSGTMP1 | ||
179 | pshufd $0x0E, MSG, MSG | ||
180 | sha256rnds2 STATE1, STATE0 | ||
181 | sha256msg1 MSGTMP0, MSGTMP3 | ||
182 | |||
183 | /* Rounds 20-23 */ | ||
184 | movdqa MSGTMP1, MSG | ||
185 | paddd 5*16(SHA256CONSTANTS), MSG | ||
186 | sha256rnds2 STATE0, STATE1 | ||
187 | movdqa MSGTMP1, MSGTMP4 | ||
188 | palignr $4, MSGTMP0, MSGTMP4 | ||
189 | paddd MSGTMP4, MSGTMP2 | ||
190 | sha256msg2 MSGTMP1, MSGTMP2 | ||
191 | pshufd $0x0E, MSG, MSG | ||
192 | sha256rnds2 STATE1, STATE0 | ||
193 | sha256msg1 MSGTMP1, MSGTMP0 | ||
194 | |||
195 | /* Rounds 24-27 */ | ||
196 | movdqa MSGTMP2, MSG | ||
197 | paddd 6*16(SHA256CONSTANTS), MSG | ||
198 | sha256rnds2 STATE0, STATE1 | ||
199 | movdqa MSGTMP2, MSGTMP4 | ||
200 | palignr $4, MSGTMP1, MSGTMP4 | ||
201 | paddd MSGTMP4, MSGTMP3 | ||
202 | sha256msg2 MSGTMP2, MSGTMP3 | ||
203 | pshufd $0x0E, MSG, MSG | ||
204 | sha256rnds2 STATE1, STATE0 | ||
205 | sha256msg1 MSGTMP2, MSGTMP1 | ||
206 | |||
207 | /* Rounds 28-31 */ | ||
208 | movdqa MSGTMP3, MSG | ||
209 | paddd 7*16(SHA256CONSTANTS), MSG | ||
210 | sha256rnds2 STATE0, STATE1 | ||
211 | movdqa MSGTMP3, MSGTMP4 | ||
212 | palignr $4, MSGTMP2, MSGTMP4 | ||
213 | paddd MSGTMP4, MSGTMP0 | ||
214 | sha256msg2 MSGTMP3, MSGTMP0 | ||
215 | pshufd $0x0E, MSG, MSG | ||
216 | sha256rnds2 STATE1, STATE0 | ||
217 | sha256msg1 MSGTMP3, MSGTMP2 | ||
218 | |||
219 | /* Rounds 32-35 */ | ||
220 | movdqa MSGTMP0, MSG | ||
221 | paddd 8*16(SHA256CONSTANTS), MSG | ||
222 | sha256rnds2 STATE0, STATE1 | ||
223 | movdqa MSGTMP0, MSGTMP4 | ||
224 | palignr $4, MSGTMP3, MSGTMP4 | ||
225 | paddd MSGTMP4, MSGTMP1 | ||
226 | sha256msg2 MSGTMP0, MSGTMP1 | ||
227 | pshufd $0x0E, MSG, MSG | ||
228 | sha256rnds2 STATE1, STATE0 | ||
229 | sha256msg1 MSGTMP0, MSGTMP3 | ||
230 | |||
231 | /* Rounds 36-39 */ | ||
232 | movdqa MSGTMP1, MSG | ||
233 | paddd 9*16(SHA256CONSTANTS), MSG | ||
234 | sha256rnds2 STATE0, STATE1 | ||
235 | movdqa MSGTMP1, MSGTMP4 | ||
236 | palignr $4, MSGTMP0, MSGTMP4 | ||
237 | paddd MSGTMP4, MSGTMP2 | ||
238 | sha256msg2 MSGTMP1, MSGTMP2 | ||
239 | pshufd $0x0E, MSG, MSG | ||
240 | sha256rnds2 STATE1, STATE0 | ||
241 | sha256msg1 MSGTMP1, MSGTMP0 | ||
242 | |||
243 | /* Rounds 40-43 */ | ||
244 | movdqa MSGTMP2, MSG | ||
245 | paddd 10*16(SHA256CONSTANTS), MSG | ||
246 | sha256rnds2 STATE0, STATE1 | ||
247 | movdqa MSGTMP2, MSGTMP4 | ||
248 | palignr $4, MSGTMP1, MSGTMP4 | ||
249 | paddd MSGTMP4, MSGTMP3 | ||
250 | sha256msg2 MSGTMP2, MSGTMP3 | ||
251 | pshufd $0x0E, MSG, MSG | ||
252 | sha256rnds2 STATE1, STATE0 | ||
253 | sha256msg1 MSGTMP2, MSGTMP1 | ||
254 | |||
255 | /* Rounds 44-47 */ | ||
256 | movdqa MSGTMP3, MSG | ||
257 | paddd 11*16(SHA256CONSTANTS), MSG | ||
258 | sha256rnds2 STATE0, STATE1 | ||
259 | movdqa MSGTMP3, MSGTMP4 | ||
260 | palignr $4, MSGTMP2, MSGTMP4 | ||
261 | paddd MSGTMP4, MSGTMP0 | ||
262 | sha256msg2 MSGTMP3, MSGTMP0 | ||
263 | pshufd $0x0E, MSG, MSG | ||
264 | sha256rnds2 STATE1, STATE0 | ||
265 | sha256msg1 MSGTMP3, MSGTMP2 | ||
266 | |||
267 | /* Rounds 48-51 */ | ||
268 | movdqa MSGTMP0, MSG | ||
269 | paddd 12*16(SHA256CONSTANTS), MSG | ||
270 | sha256rnds2 STATE0, STATE1 | ||
271 | movdqa MSGTMP0, MSGTMP4 | ||
272 | palignr $4, MSGTMP3, MSGTMP4 | ||
273 | paddd MSGTMP4, MSGTMP1 | ||
274 | sha256msg2 MSGTMP0, MSGTMP1 | ||
275 | pshufd $0x0E, MSG, MSG | ||
276 | sha256rnds2 STATE1, STATE0 | ||
277 | sha256msg1 MSGTMP0, MSGTMP3 | ||
278 | |||
279 | /* Rounds 52-55 */ | ||
280 | movdqa MSGTMP1, MSG | ||
281 | paddd 13*16(SHA256CONSTANTS), MSG | ||
282 | sha256rnds2 STATE0, STATE1 | ||
283 | movdqa MSGTMP1, MSGTMP4 | ||
284 | palignr $4, MSGTMP0, MSGTMP4 | ||
285 | paddd MSGTMP4, MSGTMP2 | ||
286 | sha256msg2 MSGTMP1, MSGTMP2 | ||
287 | pshufd $0x0E, MSG, MSG | ||
288 | sha256rnds2 STATE1, STATE0 | ||
289 | |||
290 | /* Rounds 56-59 */ | ||
291 | movdqa MSGTMP2, MSG | ||
292 | paddd 14*16(SHA256CONSTANTS), MSG | ||
293 | sha256rnds2 STATE0, STATE1 | ||
294 | movdqa MSGTMP2, MSGTMP4 | ||
295 | palignr $4, MSGTMP1, MSGTMP4 | ||
296 | paddd MSGTMP4, MSGTMP3 | ||
297 | sha256msg2 MSGTMP2, MSGTMP3 | ||
298 | pshufd $0x0E, MSG, MSG | ||
299 | sha256rnds2 STATE1, STATE0 | ||
300 | |||
301 | /* Rounds 60-63 */ | ||
302 | movdqa MSGTMP3, MSG | ||
303 | paddd 15*16(SHA256CONSTANTS), MSG | ||
304 | sha256rnds2 STATE0, STATE1 | ||
305 | pshufd $0x0E, MSG, MSG | ||
306 | sha256rnds2 STATE1, STATE0 | ||
307 | |||
308 | /* Add current hash values with previously saved */ | ||
309 | paddd ABEF_SAVE, STATE0 | ||
310 | paddd CDGH_SAVE, STATE1 | ||
311 | |||
312 | /* Increment data pointer and loop if more to process */ | ||
313 | add $64, DATA_PTR | ||
314 | cmp NUM_BLKS, DATA_PTR | ||
315 | jne .Lloop0 | ||
316 | |||
317 | /* Write hash values back in the correct order */ | ||
318 | pshufd $0x1B, STATE0, STATE0 /* FEBA */ | ||
319 | pshufd $0xB1, STATE1, STATE1 /* DCHG */ | ||
320 | movdqa STATE0, MSGTMP4 | ||
321 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ | ||
322 | palignr $8, MSGTMP4, STATE1 /* HGFE */ | ||
323 | |||
324 | movdqu STATE0, 0*16(DIGEST_PTR) | ||
325 | movdqu STATE1, 1*16(DIGEST_PTR) | ||
326 | |||
327 | .Ldone_hash: | ||
328 | |||
329 | ret | ||
330 | ENDPROC(sha256_ni_transform) | ||
331 | |||
332 | .data | ||
333 | .align 64 | ||
334 | K256: | ||
335 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
336 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
337 | .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
338 | .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
339 | .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
340 | .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
341 | .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
342 | .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
343 | .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
344 | .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
345 | .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
346 | .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
347 | .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
348 | .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
349 | .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
350 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
351 | |||
352 | PSHUFFLE_BYTE_FLIP_MASK: | ||
353 | .octa 0x0c0d0e0f08090a0b0405060700010203 | ||