diff options
Diffstat (limited to 'arch/x86/crypto/sha1_ni_asm.S')
-rw-r--r-- | arch/x86/crypto/sha1_ni_asm.S | 302 |
1 files changed, 302 insertions, 0 deletions
diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S new file mode 100644 index 000000000000..874a651b9e7d --- /dev/null +++ b/arch/x86/crypto/sha1_ni_asm.S | |||
@@ -0,0 +1,302 @@ | |||
1 | /* | ||
2 | * Intel SHA Extensions optimized implementation of a SHA-1 update function | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2015 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Sean Gulley <sean.m.gulley@intel.com> | ||
22 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
23 | * | ||
24 | * BSD LICENSE | ||
25 | * | ||
26 | * Copyright(c) 2015 Intel Corporation. | ||
27 | * | ||
28 | * Redistribution and use in source and binary forms, with or without | ||
29 | * modification, are permitted provided that the following conditions | ||
30 | * are met: | ||
31 | * | ||
32 | * * Redistributions of source code must retain the above copyright | ||
33 | * notice, this list of conditions and the following disclaimer. | ||
34 | * * Redistributions in binary form must reproduce the above copyright | ||
35 | * notice, this list of conditions and the following disclaimer in | ||
36 | * the documentation and/or other materials provided with the | ||
37 | * distribution. | ||
38 | * * Neither the name of Intel Corporation nor the names of its | ||
39 | * contributors may be used to endorse or promote products derived | ||
40 | * from this software without specific prior written permission. | ||
41 | * | ||
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
53 | * | ||
54 | */ | ||
55 | |||
56 | #include <linux/linkage.h> | ||
57 | |||
58 | #define DIGEST_PTR %rdi /* 1st arg */ | ||
59 | #define DATA_PTR %rsi /* 2nd arg */ | ||
60 | #define NUM_BLKS %rdx /* 3rd arg */ | ||
61 | |||
62 | #define RSPSAVE %rax | ||
63 | |||
64 | /* gcc conversion */ | ||
65 | #define FRAME_SIZE 32 /* space for 2x16 bytes */ | ||
66 | |||
67 | #define ABCD %xmm0 | ||
68 | #define E0 %xmm1 /* Need two E's b/c they ping pong */ | ||
69 | #define E1 %xmm2 | ||
70 | #define MSG0 %xmm3 | ||
71 | #define MSG1 %xmm4 | ||
72 | #define MSG2 %xmm5 | ||
73 | #define MSG3 %xmm6 | ||
74 | #define SHUF_MASK %xmm7 | ||
75 | |||
76 | |||
77 | /* | ||
78 | * Intel SHA Extensions optimized implementation of a SHA-1 update function | ||
79 | * | ||
80 | * The function takes a pointer to the current hash values, a pointer to the | ||
81 | * input data, and a number of 64 byte blocks to process. Once all blocks have | ||
82 | * been processed, the digest pointer is updated with the resulting hash value. | ||
83 | * The function only processes complete blocks, there is no functionality to | ||
84 | * store partial blocks. All message padding and hash value initialization must | ||
85 | * be done outside the update function. | ||
86 | * | ||
87 | * The indented lines in the loop are instructions related to rounds processing. | ||
88 | * The non-indented lines are instructions related to the message schedule. | ||
89 | * | ||
90 | * void sha1_ni_transform(uint32_t *digest, const void *data, | ||
91 | uint32_t numBlocks) | ||
92 | * digest : pointer to digest | ||
93 | * data: pointer to input data | ||
94 | * numBlocks: Number of blocks to process | ||
95 | */ | ||
96 | .text | ||
97 | .align 32 | ||
98 | ENTRY(sha1_ni_transform) | ||
99 | mov %rsp, RSPSAVE | ||
100 | sub $FRAME_SIZE, %rsp | ||
101 | and $~0xF, %rsp | ||
102 | |||
103 | shl $6, NUM_BLKS /* convert to bytes */ | ||
104 | jz .Ldone_hash | ||
105 | add DATA_PTR, NUM_BLKS /* pointer to end of data */ | ||
106 | |||
107 | /* load initial hash values */ | ||
108 | pinsrd $3, 1*16(DIGEST_PTR), E0 | ||
109 | movdqu 0*16(DIGEST_PTR), ABCD | ||
110 | pand UPPER_WORD_MASK(%rip), E0 | ||
111 | pshufd $0x1B, ABCD, ABCD | ||
112 | |||
113 | movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK | ||
114 | |||
115 | .Lloop0: | ||
116 | /* Save hash values for addition after rounds */ | ||
117 | movdqa E0, (0*16)(%rsp) | ||
118 | movdqa ABCD, (1*16)(%rsp) | ||
119 | |||
120 | /* Rounds 0-3 */ | ||
121 | movdqu 0*16(DATA_PTR), MSG0 | ||
122 | pshufb SHUF_MASK, MSG0 | ||
123 | paddd MSG0, E0 | ||
124 | movdqa ABCD, E1 | ||
125 | sha1rnds4 $0, E0, ABCD | ||
126 | |||
127 | /* Rounds 4-7 */ | ||
128 | movdqu 1*16(DATA_PTR), MSG1 | ||
129 | pshufb SHUF_MASK, MSG1 | ||
130 | sha1nexte MSG1, E1 | ||
131 | movdqa ABCD, E0 | ||
132 | sha1rnds4 $0, E1, ABCD | ||
133 | sha1msg1 MSG1, MSG0 | ||
134 | |||
135 | /* Rounds 8-11 */ | ||
136 | movdqu 2*16(DATA_PTR), MSG2 | ||
137 | pshufb SHUF_MASK, MSG2 | ||
138 | sha1nexte MSG2, E0 | ||
139 | movdqa ABCD, E1 | ||
140 | sha1rnds4 $0, E0, ABCD | ||
141 | sha1msg1 MSG2, MSG1 | ||
142 | pxor MSG2, MSG0 | ||
143 | |||
144 | /* Rounds 12-15 */ | ||
145 | movdqu 3*16(DATA_PTR), MSG3 | ||
146 | pshufb SHUF_MASK, MSG3 | ||
147 | sha1nexte MSG3, E1 | ||
148 | movdqa ABCD, E0 | ||
149 | sha1msg2 MSG3, MSG0 | ||
150 | sha1rnds4 $0, E1, ABCD | ||
151 | sha1msg1 MSG3, MSG2 | ||
152 | pxor MSG3, MSG1 | ||
153 | |||
154 | /* Rounds 16-19 */ | ||
155 | sha1nexte MSG0, E0 | ||
156 | movdqa ABCD, E1 | ||
157 | sha1msg2 MSG0, MSG1 | ||
158 | sha1rnds4 $0, E0, ABCD | ||
159 | sha1msg1 MSG0, MSG3 | ||
160 | pxor MSG0, MSG2 | ||
161 | |||
162 | /* Rounds 20-23 */ | ||
163 | sha1nexte MSG1, E1 | ||
164 | movdqa ABCD, E0 | ||
165 | sha1msg2 MSG1, MSG2 | ||
166 | sha1rnds4 $1, E1, ABCD | ||
167 | sha1msg1 MSG1, MSG0 | ||
168 | pxor MSG1, MSG3 | ||
169 | |||
170 | /* Rounds 24-27 */ | ||
171 | sha1nexte MSG2, E0 | ||
172 | movdqa ABCD, E1 | ||
173 | sha1msg2 MSG2, MSG3 | ||
174 | sha1rnds4 $1, E0, ABCD | ||
175 | sha1msg1 MSG2, MSG1 | ||
176 | pxor MSG2, MSG0 | ||
177 | |||
178 | /* Rounds 28-31 */ | ||
179 | sha1nexte MSG3, E1 | ||
180 | movdqa ABCD, E0 | ||
181 | sha1msg2 MSG3, MSG0 | ||
182 | sha1rnds4 $1, E1, ABCD | ||
183 | sha1msg1 MSG3, MSG2 | ||
184 | pxor MSG3, MSG1 | ||
185 | |||
186 | /* Rounds 32-35 */ | ||
187 | sha1nexte MSG0, E0 | ||
188 | movdqa ABCD, E1 | ||
189 | sha1msg2 MSG0, MSG1 | ||
190 | sha1rnds4 $1, E0, ABCD | ||
191 | sha1msg1 MSG0, MSG3 | ||
192 | pxor MSG0, MSG2 | ||
193 | |||
194 | /* Rounds 36-39 */ | ||
195 | sha1nexte MSG1, E1 | ||
196 | movdqa ABCD, E0 | ||
197 | sha1msg2 MSG1, MSG2 | ||
198 | sha1rnds4 $1, E1, ABCD | ||
199 | sha1msg1 MSG1, MSG0 | ||
200 | pxor MSG1, MSG3 | ||
201 | |||
202 | /* Rounds 40-43 */ | ||
203 | sha1nexte MSG2, E0 | ||
204 | movdqa ABCD, E1 | ||
205 | sha1msg2 MSG2, MSG3 | ||
206 | sha1rnds4 $2, E0, ABCD | ||
207 | sha1msg1 MSG2, MSG1 | ||
208 | pxor MSG2, MSG0 | ||
209 | |||
210 | /* Rounds 44-47 */ | ||
211 | sha1nexte MSG3, E1 | ||
212 | movdqa ABCD, E0 | ||
213 | sha1msg2 MSG3, MSG0 | ||
214 | sha1rnds4 $2, E1, ABCD | ||
215 | sha1msg1 MSG3, MSG2 | ||
216 | pxor MSG3, MSG1 | ||
217 | |||
218 | /* Rounds 48-51 */ | ||
219 | sha1nexte MSG0, E0 | ||
220 | movdqa ABCD, E1 | ||
221 | sha1msg2 MSG0, MSG1 | ||
222 | sha1rnds4 $2, E0, ABCD | ||
223 | sha1msg1 MSG0, MSG3 | ||
224 | pxor MSG0, MSG2 | ||
225 | |||
226 | /* Rounds 52-55 */ | ||
227 | sha1nexte MSG1, E1 | ||
228 | movdqa ABCD, E0 | ||
229 | sha1msg2 MSG1, MSG2 | ||
230 | sha1rnds4 $2, E1, ABCD | ||
231 | sha1msg1 MSG1, MSG0 | ||
232 | pxor MSG1, MSG3 | ||
233 | |||
234 | /* Rounds 56-59 */ | ||
235 | sha1nexte MSG2, E0 | ||
236 | movdqa ABCD, E1 | ||
237 | sha1msg2 MSG2, MSG3 | ||
238 | sha1rnds4 $2, E0, ABCD | ||
239 | sha1msg1 MSG2, MSG1 | ||
240 | pxor MSG2, MSG0 | ||
241 | |||
242 | /* Rounds 60-63 */ | ||
243 | sha1nexte MSG3, E1 | ||
244 | movdqa ABCD, E0 | ||
245 | sha1msg2 MSG3, MSG0 | ||
246 | sha1rnds4 $3, E1, ABCD | ||
247 | sha1msg1 MSG3, MSG2 | ||
248 | pxor MSG3, MSG1 | ||
249 | |||
250 | /* Rounds 64-67 */ | ||
251 | sha1nexte MSG0, E0 | ||
252 | movdqa ABCD, E1 | ||
253 | sha1msg2 MSG0, MSG1 | ||
254 | sha1rnds4 $3, E0, ABCD | ||
255 | sha1msg1 MSG0, MSG3 | ||
256 | pxor MSG0, MSG2 | ||
257 | |||
258 | /* Rounds 68-71 */ | ||
259 | sha1nexte MSG1, E1 | ||
260 | movdqa ABCD, E0 | ||
261 | sha1msg2 MSG1, MSG2 | ||
262 | sha1rnds4 $3, E1, ABCD | ||
263 | pxor MSG1, MSG3 | ||
264 | |||
265 | /* Rounds 72-75 */ | ||
266 | sha1nexte MSG2, E0 | ||
267 | movdqa ABCD, E1 | ||
268 | sha1msg2 MSG2, MSG3 | ||
269 | sha1rnds4 $3, E0, ABCD | ||
270 | |||
271 | /* Rounds 76-79 */ | ||
272 | sha1nexte MSG3, E1 | ||
273 | movdqa ABCD, E0 | ||
274 | sha1rnds4 $3, E1, ABCD | ||
275 | |||
276 | /* Add current hash values with previously saved */ | ||
277 | sha1nexte (0*16)(%rsp), E0 | ||
278 | paddd (1*16)(%rsp), ABCD | ||
279 | |||
280 | /* Increment data pointer and loop if more to process */ | ||
281 | add $64, DATA_PTR | ||
282 | cmp NUM_BLKS, DATA_PTR | ||
283 | jne .Lloop0 | ||
284 | |||
285 | /* Write hash values back in the correct order */ | ||
286 | pshufd $0x1B, ABCD, ABCD | ||
287 | movdqu ABCD, 0*16(DIGEST_PTR) | ||
288 | pextrd $3, E0, 1*16(DIGEST_PTR) | ||
289 | |||
290 | .Ldone_hash: | ||
291 | mov RSPSAVE, %rsp | ||
292 | |||
293 | ret | ||
294 | ENDPROC(sha1_ni_transform) | ||
295 | |||
296 | .data | ||
297 | |||
298 | .align 64 | ||
299 | PSHUFFLE_BYTE_FLIP_MASK: | ||
300 | .octa 0x000102030405060708090a0b0c0d0e0f | ||
301 | UPPER_WORD_MASK: | ||
302 | .octa 0xFFFFFFFF000000000000000000000000 | ||