diff options
Diffstat (limited to 'arch/arm/crypto/sha1-armv4-large.S')
-rw-r--r-- | arch/arm/crypto/sha1-armv4-large.S | 503 |
1 files changed, 503 insertions, 0 deletions
diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S new file mode 100644 index 000000000000..7050ab133b9d --- /dev/null +++ b/arch/arm/crypto/sha1-armv4-large.S | |||
@@ -0,0 +1,503 @@ | |||
1 | #define __ARM_ARCH__ __LINUX_ARM_ARCH__ | ||
2 | @ ==================================================================== | ||
3 | @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
4 | @ project. The module is, however, dual licensed under OpenSSL and | ||
5 | @ CRYPTOGAMS licenses depending on where you obtain it. For further | ||
6 | @ details see http://www.openssl.org/~appro/cryptogams/. | ||
7 | @ ==================================================================== | ||
8 | |||
9 | @ sha1_block procedure for ARMv4. | ||
10 | @ | ||
11 | @ January 2007. | ||
12 | |||
13 | @ Size/performance trade-off | ||
14 | @ ==================================================================== | ||
15 | @ impl size in bytes comp cycles[*] measured performance | ||
16 | @ ==================================================================== | ||
17 | @ thumb 304 3212 4420 | ||
18 | @ armv4-small 392/+29% 1958/+64% 2250/+96% | ||
19 | @ armv4-compact 740/+89% 1552/+26% 1840/+22% | ||
20 | @ armv4-large 1420/+92% 1307/+19% 1370/+34%[***] | ||
21 | @ full unroll ~5100/+260% ~1260/+4% ~1300/+5% | ||
22 | @ ==================================================================== | ||
23 | @ thumb = same as 'small' but in Thumb instructions[**] and | ||
24 | @ with recurring code in two private functions; | ||
25 | @ small = detached Xload/update, loops are folded; | ||
26 | @ compact = detached Xload/update, 5x unroll; | ||
27 | @ large = interleaved Xload/update, 5x unroll; | ||
28 | @ full unroll = interleaved Xload/update, full unroll, estimated[!]; | ||
29 | @ | ||
30 | @ [*] Manually counted instructions in "grand" loop body. Measured | ||
31 | @ performance is affected by prologue and epilogue overhead, | ||
32 | @ i-cache availability, branch penalties, etc. | ||
33 | @ [**] While each Thumb instruction is twice smaller, they are not as | ||
34 | @ diverse as ARM ones: e.g., there are only two arithmetic | ||
35 | @ instructions with 3 arguments, no [fixed] rotate, addressing | ||
36 | @ modes are limited. As result it takes more instructions to do | ||
37 | @ the same job in Thumb, therefore the code is never twice as | ||
38 | @ small and always slower. | ||
39 | @ [***] which is also ~35% better than compiler generated code. Dual- | ||
40 | @ issue Cortex A8 core was measured to process input block in | ||
41 | @ ~990 cycles. | ||
42 | |||
43 | @ August 2010. | ||
44 | @ | ||
45 | @ Rescheduling for dual-issue pipeline resulted in 13% improvement on | ||
46 | @ Cortex A8 core and in absolute terms ~870 cycles per input block | ||
47 | @ [or 13.6 cycles per byte]. | ||
48 | |||
49 | @ February 2011. | ||
50 | @ | ||
51 | @ Profiler-assisted and platform-specific optimization resulted in 10% | ||
52 | @ improvement on Cortex A8 core and 12.2 cycles per byte. | ||
53 | |||
54 | .text | ||
55 | |||
56 | .global sha1_block_data_order | ||
57 | .type sha1_block_data_order,%function | ||
58 | |||
59 | .align 2 | ||
60 | sha1_block_data_order: | ||
61 | stmdb sp!,{r4-r12,lr} | ||
62 | add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 | ||
63 | ldmia r0,{r3,r4,r5,r6,r7} | ||
64 | .Lloop: | ||
65 | ldr r8,.LK_00_19 | ||
66 | mov r14,sp | ||
67 | sub sp,sp,#15*4 | ||
68 | mov r5,r5,ror#30 | ||
69 | mov r6,r6,ror#30 | ||
70 | mov r7,r7,ror#30 @ [6] | ||
71 | .L_00_15: | ||
72 | #if __ARM_ARCH__<7 | ||
73 | ldrb r10,[r1,#2] | ||
74 | ldrb r9,[r1,#3] | ||
75 | ldrb r11,[r1,#1] | ||
76 | add r7,r8,r7,ror#2 @ E+=K_00_19 | ||
77 | ldrb r12,[r1],#4 | ||
78 | orr r9,r9,r10,lsl#8 | ||
79 | eor r10,r5,r6 @ F_xx_xx | ||
80 | orr r9,r9,r11,lsl#16 | ||
81 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | ||
82 | orr r9,r9,r12,lsl#24 | ||
83 | #else | ||
84 | ldr r9,[r1],#4 @ handles unaligned | ||
85 | add r7,r8,r7,ror#2 @ E+=K_00_19 | ||
86 | eor r10,r5,r6 @ F_xx_xx | ||
87 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | ||
88 | #ifdef __ARMEL__ | ||
89 | rev r9,r9 @ byte swap | ||
90 | #endif | ||
91 | #endif | ||
92 | and r10,r4,r10,ror#2 | ||
93 | add r7,r7,r9 @ E+=X[i] | ||
94 | eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) | ||
95 | str r9,[r14,#-4]! | ||
96 | add r7,r7,r10 @ E+=F_00_19(B,C,D) | ||
97 | #if __ARM_ARCH__<7 | ||
98 | ldrb r10,[r1,#2] | ||
99 | ldrb r9,[r1,#3] | ||
100 | ldrb r11,[r1,#1] | ||
101 | add r6,r8,r6,ror#2 @ E+=K_00_19 | ||
102 | ldrb r12,[r1],#4 | ||
103 | orr r9,r9,r10,lsl#8 | ||
104 | eor r10,r4,r5 @ F_xx_xx | ||
105 | orr r9,r9,r11,lsl#16 | ||
106 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | ||
107 | orr r9,r9,r12,lsl#24 | ||
108 | #else | ||
109 | ldr r9,[r1],#4 @ handles unaligned | ||
110 | add r6,r8,r6,ror#2 @ E+=K_00_19 | ||
111 | eor r10,r4,r5 @ F_xx_xx | ||
112 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | ||
113 | #ifdef __ARMEL__ | ||
114 | rev r9,r9 @ byte swap | ||
115 | #endif | ||
116 | #endif | ||
117 | and r10,r3,r10,ror#2 | ||
118 | add r6,r6,r9 @ E+=X[i] | ||
119 | eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) | ||
120 | str r9,[r14,#-4]! | ||
121 | add r6,r6,r10 @ E+=F_00_19(B,C,D) | ||
122 | #if __ARM_ARCH__<7 | ||
123 | ldrb r10,[r1,#2] | ||
124 | ldrb r9,[r1,#3] | ||
125 | ldrb r11,[r1,#1] | ||
126 | add r5,r8,r5,ror#2 @ E+=K_00_19 | ||
127 | ldrb r12,[r1],#4 | ||
128 | orr r9,r9,r10,lsl#8 | ||
129 | eor r10,r3,r4 @ F_xx_xx | ||
130 | orr r9,r9,r11,lsl#16 | ||
131 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | ||
132 | orr r9,r9,r12,lsl#24 | ||
133 | #else | ||
134 | ldr r9,[r1],#4 @ handles unaligned | ||
135 | add r5,r8,r5,ror#2 @ E+=K_00_19 | ||
136 | eor r10,r3,r4 @ F_xx_xx | ||
137 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | ||
138 | #ifdef __ARMEL__ | ||
139 | rev r9,r9 @ byte swap | ||
140 | #endif | ||
141 | #endif | ||
142 | and r10,r7,r10,ror#2 | ||
143 | add r5,r5,r9 @ E+=X[i] | ||
144 | eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) | ||
145 | str r9,[r14,#-4]! | ||
146 | add r5,r5,r10 @ E+=F_00_19(B,C,D) | ||
147 | #if __ARM_ARCH__<7 | ||
148 | ldrb r10,[r1,#2] | ||
149 | ldrb r9,[r1,#3] | ||
150 | ldrb r11,[r1,#1] | ||
151 | add r4,r8,r4,ror#2 @ E+=K_00_19 | ||
152 | ldrb r12,[r1],#4 | ||
153 | orr r9,r9,r10,lsl#8 | ||
154 | eor r10,r7,r3 @ F_xx_xx | ||
155 | orr r9,r9,r11,lsl#16 | ||
156 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | ||
157 | orr r9,r9,r12,lsl#24 | ||
158 | #else | ||
159 | ldr r9,[r1],#4 @ handles unaligned | ||
160 | add r4,r8,r4,ror#2 @ E+=K_00_19 | ||
161 | eor r10,r7,r3 @ F_xx_xx | ||
162 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | ||
163 | #ifdef __ARMEL__ | ||
164 | rev r9,r9 @ byte swap | ||
165 | #endif | ||
166 | #endif | ||
167 | and r10,r6,r10,ror#2 | ||
168 | add r4,r4,r9 @ E+=X[i] | ||
169 | eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) | ||
170 | str r9,[r14,#-4]! | ||
171 | add r4,r4,r10 @ E+=F_00_19(B,C,D) | ||
172 | #if __ARM_ARCH__<7 | ||
173 | ldrb r10,[r1,#2] | ||
174 | ldrb r9,[r1,#3] | ||
175 | ldrb r11,[r1,#1] | ||
176 | add r3,r8,r3,ror#2 @ E+=K_00_19 | ||
177 | ldrb r12,[r1],#4 | ||
178 | orr r9,r9,r10,lsl#8 | ||
179 | eor r10,r6,r7 @ F_xx_xx | ||
180 | orr r9,r9,r11,lsl#16 | ||
181 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | ||
182 | orr r9,r9,r12,lsl#24 | ||
183 | #else | ||
184 | ldr r9,[r1],#4 @ handles unaligned | ||
185 | add r3,r8,r3,ror#2 @ E+=K_00_19 | ||
186 | eor r10,r6,r7 @ F_xx_xx | ||
187 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | ||
188 | #ifdef __ARMEL__ | ||
189 | rev r9,r9 @ byte swap | ||
190 | #endif | ||
191 | #endif | ||
192 | and r10,r5,r10,ror#2 | ||
193 | add r3,r3,r9 @ E+=X[i] | ||
194 | eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) | ||
195 | str r9,[r14,#-4]! | ||
196 | add r3,r3,r10 @ E+=F_00_19(B,C,D) | ||
197 | teq r14,sp | ||
198 | bne .L_00_15 @ [((11+4)*5+2)*3] | ||
199 | #if __ARM_ARCH__<7 | ||
200 | ldrb r10,[r1,#2] | ||
201 | ldrb r9,[r1,#3] | ||
202 | ldrb r11,[r1,#1] | ||
203 | add r7,r8,r7,ror#2 @ E+=K_00_19 | ||
204 | ldrb r12,[r1],#4 | ||
205 | orr r9,r9,r10,lsl#8 | ||
206 | eor r10,r5,r6 @ F_xx_xx | ||
207 | orr r9,r9,r11,lsl#16 | ||
208 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | ||
209 | orr r9,r9,r12,lsl#24 | ||
210 | #else | ||
211 | ldr r9,[r1],#4 @ handles unaligned | ||
212 | add r7,r8,r7,ror#2 @ E+=K_00_19 | ||
213 | eor r10,r5,r6 @ F_xx_xx | ||
214 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | ||
215 | #ifdef __ARMEL__ | ||
216 | rev r9,r9 @ byte swap | ||
217 | #endif | ||
218 | #endif | ||
219 | and r10,r4,r10,ror#2 | ||
220 | add r7,r7,r9 @ E+=X[i] | ||
221 | eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) | ||
222 | str r9,[r14,#-4]! | ||
223 | add r7,r7,r10 @ E+=F_00_19(B,C,D) | ||
224 | ldr r9,[r14,#15*4] | ||
225 | ldr r10,[r14,#13*4] | ||
226 | ldr r11,[r14,#7*4] | ||
227 | add r6,r8,r6,ror#2 @ E+=K_xx_xx | ||
228 | ldr r12,[r14,#2*4] | ||
229 | eor r9,r9,r10 | ||
230 | eor r11,r11,r12 @ 1 cycle stall | ||
231 | eor r10,r4,r5 @ F_xx_xx | ||
232 | mov r9,r9,ror#31 | ||
233 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | ||
234 | eor r9,r9,r11,ror#31 | ||
235 | str r9,[r14,#-4]! | ||
236 | and r10,r3,r10,ror#2 @ F_xx_xx | ||
237 | @ F_xx_xx | ||
238 | add r6,r6,r9 @ E+=X[i] | ||
239 | eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) | ||
240 | add r6,r6,r10 @ E+=F_00_19(B,C,D) | ||
241 | ldr r9,[r14,#15*4] | ||
242 | ldr r10,[r14,#13*4] | ||
243 | ldr r11,[r14,#7*4] | ||
244 | add r5,r8,r5,ror#2 @ E+=K_xx_xx | ||
245 | ldr r12,[r14,#2*4] | ||
246 | eor r9,r9,r10 | ||
247 | eor r11,r11,r12 @ 1 cycle stall | ||
248 | eor r10,r3,r4 @ F_xx_xx | ||
249 | mov r9,r9,ror#31 | ||
250 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | ||
251 | eor r9,r9,r11,ror#31 | ||
252 | str r9,[r14,#-4]! | ||
253 | and r10,r7,r10,ror#2 @ F_xx_xx | ||
254 | @ F_xx_xx | ||
255 | add r5,r5,r9 @ E+=X[i] | ||
256 | eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) | ||
257 | add r5,r5,r10 @ E+=F_00_19(B,C,D) | ||
258 | ldr r9,[r14,#15*4] | ||
259 | ldr r10,[r14,#13*4] | ||
260 | ldr r11,[r14,#7*4] | ||
261 | add r4,r8,r4,ror#2 @ E+=K_xx_xx | ||
262 | ldr r12,[r14,#2*4] | ||
263 | eor r9,r9,r10 | ||
264 | eor r11,r11,r12 @ 1 cycle stall | ||
265 | eor r10,r7,r3 @ F_xx_xx | ||
266 | mov r9,r9,ror#31 | ||
267 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | ||
268 | eor r9,r9,r11,ror#31 | ||
269 | str r9,[r14,#-4]! | ||
270 | and r10,r6,r10,ror#2 @ F_xx_xx | ||
271 | @ F_xx_xx | ||
272 | add r4,r4,r9 @ E+=X[i] | ||
273 | eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) | ||
274 | add r4,r4,r10 @ E+=F_00_19(B,C,D) | ||
275 | ldr r9,[r14,#15*4] | ||
276 | ldr r10,[r14,#13*4] | ||
277 | ldr r11,[r14,#7*4] | ||
278 | add r3,r8,r3,ror#2 @ E+=K_xx_xx | ||
279 | ldr r12,[r14,#2*4] | ||
280 | eor r9,r9,r10 | ||
281 | eor r11,r11,r12 @ 1 cycle stall | ||
282 | eor r10,r6,r7 @ F_xx_xx | ||
283 | mov r9,r9,ror#31 | ||
284 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | ||
285 | eor r9,r9,r11,ror#31 | ||
286 | str r9,[r14,#-4]! | ||
287 | and r10,r5,r10,ror#2 @ F_xx_xx | ||
288 | @ F_xx_xx | ||
289 | add r3,r3,r9 @ E+=X[i] | ||
290 | eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) | ||
291 | add r3,r3,r10 @ E+=F_00_19(B,C,D) | ||
292 | |||
293 | ldr r8,.LK_20_39 @ [+15+16*4] | ||
294 | sub sp,sp,#25*4 | ||
295 | cmn sp,#0 @ [+3], clear carry to denote 20_39 | ||
296 | .L_20_39_or_60_79: | ||
297 | ldr r9,[r14,#15*4] | ||
298 | ldr r10,[r14,#13*4] | ||
299 | ldr r11,[r14,#7*4] | ||
300 | add r7,r8,r7,ror#2 @ E+=K_xx_xx | ||
301 | ldr r12,[r14,#2*4] | ||
302 | eor r9,r9,r10 | ||
303 | eor r11,r11,r12 @ 1 cycle stall | ||
304 | eor r10,r5,r6 @ F_xx_xx | ||
305 | mov r9,r9,ror#31 | ||
306 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | ||
307 | eor r9,r9,r11,ror#31 | ||
308 | str r9,[r14,#-4]! | ||
309 | eor r10,r4,r10,ror#2 @ F_xx_xx | ||
310 | @ F_xx_xx | ||
311 | add r7,r7,r9 @ E+=X[i] | ||
312 | add r7,r7,r10 @ E+=F_20_39(B,C,D) | ||
313 | ldr r9,[r14,#15*4] | ||
314 | ldr r10,[r14,#13*4] | ||
315 | ldr r11,[r14,#7*4] | ||
316 | add r6,r8,r6,ror#2 @ E+=K_xx_xx | ||
317 | ldr r12,[r14,#2*4] | ||
318 | eor r9,r9,r10 | ||
319 | eor r11,r11,r12 @ 1 cycle stall | ||
320 | eor r10,r4,r5 @ F_xx_xx | ||
321 | mov r9,r9,ror#31 | ||
322 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | ||
323 | eor r9,r9,r11,ror#31 | ||
324 | str r9,[r14,#-4]! | ||
325 | eor r10,r3,r10,ror#2 @ F_xx_xx | ||
326 | @ F_xx_xx | ||
327 | add r6,r6,r9 @ E+=X[i] | ||
328 | add r6,r6,r10 @ E+=F_20_39(B,C,D) | ||
329 | ldr r9,[r14,#15*4] | ||
330 | ldr r10,[r14,#13*4] | ||
331 | ldr r11,[r14,#7*4] | ||
332 | add r5,r8,r5,ror#2 @ E+=K_xx_xx | ||
333 | ldr r12,[r14,#2*4] | ||
334 | eor r9,r9,r10 | ||
335 | eor r11,r11,r12 @ 1 cycle stall | ||
336 | eor r10,r3,r4 @ F_xx_xx | ||
337 | mov r9,r9,ror#31 | ||
338 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | ||
339 | eor r9,r9,r11,ror#31 | ||
340 | str r9,[r14,#-4]! | ||
341 | eor r10,r7,r10,ror#2 @ F_xx_xx | ||
342 | @ F_xx_xx | ||
343 | add r5,r5,r9 @ E+=X[i] | ||
344 | add r5,r5,r10 @ E+=F_20_39(B,C,D) | ||
345 | ldr r9,[r14,#15*4] | ||
346 | ldr r10,[r14,#13*4] | ||
347 | ldr r11,[r14,#7*4] | ||
348 | add r4,r8,r4,ror#2 @ E+=K_xx_xx | ||
349 | ldr r12,[r14,#2*4] | ||
350 | eor r9,r9,r10 | ||
351 | eor r11,r11,r12 @ 1 cycle stall | ||
352 | eor r10,r7,r3 @ F_xx_xx | ||
353 | mov r9,r9,ror#31 | ||
354 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | ||
355 | eor r9,r9,r11,ror#31 | ||
356 | str r9,[r14,#-4]! | ||
357 | eor r10,r6,r10,ror#2 @ F_xx_xx | ||
358 | @ F_xx_xx | ||
359 | add r4,r4,r9 @ E+=X[i] | ||
360 | add r4,r4,r10 @ E+=F_20_39(B,C,D) | ||
361 | ldr r9,[r14,#15*4] | ||
362 | ldr r10,[r14,#13*4] | ||
363 | ldr r11,[r14,#7*4] | ||
364 | add r3,r8,r3,ror#2 @ E+=K_xx_xx | ||
365 | ldr r12,[r14,#2*4] | ||
366 | eor r9,r9,r10 | ||
367 | eor r11,r11,r12 @ 1 cycle stall | ||
368 | eor r10,r6,r7 @ F_xx_xx | ||
369 | mov r9,r9,ror#31 | ||
370 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | ||
371 | eor r9,r9,r11,ror#31 | ||
372 | str r9,[r14,#-4]! | ||
373 | eor r10,r5,r10,ror#2 @ F_xx_xx | ||
374 | @ F_xx_xx | ||
375 | add r3,r3,r9 @ E+=X[i] | ||
376 | add r3,r3,r10 @ E+=F_20_39(B,C,D) | ||
377 | teq r14,sp @ preserve carry | ||
378 | bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] | ||
379 | bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes | ||
380 | |||
381 | ldr r8,.LK_40_59 | ||
382 | sub sp,sp,#20*4 @ [+2] | ||
383 | .L_40_59: | ||
384 | ldr r9,[r14,#15*4] | ||
385 | ldr r10,[r14,#13*4] | ||
386 | ldr r11,[r14,#7*4] | ||
387 | add r7,r8,r7,ror#2 @ E+=K_xx_xx | ||
388 | ldr r12,[r14,#2*4] | ||
389 | eor r9,r9,r10 | ||
390 | eor r11,r11,r12 @ 1 cycle stall | ||
391 | eor r10,r5,r6 @ F_xx_xx | ||
392 | mov r9,r9,ror#31 | ||
393 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | ||
394 | eor r9,r9,r11,ror#31 | ||
395 | str r9,[r14,#-4]! | ||
396 | and r10,r4,r10,ror#2 @ F_xx_xx | ||
397 | and r11,r5,r6 @ F_xx_xx | ||
398 | add r7,r7,r9 @ E+=X[i] | ||
399 | add r7,r7,r10 @ E+=F_40_59(B,C,D) | ||
400 | add r7,r7,r11,ror#2 | ||
401 | ldr r9,[r14,#15*4] | ||
402 | ldr r10,[r14,#13*4] | ||
403 | ldr r11,[r14,#7*4] | ||
404 | add r6,r8,r6,ror#2 @ E+=K_xx_xx | ||
405 | ldr r12,[r14,#2*4] | ||
406 | eor r9,r9,r10 | ||
407 | eor r11,r11,r12 @ 1 cycle stall | ||
408 | eor r10,r4,r5 @ F_xx_xx | ||
409 | mov r9,r9,ror#31 | ||
410 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | ||
411 | eor r9,r9,r11,ror#31 | ||
412 | str r9,[r14,#-4]! | ||
413 | and r10,r3,r10,ror#2 @ F_xx_xx | ||
414 | and r11,r4,r5 @ F_xx_xx | ||
415 | add r6,r6,r9 @ E+=X[i] | ||
416 | add r6,r6,r10 @ E+=F_40_59(B,C,D) | ||
417 | add r6,r6,r11,ror#2 | ||
418 | ldr r9,[r14,#15*4] | ||
419 | ldr r10,[r14,#13*4] | ||
420 | ldr r11,[r14,#7*4] | ||
421 | add r5,r8,r5,ror#2 @ E+=K_xx_xx | ||
422 | ldr r12,[r14,#2*4] | ||
423 | eor r9,r9,r10 | ||
424 | eor r11,r11,r12 @ 1 cycle stall | ||
425 | eor r10,r3,r4 @ F_xx_xx | ||
426 | mov r9,r9,ror#31 | ||
427 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | ||
428 | eor r9,r9,r11,ror#31 | ||
429 | str r9,[r14,#-4]! | ||
430 | and r10,r7,r10,ror#2 @ F_xx_xx | ||
431 | and r11,r3,r4 @ F_xx_xx | ||
432 | add r5,r5,r9 @ E+=X[i] | ||
433 | add r5,r5,r10 @ E+=F_40_59(B,C,D) | ||
434 | add r5,r5,r11,ror#2 | ||
435 | ldr r9,[r14,#15*4] | ||
436 | ldr r10,[r14,#13*4] | ||
437 | ldr r11,[r14,#7*4] | ||
438 | add r4,r8,r4,ror#2 @ E+=K_xx_xx | ||
439 | ldr r12,[r14,#2*4] | ||
440 | eor r9,r9,r10 | ||
441 | eor r11,r11,r12 @ 1 cycle stall | ||
442 | eor r10,r7,r3 @ F_xx_xx | ||
443 | mov r9,r9,ror#31 | ||
444 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | ||
445 | eor r9,r9,r11,ror#31 | ||
446 | str r9,[r14,#-4]! | ||
447 | and r10,r6,r10,ror#2 @ F_xx_xx | ||
448 | and r11,r7,r3 @ F_xx_xx | ||
449 | add r4,r4,r9 @ E+=X[i] | ||
450 | add r4,r4,r10 @ E+=F_40_59(B,C,D) | ||
451 | add r4,r4,r11,ror#2 | ||
452 | ldr r9,[r14,#15*4] | ||
453 | ldr r10,[r14,#13*4] | ||
454 | ldr r11,[r14,#7*4] | ||
455 | add r3,r8,r3,ror#2 @ E+=K_xx_xx | ||
456 | ldr r12,[r14,#2*4] | ||
457 | eor r9,r9,r10 | ||
458 | eor r11,r11,r12 @ 1 cycle stall | ||
459 | eor r10,r6,r7 @ F_xx_xx | ||
460 | mov r9,r9,ror#31 | ||
461 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | ||
462 | eor r9,r9,r11,ror#31 | ||
463 | str r9,[r14,#-4]! | ||
464 | and r10,r5,r10,ror#2 @ F_xx_xx | ||
465 | and r11,r6,r7 @ F_xx_xx | ||
466 | add r3,r3,r9 @ E+=X[i] | ||
467 | add r3,r3,r10 @ E+=F_40_59(B,C,D) | ||
468 | add r3,r3,r11,ror#2 | ||
469 | teq r14,sp | ||
470 | bne .L_40_59 @ [+((12+5)*5+2)*4] | ||
471 | |||
472 | ldr r8,.LK_60_79 | ||
473 | sub sp,sp,#20*4 | ||
474 | cmp sp,#0 @ set carry to denote 60_79 | ||
475 | b .L_20_39_or_60_79 @ [+4], spare 300 bytes | ||
476 | .L_done: | ||
477 | add sp,sp,#80*4 @ "deallocate" stack frame | ||
478 | ldmia r0,{r8,r9,r10,r11,r12} | ||
479 | add r3,r8,r3 | ||
480 | add r4,r9,r4 | ||
481 | add r5,r10,r5,ror#2 | ||
482 | add r6,r11,r6,ror#2 | ||
483 | add r7,r12,r7,ror#2 | ||
484 | stmia r0,{r3,r4,r5,r6,r7} | ||
485 | teq r1,r2 | ||
486 | bne .Lloop @ [+18], total 1307 | ||
487 | |||
488 | #if __ARM_ARCH__>=5 | ||
489 | ldmia sp!,{r4-r12,pc} | ||
490 | #else | ||
491 | ldmia sp!,{r4-r12,lr} | ||
492 | tst lr,#1 | ||
493 | moveq pc,lr @ be binary compatible with V4, yet | ||
494 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
495 | #endif | ||
496 | .align 2 | ||
497 | .LK_00_19: .word 0x5a827999 | ||
498 | .LK_20_39: .word 0x6ed9eba1 | ||
499 | .LK_40_59: .word 0x8f1bbcdc | ||
500 | .LK_60_79: .word 0xca62c1d6 | ||
501 | .size sha1_block_data_order,.-sha1_block_data_order | ||
502 | .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" | ||
503 | .align 2 | ||