diff options
author | Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com> | 2015-02-06 11:59:35 -0500 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2015-02-28 05:13:46 -0500 |
commit | 5c380d623ed30b71a2441fb4f2e053a4e1a50794 (patch) | |
tree | 90caf15f95a1adc480bf91b9736a5d16028e6045 | |
parent | cc333cd68dfae191ce02308657a50f21d63b7cd5 (diff) |
crypto: vmx - Add support for VMS instructions by ASM
OpenSSL implements optimized ASM algorithms which support
VMX instructions on Power 8 CPU.
These scripts generate an endian-agnostic ASM implementation
in order to support both big and little-endian.
- aesp8-ppc.pl: implements suport for AES instructions
implemented by POWER8 processor.
- ghashp8-ppc.pl: implements support for GHASH for Power8.
- ppc-xlate.pl: ppc assembler distiller.
These code has been adopted from OpenSSL project in collaboration
with the original author (Andy Polyakov <appro@openssl.org>).
Signed-off-by: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rwxr-xr-x | drivers/crypto/vmx/aesp8-ppc.pl | 1940 | ||||
-rwxr-xr-x | drivers/crypto/vmx/ghashp8-ppc.pl | 234 | ||||
-rwxr-xr-x | drivers/crypto/vmx/ppc-xlate.pl | 226 |
3 files changed, 2400 insertions, 0 deletions
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl new file mode 100755 index 000000000000..3ee8979e7625 --- /dev/null +++ b/drivers/crypto/vmx/aesp8-ppc.pl | |||
@@ -0,0 +1,1940 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | # | ||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # ==================================================================== | ||
9 | # | ||
10 | # This module implements support for AES instructions as per PowerISA | ||
11 | # specification version 2.07, first implemented by POWER8 processor. | ||
12 | # The module is endian-agnostic in sense that it supports both big- | ||
13 | # and little-endian cases. Data alignment in parallelizable modes is | ||
14 | # handled with VSX loads and stores, which implies MSR.VSX flag being | ||
15 | # set. It should also be noted that ISA specification doesn't prohibit | ||
16 | # alignment exceptions for these instructions on page boundaries. | ||
17 | # Initially alignment was handled in pure AltiVec/VMX way [when data | ||
18 | # is aligned programmatically, which in turn guarantees exception- | ||
19 | # free execution], but it turned to hamper performance when vcipher | ||
20 | # instructions are interleaved. It's reckoned that eventual | ||
21 | # misalignment penalties at page boundaries are in average lower | ||
22 | # than additional overhead in pure AltiVec approach. | ||
23 | |||
24 | $flavour = shift; | ||
25 | |||
26 | if ($flavour =~ /64/) { | ||
27 | $SIZE_T =8; | ||
28 | $LRSAVE =2*$SIZE_T; | ||
29 | $STU ="stdu"; | ||
30 | $POP ="ld"; | ||
31 | $PUSH ="std"; | ||
32 | $UCMP ="cmpld"; | ||
33 | $SHL ="sldi"; | ||
34 | } elsif ($flavour =~ /32/) { | ||
35 | $SIZE_T =4; | ||
36 | $LRSAVE =$SIZE_T; | ||
37 | $STU ="stwu"; | ||
38 | $POP ="lwz"; | ||
39 | $PUSH ="stw"; | ||
40 | $UCMP ="cmplw"; | ||
41 | $SHL ="slwi"; | ||
42 | } else { die "nonsense $flavour"; } | ||
43 | |||
44 | $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; | ||
45 | |||
46 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | ||
47 | ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or | ||
48 | ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or | ||
49 | die "can't locate ppc-xlate.pl"; | ||
50 | |||
51 | open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; | ||
52 | |||
53 | $FRAME=8*$SIZE_T; | ||
54 | $prefix="aes_p8"; | ||
55 | |||
56 | $sp="r1"; | ||
57 | $vrsave="r12"; | ||
58 | |||
59 | ######################################################################### | ||
60 | {{{ # Key setup procedures # | ||
61 | my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); | ||
62 | my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); | ||
63 | my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); | ||
64 | |||
65 | $code.=<<___; | ||
66 | .machine "any" | ||
67 | |||
68 | .text | ||
69 | |||
70 | .align 7 | ||
71 | rcon: | ||
72 | .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev | ||
73 | .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev | ||
74 | .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev | ||
75 | .long 0,0,0,0 ?asis | ||
76 | Lconsts: | ||
77 | mflr r0 | ||
78 | bcl 20,31,\$+4 | ||
79 | mflr $ptr #vvvvv "distance between . and rcon | ||
80 | addi $ptr,$ptr,-0x48 | ||
81 | mtlr r0 | ||
82 | blr | ||
83 | .long 0 | ||
84 | .byte 0,12,0x14,0,0,0,0,0 | ||
85 | .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" | ||
86 | |||
87 | .globl .${prefix}_set_encrypt_key | ||
88 | .align 5 | ||
89 | .${prefix}_set_encrypt_key: | ||
90 | Lset_encrypt_key: | ||
91 | mflr r11 | ||
92 | $PUSH r11,$LRSAVE($sp) | ||
93 | |||
94 | li $ptr,-1 | ||
95 | ${UCMP}i $inp,0 | ||
96 | beq- Lenc_key_abort # if ($inp==0) return -1; | ||
97 | ${UCMP}i $out,0 | ||
98 | beq- Lenc_key_abort # if ($out==0) return -1; | ||
99 | li $ptr,-2 | ||
100 | cmpwi $bits,128 | ||
101 | blt- Lenc_key_abort | ||
102 | cmpwi $bits,256 | ||
103 | bgt- Lenc_key_abort | ||
104 | andi. r0,$bits,0x3f | ||
105 | bne- Lenc_key_abort | ||
106 | |||
107 | lis r0,0xfff0 | ||
108 | mfspr $vrsave,256 | ||
109 | mtspr 256,r0 | ||
110 | |||
111 | bl Lconsts | ||
112 | mtlr r11 | ||
113 | |||
114 | neg r9,$inp | ||
115 | lvx $in0,0,$inp | ||
116 | addi $inp,$inp,15 # 15 is not typo | ||
117 | lvsr $key,0,r9 # borrow $key | ||
118 | li r8,0x20 | ||
119 | cmpwi $bits,192 | ||
120 | lvx $in1,0,$inp | ||
121 | le?vspltisb $mask,0x0f # borrow $mask | ||
122 | lvx $rcon,0,$ptr | ||
123 | le?vxor $key,$key,$mask # adjust for byte swap | ||
124 | lvx $mask,r8,$ptr | ||
125 | addi $ptr,$ptr,0x10 | ||
126 | vperm $in0,$in0,$in1,$key # align [and byte swap in LE] | ||
127 | li $cnt,8 | ||
128 | vxor $zero,$zero,$zero | ||
129 | mtctr $cnt | ||
130 | |||
131 | ?lvsr $outperm,0,$out | ||
132 | vspltisb $outmask,-1 | ||
133 | lvx $outhead,0,$out | ||
134 | ?vperm $outmask,$zero,$outmask,$outperm | ||
135 | |||
136 | blt Loop128 | ||
137 | addi $inp,$inp,8 | ||
138 | beq L192 | ||
139 | addi $inp,$inp,8 | ||
140 | b L256 | ||
141 | |||
142 | .align 4 | ||
143 | Loop128: | ||
144 | vperm $key,$in0,$in0,$mask # rotate-n-splat | ||
145 | vsldoi $tmp,$zero,$in0,12 # >>32 | ||
146 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
147 | vsel $stage,$outhead,$outtail,$outmask | ||
148 | vmr $outhead,$outtail | ||
149 | vcipherlast $key,$key,$rcon | ||
150 | stvx $stage,0,$out | ||
151 | addi $out,$out,16 | ||
152 | |||
153 | vxor $in0,$in0,$tmp | ||
154 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
155 | vxor $in0,$in0,$tmp | ||
156 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
157 | vxor $in0,$in0,$tmp | ||
158 | vadduwm $rcon,$rcon,$rcon | ||
159 | vxor $in0,$in0,$key | ||
160 | bdnz Loop128 | ||
161 | |||
162 | lvx $rcon,0,$ptr # last two round keys | ||
163 | |||
164 | vperm $key,$in0,$in0,$mask # rotate-n-splat | ||
165 | vsldoi $tmp,$zero,$in0,12 # >>32 | ||
166 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
167 | vsel $stage,$outhead,$outtail,$outmask | ||
168 | vmr $outhead,$outtail | ||
169 | vcipherlast $key,$key,$rcon | ||
170 | stvx $stage,0,$out | ||
171 | addi $out,$out,16 | ||
172 | |||
173 | vxor $in0,$in0,$tmp | ||
174 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
175 | vxor $in0,$in0,$tmp | ||
176 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
177 | vxor $in0,$in0,$tmp | ||
178 | vadduwm $rcon,$rcon,$rcon | ||
179 | vxor $in0,$in0,$key | ||
180 | |||
181 | vperm $key,$in0,$in0,$mask # rotate-n-splat | ||
182 | vsldoi $tmp,$zero,$in0,12 # >>32 | ||
183 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
184 | vsel $stage,$outhead,$outtail,$outmask | ||
185 | vmr $outhead,$outtail | ||
186 | vcipherlast $key,$key,$rcon | ||
187 | stvx $stage,0,$out | ||
188 | addi $out,$out,16 | ||
189 | |||
190 | vxor $in0,$in0,$tmp | ||
191 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
192 | vxor $in0,$in0,$tmp | ||
193 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
194 | vxor $in0,$in0,$tmp | ||
195 | vxor $in0,$in0,$key | ||
196 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
197 | vsel $stage,$outhead,$outtail,$outmask | ||
198 | vmr $outhead,$outtail | ||
199 | stvx $stage,0,$out | ||
200 | |||
201 | addi $inp,$out,15 # 15 is not typo | ||
202 | addi $out,$out,0x50 | ||
203 | |||
204 | li $rounds,10 | ||
205 | b Ldone | ||
206 | |||
207 | .align 4 | ||
208 | L192: | ||
209 | lvx $tmp,0,$inp | ||
210 | li $cnt,4 | ||
211 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
212 | vsel $stage,$outhead,$outtail,$outmask | ||
213 | vmr $outhead,$outtail | ||
214 | stvx $stage,0,$out | ||
215 | addi $out,$out,16 | ||
216 | vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] | ||
217 | vspltisb $key,8 # borrow $key | ||
218 | mtctr $cnt | ||
219 | vsububm $mask,$mask,$key # adjust the mask | ||
220 | |||
221 | Loop192: | ||
222 | vperm $key,$in1,$in1,$mask # roate-n-splat | ||
223 | vsldoi $tmp,$zero,$in0,12 # >>32 | ||
224 | vcipherlast $key,$key,$rcon | ||
225 | |||
226 | vxor $in0,$in0,$tmp | ||
227 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
228 | vxor $in0,$in0,$tmp | ||
229 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
230 | vxor $in0,$in0,$tmp | ||
231 | |||
232 | vsldoi $stage,$zero,$in1,8 | ||
233 | vspltw $tmp,$in0,3 | ||
234 | vxor $tmp,$tmp,$in1 | ||
235 | vsldoi $in1,$zero,$in1,12 # >>32 | ||
236 | vadduwm $rcon,$rcon,$rcon | ||
237 | vxor $in1,$in1,$tmp | ||
238 | vxor $in0,$in0,$key | ||
239 | vxor $in1,$in1,$key | ||
240 | vsldoi $stage,$stage,$in0,8 | ||
241 | |||
242 | vperm $key,$in1,$in1,$mask # rotate-n-splat | ||
243 | vsldoi $tmp,$zero,$in0,12 # >>32 | ||
244 | vperm $outtail,$stage,$stage,$outperm # rotate | ||
245 | vsel $stage,$outhead,$outtail,$outmask | ||
246 | vmr $outhead,$outtail | ||
247 | vcipherlast $key,$key,$rcon | ||
248 | stvx $stage,0,$out | ||
249 | addi $out,$out,16 | ||
250 | |||
251 | vsldoi $stage,$in0,$in1,8 | ||
252 | vxor $in0,$in0,$tmp | ||
253 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
254 | vperm $outtail,$stage,$stage,$outperm # rotate | ||
255 | vsel $stage,$outhead,$outtail,$outmask | ||
256 | vmr $outhead,$outtail | ||
257 | vxor $in0,$in0,$tmp | ||
258 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
259 | vxor $in0,$in0,$tmp | ||
260 | stvx $stage,0,$out | ||
261 | addi $out,$out,16 | ||
262 | |||
263 | vspltw $tmp,$in0,3 | ||
264 | vxor $tmp,$tmp,$in1 | ||
265 | vsldoi $in1,$zero,$in1,12 # >>32 | ||
266 | vadduwm $rcon,$rcon,$rcon | ||
267 | vxor $in1,$in1,$tmp | ||
268 | vxor $in0,$in0,$key | ||
269 | vxor $in1,$in1,$key | ||
270 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
271 | vsel $stage,$outhead,$outtail,$outmask | ||
272 | vmr $outhead,$outtail | ||
273 | stvx $stage,0,$out | ||
274 | addi $inp,$out,15 # 15 is not typo | ||
275 | addi $out,$out,16 | ||
276 | bdnz Loop192 | ||
277 | |||
278 | li $rounds,12 | ||
279 | addi $out,$out,0x20 | ||
280 | b Ldone | ||
281 | |||
282 | .align 4 | ||
283 | L256: | ||
284 | lvx $tmp,0,$inp | ||
285 | li $cnt,7 | ||
286 | li $rounds,14 | ||
287 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
288 | vsel $stage,$outhead,$outtail,$outmask | ||
289 | vmr $outhead,$outtail | ||
290 | stvx $stage,0,$out | ||
291 | addi $out,$out,16 | ||
292 | vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] | ||
293 | mtctr $cnt | ||
294 | |||
295 | Loop256: | ||
296 | vperm $key,$in1,$in1,$mask # rotate-n-splat | ||
297 | vsldoi $tmp,$zero,$in0,12 # >>32 | ||
298 | vperm $outtail,$in1,$in1,$outperm # rotate | ||
299 | vsel $stage,$outhead,$outtail,$outmask | ||
300 | vmr $outhead,$outtail | ||
301 | vcipherlast $key,$key,$rcon | ||
302 | stvx $stage,0,$out | ||
303 | addi $out,$out,16 | ||
304 | |||
305 | vxor $in0,$in0,$tmp | ||
306 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
307 | vxor $in0,$in0,$tmp | ||
308 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
309 | vxor $in0,$in0,$tmp | ||
310 | vadduwm $rcon,$rcon,$rcon | ||
311 | vxor $in0,$in0,$key | ||
312 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
313 | vsel $stage,$outhead,$outtail,$outmask | ||
314 | vmr $outhead,$outtail | ||
315 | stvx $stage,0,$out | ||
316 | addi $inp,$out,15 # 15 is not typo | ||
317 | addi $out,$out,16 | ||
318 | bdz Ldone | ||
319 | |||
320 | vspltw $key,$in0,3 # just splat | ||
321 | vsldoi $tmp,$zero,$in1,12 # >>32 | ||
322 | vsbox $key,$key | ||
323 | |||
324 | vxor $in1,$in1,$tmp | ||
325 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
326 | vxor $in1,$in1,$tmp | ||
327 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
328 | vxor $in1,$in1,$tmp | ||
329 | |||
330 | vxor $in1,$in1,$key | ||
331 | b Loop256 | ||
332 | |||
333 | .align 4 | ||
334 | Ldone: | ||
335 | lvx $in1,0,$inp # redundant in aligned case | ||
336 | vsel $in1,$outhead,$in1,$outmask | ||
337 | stvx $in1,0,$inp | ||
338 | li $ptr,0 | ||
339 | mtspr 256,$vrsave | ||
340 | stw $rounds,0($out) | ||
341 | |||
342 | Lenc_key_abort: | ||
343 | mr r3,$ptr | ||
344 | blr | ||
345 | .long 0 | ||
346 | .byte 0,12,0x14,1,0,0,3,0 | ||
347 | .long 0 | ||
348 | .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key | ||
349 | |||
350 | .globl .${prefix}_set_decrypt_key | ||
351 | .align 5 | ||
352 | .${prefix}_set_decrypt_key: | ||
353 | $STU $sp,-$FRAME($sp) | ||
354 | mflr r10 | ||
355 | $PUSH r10,$FRAME+$LRSAVE($sp) | ||
356 | bl Lset_encrypt_key | ||
357 | mtlr r10 | ||
358 | |||
359 | cmpwi r3,0 | ||
360 | bne- Ldec_key_abort | ||
361 | |||
362 | slwi $cnt,$rounds,4 | ||
363 | subi $inp,$out,240 # first round key | ||
364 | srwi $rounds,$rounds,1 | ||
365 | add $out,$inp,$cnt # last round key | ||
366 | mtctr $rounds | ||
367 | |||
368 | Ldeckey: | ||
369 | lwz r0, 0($inp) | ||
370 | lwz r6, 4($inp) | ||
371 | lwz r7, 8($inp) | ||
372 | lwz r8, 12($inp) | ||
373 | addi $inp,$inp,16 | ||
374 | lwz r9, 0($out) | ||
375 | lwz r10,4($out) | ||
376 | lwz r11,8($out) | ||
377 | lwz r12,12($out) | ||
378 | stw r0, 0($out) | ||
379 | stw r6, 4($out) | ||
380 | stw r7, 8($out) | ||
381 | stw r8, 12($out) | ||
382 | subi $out,$out,16 | ||
383 | stw r9, -16($inp) | ||
384 | stw r10,-12($inp) | ||
385 | stw r11,-8($inp) | ||
386 | stw r12,-4($inp) | ||
387 | bdnz Ldeckey | ||
388 | |||
389 | xor r3,r3,r3 # return value | ||
390 | Ldec_key_abort: | ||
391 | addi $sp,$sp,$FRAME | ||
392 | blr | ||
393 | .long 0 | ||
394 | .byte 0,12,4,1,0x80,0,3,0 | ||
395 | .long 0 | ||
396 | .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key | ||
397 | ___ | ||
398 | }}} | ||
399 | ######################################################################### | ||
400 | {{{ # Single block en- and decrypt procedures # | ||
401 | sub gen_block () { | ||
402 | my $dir = shift; | ||
403 | my $n = $dir eq "de" ? "n" : ""; | ||
404 | my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); | ||
405 | |||
406 | $code.=<<___; | ||
407 | .globl .${prefix}_${dir}crypt | ||
408 | .align 5 | ||
409 | .${prefix}_${dir}crypt: | ||
410 | lwz $rounds,240($key) | ||
411 | lis r0,0xfc00 | ||
412 | mfspr $vrsave,256 | ||
413 | li $idx,15 # 15 is not typo | ||
414 | mtspr 256,r0 | ||
415 | |||
416 | lvx v0,0,$inp | ||
417 | neg r11,$out | ||
418 | lvx v1,$idx,$inp | ||
419 | lvsl v2,0,$inp # inpperm | ||
420 | le?vspltisb v4,0x0f | ||
421 | ?lvsl v3,0,r11 # outperm | ||
422 | le?vxor v2,v2,v4 | ||
423 | li $idx,16 | ||
424 | vperm v0,v0,v1,v2 # align [and byte swap in LE] | ||
425 | lvx v1,0,$key | ||
426 | ?lvsl v5,0,$key # keyperm | ||
427 | srwi $rounds,$rounds,1 | ||
428 | lvx v2,$idx,$key | ||
429 | addi $idx,$idx,16 | ||
430 | subi $rounds,$rounds,1 | ||
431 | ?vperm v1,v1,v2,v5 # align round key | ||
432 | |||
433 | vxor v0,v0,v1 | ||
434 | lvx v1,$idx,$key | ||
435 | addi $idx,$idx,16 | ||
436 | mtctr $rounds | ||
437 | |||
438 | Loop_${dir}c: | ||
439 | ?vperm v2,v2,v1,v5 | ||
440 | v${n}cipher v0,v0,v2 | ||
441 | lvx v2,$idx,$key | ||
442 | addi $idx,$idx,16 | ||
443 | ?vperm v1,v1,v2,v5 | ||
444 | v${n}cipher v0,v0,v1 | ||
445 | lvx v1,$idx,$key | ||
446 | addi $idx,$idx,16 | ||
447 | bdnz Loop_${dir}c | ||
448 | |||
449 | ?vperm v2,v2,v1,v5 | ||
450 | v${n}cipher v0,v0,v2 | ||
451 | lvx v2,$idx,$key | ||
452 | ?vperm v1,v1,v2,v5 | ||
453 | v${n}cipherlast v0,v0,v1 | ||
454 | |||
455 | vspltisb v2,-1 | ||
456 | vxor v1,v1,v1 | ||
457 | li $idx,15 # 15 is not typo | ||
458 | ?vperm v2,v1,v2,v3 # outmask | ||
459 | le?vxor v3,v3,v4 | ||
460 | lvx v1,0,$out # outhead | ||
461 | vperm v0,v0,v0,v3 # rotate [and byte swap in LE] | ||
462 | vsel v1,v1,v0,v2 | ||
463 | lvx v4,$idx,$out | ||
464 | stvx v1,0,$out | ||
465 | vsel v0,v0,v4,v2 | ||
466 | stvx v0,$idx,$out | ||
467 | |||
468 | mtspr 256,$vrsave | ||
469 | blr | ||
470 | .long 0 | ||
471 | .byte 0,12,0x14,0,0,0,3,0 | ||
472 | .long 0 | ||
473 | .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt | ||
474 | ___ | ||
475 | } | ||
476 | &gen_block("en"); | ||
477 | &gen_block("de"); | ||
478 | }}} | ||
479 | ######################################################################### | ||
480 | {{{ # CBC en- and decrypt procedures # | ||
481 | my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); | ||
482 | my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); | ||
483 | my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= | ||
484 | map("v$_",(4..10)); | ||
485 | $code.=<<___; | ||
486 | .globl .${prefix}_cbc_encrypt | ||
487 | .align 5 | ||
488 | .${prefix}_cbc_encrypt: | ||
489 | ${UCMP}i $len,16 | ||
490 | bltlr- | ||
491 | |||
492 | cmpwi $enc,0 # test direction | ||
493 | lis r0,0xffe0 | ||
494 | mfspr $vrsave,256 | ||
495 | mtspr 256,r0 | ||
496 | |||
497 | li $idx,15 | ||
498 | vxor $rndkey0,$rndkey0,$rndkey0 | ||
499 | le?vspltisb $tmp,0x0f | ||
500 | |||
501 | lvx $ivec,0,$ivp # load [unaligned] iv | ||
502 | lvsl $inpperm,0,$ivp | ||
503 | lvx $inptail,$idx,$ivp | ||
504 | le?vxor $inpperm,$inpperm,$tmp | ||
505 | vperm $ivec,$ivec,$inptail,$inpperm | ||
506 | |||
507 | neg r11,$inp | ||
508 | ?lvsl $keyperm,0,$key # prepare for unaligned key | ||
509 | lwz $rounds,240($key) | ||
510 | |||
511 | lvsr $inpperm,0,r11 # prepare for unaligned load | ||
512 | lvx $inptail,0,$inp | ||
513 | addi $inp,$inp,15 # 15 is not typo | ||
514 | le?vxor $inpperm,$inpperm,$tmp | ||
515 | |||
516 | ?lvsr $outperm,0,$out # prepare for unaligned store | ||
517 | vspltisb $outmask,-1 | ||
518 | lvx $outhead,0,$out | ||
519 | ?vperm $outmask,$rndkey0,$outmask,$outperm | ||
520 | le?vxor $outperm,$outperm,$tmp | ||
521 | |||
522 | srwi $rounds,$rounds,1 | ||
523 | li $idx,16 | ||
524 | subi $rounds,$rounds,1 | ||
525 | beq Lcbc_dec | ||
526 | |||
527 | Lcbc_enc: | ||
528 | vmr $inout,$inptail | ||
529 | lvx $inptail,0,$inp | ||
530 | addi $inp,$inp,16 | ||
531 | mtctr $rounds | ||
532 | subi $len,$len,16 # len-=16 | ||
533 | |||
534 | lvx $rndkey0,0,$key | ||
535 | vperm $inout,$inout,$inptail,$inpperm | ||
536 | lvx $rndkey1,$idx,$key | ||
537 | addi $idx,$idx,16 | ||
538 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
539 | vxor $inout,$inout,$rndkey0 | ||
540 | lvx $rndkey0,$idx,$key | ||
541 | addi $idx,$idx,16 | ||
542 | vxor $inout,$inout,$ivec | ||
543 | |||
544 | Loop_cbc_enc: | ||
545 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
546 | vcipher $inout,$inout,$rndkey1 | ||
547 | lvx $rndkey1,$idx,$key | ||
548 | addi $idx,$idx,16 | ||
549 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
550 | vcipher $inout,$inout,$rndkey0 | ||
551 | lvx $rndkey0,$idx,$key | ||
552 | addi $idx,$idx,16 | ||
553 | bdnz Loop_cbc_enc | ||
554 | |||
555 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
556 | vcipher $inout,$inout,$rndkey1 | ||
557 | lvx $rndkey1,$idx,$key | ||
558 | li $idx,16 | ||
559 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
560 | vcipherlast $ivec,$inout,$rndkey0 | ||
561 | ${UCMP}i $len,16 | ||
562 | |||
563 | vperm $tmp,$ivec,$ivec,$outperm | ||
564 | vsel $inout,$outhead,$tmp,$outmask | ||
565 | vmr $outhead,$tmp | ||
566 | stvx $inout,0,$out | ||
567 | addi $out,$out,16 | ||
568 | bge Lcbc_enc | ||
569 | |||
570 | b Lcbc_done | ||
571 | |||
572 | .align 4 | ||
573 | Lcbc_dec: | ||
574 | ${UCMP}i $len,128 | ||
575 | bge _aesp8_cbc_decrypt8x | ||
576 | vmr $tmp,$inptail | ||
577 | lvx $inptail,0,$inp | ||
578 | addi $inp,$inp,16 | ||
579 | mtctr $rounds | ||
580 | subi $len,$len,16 # len-=16 | ||
581 | |||
582 | lvx $rndkey0,0,$key | ||
583 | vperm $tmp,$tmp,$inptail,$inpperm | ||
584 | lvx $rndkey1,$idx,$key | ||
585 | addi $idx,$idx,16 | ||
586 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
587 | vxor $inout,$tmp,$rndkey0 | ||
588 | lvx $rndkey0,$idx,$key | ||
589 | addi $idx,$idx,16 | ||
590 | |||
591 | Loop_cbc_dec: | ||
592 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
593 | vncipher $inout,$inout,$rndkey1 | ||
594 | lvx $rndkey1,$idx,$key | ||
595 | addi $idx,$idx,16 | ||
596 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
597 | vncipher $inout,$inout,$rndkey0 | ||
598 | lvx $rndkey0,$idx,$key | ||
599 | addi $idx,$idx,16 | ||
600 | bdnz Loop_cbc_dec | ||
601 | |||
602 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
603 | vncipher $inout,$inout,$rndkey1 | ||
604 | lvx $rndkey1,$idx,$key | ||
605 | li $idx,16 | ||
606 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
607 | vncipherlast $inout,$inout,$rndkey0 | ||
608 | ${UCMP}i $len,16 | ||
609 | |||
610 | vxor $inout,$inout,$ivec | ||
611 | vmr $ivec,$tmp | ||
612 | vperm $tmp,$inout,$inout,$outperm | ||
613 | vsel $inout,$outhead,$tmp,$outmask | ||
614 | vmr $outhead,$tmp | ||
615 | stvx $inout,0,$out | ||
616 | addi $out,$out,16 | ||
617 | bge Lcbc_dec | ||
618 | |||
619 | Lcbc_done: | ||
620 | addi $out,$out,-1 | ||
621 | lvx $inout,0,$out # redundant in aligned case | ||
622 | vsel $inout,$outhead,$inout,$outmask | ||
623 | stvx $inout,0,$out | ||
624 | |||
625 | neg $enc,$ivp # write [unaligned] iv | ||
626 | li $idx,15 # 15 is not typo | ||
627 | vxor $rndkey0,$rndkey0,$rndkey0 | ||
628 | vspltisb $outmask,-1 | ||
629 | le?vspltisb $tmp,0x0f | ||
630 | ?lvsl $outperm,0,$enc | ||
631 | ?vperm $outmask,$rndkey0,$outmask,$outperm | ||
632 | le?vxor $outperm,$outperm,$tmp | ||
633 | lvx $outhead,0,$ivp | ||
634 | vperm $ivec,$ivec,$ivec,$outperm | ||
635 | vsel $inout,$outhead,$ivec,$outmask | ||
636 | lvx $inptail,$idx,$ivp | ||
637 | stvx $inout,0,$ivp | ||
638 | vsel $inout,$ivec,$inptail,$outmask | ||
639 | stvx $inout,$idx,$ivp | ||
640 | |||
641 | mtspr 256,$vrsave | ||
642 | blr | ||
643 | .long 0 | ||
644 | .byte 0,12,0x14,0,0,0,6,0 | ||
645 | .long 0 | ||
646 | ___ | ||
647 | ######################################################################### | ||
648 | {{ # Optimized CBC decrypt procedure # | ||
649 | my $key_="r11"; | ||
650 | my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); | ||
651 | my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); | ||
652 | my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); | ||
653 | my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys | ||
654 | # v26-v31 last 6 round keys | ||
655 | my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment | ||
656 | |||
657 | $code.=<<___; | ||
658 | .align 5 | ||
659 | _aesp8_cbc_decrypt8x: | ||
660 | $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) | ||
661 | li r10,`$FRAME+8*16+15` | ||
662 | li r11,`$FRAME+8*16+31` | ||
663 | stvx v20,r10,$sp # ABI says so | ||
664 | addi r10,r10,32 | ||
665 | stvx v21,r11,$sp | ||
666 | addi r11,r11,32 | ||
667 | stvx v22,r10,$sp | ||
668 | addi r10,r10,32 | ||
669 | stvx v23,r11,$sp | ||
670 | addi r11,r11,32 | ||
671 | stvx v24,r10,$sp | ||
672 | addi r10,r10,32 | ||
673 | stvx v25,r11,$sp | ||
674 | addi r11,r11,32 | ||
675 | stvx v26,r10,$sp | ||
676 | addi r10,r10,32 | ||
677 | stvx v27,r11,$sp | ||
678 | addi r11,r11,32 | ||
679 | stvx v28,r10,$sp | ||
680 | addi r10,r10,32 | ||
681 | stvx v29,r11,$sp | ||
682 | addi r11,r11,32 | ||
683 | stvx v30,r10,$sp | ||
684 | stvx v31,r11,$sp | ||
685 | li r0,-1 | ||
686 | stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave | ||
687 | li $x10,0x10 | ||
688 | $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) | ||
689 | li $x20,0x20 | ||
690 | $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) | ||
691 | li $x30,0x30 | ||
692 | $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) | ||
693 | li $x40,0x40 | ||
694 | $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) | ||
695 | li $x50,0x50 | ||
696 | $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) | ||
697 | li $x60,0x60 | ||
698 | $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) | ||
699 | li $x70,0x70 | ||
700 | mtspr 256,r0 | ||
701 | |||
702 | subi $rounds,$rounds,3 # -4 in total | ||
703 | subi $len,$len,128 # bias | ||
704 | |||
705 | lvx $rndkey0,$x00,$key # load key schedule | ||
706 | lvx v30,$x10,$key | ||
707 | addi $key,$key,0x20 | ||
708 | lvx v31,$x00,$key | ||
709 | ?vperm $rndkey0,$rndkey0,v30,$keyperm | ||
710 | addi $key_,$sp,$FRAME+15 | ||
711 | mtctr $rounds | ||
712 | |||
713 | Load_cbc_dec_key: | ||
714 | ?vperm v24,v30,v31,$keyperm | ||
715 | lvx v30,$x10,$key | ||
716 | addi $key,$key,0x20 | ||
717 | stvx v24,$x00,$key_ # off-load round[1] | ||
718 | ?vperm v25,v31,v30,$keyperm | ||
719 | lvx v31,$x00,$key | ||
720 | stvx v25,$x10,$key_ # off-load round[2] | ||
721 | addi $key_,$key_,0x20 | ||
722 | bdnz Load_cbc_dec_key | ||
723 | |||
724 | lvx v26,$x10,$key | ||
725 | ?vperm v24,v30,v31,$keyperm | ||
726 | lvx v27,$x20,$key | ||
727 | stvx v24,$x00,$key_ # off-load round[3] | ||
728 | ?vperm v25,v31,v26,$keyperm | ||
729 | lvx v28,$x30,$key | ||
730 | stvx v25,$x10,$key_ # off-load round[4] | ||
731 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
732 | ?vperm v26,v26,v27,$keyperm | ||
733 | lvx v29,$x40,$key | ||
734 | ?vperm v27,v27,v28,$keyperm | ||
735 | lvx v30,$x50,$key | ||
736 | ?vperm v28,v28,v29,$keyperm | ||
737 | lvx v31,$x60,$key | ||
738 | ?vperm v29,v29,v30,$keyperm | ||
739 | lvx $out0,$x70,$key # borrow $out0 | ||
740 | ?vperm v30,v30,v31,$keyperm | ||
741 | lvx v24,$x00,$key_ # pre-load round[1] | ||
742 | ?vperm v31,v31,$out0,$keyperm | ||
743 | lvx v25,$x10,$key_ # pre-load round[2] | ||
744 | |||
745 | #lvx $inptail,0,$inp # "caller" already did this | ||
746 | #addi $inp,$inp,15 # 15 is not typo | ||
747 | subi $inp,$inp,15 # undo "caller" | ||
748 | |||
749 | le?li $idx,8 | ||
750 | lvx_u $in0,$x00,$inp # load first 8 "words" | ||
751 | le?lvsl $inpperm,0,$idx | ||
752 | le?vspltisb $tmp,0x0f | ||
753 | lvx_u $in1,$x10,$inp | ||
754 | le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u | ||
755 | lvx_u $in2,$x20,$inp | ||
756 | le?vperm $in0,$in0,$in0,$inpperm | ||
757 | lvx_u $in3,$x30,$inp | ||
758 | le?vperm $in1,$in1,$in1,$inpperm | ||
759 | lvx_u $in4,$x40,$inp | ||
760 | le?vperm $in2,$in2,$in2,$inpperm | ||
761 | vxor $out0,$in0,$rndkey0 | ||
762 | lvx_u $in5,$x50,$inp | ||
763 | le?vperm $in3,$in3,$in3,$inpperm | ||
764 | vxor $out1,$in1,$rndkey0 | ||
765 | lvx_u $in6,$x60,$inp | ||
766 | le?vperm $in4,$in4,$in4,$inpperm | ||
767 | vxor $out2,$in2,$rndkey0 | ||
768 | lvx_u $in7,$x70,$inp | ||
769 | addi $inp,$inp,0x80 | ||
770 | le?vperm $in5,$in5,$in5,$inpperm | ||
771 | vxor $out3,$in3,$rndkey0 | ||
772 | le?vperm $in6,$in6,$in6,$inpperm | ||
773 | vxor $out4,$in4,$rndkey0 | ||
774 | le?vperm $in7,$in7,$in7,$inpperm | ||
775 | vxor $out5,$in5,$rndkey0 | ||
776 | vxor $out6,$in6,$rndkey0 | ||
777 | vxor $out7,$in7,$rndkey0 | ||
778 | |||
779 | mtctr $rounds | ||
780 | b Loop_cbc_dec8x | ||
781 | .align 5 | ||
782 | Loop_cbc_dec8x: | ||
783 | vncipher $out0,$out0,v24 | ||
784 | vncipher $out1,$out1,v24 | ||
785 | vncipher $out2,$out2,v24 | ||
786 | vncipher $out3,$out3,v24 | ||
787 | vncipher $out4,$out4,v24 | ||
788 | vncipher $out5,$out5,v24 | ||
789 | vncipher $out6,$out6,v24 | ||
790 | vncipher $out7,$out7,v24 | ||
791 | lvx v24,$x20,$key_ # round[3] | ||
792 | addi $key_,$key_,0x20 | ||
793 | |||
794 | vncipher $out0,$out0,v25 | ||
795 | vncipher $out1,$out1,v25 | ||
796 | vncipher $out2,$out2,v25 | ||
797 | vncipher $out3,$out3,v25 | ||
798 | vncipher $out4,$out4,v25 | ||
799 | vncipher $out5,$out5,v25 | ||
800 | vncipher $out6,$out6,v25 | ||
801 | vncipher $out7,$out7,v25 | ||
802 | lvx v25,$x10,$key_ # round[4] | ||
803 | bdnz Loop_cbc_dec8x | ||
804 | |||
805 | subic $len,$len,128 # $len-=128 | ||
806 | vncipher $out0,$out0,v24 | ||
807 | vncipher $out1,$out1,v24 | ||
808 | vncipher $out2,$out2,v24 | ||
809 | vncipher $out3,$out3,v24 | ||
810 | vncipher $out4,$out4,v24 | ||
811 | vncipher $out5,$out5,v24 | ||
812 | vncipher $out6,$out6,v24 | ||
813 | vncipher $out7,$out7,v24 | ||
814 | |||
815 | subfe. r0,r0,r0 # borrow?-1:0 | ||
816 | vncipher $out0,$out0,v25 | ||
817 | vncipher $out1,$out1,v25 | ||
818 | vncipher $out2,$out2,v25 | ||
819 | vncipher $out3,$out3,v25 | ||
820 | vncipher $out4,$out4,v25 | ||
821 | vncipher $out5,$out5,v25 | ||
822 | vncipher $out6,$out6,v25 | ||
823 | vncipher $out7,$out7,v25 | ||
824 | |||
825 | and r0,r0,$len | ||
826 | vncipher $out0,$out0,v26 | ||
827 | vncipher $out1,$out1,v26 | ||
828 | vncipher $out2,$out2,v26 | ||
829 | vncipher $out3,$out3,v26 | ||
830 | vncipher $out4,$out4,v26 | ||
831 | vncipher $out5,$out5,v26 | ||
832 | vncipher $out6,$out6,v26 | ||
833 | vncipher $out7,$out7,v26 | ||
834 | |||
835 | add $inp,$inp,r0 # $inp is adjusted in such | ||
836 | # way that at exit from the | ||
837 | # loop inX-in7 are loaded | ||
838 | # with last "words" | ||
839 | vncipher $out0,$out0,v27 | ||
840 | vncipher $out1,$out1,v27 | ||
841 | vncipher $out2,$out2,v27 | ||
842 | vncipher $out3,$out3,v27 | ||
843 | vncipher $out4,$out4,v27 | ||
844 | vncipher $out5,$out5,v27 | ||
845 | vncipher $out6,$out6,v27 | ||
846 | vncipher $out7,$out7,v27 | ||
847 | |||
848 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
849 | vncipher $out0,$out0,v28 | ||
850 | vncipher $out1,$out1,v28 | ||
851 | vncipher $out2,$out2,v28 | ||
852 | vncipher $out3,$out3,v28 | ||
853 | vncipher $out4,$out4,v28 | ||
854 | vncipher $out5,$out5,v28 | ||
855 | vncipher $out6,$out6,v28 | ||
856 | vncipher $out7,$out7,v28 | ||
857 | lvx v24,$x00,$key_ # re-pre-load round[1] | ||
858 | |||
859 | vncipher $out0,$out0,v29 | ||
860 | vncipher $out1,$out1,v29 | ||
861 | vncipher $out2,$out2,v29 | ||
862 | vncipher $out3,$out3,v29 | ||
863 | vncipher $out4,$out4,v29 | ||
864 | vncipher $out5,$out5,v29 | ||
865 | vncipher $out6,$out6,v29 | ||
866 | vncipher $out7,$out7,v29 | ||
867 | lvx v25,$x10,$key_ # re-pre-load round[2] | ||
868 | |||
869 | vncipher $out0,$out0,v30 | ||
870 | vxor $ivec,$ivec,v31 # xor with last round key | ||
871 | vncipher $out1,$out1,v30 | ||
872 | vxor $in0,$in0,v31 | ||
873 | vncipher $out2,$out2,v30 | ||
874 | vxor $in1,$in1,v31 | ||
875 | vncipher $out3,$out3,v30 | ||
876 | vxor $in2,$in2,v31 | ||
877 | vncipher $out4,$out4,v30 | ||
878 | vxor $in3,$in3,v31 | ||
879 | vncipher $out5,$out5,v30 | ||
880 | vxor $in4,$in4,v31 | ||
881 | vncipher $out6,$out6,v30 | ||
882 | vxor $in5,$in5,v31 | ||
883 | vncipher $out7,$out7,v30 | ||
884 | vxor $in6,$in6,v31 | ||
885 | |||
886 | vncipherlast $out0,$out0,$ivec | ||
887 | vncipherlast $out1,$out1,$in0 | ||
888 | lvx_u $in0,$x00,$inp # load next input block | ||
889 | vncipherlast $out2,$out2,$in1 | ||
890 | lvx_u $in1,$x10,$inp | ||
891 | vncipherlast $out3,$out3,$in2 | ||
892 | le?vperm $in0,$in0,$in0,$inpperm | ||
893 | lvx_u $in2,$x20,$inp | ||
894 | vncipherlast $out4,$out4,$in3 | ||
895 | le?vperm $in1,$in1,$in1,$inpperm | ||
896 | lvx_u $in3,$x30,$inp | ||
897 | vncipherlast $out5,$out5,$in4 | ||
898 | le?vperm $in2,$in2,$in2,$inpperm | ||
899 | lvx_u $in4,$x40,$inp | ||
900 | vncipherlast $out6,$out6,$in5 | ||
901 | le?vperm $in3,$in3,$in3,$inpperm | ||
902 | lvx_u $in5,$x50,$inp | ||
903 | vncipherlast $out7,$out7,$in6 | ||
904 | le?vperm $in4,$in4,$in4,$inpperm | ||
905 | lvx_u $in6,$x60,$inp | ||
906 | vmr $ivec,$in7 | ||
907 | le?vperm $in5,$in5,$in5,$inpperm | ||
908 | lvx_u $in7,$x70,$inp | ||
909 | addi $inp,$inp,0x80 | ||
910 | |||
911 | le?vperm $out0,$out0,$out0,$inpperm | ||
912 | le?vperm $out1,$out1,$out1,$inpperm | ||
913 | stvx_u $out0,$x00,$out | ||
914 | le?vperm $in6,$in6,$in6,$inpperm | ||
915 | vxor $out0,$in0,$rndkey0 | ||
916 | le?vperm $out2,$out2,$out2,$inpperm | ||
917 | stvx_u $out1,$x10,$out | ||
918 | le?vperm $in7,$in7,$in7,$inpperm | ||
919 | vxor $out1,$in1,$rndkey0 | ||
920 | le?vperm $out3,$out3,$out3,$inpperm | ||
921 | stvx_u $out2,$x20,$out | ||
922 | vxor $out2,$in2,$rndkey0 | ||
923 | le?vperm $out4,$out4,$out4,$inpperm | ||
924 | stvx_u $out3,$x30,$out | ||
925 | vxor $out3,$in3,$rndkey0 | ||
926 | le?vperm $out5,$out5,$out5,$inpperm | ||
927 | stvx_u $out4,$x40,$out | ||
928 | vxor $out4,$in4,$rndkey0 | ||
929 | le?vperm $out6,$out6,$out6,$inpperm | ||
930 | stvx_u $out5,$x50,$out | ||
931 | vxor $out5,$in5,$rndkey0 | ||
932 | le?vperm $out7,$out7,$out7,$inpperm | ||
933 | stvx_u $out6,$x60,$out | ||
934 | vxor $out6,$in6,$rndkey0 | ||
935 | stvx_u $out7,$x70,$out | ||
936 | addi $out,$out,0x80 | ||
937 | vxor $out7,$in7,$rndkey0 | ||
938 | |||
939 | mtctr $rounds | ||
940 | beq Loop_cbc_dec8x # did $len-=128 borrow? | ||
941 | |||
942 | addic. $len,$len,128 | ||
943 | beq Lcbc_dec8x_done | ||
944 | nop | ||
945 | nop | ||
946 | |||
947 | Loop_cbc_dec8x_tail: # up to 7 "words" tail... | ||
948 | vncipher $out1,$out1,v24 | ||
949 | vncipher $out2,$out2,v24 | ||
950 | vncipher $out3,$out3,v24 | ||
951 | vncipher $out4,$out4,v24 | ||
952 | vncipher $out5,$out5,v24 | ||
953 | vncipher $out6,$out6,v24 | ||
954 | vncipher $out7,$out7,v24 | ||
955 | lvx v24,$x20,$key_ # round[3] | ||
956 | addi $key_,$key_,0x20 | ||
957 | |||
958 | vncipher $out1,$out1,v25 | ||
959 | vncipher $out2,$out2,v25 | ||
960 | vncipher $out3,$out3,v25 | ||
961 | vncipher $out4,$out4,v25 | ||
962 | vncipher $out5,$out5,v25 | ||
963 | vncipher $out6,$out6,v25 | ||
964 | vncipher $out7,$out7,v25 | ||
965 | lvx v25,$x10,$key_ # round[4] | ||
966 | bdnz Loop_cbc_dec8x_tail | ||
967 | |||
968 | vncipher $out1,$out1,v24 | ||
969 | vncipher $out2,$out2,v24 | ||
970 | vncipher $out3,$out3,v24 | ||
971 | vncipher $out4,$out4,v24 | ||
972 | vncipher $out5,$out5,v24 | ||
973 | vncipher $out6,$out6,v24 | ||
974 | vncipher $out7,$out7,v24 | ||
975 | |||
976 | vncipher $out1,$out1,v25 | ||
977 | vncipher $out2,$out2,v25 | ||
978 | vncipher $out3,$out3,v25 | ||
979 | vncipher $out4,$out4,v25 | ||
980 | vncipher $out5,$out5,v25 | ||
981 | vncipher $out6,$out6,v25 | ||
982 | vncipher $out7,$out7,v25 | ||
983 | |||
984 | vncipher $out1,$out1,v26 | ||
985 | vncipher $out2,$out2,v26 | ||
986 | vncipher $out3,$out3,v26 | ||
987 | vncipher $out4,$out4,v26 | ||
988 | vncipher $out5,$out5,v26 | ||
989 | vncipher $out6,$out6,v26 | ||
990 | vncipher $out7,$out7,v26 | ||
991 | |||
992 | vncipher $out1,$out1,v27 | ||
993 | vncipher $out2,$out2,v27 | ||
994 | vncipher $out3,$out3,v27 | ||
995 | vncipher $out4,$out4,v27 | ||
996 | vncipher $out5,$out5,v27 | ||
997 | vncipher $out6,$out6,v27 | ||
998 | vncipher $out7,$out7,v27 | ||
999 | |||
1000 | vncipher $out1,$out1,v28 | ||
1001 | vncipher $out2,$out2,v28 | ||
1002 | vncipher $out3,$out3,v28 | ||
1003 | vncipher $out4,$out4,v28 | ||
1004 | vncipher $out5,$out5,v28 | ||
1005 | vncipher $out6,$out6,v28 | ||
1006 | vncipher $out7,$out7,v28 | ||
1007 | |||
1008 | vncipher $out1,$out1,v29 | ||
1009 | vncipher $out2,$out2,v29 | ||
1010 | vncipher $out3,$out3,v29 | ||
1011 | vncipher $out4,$out4,v29 | ||
1012 | vncipher $out5,$out5,v29 | ||
1013 | vncipher $out6,$out6,v29 | ||
1014 | vncipher $out7,$out7,v29 | ||
1015 | |||
1016 | vncipher $out1,$out1,v30 | ||
1017 | vxor $ivec,$ivec,v31 # last round key | ||
1018 | vncipher $out2,$out2,v30 | ||
1019 | vxor $in1,$in1,v31 | ||
1020 | vncipher $out3,$out3,v30 | ||
1021 | vxor $in2,$in2,v31 | ||
1022 | vncipher $out4,$out4,v30 | ||
1023 | vxor $in3,$in3,v31 | ||
1024 | vncipher $out5,$out5,v30 | ||
1025 | vxor $in4,$in4,v31 | ||
1026 | vncipher $out6,$out6,v30 | ||
1027 | vxor $in5,$in5,v31 | ||
1028 | vncipher $out7,$out7,v30 | ||
1029 | vxor $in6,$in6,v31 | ||
1030 | |||
1031 | cmplwi $len,32 # switch($len) | ||
1032 | blt Lcbc_dec8x_one | ||
1033 | nop | ||
1034 | beq Lcbc_dec8x_two | ||
1035 | cmplwi $len,64 | ||
1036 | blt Lcbc_dec8x_three | ||
1037 | nop | ||
1038 | beq Lcbc_dec8x_four | ||
1039 | cmplwi $len,96 | ||
1040 | blt Lcbc_dec8x_five | ||
1041 | nop | ||
1042 | beq Lcbc_dec8x_six | ||
1043 | |||
1044 | Lcbc_dec8x_seven: | ||
1045 | vncipherlast $out1,$out1,$ivec | ||
1046 | vncipherlast $out2,$out2,$in1 | ||
1047 | vncipherlast $out3,$out3,$in2 | ||
1048 | vncipherlast $out4,$out4,$in3 | ||
1049 | vncipherlast $out5,$out5,$in4 | ||
1050 | vncipherlast $out6,$out6,$in5 | ||
1051 | vncipherlast $out7,$out7,$in6 | ||
1052 | vmr $ivec,$in7 | ||
1053 | |||
1054 | le?vperm $out1,$out1,$out1,$inpperm | ||
1055 | le?vperm $out2,$out2,$out2,$inpperm | ||
1056 | stvx_u $out1,$x00,$out | ||
1057 | le?vperm $out3,$out3,$out3,$inpperm | ||
1058 | stvx_u $out2,$x10,$out | ||
1059 | le?vperm $out4,$out4,$out4,$inpperm | ||
1060 | stvx_u $out3,$x20,$out | ||
1061 | le?vperm $out5,$out5,$out5,$inpperm | ||
1062 | stvx_u $out4,$x30,$out | ||
1063 | le?vperm $out6,$out6,$out6,$inpperm | ||
1064 | stvx_u $out5,$x40,$out | ||
1065 | le?vperm $out7,$out7,$out7,$inpperm | ||
1066 | stvx_u $out6,$x50,$out | ||
1067 | stvx_u $out7,$x60,$out | ||
1068 | addi $out,$out,0x70 | ||
1069 | b Lcbc_dec8x_done | ||
1070 | |||
1071 | .align 5 | ||
1072 | Lcbc_dec8x_six: | ||
1073 | vncipherlast $out2,$out2,$ivec | ||
1074 | vncipherlast $out3,$out3,$in2 | ||
1075 | vncipherlast $out4,$out4,$in3 | ||
1076 | vncipherlast $out5,$out5,$in4 | ||
1077 | vncipherlast $out6,$out6,$in5 | ||
1078 | vncipherlast $out7,$out7,$in6 | ||
1079 | vmr $ivec,$in7 | ||
1080 | |||
1081 | le?vperm $out2,$out2,$out2,$inpperm | ||
1082 | le?vperm $out3,$out3,$out3,$inpperm | ||
1083 | stvx_u $out2,$x00,$out | ||
1084 | le?vperm $out4,$out4,$out4,$inpperm | ||
1085 | stvx_u $out3,$x10,$out | ||
1086 | le?vperm $out5,$out5,$out5,$inpperm | ||
1087 | stvx_u $out4,$x20,$out | ||
1088 | le?vperm $out6,$out6,$out6,$inpperm | ||
1089 | stvx_u $out5,$x30,$out | ||
1090 | le?vperm $out7,$out7,$out7,$inpperm | ||
1091 | stvx_u $out6,$x40,$out | ||
1092 | stvx_u $out7,$x50,$out | ||
1093 | addi $out,$out,0x60 | ||
1094 | b Lcbc_dec8x_done | ||
1095 | |||
1096 | .align 5 | ||
1097 | Lcbc_dec8x_five: | ||
1098 | vncipherlast $out3,$out3,$ivec | ||
1099 | vncipherlast $out4,$out4,$in3 | ||
1100 | vncipherlast $out5,$out5,$in4 | ||
1101 | vncipherlast $out6,$out6,$in5 | ||
1102 | vncipherlast $out7,$out7,$in6 | ||
1103 | vmr $ivec,$in7 | ||
1104 | |||
1105 | le?vperm $out3,$out3,$out3,$inpperm | ||
1106 | le?vperm $out4,$out4,$out4,$inpperm | ||
1107 | stvx_u $out3,$x00,$out | ||
1108 | le?vperm $out5,$out5,$out5,$inpperm | ||
1109 | stvx_u $out4,$x10,$out | ||
1110 | le?vperm $out6,$out6,$out6,$inpperm | ||
1111 | stvx_u $out5,$x20,$out | ||
1112 | le?vperm $out7,$out7,$out7,$inpperm | ||
1113 | stvx_u $out6,$x30,$out | ||
1114 | stvx_u $out7,$x40,$out | ||
1115 | addi $out,$out,0x50 | ||
1116 | b Lcbc_dec8x_done | ||
1117 | |||
1118 | .align 5 | ||
1119 | Lcbc_dec8x_four: | ||
1120 | vncipherlast $out4,$out4,$ivec | ||
1121 | vncipherlast $out5,$out5,$in4 | ||
1122 | vncipherlast $out6,$out6,$in5 | ||
1123 | vncipherlast $out7,$out7,$in6 | ||
1124 | vmr $ivec,$in7 | ||
1125 | |||
1126 | le?vperm $out4,$out4,$out4,$inpperm | ||
1127 | le?vperm $out5,$out5,$out5,$inpperm | ||
1128 | stvx_u $out4,$x00,$out | ||
1129 | le?vperm $out6,$out6,$out6,$inpperm | ||
1130 | stvx_u $out5,$x10,$out | ||
1131 | le?vperm $out7,$out7,$out7,$inpperm | ||
1132 | stvx_u $out6,$x20,$out | ||
1133 | stvx_u $out7,$x30,$out | ||
1134 | addi $out,$out,0x40 | ||
1135 | b Lcbc_dec8x_done | ||
1136 | |||
1137 | .align 5 | ||
1138 | Lcbc_dec8x_three: | ||
1139 | vncipherlast $out5,$out5,$ivec | ||
1140 | vncipherlast $out6,$out6,$in5 | ||
1141 | vncipherlast $out7,$out7,$in6 | ||
1142 | vmr $ivec,$in7 | ||
1143 | |||
1144 | le?vperm $out5,$out5,$out5,$inpperm | ||
1145 | le?vperm $out6,$out6,$out6,$inpperm | ||
1146 | stvx_u $out5,$x00,$out | ||
1147 | le?vperm $out7,$out7,$out7,$inpperm | ||
1148 | stvx_u $out6,$x10,$out | ||
1149 | stvx_u $out7,$x20,$out | ||
1150 | addi $out,$out,0x30 | ||
1151 | b Lcbc_dec8x_done | ||
1152 | |||
1153 | .align 5 | ||
1154 | Lcbc_dec8x_two: | ||
1155 | vncipherlast $out6,$out6,$ivec | ||
1156 | vncipherlast $out7,$out7,$in6 | ||
1157 | vmr $ivec,$in7 | ||
1158 | |||
1159 | le?vperm $out6,$out6,$out6,$inpperm | ||
1160 | le?vperm $out7,$out7,$out7,$inpperm | ||
1161 | stvx_u $out6,$x00,$out | ||
1162 | stvx_u $out7,$x10,$out | ||
1163 | addi $out,$out,0x20 | ||
1164 | b Lcbc_dec8x_done | ||
1165 | |||
1166 | .align 5 | ||
1167 | Lcbc_dec8x_one: | ||
1168 | vncipherlast $out7,$out7,$ivec | ||
1169 | vmr $ivec,$in7 | ||
1170 | |||
1171 | le?vperm $out7,$out7,$out7,$inpperm | ||
1172 | stvx_u $out7,0,$out | ||
1173 | addi $out,$out,0x10 | ||
1174 | |||
1175 | Lcbc_dec8x_done: | ||
1176 | le?vperm $ivec,$ivec,$ivec,$inpperm | ||
1177 | stvx_u $ivec,0,$ivp # write [unaligned] iv | ||
1178 | |||
1179 | li r10,`$FRAME+15` | ||
1180 | li r11,`$FRAME+31` | ||
1181 | stvx $inpperm,r10,$sp # wipe copies of round keys | ||
1182 | addi r10,r10,32 | ||
1183 | stvx $inpperm,r11,$sp | ||
1184 | addi r11,r11,32 | ||
1185 | stvx $inpperm,r10,$sp | ||
1186 | addi r10,r10,32 | ||
1187 | stvx $inpperm,r11,$sp | ||
1188 | addi r11,r11,32 | ||
1189 | stvx $inpperm,r10,$sp | ||
1190 | addi r10,r10,32 | ||
1191 | stvx $inpperm,r11,$sp | ||
1192 | addi r11,r11,32 | ||
1193 | stvx $inpperm,r10,$sp | ||
1194 | addi r10,r10,32 | ||
1195 | stvx $inpperm,r11,$sp | ||
1196 | addi r11,r11,32 | ||
1197 | |||
1198 | mtspr 256,$vrsave | ||
1199 | lvx v20,r10,$sp # ABI says so | ||
1200 | addi r10,r10,32 | ||
1201 | lvx v21,r11,$sp | ||
1202 | addi r11,r11,32 | ||
1203 | lvx v22,r10,$sp | ||
1204 | addi r10,r10,32 | ||
1205 | lvx v23,r11,$sp | ||
1206 | addi r11,r11,32 | ||
1207 | lvx v24,r10,$sp | ||
1208 | addi r10,r10,32 | ||
1209 | lvx v25,r11,$sp | ||
1210 | addi r11,r11,32 | ||
1211 | lvx v26,r10,$sp | ||
1212 | addi r10,r10,32 | ||
1213 | lvx v27,r11,$sp | ||
1214 | addi r11,r11,32 | ||
1215 | lvx v28,r10,$sp | ||
1216 | addi r10,r10,32 | ||
1217 | lvx v29,r11,$sp | ||
1218 | addi r11,r11,32 | ||
1219 | lvx v30,r10,$sp | ||
1220 | lvx v31,r11,$sp | ||
1221 | $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) | ||
1222 | $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) | ||
1223 | $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) | ||
1224 | $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) | ||
1225 | $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) | ||
1226 | $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) | ||
1227 | addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` | ||
1228 | blr | ||
1229 | .long 0 | ||
1230 | .byte 0,12,0x14,0,0x80,6,6,0 | ||
1231 | .long 0 | ||
1232 | .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt | ||
1233 | ___ | ||
1234 | }} }}} | ||
1235 | |||
1236 | ######################################################################### | ||
1237 | {{{ # CTR procedure[s] # | ||
1238 | my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); | ||
1239 | my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); | ||
1240 | my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= | ||
1241 | map("v$_",(4..11)); | ||
1242 | my $dat=$tmp; | ||
1243 | |||
1244 | $code.=<<___; | ||
1245 | .globl .${prefix}_ctr32_encrypt_blocks | ||
1246 | .align 5 | ||
1247 | .${prefix}_ctr32_encrypt_blocks: | ||
1248 | ${UCMP}i $len,1 | ||
1249 | bltlr- | ||
1250 | |||
1251 | lis r0,0xfff0 | ||
1252 | mfspr $vrsave,256 | ||
1253 | mtspr 256,r0 | ||
1254 | |||
1255 | li $idx,15 | ||
1256 | vxor $rndkey0,$rndkey0,$rndkey0 | ||
1257 | le?vspltisb $tmp,0x0f | ||
1258 | |||
1259 | lvx $ivec,0,$ivp # load [unaligned] iv | ||
1260 | lvsl $inpperm,0,$ivp | ||
1261 | lvx $inptail,$idx,$ivp | ||
1262 | vspltisb $one,1 | ||
1263 | le?vxor $inpperm,$inpperm,$tmp | ||
1264 | vperm $ivec,$ivec,$inptail,$inpperm | ||
1265 | vsldoi $one,$rndkey0,$one,1 | ||
1266 | |||
1267 | neg r11,$inp | ||
1268 | ?lvsl $keyperm,0,$key # prepare for unaligned key | ||
1269 | lwz $rounds,240($key) | ||
1270 | |||
1271 | lvsr $inpperm,0,r11 # prepare for unaligned load | ||
1272 | lvx $inptail,0,$inp | ||
1273 | addi $inp,$inp,15 # 15 is not typo | ||
1274 | le?vxor $inpperm,$inpperm,$tmp | ||
1275 | |||
1276 | srwi $rounds,$rounds,1 | ||
1277 | li $idx,16 | ||
1278 | subi $rounds,$rounds,1 | ||
1279 | |||
1280 | ${UCMP}i $len,8 | ||
1281 | bge _aesp8_ctr32_encrypt8x | ||
1282 | |||
1283 | ?lvsr $outperm,0,$out # prepare for unaligned store | ||
1284 | vspltisb $outmask,-1 | ||
1285 | lvx $outhead,0,$out | ||
1286 | ?vperm $outmask,$rndkey0,$outmask,$outperm | ||
1287 | le?vxor $outperm,$outperm,$tmp | ||
1288 | |||
1289 | lvx $rndkey0,0,$key | ||
1290 | mtctr $rounds | ||
1291 | lvx $rndkey1,$idx,$key | ||
1292 | addi $idx,$idx,16 | ||
1293 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
1294 | vxor $inout,$ivec,$rndkey0 | ||
1295 | lvx $rndkey0,$idx,$key | ||
1296 | addi $idx,$idx,16 | ||
1297 | b Loop_ctr32_enc | ||
1298 | |||
1299 | .align 5 | ||
1300 | Loop_ctr32_enc: | ||
1301 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
1302 | vcipher $inout,$inout,$rndkey1 | ||
1303 | lvx $rndkey1,$idx,$key | ||
1304 | addi $idx,$idx,16 | ||
1305 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
1306 | vcipher $inout,$inout,$rndkey0 | ||
1307 | lvx $rndkey0,$idx,$key | ||
1308 | addi $idx,$idx,16 | ||
1309 | bdnz Loop_ctr32_enc | ||
1310 | |||
1311 | vadduwm $ivec,$ivec,$one | ||
1312 | vmr $dat,$inptail | ||
1313 | lvx $inptail,0,$inp | ||
1314 | addi $inp,$inp,16 | ||
1315 | subic. $len,$len,1 # blocks-- | ||
1316 | |||
1317 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
1318 | vcipher $inout,$inout,$rndkey1 | ||
1319 | lvx $rndkey1,$idx,$key | ||
1320 | vperm $dat,$dat,$inptail,$inpperm | ||
1321 | li $idx,16 | ||
1322 | ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm | ||
1323 | lvx $rndkey0,0,$key | ||
1324 | vxor $dat,$dat,$rndkey1 # last round key | ||
1325 | vcipherlast $inout,$inout,$dat | ||
1326 | |||
1327 | lvx $rndkey1,$idx,$key | ||
1328 | addi $idx,$idx,16 | ||
1329 | vperm $inout,$inout,$inout,$outperm | ||
1330 | vsel $dat,$outhead,$inout,$outmask | ||
1331 | mtctr $rounds | ||
1332 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
1333 | vmr $outhead,$inout | ||
1334 | vxor $inout,$ivec,$rndkey0 | ||
1335 | lvx $rndkey0,$idx,$key | ||
1336 | addi $idx,$idx,16 | ||
1337 | stvx $dat,0,$out | ||
1338 | addi $out,$out,16 | ||
1339 | bne Loop_ctr32_enc | ||
1340 | |||
1341 | addi $out,$out,-1 | ||
1342 | lvx $inout,0,$out # redundant in aligned case | ||
1343 | vsel $inout,$outhead,$inout,$outmask | ||
1344 | stvx $inout,0,$out | ||
1345 | |||
1346 | mtspr 256,$vrsave | ||
1347 | blr | ||
1348 | .long 0 | ||
1349 | .byte 0,12,0x14,0,0,0,6,0 | ||
1350 | .long 0 | ||
1351 | ___ | ||
1352 | ######################################################################### | ||
1353 | {{ # Optimized CTR procedure # | ||
1354 | my $key_="r11"; | ||
1355 | my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); | ||
1356 | my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); | ||
1357 | my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); | ||
1358 | my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys | ||
1359 | # v26-v31 last 6 round keys | ||
1360 | my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment | ||
1361 | my ($two,$three,$four)=($outhead,$outperm,$outmask); | ||
1362 | |||
1363 | $code.=<<___; | ||
1364 | .align 5 | ||
1365 | _aesp8_ctr32_encrypt8x: | ||
1366 | $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) | ||
1367 | li r10,`$FRAME+8*16+15` | ||
1368 | li r11,`$FRAME+8*16+31` | ||
1369 | stvx v20,r10,$sp # ABI says so | ||
1370 | addi r10,r10,32 | ||
1371 | stvx v21,r11,$sp | ||
1372 | addi r11,r11,32 | ||
1373 | stvx v22,r10,$sp | ||
1374 | addi r10,r10,32 | ||
1375 | stvx v23,r11,$sp | ||
1376 | addi r11,r11,32 | ||
1377 | stvx v24,r10,$sp | ||
1378 | addi r10,r10,32 | ||
1379 | stvx v25,r11,$sp | ||
1380 | addi r11,r11,32 | ||
1381 | stvx v26,r10,$sp | ||
1382 | addi r10,r10,32 | ||
1383 | stvx v27,r11,$sp | ||
1384 | addi r11,r11,32 | ||
1385 | stvx v28,r10,$sp | ||
1386 | addi r10,r10,32 | ||
1387 | stvx v29,r11,$sp | ||
1388 | addi r11,r11,32 | ||
1389 | stvx v30,r10,$sp | ||
1390 | stvx v31,r11,$sp | ||
1391 | li r0,-1 | ||
1392 | stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave | ||
1393 | li $x10,0x10 | ||
1394 | $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) | ||
1395 | li $x20,0x20 | ||
1396 | $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) | ||
1397 | li $x30,0x30 | ||
1398 | $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) | ||
1399 | li $x40,0x40 | ||
1400 | $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) | ||
1401 | li $x50,0x50 | ||
1402 | $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) | ||
1403 | li $x60,0x60 | ||
1404 | $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) | ||
1405 | li $x70,0x70 | ||
1406 | mtspr 256,r0 | ||
1407 | |||
1408 | subi $rounds,$rounds,3 # -4 in total | ||
1409 | |||
1410 | lvx $rndkey0,$x00,$key # load key schedule | ||
1411 | lvx v30,$x10,$key | ||
1412 | addi $key,$key,0x20 | ||
1413 | lvx v31,$x00,$key | ||
1414 | ?vperm $rndkey0,$rndkey0,v30,$keyperm | ||
1415 | addi $key_,$sp,$FRAME+15 | ||
1416 | mtctr $rounds | ||
1417 | |||
1418 | Load_ctr32_enc_key: | ||
1419 | ?vperm v24,v30,v31,$keyperm | ||
1420 | lvx v30,$x10,$key | ||
1421 | addi $key,$key,0x20 | ||
1422 | stvx v24,$x00,$key_ # off-load round[1] | ||
1423 | ?vperm v25,v31,v30,$keyperm | ||
1424 | lvx v31,$x00,$key | ||
1425 | stvx v25,$x10,$key_ # off-load round[2] | ||
1426 | addi $key_,$key_,0x20 | ||
1427 | bdnz Load_ctr32_enc_key | ||
1428 | |||
1429 | lvx v26,$x10,$key | ||
1430 | ?vperm v24,v30,v31,$keyperm | ||
1431 | lvx v27,$x20,$key | ||
1432 | stvx v24,$x00,$key_ # off-load round[3] | ||
1433 | ?vperm v25,v31,v26,$keyperm | ||
1434 | lvx v28,$x30,$key | ||
1435 | stvx v25,$x10,$key_ # off-load round[4] | ||
1436 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
1437 | ?vperm v26,v26,v27,$keyperm | ||
1438 | lvx v29,$x40,$key | ||
1439 | ?vperm v27,v27,v28,$keyperm | ||
1440 | lvx v30,$x50,$key | ||
1441 | ?vperm v28,v28,v29,$keyperm | ||
1442 | lvx v31,$x60,$key | ||
1443 | ?vperm v29,v29,v30,$keyperm | ||
1444 | lvx $out0,$x70,$key # borrow $out0 | ||
1445 | ?vperm v30,v30,v31,$keyperm | ||
1446 | lvx v24,$x00,$key_ # pre-load round[1] | ||
1447 | ?vperm v31,v31,$out0,$keyperm | ||
1448 | lvx v25,$x10,$key_ # pre-load round[2] | ||
1449 | |||
1450 | vadduwm $two,$one,$one | ||
1451 | subi $inp,$inp,15 # undo "caller" | ||
1452 | $SHL $len,$len,4 | ||
1453 | |||
1454 | vadduwm $out1,$ivec,$one # counter values ... | ||
1455 | vadduwm $out2,$ivec,$two | ||
1456 | vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] | ||
1457 | le?li $idx,8 | ||
1458 | vadduwm $out3,$out1,$two | ||
1459 | vxor $out1,$out1,$rndkey0 | ||
1460 | le?lvsl $inpperm,0,$idx | ||
1461 | vadduwm $out4,$out2,$two | ||
1462 | vxor $out2,$out2,$rndkey0 | ||
1463 | le?vspltisb $tmp,0x0f | ||
1464 | vadduwm $out5,$out3,$two | ||
1465 | vxor $out3,$out3,$rndkey0 | ||
1466 | le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u | ||
1467 | vadduwm $out6,$out4,$two | ||
1468 | vxor $out4,$out4,$rndkey0 | ||
1469 | vadduwm $out7,$out5,$two | ||
1470 | vxor $out5,$out5,$rndkey0 | ||
1471 | vadduwm $ivec,$out6,$two # next counter value | ||
1472 | vxor $out6,$out6,$rndkey0 | ||
1473 | vxor $out7,$out7,$rndkey0 | ||
1474 | |||
1475 | mtctr $rounds | ||
1476 | b Loop_ctr32_enc8x | ||
1477 | .align 5 | ||
1478 | Loop_ctr32_enc8x: | ||
1479 | vcipher $out0,$out0,v24 | ||
1480 | vcipher $out1,$out1,v24 | ||
1481 | vcipher $out2,$out2,v24 | ||
1482 | vcipher $out3,$out3,v24 | ||
1483 | vcipher $out4,$out4,v24 | ||
1484 | vcipher $out5,$out5,v24 | ||
1485 | vcipher $out6,$out6,v24 | ||
1486 | vcipher $out7,$out7,v24 | ||
1487 | Loop_ctr32_enc8x_middle: | ||
1488 | lvx v24,$x20,$key_ # round[3] | ||
1489 | addi $key_,$key_,0x20 | ||
1490 | |||
1491 | vcipher $out0,$out0,v25 | ||
1492 | vcipher $out1,$out1,v25 | ||
1493 | vcipher $out2,$out2,v25 | ||
1494 | vcipher $out3,$out3,v25 | ||
1495 | vcipher $out4,$out4,v25 | ||
1496 | vcipher $out5,$out5,v25 | ||
1497 | vcipher $out6,$out6,v25 | ||
1498 | vcipher $out7,$out7,v25 | ||
1499 | lvx v25,$x10,$key_ # round[4] | ||
1500 | bdnz Loop_ctr32_enc8x | ||
1501 | |||
1502 | subic r11,$len,256 # $len-256, borrow $key_ | ||
1503 | vcipher $out0,$out0,v24 | ||
1504 | vcipher $out1,$out1,v24 | ||
1505 | vcipher $out2,$out2,v24 | ||
1506 | vcipher $out3,$out3,v24 | ||
1507 | vcipher $out4,$out4,v24 | ||
1508 | vcipher $out5,$out5,v24 | ||
1509 | vcipher $out6,$out6,v24 | ||
1510 | vcipher $out7,$out7,v24 | ||
1511 | |||
1512 | subfe r0,r0,r0 # borrow?-1:0 | ||
1513 | vcipher $out0,$out0,v25 | ||
1514 | vcipher $out1,$out1,v25 | ||
1515 | vcipher $out2,$out2,v25 | ||
1516 | vcipher $out3,$out3,v25 | ||
1517 | vcipher $out4,$out4,v25 | ||
1518 | vcipher $out5,$out5,v25 | ||
1519 | vcipher $out6,$out6,v25 | ||
1520 | vcipher $out7,$out7,v25 | ||
1521 | |||
1522 | and r0,r0,r11 | ||
1523 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
1524 | vcipher $out0,$out0,v26 | ||
1525 | vcipher $out1,$out1,v26 | ||
1526 | vcipher $out2,$out2,v26 | ||
1527 | vcipher $out3,$out3,v26 | ||
1528 | vcipher $out4,$out4,v26 | ||
1529 | vcipher $out5,$out5,v26 | ||
1530 | vcipher $out6,$out6,v26 | ||
1531 | vcipher $out7,$out7,v26 | ||
1532 | lvx v24,$x00,$key_ # re-pre-load round[1] | ||
1533 | |||
1534 | subic $len,$len,129 # $len-=129 | ||
1535 | vcipher $out0,$out0,v27 | ||
1536 | addi $len,$len,1 # $len-=128 really | ||
1537 | vcipher $out1,$out1,v27 | ||
1538 | vcipher $out2,$out2,v27 | ||
1539 | vcipher $out3,$out3,v27 | ||
1540 | vcipher $out4,$out4,v27 | ||
1541 | vcipher $out5,$out5,v27 | ||
1542 | vcipher $out6,$out6,v27 | ||
1543 | vcipher $out7,$out7,v27 | ||
1544 | lvx v25,$x10,$key_ # re-pre-load round[2] | ||
1545 | |||
1546 | vcipher $out0,$out0,v28 | ||
1547 | lvx_u $in0,$x00,$inp # load input | ||
1548 | vcipher $out1,$out1,v28 | ||
1549 | lvx_u $in1,$x10,$inp | ||
1550 | vcipher $out2,$out2,v28 | ||
1551 | lvx_u $in2,$x20,$inp | ||
1552 | vcipher $out3,$out3,v28 | ||
1553 | lvx_u $in3,$x30,$inp | ||
1554 | vcipher $out4,$out4,v28 | ||
1555 | lvx_u $in4,$x40,$inp | ||
1556 | vcipher $out5,$out5,v28 | ||
1557 | lvx_u $in5,$x50,$inp | ||
1558 | vcipher $out6,$out6,v28 | ||
1559 | lvx_u $in6,$x60,$inp | ||
1560 | vcipher $out7,$out7,v28 | ||
1561 | lvx_u $in7,$x70,$inp | ||
1562 | addi $inp,$inp,0x80 | ||
1563 | |||
1564 | vcipher $out0,$out0,v29 | ||
1565 | le?vperm $in0,$in0,$in0,$inpperm | ||
1566 | vcipher $out1,$out1,v29 | ||
1567 | le?vperm $in1,$in1,$in1,$inpperm | ||
1568 | vcipher $out2,$out2,v29 | ||
1569 | le?vperm $in2,$in2,$in2,$inpperm | ||
1570 | vcipher $out3,$out3,v29 | ||
1571 | le?vperm $in3,$in3,$in3,$inpperm | ||
1572 | vcipher $out4,$out4,v29 | ||
1573 | le?vperm $in4,$in4,$in4,$inpperm | ||
1574 | vcipher $out5,$out5,v29 | ||
1575 | le?vperm $in5,$in5,$in5,$inpperm | ||
1576 | vcipher $out6,$out6,v29 | ||
1577 | le?vperm $in6,$in6,$in6,$inpperm | ||
1578 | vcipher $out7,$out7,v29 | ||
1579 | le?vperm $in7,$in7,$in7,$inpperm | ||
1580 | |||
1581 | add $inp,$inp,r0 # $inp is adjusted in such | ||
1582 | # way that at exit from the | ||
1583 | # loop inX-in7 are loaded | ||
1584 | # with last "words" | ||
1585 | subfe. r0,r0,r0 # borrow?-1:0 | ||
1586 | vcipher $out0,$out0,v30 | ||
1587 | vxor $in0,$in0,v31 # xor with last round key | ||
1588 | vcipher $out1,$out1,v30 | ||
1589 | vxor $in1,$in1,v31 | ||
1590 | vcipher $out2,$out2,v30 | ||
1591 | vxor $in2,$in2,v31 | ||
1592 | vcipher $out3,$out3,v30 | ||
1593 | vxor $in3,$in3,v31 | ||
1594 | vcipher $out4,$out4,v30 | ||
1595 | vxor $in4,$in4,v31 | ||
1596 | vcipher $out5,$out5,v30 | ||
1597 | vxor $in5,$in5,v31 | ||
1598 | vcipher $out6,$out6,v30 | ||
1599 | vxor $in6,$in6,v31 | ||
1600 | vcipher $out7,$out7,v30 | ||
1601 | vxor $in7,$in7,v31 | ||
1602 | |||
1603 | bne Lctr32_enc8x_break # did $len-129 borrow? | ||
1604 | |||
1605 | vcipherlast $in0,$out0,$in0 | ||
1606 | vcipherlast $in1,$out1,$in1 | ||
1607 | vadduwm $out1,$ivec,$one # counter values ... | ||
1608 | vcipherlast $in2,$out2,$in2 | ||
1609 | vadduwm $out2,$ivec,$two | ||
1610 | vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] | ||
1611 | vcipherlast $in3,$out3,$in3 | ||
1612 | vadduwm $out3,$out1,$two | ||
1613 | vxor $out1,$out1,$rndkey0 | ||
1614 | vcipherlast $in4,$out4,$in4 | ||
1615 | vadduwm $out4,$out2,$two | ||
1616 | vxor $out2,$out2,$rndkey0 | ||
1617 | vcipherlast $in5,$out5,$in5 | ||
1618 | vadduwm $out5,$out3,$two | ||
1619 | vxor $out3,$out3,$rndkey0 | ||
1620 | vcipherlast $in6,$out6,$in6 | ||
1621 | vadduwm $out6,$out4,$two | ||
1622 | vxor $out4,$out4,$rndkey0 | ||
1623 | vcipherlast $in7,$out7,$in7 | ||
1624 | vadduwm $out7,$out5,$two | ||
1625 | vxor $out5,$out5,$rndkey0 | ||
1626 | le?vperm $in0,$in0,$in0,$inpperm | ||
1627 | vadduwm $ivec,$out6,$two # next counter value | ||
1628 | vxor $out6,$out6,$rndkey0 | ||
1629 | le?vperm $in1,$in1,$in1,$inpperm | ||
1630 | vxor $out7,$out7,$rndkey0 | ||
1631 | mtctr $rounds | ||
1632 | |||
1633 | vcipher $out0,$out0,v24 | ||
1634 | stvx_u $in0,$x00,$out | ||
1635 | le?vperm $in2,$in2,$in2,$inpperm | ||
1636 | vcipher $out1,$out1,v24 | ||
1637 | stvx_u $in1,$x10,$out | ||
1638 | le?vperm $in3,$in3,$in3,$inpperm | ||
1639 | vcipher $out2,$out2,v24 | ||
1640 | stvx_u $in2,$x20,$out | ||
1641 | le?vperm $in4,$in4,$in4,$inpperm | ||
1642 | vcipher $out3,$out3,v24 | ||
1643 | stvx_u $in3,$x30,$out | ||
1644 | le?vperm $in5,$in5,$in5,$inpperm | ||
1645 | vcipher $out4,$out4,v24 | ||
1646 | stvx_u $in4,$x40,$out | ||
1647 | le?vperm $in6,$in6,$in6,$inpperm | ||
1648 | vcipher $out5,$out5,v24 | ||
1649 | stvx_u $in5,$x50,$out | ||
1650 | le?vperm $in7,$in7,$in7,$inpperm | ||
1651 | vcipher $out6,$out6,v24 | ||
1652 | stvx_u $in6,$x60,$out | ||
1653 | vcipher $out7,$out7,v24 | ||
1654 | stvx_u $in7,$x70,$out | ||
1655 | addi $out,$out,0x80 | ||
1656 | |||
1657 | b Loop_ctr32_enc8x_middle | ||
1658 | |||
1659 | .align 5 | ||
1660 | Lctr32_enc8x_break: | ||
1661 | cmpwi $len,-0x60 | ||
1662 | blt Lctr32_enc8x_one | ||
1663 | nop | ||
1664 | beq Lctr32_enc8x_two | ||
1665 | cmpwi $len,-0x40 | ||
1666 | blt Lctr32_enc8x_three | ||
1667 | nop | ||
1668 | beq Lctr32_enc8x_four | ||
1669 | cmpwi $len,-0x20 | ||
1670 | blt Lctr32_enc8x_five | ||
1671 | nop | ||
1672 | beq Lctr32_enc8x_six | ||
1673 | cmpwi $len,0x00 | ||
1674 | blt Lctr32_enc8x_seven | ||
1675 | |||
1676 | Lctr32_enc8x_eight: | ||
1677 | vcipherlast $out0,$out0,$in0 | ||
1678 | vcipherlast $out1,$out1,$in1 | ||
1679 | vcipherlast $out2,$out2,$in2 | ||
1680 | vcipherlast $out3,$out3,$in3 | ||
1681 | vcipherlast $out4,$out4,$in4 | ||
1682 | vcipherlast $out5,$out5,$in5 | ||
1683 | vcipherlast $out6,$out6,$in6 | ||
1684 | vcipherlast $out7,$out7,$in7 | ||
1685 | |||
1686 | le?vperm $out0,$out0,$out0,$inpperm | ||
1687 | le?vperm $out1,$out1,$out1,$inpperm | ||
1688 | stvx_u $out0,$x00,$out | ||
1689 | le?vperm $out2,$out2,$out2,$inpperm | ||
1690 | stvx_u $out1,$x10,$out | ||
1691 | le?vperm $out3,$out3,$out3,$inpperm | ||
1692 | stvx_u $out2,$x20,$out | ||
1693 | le?vperm $out4,$out4,$out4,$inpperm | ||
1694 | stvx_u $out3,$x30,$out | ||
1695 | le?vperm $out5,$out5,$out5,$inpperm | ||
1696 | stvx_u $out4,$x40,$out | ||
1697 | le?vperm $out6,$out6,$out6,$inpperm | ||
1698 | stvx_u $out5,$x50,$out | ||
1699 | le?vperm $out7,$out7,$out7,$inpperm | ||
1700 | stvx_u $out6,$x60,$out | ||
1701 | stvx_u $out7,$x70,$out | ||
1702 | addi $out,$out,0x80 | ||
1703 | b Lctr32_enc8x_done | ||
1704 | |||
1705 | .align 5 | ||
1706 | Lctr32_enc8x_seven: | ||
1707 | vcipherlast $out0,$out0,$in1 | ||
1708 | vcipherlast $out1,$out1,$in2 | ||
1709 | vcipherlast $out2,$out2,$in3 | ||
1710 | vcipherlast $out3,$out3,$in4 | ||
1711 | vcipherlast $out4,$out4,$in5 | ||
1712 | vcipherlast $out5,$out5,$in6 | ||
1713 | vcipherlast $out6,$out6,$in7 | ||
1714 | |||
1715 | le?vperm $out0,$out0,$out0,$inpperm | ||
1716 | le?vperm $out1,$out1,$out1,$inpperm | ||
1717 | stvx_u $out0,$x00,$out | ||
1718 | le?vperm $out2,$out2,$out2,$inpperm | ||
1719 | stvx_u $out1,$x10,$out | ||
1720 | le?vperm $out3,$out3,$out3,$inpperm | ||
1721 | stvx_u $out2,$x20,$out | ||
1722 | le?vperm $out4,$out4,$out4,$inpperm | ||
1723 | stvx_u $out3,$x30,$out | ||
1724 | le?vperm $out5,$out5,$out5,$inpperm | ||
1725 | stvx_u $out4,$x40,$out | ||
1726 | le?vperm $out6,$out6,$out6,$inpperm | ||
1727 | stvx_u $out5,$x50,$out | ||
1728 | stvx_u $out6,$x60,$out | ||
1729 | addi $out,$out,0x70 | ||
1730 | b Lctr32_enc8x_done | ||
1731 | |||
1732 | .align 5 | ||
1733 | Lctr32_enc8x_six: | ||
1734 | vcipherlast $out0,$out0,$in2 | ||
1735 | vcipherlast $out1,$out1,$in3 | ||
1736 | vcipherlast $out2,$out2,$in4 | ||
1737 | vcipherlast $out3,$out3,$in5 | ||
1738 | vcipherlast $out4,$out4,$in6 | ||
1739 | vcipherlast $out5,$out5,$in7 | ||
1740 | |||
1741 | le?vperm $out0,$out0,$out0,$inpperm | ||
1742 | le?vperm $out1,$out1,$out1,$inpperm | ||
1743 | stvx_u $out0,$x00,$out | ||
1744 | le?vperm $out2,$out2,$out2,$inpperm | ||
1745 | stvx_u $out1,$x10,$out | ||
1746 | le?vperm $out3,$out3,$out3,$inpperm | ||
1747 | stvx_u $out2,$x20,$out | ||
1748 | le?vperm $out4,$out4,$out4,$inpperm | ||
1749 | stvx_u $out3,$x30,$out | ||
1750 | le?vperm $out5,$out5,$out5,$inpperm | ||
1751 | stvx_u $out4,$x40,$out | ||
1752 | stvx_u $out5,$x50,$out | ||
1753 | addi $out,$out,0x60 | ||
1754 | b Lctr32_enc8x_done | ||
1755 | |||
1756 | .align 5 | ||
1757 | Lctr32_enc8x_five: | ||
1758 | vcipherlast $out0,$out0,$in3 | ||
1759 | vcipherlast $out1,$out1,$in4 | ||
1760 | vcipherlast $out2,$out2,$in5 | ||
1761 | vcipherlast $out3,$out3,$in6 | ||
1762 | vcipherlast $out4,$out4,$in7 | ||
1763 | |||
1764 | le?vperm $out0,$out0,$out0,$inpperm | ||
1765 | le?vperm $out1,$out1,$out1,$inpperm | ||
1766 | stvx_u $out0,$x00,$out | ||
1767 | le?vperm $out2,$out2,$out2,$inpperm | ||
1768 | stvx_u $out1,$x10,$out | ||
1769 | le?vperm $out3,$out3,$out3,$inpperm | ||
1770 | stvx_u $out2,$x20,$out | ||
1771 | le?vperm $out4,$out4,$out4,$inpperm | ||
1772 | stvx_u $out3,$x30,$out | ||
1773 | stvx_u $out4,$x40,$out | ||
1774 | addi $out,$out,0x50 | ||
1775 | b Lctr32_enc8x_done | ||
1776 | |||
1777 | .align 5 | ||
1778 | Lctr32_enc8x_four: | ||
1779 | vcipherlast $out0,$out0,$in4 | ||
1780 | vcipherlast $out1,$out1,$in5 | ||
1781 | vcipherlast $out2,$out2,$in6 | ||
1782 | vcipherlast $out3,$out3,$in7 | ||
1783 | |||
1784 | le?vperm $out0,$out0,$out0,$inpperm | ||
1785 | le?vperm $out1,$out1,$out1,$inpperm | ||
1786 | stvx_u $out0,$x00,$out | ||
1787 | le?vperm $out2,$out2,$out2,$inpperm | ||
1788 | stvx_u $out1,$x10,$out | ||
1789 | le?vperm $out3,$out3,$out3,$inpperm | ||
1790 | stvx_u $out2,$x20,$out | ||
1791 | stvx_u $out3,$x30,$out | ||
1792 | addi $out,$out,0x40 | ||
1793 | b Lctr32_enc8x_done | ||
1794 | |||
1795 | .align 5 | ||
1796 | Lctr32_enc8x_three: | ||
1797 | vcipherlast $out0,$out0,$in5 | ||
1798 | vcipherlast $out1,$out1,$in6 | ||
1799 | vcipherlast $out2,$out2,$in7 | ||
1800 | |||
1801 | le?vperm $out0,$out0,$out0,$inpperm | ||
1802 | le?vperm $out1,$out1,$out1,$inpperm | ||
1803 | stvx_u $out0,$x00,$out | ||
1804 | le?vperm $out2,$out2,$out2,$inpperm | ||
1805 | stvx_u $out1,$x10,$out | ||
1806 | stvx_u $out2,$x20,$out | ||
1807 | addi $out,$out,0x30 | ||
1808 | b Lcbc_dec8x_done | ||
1809 | |||
1810 | .align 5 | ||
1811 | Lctr32_enc8x_two: | ||
1812 | vcipherlast $out0,$out0,$in6 | ||
1813 | vcipherlast $out1,$out1,$in7 | ||
1814 | |||
1815 | le?vperm $out0,$out0,$out0,$inpperm | ||
1816 | le?vperm $out1,$out1,$out1,$inpperm | ||
1817 | stvx_u $out0,$x00,$out | ||
1818 | stvx_u $out1,$x10,$out | ||
1819 | addi $out,$out,0x20 | ||
1820 | b Lcbc_dec8x_done | ||
1821 | |||
1822 | .align 5 | ||
1823 | Lctr32_enc8x_one: | ||
1824 | vcipherlast $out0,$out0,$in7 | ||
1825 | |||
1826 | le?vperm $out0,$out0,$out0,$inpperm | ||
1827 | stvx_u $out0,0,$out | ||
1828 | addi $out,$out,0x10 | ||
1829 | |||
1830 | Lctr32_enc8x_done: | ||
1831 | li r10,`$FRAME+15` | ||
1832 | li r11,`$FRAME+31` | ||
1833 | stvx $inpperm,r10,$sp # wipe copies of round keys | ||
1834 | addi r10,r10,32 | ||
1835 | stvx $inpperm,r11,$sp | ||
1836 | addi r11,r11,32 | ||
1837 | stvx $inpperm,r10,$sp | ||
1838 | addi r10,r10,32 | ||
1839 | stvx $inpperm,r11,$sp | ||
1840 | addi r11,r11,32 | ||
1841 | stvx $inpperm,r10,$sp | ||
1842 | addi r10,r10,32 | ||
1843 | stvx $inpperm,r11,$sp | ||
1844 | addi r11,r11,32 | ||
1845 | stvx $inpperm,r10,$sp | ||
1846 | addi r10,r10,32 | ||
1847 | stvx $inpperm,r11,$sp | ||
1848 | addi r11,r11,32 | ||
1849 | |||
1850 | mtspr 256,$vrsave | ||
1851 | lvx v20,r10,$sp # ABI says so | ||
1852 | addi r10,r10,32 | ||
1853 | lvx v21,r11,$sp | ||
1854 | addi r11,r11,32 | ||
1855 | lvx v22,r10,$sp | ||
1856 | addi r10,r10,32 | ||
1857 | lvx v23,r11,$sp | ||
1858 | addi r11,r11,32 | ||
1859 | lvx v24,r10,$sp | ||
1860 | addi r10,r10,32 | ||
1861 | lvx v25,r11,$sp | ||
1862 | addi r11,r11,32 | ||
1863 | lvx v26,r10,$sp | ||
1864 | addi r10,r10,32 | ||
1865 | lvx v27,r11,$sp | ||
1866 | addi r11,r11,32 | ||
1867 | lvx v28,r10,$sp | ||
1868 | addi r10,r10,32 | ||
1869 | lvx v29,r11,$sp | ||
1870 | addi r11,r11,32 | ||
1871 | lvx v30,r10,$sp | ||
1872 | lvx v31,r11,$sp | ||
1873 | $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) | ||
1874 | $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) | ||
1875 | $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) | ||
1876 | $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) | ||
1877 | $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) | ||
1878 | $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) | ||
1879 | addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` | ||
1880 | blr | ||
1881 | .long 0 | ||
1882 | .byte 0,12,0x14,0,0x80,6,6,0 | ||
1883 | .long 0 | ||
1884 | .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks | ||
1885 | ___ | ||
1886 | }} }}} | ||
1887 | |||
1888 | my $consts=1; | ||
1889 | foreach(split("\n",$code)) { | ||
1890 | s/\`([^\`]*)\`/eval($1)/geo; | ||
1891 | |||
1892 | # constants table endian-specific conversion | ||
1893 | if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { | ||
1894 | my $conv=$3; | ||
1895 | my @bytes=(); | ||
1896 | |||
1897 | # convert to endian-agnostic format | ||
1898 | if ($1 eq "long") { | ||
1899 | foreach (split(/,\s*/,$2)) { | ||
1900 | my $l = /^0/?oct:int; | ||
1901 | push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; | ||
1902 | } | ||
1903 | } else { | ||
1904 | @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); | ||
1905 | } | ||
1906 | |||
1907 | # little-endian conversion | ||
1908 | if ($flavour =~ /le$/o) { | ||
1909 | SWITCH: for($conv) { | ||
1910 | /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; | ||
1911 | /\?rev/ && do { @bytes=reverse(@bytes); last; }; | ||
1912 | } | ||
1913 | } | ||
1914 | |||
1915 | #emit | ||
1916 | print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; | ||
1917 | next; | ||
1918 | } | ||
1919 | $consts=0 if (m/Lconsts:/o); # end of table | ||
1920 | |||
1921 | # instructions prefixed with '?' are endian-specific and need | ||
1922 | # to be adjusted accordingly... | ||
1923 | if ($flavour =~ /le$/o) { # little-endian | ||
1924 | s/le\?//o or | ||
1925 | s/be\?/#be#/o or | ||
1926 | s/\?lvsr/lvsl/o or | ||
1927 | s/\?lvsl/lvsr/o or | ||
1928 | s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or | ||
1929 | s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or | ||
1930 | s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; | ||
1931 | } else { # big-endian | ||
1932 | s/le\?/#le#/o or | ||
1933 | s/be\?//o or | ||
1934 | s/\?([a-z]+)/$1/o; | ||
1935 | } | ||
1936 | |||
1937 | print $_,"\n"; | ||
1938 | } | ||
1939 | |||
1940 | close STDOUT; | ||
diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl new file mode 100755 index 000000000000..e76a58c343c1 --- /dev/null +++ b/drivers/crypto/vmx/ghashp8-ppc.pl | |||
@@ -0,0 +1,234 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | # | ||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # ==================================================================== | ||
9 | # | ||
10 | # GHASH for for PowerISA v2.07. | ||
11 | # | ||
12 | # July 2014 | ||
13 | # | ||
14 | # Accurate performance measurements are problematic, because it's | ||
15 | # always virtualized setup with possibly throttled processor. | ||
16 | # Relative comparison is therefore more informative. This initial | ||
17 | # version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x | ||
18 | # faster than "4-bit" integer-only compiler-generated 64-bit code. | ||
19 | # "Initial version" means that there is room for futher improvement. | ||
20 | |||
21 | $flavour=shift; | ||
22 | $output =shift; | ||
23 | |||
24 | if ($flavour =~ /64/) { | ||
25 | $SIZE_T=8; | ||
26 | $LRSAVE=2*$SIZE_T; | ||
27 | $STU="stdu"; | ||
28 | $POP="ld"; | ||
29 | $PUSH="std"; | ||
30 | } elsif ($flavour =~ /32/) { | ||
31 | $SIZE_T=4; | ||
32 | $LRSAVE=$SIZE_T; | ||
33 | $STU="stwu"; | ||
34 | $POP="lwz"; | ||
35 | $PUSH="stw"; | ||
36 | } else { die "nonsense $flavour"; } | ||
37 | |||
38 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | ||
39 | ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or | ||
40 | ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or | ||
41 | die "can't locate ppc-xlate.pl"; | ||
42 | |||
43 | open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; | ||
44 | |||
45 | my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block | ||
46 | |||
47 | my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); | ||
48 | my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); | ||
49 | my $vrsave="r12"; | ||
50 | |||
51 | $code=<<___; | ||
52 | .machine "any" | ||
53 | |||
54 | .text | ||
55 | |||
56 | .globl .gcm_init_p8 | ||
57 | .align 5 | ||
58 | .gcm_init_p8: | ||
59 | lis r0,0xfff0 | ||
60 | li r8,0x10 | ||
61 | mfspr $vrsave,256 | ||
62 | li r9,0x20 | ||
63 | mtspr 256,r0 | ||
64 | li r10,0x30 | ||
65 | lvx_u $H,0,r4 # load H | ||
66 | |||
67 | vspltisb $xC2,-16 # 0xf0 | ||
68 | vspltisb $t0,1 # one | ||
69 | vaddubm $xC2,$xC2,$xC2 # 0xe0 | ||
70 | vxor $zero,$zero,$zero | ||
71 | vor $xC2,$xC2,$t0 # 0xe1 | ||
72 | vsldoi $xC2,$xC2,$zero,15 # 0xe1... | ||
73 | vsldoi $t1,$zero,$t0,1 # ...1 | ||
74 | vaddubm $xC2,$xC2,$xC2 # 0xc2... | ||
75 | vspltisb $t2,7 | ||
76 | vor $xC2,$xC2,$t1 # 0xc2....01 | ||
77 | vspltb $t1,$H,0 # most significant byte | ||
78 | vsl $H,$H,$t0 # H<<=1 | ||
79 | vsrab $t1,$t1,$t2 # broadcast carry bit | ||
80 | vand $t1,$t1,$xC2 | ||
81 | vxor $H,$H,$t1 # twisted H | ||
82 | |||
83 | vsldoi $H,$H,$H,8 # twist even more ... | ||
84 | vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 | ||
85 | vsldoi $Hl,$zero,$H,8 # ... and split | ||
86 | vsldoi $Hh,$H,$zero,8 | ||
87 | |||
88 | stvx_u $xC2,0,r3 # save pre-computed table | ||
89 | stvx_u $Hl,r8,r3 | ||
90 | stvx_u $H, r9,r3 | ||
91 | stvx_u $Hh,r10,r3 | ||
92 | |||
93 | mtspr 256,$vrsave | ||
94 | blr | ||
95 | .long 0 | ||
96 | .byte 0,12,0x14,0,0,0,2,0 | ||
97 | .long 0 | ||
98 | .size .gcm_init_p8,.-.gcm_init_p8 | ||
99 | |||
100 | .globl .gcm_gmult_p8 | ||
101 | .align 5 | ||
102 | .gcm_gmult_p8: | ||
103 | lis r0,0xfff8 | ||
104 | li r8,0x10 | ||
105 | mfspr $vrsave,256 | ||
106 | li r9,0x20 | ||
107 | mtspr 256,r0 | ||
108 | li r10,0x30 | ||
109 | lvx_u $IN,0,$Xip # load Xi | ||
110 | |||
111 | lvx_u $Hl,r8,$Htbl # load pre-computed table | ||
112 | le?lvsl $lemask,r0,r0 | ||
113 | lvx_u $H, r9,$Htbl | ||
114 | le?vspltisb $t0,0x07 | ||
115 | lvx_u $Hh,r10,$Htbl | ||
116 | le?vxor $lemask,$lemask,$t0 | ||
117 | lvx_u $xC2,0,$Htbl | ||
118 | le?vperm $IN,$IN,$IN,$lemask | ||
119 | vxor $zero,$zero,$zero | ||
120 | |||
121 | vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo | ||
122 | vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi | ||
123 | vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi | ||
124 | |||
125 | vpmsumd $t2,$Xl,$xC2 # 1st phase | ||
126 | |||
127 | vsldoi $t0,$Xm,$zero,8 | ||
128 | vsldoi $t1,$zero,$Xm,8 | ||
129 | vxor $Xl,$Xl,$t0 | ||
130 | vxor $Xh,$Xh,$t1 | ||
131 | |||
132 | vsldoi $Xl,$Xl,$Xl,8 | ||
133 | vxor $Xl,$Xl,$t2 | ||
134 | |||
135 | vsldoi $t1,$Xl,$Xl,8 # 2nd phase | ||
136 | vpmsumd $Xl,$Xl,$xC2 | ||
137 | vxor $t1,$t1,$Xh | ||
138 | vxor $Xl,$Xl,$t1 | ||
139 | |||
140 | le?vperm $Xl,$Xl,$Xl,$lemask | ||
141 | stvx_u $Xl,0,$Xip # write out Xi | ||
142 | |||
143 | mtspr 256,$vrsave | ||
144 | blr | ||
145 | .long 0 | ||
146 | .byte 0,12,0x14,0,0,0,2,0 | ||
147 | .long 0 | ||
148 | .size .gcm_gmult_p8,.-.gcm_gmult_p8 | ||
149 | |||
150 | .globl .gcm_ghash_p8 | ||
151 | .align 5 | ||
152 | .gcm_ghash_p8: | ||
153 | lis r0,0xfff8 | ||
154 | li r8,0x10 | ||
155 | mfspr $vrsave,256 | ||
156 | li r9,0x20 | ||
157 | mtspr 256,r0 | ||
158 | li r10,0x30 | ||
159 | lvx_u $Xl,0,$Xip # load Xi | ||
160 | |||
161 | lvx_u $Hl,r8,$Htbl # load pre-computed table | ||
162 | le?lvsl $lemask,r0,r0 | ||
163 | lvx_u $H, r9,$Htbl | ||
164 | le?vspltisb $t0,0x07 | ||
165 | lvx_u $Hh,r10,$Htbl | ||
166 | le?vxor $lemask,$lemask,$t0 | ||
167 | lvx_u $xC2,0,$Htbl | ||
168 | le?vperm $Xl,$Xl,$Xl,$lemask | ||
169 | vxor $zero,$zero,$zero | ||
170 | |||
171 | lvx_u $IN,0,$inp | ||
172 | addi $inp,$inp,16 | ||
173 | subi $len,$len,16 | ||
174 | le?vperm $IN,$IN,$IN,$lemask | ||
175 | vxor $IN,$IN,$Xl | ||
176 | b Loop | ||
177 | |||
178 | .align 5 | ||
179 | Loop: | ||
180 | subic $len,$len,16 | ||
181 | vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo | ||
182 | subfe. r0,r0,r0 # borrow?-1:0 | ||
183 | vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi | ||
184 | and r0,r0,$len | ||
185 | vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi | ||
186 | add $inp,$inp,r0 | ||
187 | |||
188 | vpmsumd $t2,$Xl,$xC2 # 1st phase | ||
189 | |||
190 | vsldoi $t0,$Xm,$zero,8 | ||
191 | vsldoi $t1,$zero,$Xm,8 | ||
192 | vxor $Xl,$Xl,$t0 | ||
193 | vxor $Xh,$Xh,$t1 | ||
194 | |||
195 | vsldoi $Xl,$Xl,$Xl,8 | ||
196 | vxor $Xl,$Xl,$t2 | ||
197 | lvx_u $IN,0,$inp | ||
198 | addi $inp,$inp,16 | ||
199 | |||
200 | vsldoi $t1,$Xl,$Xl,8 # 2nd phase | ||
201 | vpmsumd $Xl,$Xl,$xC2 | ||
202 | le?vperm $IN,$IN,$IN,$lemask | ||
203 | vxor $t1,$t1,$Xh | ||
204 | vxor $IN,$IN,$t1 | ||
205 | vxor $IN,$IN,$Xl | ||
206 | beq Loop # did $len-=16 borrow? | ||
207 | |||
208 | vxor $Xl,$Xl,$t1 | ||
209 | le?vperm $Xl,$Xl,$Xl,$lemask | ||
210 | stvx_u $Xl,0,$Xip # write out Xi | ||
211 | |||
212 | mtspr 256,$vrsave | ||
213 | blr | ||
214 | .long 0 | ||
215 | .byte 0,12,0x14,0,0,0,4,0 | ||
216 | .long 0 | ||
217 | .size .gcm_ghash_p8,.-.gcm_ghash_p8 | ||
218 | |||
219 | .asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" | ||
220 | .align 2 | ||
221 | ___ | ||
222 | |||
223 | foreach (split("\n",$code)) { | ||
224 | if ($flavour =~ /le$/o) { # little-endian | ||
225 | s/le\?//o or | ||
226 | s/be\?/#be#/o; | ||
227 | } else { | ||
228 | s/le\?/#le#/o or | ||
229 | s/be\?//o; | ||
230 | } | ||
231 | print $_,"\n"; | ||
232 | } | ||
233 | |||
234 | close STDOUT; # enforce flush | ||
diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl new file mode 100755 index 000000000000..f89e81429931 --- /dev/null +++ b/drivers/crypto/vmx/ppc-xlate.pl | |||
@@ -0,0 +1,226 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | # PowerPC assembler distiller by <appro>. | ||
4 | |||
5 | my $flavour = shift; | ||
6 | my $output = shift; | ||
7 | open STDOUT,">$output" || die "can't open $output: $!"; | ||
8 | |||
9 | my %GLOBALS; | ||
10 | my $dotinlocallabels=($flavour=~/linux/)?1:0; | ||
11 | |||
12 | ################################################################ | ||
13 | # directives which need special treatment on different platforms | ||
14 | ################################################################ | ||
15 | my $globl = sub { | ||
16 | my $junk = shift; | ||
17 | my $name = shift; | ||
18 | my $global = \$GLOBALS{$name}; | ||
19 | my $ret; | ||
20 | |||
21 | $name =~ s|^[\.\_]||; | ||
22 | |||
23 | SWITCH: for ($flavour) { | ||
24 | /aix/ && do { $name = ".$name"; | ||
25 | last; | ||
26 | }; | ||
27 | /osx/ && do { $name = "_$name"; | ||
28 | last; | ||
29 | }; | ||
30 | /linux.*(32|64le)/ | ||
31 | && do { $ret .= ".globl $name\n"; | ||
32 | $ret .= ".type $name,\@function"; | ||
33 | last; | ||
34 | }; | ||
35 | /linux.*64/ && do { $ret .= ".globl $name\n"; | ||
36 | $ret .= ".type $name,\@function\n"; | ||
37 | $ret .= ".section \".opd\",\"aw\"\n"; | ||
38 | $ret .= ".align 3\n"; | ||
39 | $ret .= "$name:\n"; | ||
40 | $ret .= ".quad .$name,.TOC.\@tocbase,0\n"; | ||
41 | $ret .= ".previous\n"; | ||
42 | |||
43 | $name = ".$name"; | ||
44 | last; | ||
45 | }; | ||
46 | } | ||
47 | |||
48 | $ret = ".globl $name" if (!$ret); | ||
49 | $$global = $name; | ||
50 | $ret; | ||
51 | }; | ||
52 | my $text = sub { | ||
53 | my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; | ||
54 | $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); | ||
55 | $ret; | ||
56 | }; | ||
57 | my $machine = sub { | ||
58 | my $junk = shift; | ||
59 | my $arch = shift; | ||
60 | if ($flavour =~ /osx/) | ||
61 | { $arch =~ s/\"//g; | ||
62 | $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); | ||
63 | } | ||
64 | ".machine $arch"; | ||
65 | }; | ||
66 | my $size = sub { | ||
67 | if ($flavour =~ /linux/) | ||
68 | { shift; | ||
69 | my $name = shift; $name =~ s|^[\.\_]||; | ||
70 | my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name; | ||
71 | $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/); | ||
72 | $ret; | ||
73 | } | ||
74 | else | ||
75 | { ""; } | ||
76 | }; | ||
77 | my $asciz = sub { | ||
78 | shift; | ||
79 | my $line = join(",",@_); | ||
80 | if ($line =~ /^"(.*)"$/) | ||
81 | { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } | ||
82 | else | ||
83 | { ""; } | ||
84 | }; | ||
85 | my $quad = sub { | ||
86 | shift; | ||
87 | my @ret; | ||
88 | my ($hi,$lo); | ||
89 | for (@_) { | ||
90 | if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) | ||
91 | { $hi=$1?"0x$1":"0"; $lo="0x$2"; } | ||
92 | elsif (/^([0-9]+)$/o) | ||
93 | { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl | ||
94 | else | ||
95 | { $hi=undef; $lo=$_; } | ||
96 | |||
97 | if (defined($hi)) | ||
98 | { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } | ||
99 | else | ||
100 | { push(@ret,".quad $lo"); } | ||
101 | } | ||
102 | join("\n",@ret); | ||
103 | }; | ||
104 | |||
105 | ################################################################ | ||
106 | # simplified mnemonics not handled by at least one assembler | ||
107 | ################################################################ | ||
108 | my $cmplw = sub { | ||
109 | my $f = shift; | ||
110 | my $cr = 0; $cr = shift if ($#_>1); | ||
111 | # Some out-of-date 32-bit GNU assembler just can't handle cmplw... | ||
112 | ($flavour =~ /linux.*32/) ? | ||
113 | " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : | ||
114 | " cmplw ".join(',',$cr,@_); | ||
115 | }; | ||
116 | my $bdnz = sub { | ||
117 | my $f = shift; | ||
118 | my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint | ||
119 | " bc $bo,0,".shift; | ||
120 | } if ($flavour!~/linux/); | ||
121 | my $bltlr = sub { | ||
122 | my $f = shift; | ||
123 | my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint | ||
124 | ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints | ||
125 | " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : | ||
126 | " bclr $bo,0"; | ||
127 | }; | ||
128 | my $bnelr = sub { | ||
129 | my $f = shift; | ||
130 | my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint | ||
131 | ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints | ||
132 | " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : | ||
133 | " bclr $bo,2"; | ||
134 | }; | ||
135 | my $beqlr = sub { | ||
136 | my $f = shift; | ||
137 | my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint | ||
138 | ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints | ||
139 | " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : | ||
140 | " bclr $bo,2"; | ||
141 | }; | ||
142 | # GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two | ||
143 | # arguments is 64, with "operand out of range" error. | ||
144 | my $extrdi = sub { | ||
145 | my ($f,$ra,$rs,$n,$b) = @_; | ||
146 | $b = ($b+$n)&63; $n = 64-$n; | ||
147 | " rldicl $ra,$rs,$b,$n"; | ||
148 | }; | ||
149 | my $vmr = sub { | ||
150 | my ($f,$vx,$vy) = @_; | ||
151 | " vor $vx,$vy,$vy"; | ||
152 | }; | ||
153 | |||
154 | # PowerISA 2.06 stuff | ||
155 | sub vsxmem_op { | ||
156 | my ($f, $vrt, $ra, $rb, $op) = @_; | ||
157 | " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); | ||
158 | } | ||
159 | # made-up unaligned memory reference AltiVec/VMX instructions | ||
160 | my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x | ||
161 | my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x | ||
162 | my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx | ||
163 | my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx | ||
164 | my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x | ||
165 | my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x | ||
166 | |||
167 | # PowerISA 2.07 stuff | ||
168 | sub vcrypto_op { | ||
169 | my ($f, $vrt, $vra, $vrb, $op) = @_; | ||
170 | " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; | ||
171 | } | ||
172 | my $vcipher = sub { vcrypto_op(@_, 1288); }; | ||
173 | my $vcipherlast = sub { vcrypto_op(@_, 1289); }; | ||
174 | my $vncipher = sub { vcrypto_op(@_, 1352); }; | ||
175 | my $vncipherlast= sub { vcrypto_op(@_, 1353); }; | ||
176 | my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; | ||
177 | my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; | ||
178 | my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; | ||
179 | my $vpmsumb = sub { vcrypto_op(@_, 1032); }; | ||
180 | my $vpmsumd = sub { vcrypto_op(@_, 1224); }; | ||
181 | my $vpmsubh = sub { vcrypto_op(@_, 1096); }; | ||
182 | my $vpmsumw = sub { vcrypto_op(@_, 1160); }; | ||
183 | my $vaddudm = sub { vcrypto_op(@_, 192); }; | ||
184 | |||
185 | my $mtsle = sub { | ||
186 | my ($f, $arg) = @_; | ||
187 | " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); | ||
188 | }; | ||
189 | |||
190 | while($line=<>) { | ||
191 | |||
192 | $line =~ s|[#!;].*$||; # get rid of asm-style comments... | ||
193 | $line =~ s|/\*.*\*/||; # ... and C-style comments... | ||
194 | $line =~ s|^\s+||; # ... and skip white spaces in beginning... | ||
195 | $line =~ s|\s+$||; # ... and at the end | ||
196 | |||
197 | { | ||
198 | $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel | ||
199 | $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); | ||
200 | } | ||
201 | |||
202 | { | ||
203 | $line =~ s|(^[\.\w]+)\:\s*||; | ||
204 | my $label = $1; | ||
205 | if ($label) { | ||
206 | printf "%s:",($GLOBALS{$label} or $label); | ||
207 | printf "\n.localentry\t$GLOBALS{$label},0" if ($GLOBALS{$label} && $flavour =~ /linux.*64le/); | ||
208 | } | ||
209 | } | ||
210 | |||
211 | { | ||
212 | $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; | ||
213 | my $c = $1; $c = "\t" if ($c eq ""); | ||
214 | my $mnemonic = $2; | ||
215 | my $f = $3; | ||
216 | my $opcode = eval("\$$mnemonic"); | ||
217 | $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); | ||
218 | if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } | ||
219 | elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } | ||
220 | } | ||
221 | |||
222 | print $line if ($line); | ||
223 | print "\n"; | ||
224 | } | ||
225 | |||
226 | close STDOUT; | ||