aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLeonidas S. Barbosa <leosilva@linux.vnet.ibm.com>2015-02-06 11:59:35 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2015-02-28 05:13:46 -0500
commit5c380d623ed30b71a2441fb4f2e053a4e1a50794 (patch)
tree90caf15f95a1adc480bf91b9736a5d16028e6045
parentcc333cd68dfae191ce02308657a50f21d63b7cd5 (diff)
crypto: vmx - Add support for VMS instructions by ASM
OpenSSL implements optimized ASM algorithms which support VMX instructions on Power 8 CPU. These scripts generate an endian-agnostic ASM implementation in order to support both big and little-endian. - aesp8-ppc.pl: implements suport for AES instructions implemented by POWER8 processor. - ghashp8-ppc.pl: implements support for GHASH for Power8. - ppc-xlate.pl: ppc assembler distiller. These code has been adopted from OpenSSL project in collaboration with the original author (Andy Polyakov <appro@openssl.org>). Signed-off-by: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rwxr-xr-xdrivers/crypto/vmx/aesp8-ppc.pl1940
-rwxr-xr-xdrivers/crypto/vmx/ghashp8-ppc.pl234
-rwxr-xr-xdrivers/crypto/vmx/ppc-xlate.pl226
3 files changed, 2400 insertions, 0 deletions
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl
new file mode 100755
index 000000000000..3ee8979e7625
--- /dev/null
+++ b/drivers/crypto/vmx/aesp8-ppc.pl
@@ -0,0 +1,1940 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# This module implements support for AES instructions as per PowerISA
11# specification version 2.07, first implemented by POWER8 processor.
12# The module is endian-agnostic in sense that it supports both big-
13# and little-endian cases. Data alignment in parallelizable modes is
14# handled with VSX loads and stores, which implies MSR.VSX flag being
15# set. It should also be noted that ISA specification doesn't prohibit
16# alignment exceptions for these instructions on page boundaries.
17# Initially alignment was handled in pure AltiVec/VMX way [when data
18# is aligned programmatically, which in turn guarantees exception-
19# free execution], but it turned to hamper performance when vcipher
20# instructions are interleaved. It's reckoned that eventual
21# misalignment penalties at page boundaries are in average lower
22# than additional overhead in pure AltiVec approach.
23
24$flavour = shift;
25
26if ($flavour =~ /64/) {
27 $SIZE_T =8;
28 $LRSAVE =2*$SIZE_T;
29 $STU ="stdu";
30 $POP ="ld";
31 $PUSH ="std";
32 $UCMP ="cmpld";
33 $SHL ="sldi";
34} elsif ($flavour =~ /32/) {
35 $SIZE_T =4;
36 $LRSAVE =$SIZE_T;
37 $STU ="stwu";
38 $POP ="lwz";
39 $PUSH ="stw";
40 $UCMP ="cmplw";
41 $SHL ="slwi";
42} else { die "nonsense $flavour"; }
43
44$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
45
46$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
47( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
48( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
49die "can't locate ppc-xlate.pl";
50
51open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
52
53$FRAME=8*$SIZE_T;
54$prefix="aes_p8";
55
56$sp="r1";
57$vrsave="r12";
58
59#########################################################################
60{{{ # Key setup procedures #
61my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
62my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
63my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
64
65$code.=<<___;
66.machine "any"
67
68.text
69
70.align 7
71rcon:
72.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
73.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
74.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
75.long 0,0,0,0 ?asis
76Lconsts:
77 mflr r0
78 bcl 20,31,\$+4
79 mflr $ptr #vvvvv "distance between . and rcon
80 addi $ptr,$ptr,-0x48
81 mtlr r0
82 blr
83 .long 0
84 .byte 0,12,0x14,0,0,0,0,0
85.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
86
87.globl .${prefix}_set_encrypt_key
88.align 5
89.${prefix}_set_encrypt_key:
90Lset_encrypt_key:
91 mflr r11
92 $PUSH r11,$LRSAVE($sp)
93
94 li $ptr,-1
95 ${UCMP}i $inp,0
96 beq- Lenc_key_abort # if ($inp==0) return -1;
97 ${UCMP}i $out,0
98 beq- Lenc_key_abort # if ($out==0) return -1;
99 li $ptr,-2
100 cmpwi $bits,128
101 blt- Lenc_key_abort
102 cmpwi $bits,256
103 bgt- Lenc_key_abort
104 andi. r0,$bits,0x3f
105 bne- Lenc_key_abort
106
107 lis r0,0xfff0
108 mfspr $vrsave,256
109 mtspr 256,r0
110
111 bl Lconsts
112 mtlr r11
113
114 neg r9,$inp
115 lvx $in0,0,$inp
116 addi $inp,$inp,15 # 15 is not typo
117 lvsr $key,0,r9 # borrow $key
118 li r8,0x20
119 cmpwi $bits,192
120 lvx $in1,0,$inp
121 le?vspltisb $mask,0x0f # borrow $mask
122 lvx $rcon,0,$ptr
123 le?vxor $key,$key,$mask # adjust for byte swap
124 lvx $mask,r8,$ptr
125 addi $ptr,$ptr,0x10
126 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
127 li $cnt,8
128 vxor $zero,$zero,$zero
129 mtctr $cnt
130
131 ?lvsr $outperm,0,$out
132 vspltisb $outmask,-1
133 lvx $outhead,0,$out
134 ?vperm $outmask,$zero,$outmask,$outperm
135
136 blt Loop128
137 addi $inp,$inp,8
138 beq L192
139 addi $inp,$inp,8
140 b L256
141
142.align 4
143Loop128:
144 vperm $key,$in0,$in0,$mask # rotate-n-splat
145 vsldoi $tmp,$zero,$in0,12 # >>32
146 vperm $outtail,$in0,$in0,$outperm # rotate
147 vsel $stage,$outhead,$outtail,$outmask
148 vmr $outhead,$outtail
149 vcipherlast $key,$key,$rcon
150 stvx $stage,0,$out
151 addi $out,$out,16
152
153 vxor $in0,$in0,$tmp
154 vsldoi $tmp,$zero,$tmp,12 # >>32
155 vxor $in0,$in0,$tmp
156 vsldoi $tmp,$zero,$tmp,12 # >>32
157 vxor $in0,$in0,$tmp
158 vadduwm $rcon,$rcon,$rcon
159 vxor $in0,$in0,$key
160 bdnz Loop128
161
162 lvx $rcon,0,$ptr # last two round keys
163
164 vperm $key,$in0,$in0,$mask # rotate-n-splat
165 vsldoi $tmp,$zero,$in0,12 # >>32
166 vperm $outtail,$in0,$in0,$outperm # rotate
167 vsel $stage,$outhead,$outtail,$outmask
168 vmr $outhead,$outtail
169 vcipherlast $key,$key,$rcon
170 stvx $stage,0,$out
171 addi $out,$out,16
172
173 vxor $in0,$in0,$tmp
174 vsldoi $tmp,$zero,$tmp,12 # >>32
175 vxor $in0,$in0,$tmp
176 vsldoi $tmp,$zero,$tmp,12 # >>32
177 vxor $in0,$in0,$tmp
178 vadduwm $rcon,$rcon,$rcon
179 vxor $in0,$in0,$key
180
181 vperm $key,$in0,$in0,$mask # rotate-n-splat
182 vsldoi $tmp,$zero,$in0,12 # >>32
183 vperm $outtail,$in0,$in0,$outperm # rotate
184 vsel $stage,$outhead,$outtail,$outmask
185 vmr $outhead,$outtail
186 vcipherlast $key,$key,$rcon
187 stvx $stage,0,$out
188 addi $out,$out,16
189
190 vxor $in0,$in0,$tmp
191 vsldoi $tmp,$zero,$tmp,12 # >>32
192 vxor $in0,$in0,$tmp
193 vsldoi $tmp,$zero,$tmp,12 # >>32
194 vxor $in0,$in0,$tmp
195 vxor $in0,$in0,$key
196 vperm $outtail,$in0,$in0,$outperm # rotate
197 vsel $stage,$outhead,$outtail,$outmask
198 vmr $outhead,$outtail
199 stvx $stage,0,$out
200
201 addi $inp,$out,15 # 15 is not typo
202 addi $out,$out,0x50
203
204 li $rounds,10
205 b Ldone
206
207.align 4
208L192:
209 lvx $tmp,0,$inp
210 li $cnt,4
211 vperm $outtail,$in0,$in0,$outperm # rotate
212 vsel $stage,$outhead,$outtail,$outmask
213 vmr $outhead,$outtail
214 stvx $stage,0,$out
215 addi $out,$out,16
216 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
217 vspltisb $key,8 # borrow $key
218 mtctr $cnt
219 vsububm $mask,$mask,$key # adjust the mask
220
221Loop192:
222 vperm $key,$in1,$in1,$mask # roate-n-splat
223 vsldoi $tmp,$zero,$in0,12 # >>32
224 vcipherlast $key,$key,$rcon
225
226 vxor $in0,$in0,$tmp
227 vsldoi $tmp,$zero,$tmp,12 # >>32
228 vxor $in0,$in0,$tmp
229 vsldoi $tmp,$zero,$tmp,12 # >>32
230 vxor $in0,$in0,$tmp
231
232 vsldoi $stage,$zero,$in1,8
233 vspltw $tmp,$in0,3
234 vxor $tmp,$tmp,$in1
235 vsldoi $in1,$zero,$in1,12 # >>32
236 vadduwm $rcon,$rcon,$rcon
237 vxor $in1,$in1,$tmp
238 vxor $in0,$in0,$key
239 vxor $in1,$in1,$key
240 vsldoi $stage,$stage,$in0,8
241
242 vperm $key,$in1,$in1,$mask # rotate-n-splat
243 vsldoi $tmp,$zero,$in0,12 # >>32
244 vperm $outtail,$stage,$stage,$outperm # rotate
245 vsel $stage,$outhead,$outtail,$outmask
246 vmr $outhead,$outtail
247 vcipherlast $key,$key,$rcon
248 stvx $stage,0,$out
249 addi $out,$out,16
250
251 vsldoi $stage,$in0,$in1,8
252 vxor $in0,$in0,$tmp
253 vsldoi $tmp,$zero,$tmp,12 # >>32
254 vperm $outtail,$stage,$stage,$outperm # rotate
255 vsel $stage,$outhead,$outtail,$outmask
256 vmr $outhead,$outtail
257 vxor $in0,$in0,$tmp
258 vsldoi $tmp,$zero,$tmp,12 # >>32
259 vxor $in0,$in0,$tmp
260 stvx $stage,0,$out
261 addi $out,$out,16
262
263 vspltw $tmp,$in0,3
264 vxor $tmp,$tmp,$in1
265 vsldoi $in1,$zero,$in1,12 # >>32
266 vadduwm $rcon,$rcon,$rcon
267 vxor $in1,$in1,$tmp
268 vxor $in0,$in0,$key
269 vxor $in1,$in1,$key
270 vperm $outtail,$in0,$in0,$outperm # rotate
271 vsel $stage,$outhead,$outtail,$outmask
272 vmr $outhead,$outtail
273 stvx $stage,0,$out
274 addi $inp,$out,15 # 15 is not typo
275 addi $out,$out,16
276 bdnz Loop192
277
278 li $rounds,12
279 addi $out,$out,0x20
280 b Ldone
281
282.align 4
283L256:
284 lvx $tmp,0,$inp
285 li $cnt,7
286 li $rounds,14
287 vperm $outtail,$in0,$in0,$outperm # rotate
288 vsel $stage,$outhead,$outtail,$outmask
289 vmr $outhead,$outtail
290 stvx $stage,0,$out
291 addi $out,$out,16
292 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
293 mtctr $cnt
294
295Loop256:
296 vperm $key,$in1,$in1,$mask # rotate-n-splat
297 vsldoi $tmp,$zero,$in0,12 # >>32
298 vperm $outtail,$in1,$in1,$outperm # rotate
299 vsel $stage,$outhead,$outtail,$outmask
300 vmr $outhead,$outtail
301 vcipherlast $key,$key,$rcon
302 stvx $stage,0,$out
303 addi $out,$out,16
304
305 vxor $in0,$in0,$tmp
306 vsldoi $tmp,$zero,$tmp,12 # >>32
307 vxor $in0,$in0,$tmp
308 vsldoi $tmp,$zero,$tmp,12 # >>32
309 vxor $in0,$in0,$tmp
310 vadduwm $rcon,$rcon,$rcon
311 vxor $in0,$in0,$key
312 vperm $outtail,$in0,$in0,$outperm # rotate
313 vsel $stage,$outhead,$outtail,$outmask
314 vmr $outhead,$outtail
315 stvx $stage,0,$out
316 addi $inp,$out,15 # 15 is not typo
317 addi $out,$out,16
318 bdz Ldone
319
320 vspltw $key,$in0,3 # just splat
321 vsldoi $tmp,$zero,$in1,12 # >>32
322 vsbox $key,$key
323
324 vxor $in1,$in1,$tmp
325 vsldoi $tmp,$zero,$tmp,12 # >>32
326 vxor $in1,$in1,$tmp
327 vsldoi $tmp,$zero,$tmp,12 # >>32
328 vxor $in1,$in1,$tmp
329
330 vxor $in1,$in1,$key
331 b Loop256
332
333.align 4
334Ldone:
335 lvx $in1,0,$inp # redundant in aligned case
336 vsel $in1,$outhead,$in1,$outmask
337 stvx $in1,0,$inp
338 li $ptr,0
339 mtspr 256,$vrsave
340 stw $rounds,0($out)
341
342Lenc_key_abort:
343 mr r3,$ptr
344 blr
345 .long 0
346 .byte 0,12,0x14,1,0,0,3,0
347 .long 0
348.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
349
350.globl .${prefix}_set_decrypt_key
351.align 5
352.${prefix}_set_decrypt_key:
353 $STU $sp,-$FRAME($sp)
354 mflr r10
355 $PUSH r10,$FRAME+$LRSAVE($sp)
356 bl Lset_encrypt_key
357 mtlr r10
358
359 cmpwi r3,0
360 bne- Ldec_key_abort
361
362 slwi $cnt,$rounds,4
363 subi $inp,$out,240 # first round key
364 srwi $rounds,$rounds,1
365 add $out,$inp,$cnt # last round key
366 mtctr $rounds
367
368Ldeckey:
369 lwz r0, 0($inp)
370 lwz r6, 4($inp)
371 lwz r7, 8($inp)
372 lwz r8, 12($inp)
373 addi $inp,$inp,16
374 lwz r9, 0($out)
375 lwz r10,4($out)
376 lwz r11,8($out)
377 lwz r12,12($out)
378 stw r0, 0($out)
379 stw r6, 4($out)
380 stw r7, 8($out)
381 stw r8, 12($out)
382 subi $out,$out,16
383 stw r9, -16($inp)
384 stw r10,-12($inp)
385 stw r11,-8($inp)
386 stw r12,-4($inp)
387 bdnz Ldeckey
388
389 xor r3,r3,r3 # return value
390Ldec_key_abort:
391 addi $sp,$sp,$FRAME
392 blr
393 .long 0
394 .byte 0,12,4,1,0x80,0,3,0
395 .long 0
396.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
397___
398}}}
399#########################################################################
400{{{ # Single block en- and decrypt procedures #
401sub gen_block () {
402my $dir = shift;
403my $n = $dir eq "de" ? "n" : "";
404my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
405
406$code.=<<___;
407.globl .${prefix}_${dir}crypt
408.align 5
409.${prefix}_${dir}crypt:
410 lwz $rounds,240($key)
411 lis r0,0xfc00
412 mfspr $vrsave,256
413 li $idx,15 # 15 is not typo
414 mtspr 256,r0
415
416 lvx v0,0,$inp
417 neg r11,$out
418 lvx v1,$idx,$inp
419 lvsl v2,0,$inp # inpperm
420 le?vspltisb v4,0x0f
421 ?lvsl v3,0,r11 # outperm
422 le?vxor v2,v2,v4
423 li $idx,16
424 vperm v0,v0,v1,v2 # align [and byte swap in LE]
425 lvx v1,0,$key
426 ?lvsl v5,0,$key # keyperm
427 srwi $rounds,$rounds,1
428 lvx v2,$idx,$key
429 addi $idx,$idx,16
430 subi $rounds,$rounds,1
431 ?vperm v1,v1,v2,v5 # align round key
432
433 vxor v0,v0,v1
434 lvx v1,$idx,$key
435 addi $idx,$idx,16
436 mtctr $rounds
437
438Loop_${dir}c:
439 ?vperm v2,v2,v1,v5
440 v${n}cipher v0,v0,v2
441 lvx v2,$idx,$key
442 addi $idx,$idx,16
443 ?vperm v1,v1,v2,v5
444 v${n}cipher v0,v0,v1
445 lvx v1,$idx,$key
446 addi $idx,$idx,16
447 bdnz Loop_${dir}c
448
449 ?vperm v2,v2,v1,v5
450 v${n}cipher v0,v0,v2
451 lvx v2,$idx,$key
452 ?vperm v1,v1,v2,v5
453 v${n}cipherlast v0,v0,v1
454
455 vspltisb v2,-1
456 vxor v1,v1,v1
457 li $idx,15 # 15 is not typo
458 ?vperm v2,v1,v2,v3 # outmask
459 le?vxor v3,v3,v4
460 lvx v1,0,$out # outhead
461 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
462 vsel v1,v1,v0,v2
463 lvx v4,$idx,$out
464 stvx v1,0,$out
465 vsel v0,v0,v4,v2
466 stvx v0,$idx,$out
467
468 mtspr 256,$vrsave
469 blr
470 .long 0
471 .byte 0,12,0x14,0,0,0,3,0
472 .long 0
473.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
474___
475}
476&gen_block("en");
477&gen_block("de");
478}}}
479#########################################################################
480{{{ # CBC en- and decrypt procedures #
481my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
482my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
483my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
484 map("v$_",(4..10));
485$code.=<<___;
486.globl .${prefix}_cbc_encrypt
487.align 5
488.${prefix}_cbc_encrypt:
489 ${UCMP}i $len,16
490 bltlr-
491
492 cmpwi $enc,0 # test direction
493 lis r0,0xffe0
494 mfspr $vrsave,256
495 mtspr 256,r0
496
497 li $idx,15
498 vxor $rndkey0,$rndkey0,$rndkey0
499 le?vspltisb $tmp,0x0f
500
501 lvx $ivec,0,$ivp # load [unaligned] iv
502 lvsl $inpperm,0,$ivp
503 lvx $inptail,$idx,$ivp
504 le?vxor $inpperm,$inpperm,$tmp
505 vperm $ivec,$ivec,$inptail,$inpperm
506
507 neg r11,$inp
508 ?lvsl $keyperm,0,$key # prepare for unaligned key
509 lwz $rounds,240($key)
510
511 lvsr $inpperm,0,r11 # prepare for unaligned load
512 lvx $inptail,0,$inp
513 addi $inp,$inp,15 # 15 is not typo
514 le?vxor $inpperm,$inpperm,$tmp
515
516 ?lvsr $outperm,0,$out # prepare for unaligned store
517 vspltisb $outmask,-1
518 lvx $outhead,0,$out
519 ?vperm $outmask,$rndkey0,$outmask,$outperm
520 le?vxor $outperm,$outperm,$tmp
521
522 srwi $rounds,$rounds,1
523 li $idx,16
524 subi $rounds,$rounds,1
525 beq Lcbc_dec
526
527Lcbc_enc:
528 vmr $inout,$inptail
529 lvx $inptail,0,$inp
530 addi $inp,$inp,16
531 mtctr $rounds
532 subi $len,$len,16 # len-=16
533
534 lvx $rndkey0,0,$key
535 vperm $inout,$inout,$inptail,$inpperm
536 lvx $rndkey1,$idx,$key
537 addi $idx,$idx,16
538 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
539 vxor $inout,$inout,$rndkey0
540 lvx $rndkey0,$idx,$key
541 addi $idx,$idx,16
542 vxor $inout,$inout,$ivec
543
544Loop_cbc_enc:
545 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
546 vcipher $inout,$inout,$rndkey1
547 lvx $rndkey1,$idx,$key
548 addi $idx,$idx,16
549 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
550 vcipher $inout,$inout,$rndkey0
551 lvx $rndkey0,$idx,$key
552 addi $idx,$idx,16
553 bdnz Loop_cbc_enc
554
555 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
556 vcipher $inout,$inout,$rndkey1
557 lvx $rndkey1,$idx,$key
558 li $idx,16
559 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
560 vcipherlast $ivec,$inout,$rndkey0
561 ${UCMP}i $len,16
562
563 vperm $tmp,$ivec,$ivec,$outperm
564 vsel $inout,$outhead,$tmp,$outmask
565 vmr $outhead,$tmp
566 stvx $inout,0,$out
567 addi $out,$out,16
568 bge Lcbc_enc
569
570 b Lcbc_done
571
572.align 4
573Lcbc_dec:
574 ${UCMP}i $len,128
575 bge _aesp8_cbc_decrypt8x
576 vmr $tmp,$inptail
577 lvx $inptail,0,$inp
578 addi $inp,$inp,16
579 mtctr $rounds
580 subi $len,$len,16 # len-=16
581
582 lvx $rndkey0,0,$key
583 vperm $tmp,$tmp,$inptail,$inpperm
584 lvx $rndkey1,$idx,$key
585 addi $idx,$idx,16
586 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
587 vxor $inout,$tmp,$rndkey0
588 lvx $rndkey0,$idx,$key
589 addi $idx,$idx,16
590
591Loop_cbc_dec:
592 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
593 vncipher $inout,$inout,$rndkey1
594 lvx $rndkey1,$idx,$key
595 addi $idx,$idx,16
596 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
597 vncipher $inout,$inout,$rndkey0
598 lvx $rndkey0,$idx,$key
599 addi $idx,$idx,16
600 bdnz Loop_cbc_dec
601
602 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
603 vncipher $inout,$inout,$rndkey1
604 lvx $rndkey1,$idx,$key
605 li $idx,16
606 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
607 vncipherlast $inout,$inout,$rndkey0
608 ${UCMP}i $len,16
609
610 vxor $inout,$inout,$ivec
611 vmr $ivec,$tmp
612 vperm $tmp,$inout,$inout,$outperm
613 vsel $inout,$outhead,$tmp,$outmask
614 vmr $outhead,$tmp
615 stvx $inout,0,$out
616 addi $out,$out,16
617 bge Lcbc_dec
618
619Lcbc_done:
620 addi $out,$out,-1
621 lvx $inout,0,$out # redundant in aligned case
622 vsel $inout,$outhead,$inout,$outmask
623 stvx $inout,0,$out
624
625 neg $enc,$ivp # write [unaligned] iv
626 li $idx,15 # 15 is not typo
627 vxor $rndkey0,$rndkey0,$rndkey0
628 vspltisb $outmask,-1
629 le?vspltisb $tmp,0x0f
630 ?lvsl $outperm,0,$enc
631 ?vperm $outmask,$rndkey0,$outmask,$outperm
632 le?vxor $outperm,$outperm,$tmp
633 lvx $outhead,0,$ivp
634 vperm $ivec,$ivec,$ivec,$outperm
635 vsel $inout,$outhead,$ivec,$outmask
636 lvx $inptail,$idx,$ivp
637 stvx $inout,0,$ivp
638 vsel $inout,$ivec,$inptail,$outmask
639 stvx $inout,$idx,$ivp
640
641 mtspr 256,$vrsave
642 blr
643 .long 0
644 .byte 0,12,0x14,0,0,0,6,0
645 .long 0
646___
647#########################################################################
648{{ # Optimized CBC decrypt procedure #
649my $key_="r11";
650my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
651my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
652my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
653my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
654 # v26-v31 last 6 round keys
655my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
656
657$code.=<<___;
658.align 5
659_aesp8_cbc_decrypt8x:
660 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
661 li r10,`$FRAME+8*16+15`
662 li r11,`$FRAME+8*16+31`
663 stvx v20,r10,$sp # ABI says so
664 addi r10,r10,32
665 stvx v21,r11,$sp
666 addi r11,r11,32
667 stvx v22,r10,$sp
668 addi r10,r10,32
669 stvx v23,r11,$sp
670 addi r11,r11,32
671 stvx v24,r10,$sp
672 addi r10,r10,32
673 stvx v25,r11,$sp
674 addi r11,r11,32
675 stvx v26,r10,$sp
676 addi r10,r10,32
677 stvx v27,r11,$sp
678 addi r11,r11,32
679 stvx v28,r10,$sp
680 addi r10,r10,32
681 stvx v29,r11,$sp
682 addi r11,r11,32
683 stvx v30,r10,$sp
684 stvx v31,r11,$sp
685 li r0,-1
686 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
687 li $x10,0x10
688 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
689 li $x20,0x20
690 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
691 li $x30,0x30
692 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
693 li $x40,0x40
694 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
695 li $x50,0x50
696 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
697 li $x60,0x60
698 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
699 li $x70,0x70
700 mtspr 256,r0
701
702 subi $rounds,$rounds,3 # -4 in total
703 subi $len,$len,128 # bias
704
705 lvx $rndkey0,$x00,$key # load key schedule
706 lvx v30,$x10,$key
707 addi $key,$key,0x20
708 lvx v31,$x00,$key
709 ?vperm $rndkey0,$rndkey0,v30,$keyperm
710 addi $key_,$sp,$FRAME+15
711 mtctr $rounds
712
713Load_cbc_dec_key:
714 ?vperm v24,v30,v31,$keyperm
715 lvx v30,$x10,$key
716 addi $key,$key,0x20
717 stvx v24,$x00,$key_ # off-load round[1]
718 ?vperm v25,v31,v30,$keyperm
719 lvx v31,$x00,$key
720 stvx v25,$x10,$key_ # off-load round[2]
721 addi $key_,$key_,0x20
722 bdnz Load_cbc_dec_key
723
724 lvx v26,$x10,$key
725 ?vperm v24,v30,v31,$keyperm
726 lvx v27,$x20,$key
727 stvx v24,$x00,$key_ # off-load round[3]
728 ?vperm v25,v31,v26,$keyperm
729 lvx v28,$x30,$key
730 stvx v25,$x10,$key_ # off-load round[4]
731 addi $key_,$sp,$FRAME+15 # rewind $key_
732 ?vperm v26,v26,v27,$keyperm
733 lvx v29,$x40,$key
734 ?vperm v27,v27,v28,$keyperm
735 lvx v30,$x50,$key
736 ?vperm v28,v28,v29,$keyperm
737 lvx v31,$x60,$key
738 ?vperm v29,v29,v30,$keyperm
739 lvx $out0,$x70,$key # borrow $out0
740 ?vperm v30,v30,v31,$keyperm
741 lvx v24,$x00,$key_ # pre-load round[1]
742 ?vperm v31,v31,$out0,$keyperm
743 lvx v25,$x10,$key_ # pre-load round[2]
744
745 #lvx $inptail,0,$inp # "caller" already did this
746 #addi $inp,$inp,15 # 15 is not typo
747 subi $inp,$inp,15 # undo "caller"
748
749 le?li $idx,8
750 lvx_u $in0,$x00,$inp # load first 8 "words"
751 le?lvsl $inpperm,0,$idx
752 le?vspltisb $tmp,0x0f
753 lvx_u $in1,$x10,$inp
754 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
755 lvx_u $in2,$x20,$inp
756 le?vperm $in0,$in0,$in0,$inpperm
757 lvx_u $in3,$x30,$inp
758 le?vperm $in1,$in1,$in1,$inpperm
759 lvx_u $in4,$x40,$inp
760 le?vperm $in2,$in2,$in2,$inpperm
761 vxor $out0,$in0,$rndkey0
762 lvx_u $in5,$x50,$inp
763 le?vperm $in3,$in3,$in3,$inpperm
764 vxor $out1,$in1,$rndkey0
765 lvx_u $in6,$x60,$inp
766 le?vperm $in4,$in4,$in4,$inpperm
767 vxor $out2,$in2,$rndkey0
768 lvx_u $in7,$x70,$inp
769 addi $inp,$inp,0x80
770 le?vperm $in5,$in5,$in5,$inpperm
771 vxor $out3,$in3,$rndkey0
772 le?vperm $in6,$in6,$in6,$inpperm
773 vxor $out4,$in4,$rndkey0
774 le?vperm $in7,$in7,$in7,$inpperm
775 vxor $out5,$in5,$rndkey0
776 vxor $out6,$in6,$rndkey0
777 vxor $out7,$in7,$rndkey0
778
779 mtctr $rounds
780 b Loop_cbc_dec8x
781.align 5
782Loop_cbc_dec8x:
783 vncipher $out0,$out0,v24
784 vncipher $out1,$out1,v24
785 vncipher $out2,$out2,v24
786 vncipher $out3,$out3,v24
787 vncipher $out4,$out4,v24
788 vncipher $out5,$out5,v24
789 vncipher $out6,$out6,v24
790 vncipher $out7,$out7,v24
791 lvx v24,$x20,$key_ # round[3]
792 addi $key_,$key_,0x20
793
794 vncipher $out0,$out0,v25
795 vncipher $out1,$out1,v25
796 vncipher $out2,$out2,v25
797 vncipher $out3,$out3,v25
798 vncipher $out4,$out4,v25
799 vncipher $out5,$out5,v25
800 vncipher $out6,$out6,v25
801 vncipher $out7,$out7,v25
802 lvx v25,$x10,$key_ # round[4]
803 bdnz Loop_cbc_dec8x
804
805 subic $len,$len,128 # $len-=128
806 vncipher $out0,$out0,v24
807 vncipher $out1,$out1,v24
808 vncipher $out2,$out2,v24
809 vncipher $out3,$out3,v24
810 vncipher $out4,$out4,v24
811 vncipher $out5,$out5,v24
812 vncipher $out6,$out6,v24
813 vncipher $out7,$out7,v24
814
815 subfe. r0,r0,r0 # borrow?-1:0
816 vncipher $out0,$out0,v25
817 vncipher $out1,$out1,v25
818 vncipher $out2,$out2,v25
819 vncipher $out3,$out3,v25
820 vncipher $out4,$out4,v25
821 vncipher $out5,$out5,v25
822 vncipher $out6,$out6,v25
823 vncipher $out7,$out7,v25
824
825 and r0,r0,$len
826 vncipher $out0,$out0,v26
827 vncipher $out1,$out1,v26
828 vncipher $out2,$out2,v26
829 vncipher $out3,$out3,v26
830 vncipher $out4,$out4,v26
831 vncipher $out5,$out5,v26
832 vncipher $out6,$out6,v26
833 vncipher $out7,$out7,v26
834
835 add $inp,$inp,r0 # $inp is adjusted in such
836 # way that at exit from the
837 # loop inX-in7 are loaded
838 # with last "words"
839 vncipher $out0,$out0,v27
840 vncipher $out1,$out1,v27
841 vncipher $out2,$out2,v27
842 vncipher $out3,$out3,v27
843 vncipher $out4,$out4,v27
844 vncipher $out5,$out5,v27
845 vncipher $out6,$out6,v27
846 vncipher $out7,$out7,v27
847
848 addi $key_,$sp,$FRAME+15 # rewind $key_
849 vncipher $out0,$out0,v28
850 vncipher $out1,$out1,v28
851 vncipher $out2,$out2,v28
852 vncipher $out3,$out3,v28
853 vncipher $out4,$out4,v28
854 vncipher $out5,$out5,v28
855 vncipher $out6,$out6,v28
856 vncipher $out7,$out7,v28
857 lvx v24,$x00,$key_ # re-pre-load round[1]
858
859 vncipher $out0,$out0,v29
860 vncipher $out1,$out1,v29
861 vncipher $out2,$out2,v29
862 vncipher $out3,$out3,v29
863 vncipher $out4,$out4,v29
864 vncipher $out5,$out5,v29
865 vncipher $out6,$out6,v29
866 vncipher $out7,$out7,v29
867 lvx v25,$x10,$key_ # re-pre-load round[2]
868
869 vncipher $out0,$out0,v30
870 vxor $ivec,$ivec,v31 # xor with last round key
871 vncipher $out1,$out1,v30
872 vxor $in0,$in0,v31
873 vncipher $out2,$out2,v30
874 vxor $in1,$in1,v31
875 vncipher $out3,$out3,v30
876 vxor $in2,$in2,v31
877 vncipher $out4,$out4,v30
878 vxor $in3,$in3,v31
879 vncipher $out5,$out5,v30
880 vxor $in4,$in4,v31
881 vncipher $out6,$out6,v30
882 vxor $in5,$in5,v31
883 vncipher $out7,$out7,v30
884 vxor $in6,$in6,v31
885
886 vncipherlast $out0,$out0,$ivec
887 vncipherlast $out1,$out1,$in0
888 lvx_u $in0,$x00,$inp # load next input block
889 vncipherlast $out2,$out2,$in1
890 lvx_u $in1,$x10,$inp
891 vncipherlast $out3,$out3,$in2
892 le?vperm $in0,$in0,$in0,$inpperm
893 lvx_u $in2,$x20,$inp
894 vncipherlast $out4,$out4,$in3
895 le?vperm $in1,$in1,$in1,$inpperm
896 lvx_u $in3,$x30,$inp
897 vncipherlast $out5,$out5,$in4
898 le?vperm $in2,$in2,$in2,$inpperm
899 lvx_u $in4,$x40,$inp
900 vncipherlast $out6,$out6,$in5
901 le?vperm $in3,$in3,$in3,$inpperm
902 lvx_u $in5,$x50,$inp
903 vncipherlast $out7,$out7,$in6
904 le?vperm $in4,$in4,$in4,$inpperm
905 lvx_u $in6,$x60,$inp
906 vmr $ivec,$in7
907 le?vperm $in5,$in5,$in5,$inpperm
908 lvx_u $in7,$x70,$inp
909 addi $inp,$inp,0x80
910
911 le?vperm $out0,$out0,$out0,$inpperm
912 le?vperm $out1,$out1,$out1,$inpperm
913 stvx_u $out0,$x00,$out
914 le?vperm $in6,$in6,$in6,$inpperm
915 vxor $out0,$in0,$rndkey0
916 le?vperm $out2,$out2,$out2,$inpperm
917 stvx_u $out1,$x10,$out
918 le?vperm $in7,$in7,$in7,$inpperm
919 vxor $out1,$in1,$rndkey0
920 le?vperm $out3,$out3,$out3,$inpperm
921 stvx_u $out2,$x20,$out
922 vxor $out2,$in2,$rndkey0
923 le?vperm $out4,$out4,$out4,$inpperm
924 stvx_u $out3,$x30,$out
925 vxor $out3,$in3,$rndkey0
926 le?vperm $out5,$out5,$out5,$inpperm
927 stvx_u $out4,$x40,$out
928 vxor $out4,$in4,$rndkey0
929 le?vperm $out6,$out6,$out6,$inpperm
930 stvx_u $out5,$x50,$out
931 vxor $out5,$in5,$rndkey0
932 le?vperm $out7,$out7,$out7,$inpperm
933 stvx_u $out6,$x60,$out
934 vxor $out6,$in6,$rndkey0
935 stvx_u $out7,$x70,$out
936 addi $out,$out,0x80
937 vxor $out7,$in7,$rndkey0
938
939 mtctr $rounds
940 beq Loop_cbc_dec8x # did $len-=128 borrow?
941
942 addic. $len,$len,128
943 beq Lcbc_dec8x_done
944 nop
945 nop
946
947Loop_cbc_dec8x_tail: # up to 7 "words" tail...
948 vncipher $out1,$out1,v24
949 vncipher $out2,$out2,v24
950 vncipher $out3,$out3,v24
951 vncipher $out4,$out4,v24
952 vncipher $out5,$out5,v24
953 vncipher $out6,$out6,v24
954 vncipher $out7,$out7,v24
955 lvx v24,$x20,$key_ # round[3]
956 addi $key_,$key_,0x20
957
958 vncipher $out1,$out1,v25
959 vncipher $out2,$out2,v25
960 vncipher $out3,$out3,v25
961 vncipher $out4,$out4,v25
962 vncipher $out5,$out5,v25
963 vncipher $out6,$out6,v25
964 vncipher $out7,$out7,v25
965 lvx v25,$x10,$key_ # round[4]
966 bdnz Loop_cbc_dec8x_tail
967
968 vncipher $out1,$out1,v24
969 vncipher $out2,$out2,v24
970 vncipher $out3,$out3,v24
971 vncipher $out4,$out4,v24
972 vncipher $out5,$out5,v24
973 vncipher $out6,$out6,v24
974 vncipher $out7,$out7,v24
975
976 vncipher $out1,$out1,v25
977 vncipher $out2,$out2,v25
978 vncipher $out3,$out3,v25
979 vncipher $out4,$out4,v25
980 vncipher $out5,$out5,v25
981 vncipher $out6,$out6,v25
982 vncipher $out7,$out7,v25
983
984 vncipher $out1,$out1,v26
985 vncipher $out2,$out2,v26
986 vncipher $out3,$out3,v26
987 vncipher $out4,$out4,v26
988 vncipher $out5,$out5,v26
989 vncipher $out6,$out6,v26
990 vncipher $out7,$out7,v26
991
992 vncipher $out1,$out1,v27
993 vncipher $out2,$out2,v27
994 vncipher $out3,$out3,v27
995 vncipher $out4,$out4,v27
996 vncipher $out5,$out5,v27
997 vncipher $out6,$out6,v27
998 vncipher $out7,$out7,v27
999
1000 vncipher $out1,$out1,v28
1001 vncipher $out2,$out2,v28
1002 vncipher $out3,$out3,v28
1003 vncipher $out4,$out4,v28
1004 vncipher $out5,$out5,v28
1005 vncipher $out6,$out6,v28
1006 vncipher $out7,$out7,v28
1007
1008 vncipher $out1,$out1,v29
1009 vncipher $out2,$out2,v29
1010 vncipher $out3,$out3,v29
1011 vncipher $out4,$out4,v29
1012 vncipher $out5,$out5,v29
1013 vncipher $out6,$out6,v29
1014 vncipher $out7,$out7,v29
1015
1016 vncipher $out1,$out1,v30
1017 vxor $ivec,$ivec,v31 # last round key
1018 vncipher $out2,$out2,v30
1019 vxor $in1,$in1,v31
1020 vncipher $out3,$out3,v30
1021 vxor $in2,$in2,v31
1022 vncipher $out4,$out4,v30
1023 vxor $in3,$in3,v31
1024 vncipher $out5,$out5,v30
1025 vxor $in4,$in4,v31
1026 vncipher $out6,$out6,v30
1027 vxor $in5,$in5,v31
1028 vncipher $out7,$out7,v30
1029 vxor $in6,$in6,v31
1030
1031 cmplwi $len,32 # switch($len)
1032 blt Lcbc_dec8x_one
1033 nop
1034 beq Lcbc_dec8x_two
1035 cmplwi $len,64
1036 blt Lcbc_dec8x_three
1037 nop
1038 beq Lcbc_dec8x_four
1039 cmplwi $len,96
1040 blt Lcbc_dec8x_five
1041 nop
1042 beq Lcbc_dec8x_six
1043
1044Lcbc_dec8x_seven:
1045 vncipherlast $out1,$out1,$ivec
1046 vncipherlast $out2,$out2,$in1
1047 vncipherlast $out3,$out3,$in2
1048 vncipherlast $out4,$out4,$in3
1049 vncipherlast $out5,$out5,$in4
1050 vncipherlast $out6,$out6,$in5
1051 vncipherlast $out7,$out7,$in6
1052 vmr $ivec,$in7
1053
1054 le?vperm $out1,$out1,$out1,$inpperm
1055 le?vperm $out2,$out2,$out2,$inpperm
1056 stvx_u $out1,$x00,$out
1057 le?vperm $out3,$out3,$out3,$inpperm
1058 stvx_u $out2,$x10,$out
1059 le?vperm $out4,$out4,$out4,$inpperm
1060 stvx_u $out3,$x20,$out
1061 le?vperm $out5,$out5,$out5,$inpperm
1062 stvx_u $out4,$x30,$out
1063 le?vperm $out6,$out6,$out6,$inpperm
1064 stvx_u $out5,$x40,$out
1065 le?vperm $out7,$out7,$out7,$inpperm
1066 stvx_u $out6,$x50,$out
1067 stvx_u $out7,$x60,$out
1068 addi $out,$out,0x70
1069 b Lcbc_dec8x_done
1070
1071.align 5
1072Lcbc_dec8x_six:
1073 vncipherlast $out2,$out2,$ivec
1074 vncipherlast $out3,$out3,$in2
1075 vncipherlast $out4,$out4,$in3
1076 vncipherlast $out5,$out5,$in4
1077 vncipherlast $out6,$out6,$in5
1078 vncipherlast $out7,$out7,$in6
1079 vmr $ivec,$in7
1080
1081 le?vperm $out2,$out2,$out2,$inpperm
1082 le?vperm $out3,$out3,$out3,$inpperm
1083 stvx_u $out2,$x00,$out
1084 le?vperm $out4,$out4,$out4,$inpperm
1085 stvx_u $out3,$x10,$out
1086 le?vperm $out5,$out5,$out5,$inpperm
1087 stvx_u $out4,$x20,$out
1088 le?vperm $out6,$out6,$out6,$inpperm
1089 stvx_u $out5,$x30,$out
1090 le?vperm $out7,$out7,$out7,$inpperm
1091 stvx_u $out6,$x40,$out
1092 stvx_u $out7,$x50,$out
1093 addi $out,$out,0x60
1094 b Lcbc_dec8x_done
1095
1096.align 5
1097Lcbc_dec8x_five:
1098 vncipherlast $out3,$out3,$ivec
1099 vncipherlast $out4,$out4,$in3
1100 vncipherlast $out5,$out5,$in4
1101 vncipherlast $out6,$out6,$in5
1102 vncipherlast $out7,$out7,$in6
1103 vmr $ivec,$in7
1104
1105 le?vperm $out3,$out3,$out3,$inpperm
1106 le?vperm $out4,$out4,$out4,$inpperm
1107 stvx_u $out3,$x00,$out
1108 le?vperm $out5,$out5,$out5,$inpperm
1109 stvx_u $out4,$x10,$out
1110 le?vperm $out6,$out6,$out6,$inpperm
1111 stvx_u $out5,$x20,$out
1112 le?vperm $out7,$out7,$out7,$inpperm
1113 stvx_u $out6,$x30,$out
1114 stvx_u $out7,$x40,$out
1115 addi $out,$out,0x50
1116 b Lcbc_dec8x_done
1117
1118.align 5
1119Lcbc_dec8x_four:
1120 vncipherlast $out4,$out4,$ivec
1121 vncipherlast $out5,$out5,$in4
1122 vncipherlast $out6,$out6,$in5
1123 vncipherlast $out7,$out7,$in6
1124 vmr $ivec,$in7
1125
1126 le?vperm $out4,$out4,$out4,$inpperm
1127 le?vperm $out5,$out5,$out5,$inpperm
1128 stvx_u $out4,$x00,$out
1129 le?vperm $out6,$out6,$out6,$inpperm
1130 stvx_u $out5,$x10,$out
1131 le?vperm $out7,$out7,$out7,$inpperm
1132 stvx_u $out6,$x20,$out
1133 stvx_u $out7,$x30,$out
1134 addi $out,$out,0x40
1135 b Lcbc_dec8x_done
1136
1137.align 5
1138Lcbc_dec8x_three:
1139 vncipherlast $out5,$out5,$ivec
1140 vncipherlast $out6,$out6,$in5
1141 vncipherlast $out7,$out7,$in6
1142 vmr $ivec,$in7
1143
1144 le?vperm $out5,$out5,$out5,$inpperm
1145 le?vperm $out6,$out6,$out6,$inpperm
1146 stvx_u $out5,$x00,$out
1147 le?vperm $out7,$out7,$out7,$inpperm
1148 stvx_u $out6,$x10,$out
1149 stvx_u $out7,$x20,$out
1150 addi $out,$out,0x30
1151 b Lcbc_dec8x_done
1152
1153.align 5
1154Lcbc_dec8x_two:
1155 vncipherlast $out6,$out6,$ivec
1156 vncipherlast $out7,$out7,$in6
1157 vmr $ivec,$in7
1158
1159 le?vperm $out6,$out6,$out6,$inpperm
1160 le?vperm $out7,$out7,$out7,$inpperm
1161 stvx_u $out6,$x00,$out
1162 stvx_u $out7,$x10,$out
1163 addi $out,$out,0x20
1164 b Lcbc_dec8x_done
1165
1166.align 5
1167Lcbc_dec8x_one:
1168 vncipherlast $out7,$out7,$ivec
1169 vmr $ivec,$in7
1170
1171 le?vperm $out7,$out7,$out7,$inpperm
1172 stvx_u $out7,0,$out
1173 addi $out,$out,0x10
1174
1175Lcbc_dec8x_done:
1176 le?vperm $ivec,$ivec,$ivec,$inpperm
1177 stvx_u $ivec,0,$ivp # write [unaligned] iv
1178
1179 li r10,`$FRAME+15`
1180 li r11,`$FRAME+31`
1181 stvx $inpperm,r10,$sp # wipe copies of round keys
1182 addi r10,r10,32
1183 stvx $inpperm,r11,$sp
1184 addi r11,r11,32
1185 stvx $inpperm,r10,$sp
1186 addi r10,r10,32
1187 stvx $inpperm,r11,$sp
1188 addi r11,r11,32
1189 stvx $inpperm,r10,$sp
1190 addi r10,r10,32
1191 stvx $inpperm,r11,$sp
1192 addi r11,r11,32
1193 stvx $inpperm,r10,$sp
1194 addi r10,r10,32
1195 stvx $inpperm,r11,$sp
1196 addi r11,r11,32
1197
1198 mtspr 256,$vrsave
1199 lvx v20,r10,$sp # ABI says so
1200 addi r10,r10,32
1201 lvx v21,r11,$sp
1202 addi r11,r11,32
1203 lvx v22,r10,$sp
1204 addi r10,r10,32
1205 lvx v23,r11,$sp
1206 addi r11,r11,32
1207 lvx v24,r10,$sp
1208 addi r10,r10,32
1209 lvx v25,r11,$sp
1210 addi r11,r11,32
1211 lvx v26,r10,$sp
1212 addi r10,r10,32
1213 lvx v27,r11,$sp
1214 addi r11,r11,32
1215 lvx v28,r10,$sp
1216 addi r10,r10,32
1217 lvx v29,r11,$sp
1218 addi r11,r11,32
1219 lvx v30,r10,$sp
1220 lvx v31,r11,$sp
1221 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1222 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1223 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1224 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1225 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1226 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1227 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1228 blr
1229 .long 0
1230 .byte 0,12,0x14,0,0x80,6,6,0
1231 .long 0
1232.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1233___
1234}} }}}
1235
1236#########################################################################
1237{{{ # CTR procedure[s] #
1238my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1239my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1240my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1241 map("v$_",(4..11));
1242my $dat=$tmp;
1243
1244$code.=<<___;
1245.globl .${prefix}_ctr32_encrypt_blocks
1246.align 5
1247.${prefix}_ctr32_encrypt_blocks:
1248 ${UCMP}i $len,1
1249 bltlr-
1250
1251 lis r0,0xfff0
1252 mfspr $vrsave,256
1253 mtspr 256,r0
1254
1255 li $idx,15
1256 vxor $rndkey0,$rndkey0,$rndkey0
1257 le?vspltisb $tmp,0x0f
1258
1259 lvx $ivec,0,$ivp # load [unaligned] iv
1260 lvsl $inpperm,0,$ivp
1261 lvx $inptail,$idx,$ivp
1262 vspltisb $one,1
1263 le?vxor $inpperm,$inpperm,$tmp
1264 vperm $ivec,$ivec,$inptail,$inpperm
1265 vsldoi $one,$rndkey0,$one,1
1266
1267 neg r11,$inp
1268 ?lvsl $keyperm,0,$key # prepare for unaligned key
1269 lwz $rounds,240($key)
1270
1271 lvsr $inpperm,0,r11 # prepare for unaligned load
1272 lvx $inptail,0,$inp
1273 addi $inp,$inp,15 # 15 is not typo
1274 le?vxor $inpperm,$inpperm,$tmp
1275
1276 srwi $rounds,$rounds,1
1277 li $idx,16
1278 subi $rounds,$rounds,1
1279
1280 ${UCMP}i $len,8
1281 bge _aesp8_ctr32_encrypt8x
1282
1283 ?lvsr $outperm,0,$out # prepare for unaligned store
1284 vspltisb $outmask,-1
1285 lvx $outhead,0,$out
1286 ?vperm $outmask,$rndkey0,$outmask,$outperm
1287 le?vxor $outperm,$outperm,$tmp
1288
1289 lvx $rndkey0,0,$key
1290 mtctr $rounds
1291 lvx $rndkey1,$idx,$key
1292 addi $idx,$idx,16
1293 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1294 vxor $inout,$ivec,$rndkey0
1295 lvx $rndkey0,$idx,$key
1296 addi $idx,$idx,16
1297 b Loop_ctr32_enc
1298
1299.align 5
1300Loop_ctr32_enc:
1301 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1302 vcipher $inout,$inout,$rndkey1
1303 lvx $rndkey1,$idx,$key
1304 addi $idx,$idx,16
1305 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1306 vcipher $inout,$inout,$rndkey0
1307 lvx $rndkey0,$idx,$key
1308 addi $idx,$idx,16
1309 bdnz Loop_ctr32_enc
1310
1311 vadduwm $ivec,$ivec,$one
1312 vmr $dat,$inptail
1313 lvx $inptail,0,$inp
1314 addi $inp,$inp,16
1315 subic. $len,$len,1 # blocks--
1316
1317 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1318 vcipher $inout,$inout,$rndkey1
1319 lvx $rndkey1,$idx,$key
1320 vperm $dat,$dat,$inptail,$inpperm
1321 li $idx,16
1322 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1323 lvx $rndkey0,0,$key
1324 vxor $dat,$dat,$rndkey1 # last round key
1325 vcipherlast $inout,$inout,$dat
1326
1327 lvx $rndkey1,$idx,$key
1328 addi $idx,$idx,16
1329 vperm $inout,$inout,$inout,$outperm
1330 vsel $dat,$outhead,$inout,$outmask
1331 mtctr $rounds
1332 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1333 vmr $outhead,$inout
1334 vxor $inout,$ivec,$rndkey0
1335 lvx $rndkey0,$idx,$key
1336 addi $idx,$idx,16
1337 stvx $dat,0,$out
1338 addi $out,$out,16
1339 bne Loop_ctr32_enc
1340
1341 addi $out,$out,-1
1342 lvx $inout,0,$out # redundant in aligned case
1343 vsel $inout,$outhead,$inout,$outmask
1344 stvx $inout,0,$out
1345
1346 mtspr 256,$vrsave
1347 blr
1348 .long 0
1349 .byte 0,12,0x14,0,0,0,6,0
1350 .long 0
1351___
1352#########################################################################
1353{{ # Optimized CTR procedure #
1354my $key_="r11";
1355my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1356my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1357my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1358my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1359 # v26-v31 last 6 round keys
1360my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1361my ($two,$three,$four)=($outhead,$outperm,$outmask);
1362
1363$code.=<<___;
1364.align 5
1365_aesp8_ctr32_encrypt8x:
1366 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1367 li r10,`$FRAME+8*16+15`
1368 li r11,`$FRAME+8*16+31`
1369 stvx v20,r10,$sp # ABI says so
1370 addi r10,r10,32
1371 stvx v21,r11,$sp
1372 addi r11,r11,32
1373 stvx v22,r10,$sp
1374 addi r10,r10,32
1375 stvx v23,r11,$sp
1376 addi r11,r11,32
1377 stvx v24,r10,$sp
1378 addi r10,r10,32
1379 stvx v25,r11,$sp
1380 addi r11,r11,32
1381 stvx v26,r10,$sp
1382 addi r10,r10,32
1383 stvx v27,r11,$sp
1384 addi r11,r11,32
1385 stvx v28,r10,$sp
1386 addi r10,r10,32
1387 stvx v29,r11,$sp
1388 addi r11,r11,32
1389 stvx v30,r10,$sp
1390 stvx v31,r11,$sp
1391 li r0,-1
1392 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1393 li $x10,0x10
1394 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1395 li $x20,0x20
1396 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1397 li $x30,0x30
1398 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1399 li $x40,0x40
1400 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1401 li $x50,0x50
1402 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1403 li $x60,0x60
1404 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1405 li $x70,0x70
1406 mtspr 256,r0
1407
1408 subi $rounds,$rounds,3 # -4 in total
1409
1410 lvx $rndkey0,$x00,$key # load key schedule
1411 lvx v30,$x10,$key
1412 addi $key,$key,0x20
1413 lvx v31,$x00,$key
1414 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1415 addi $key_,$sp,$FRAME+15
1416 mtctr $rounds
1417
1418Load_ctr32_enc_key:
1419 ?vperm v24,v30,v31,$keyperm
1420 lvx v30,$x10,$key
1421 addi $key,$key,0x20
1422 stvx v24,$x00,$key_ # off-load round[1]
1423 ?vperm v25,v31,v30,$keyperm
1424 lvx v31,$x00,$key
1425 stvx v25,$x10,$key_ # off-load round[2]
1426 addi $key_,$key_,0x20
1427 bdnz Load_ctr32_enc_key
1428
1429 lvx v26,$x10,$key
1430 ?vperm v24,v30,v31,$keyperm
1431 lvx v27,$x20,$key
1432 stvx v24,$x00,$key_ # off-load round[3]
1433 ?vperm v25,v31,v26,$keyperm
1434 lvx v28,$x30,$key
1435 stvx v25,$x10,$key_ # off-load round[4]
1436 addi $key_,$sp,$FRAME+15 # rewind $key_
1437 ?vperm v26,v26,v27,$keyperm
1438 lvx v29,$x40,$key
1439 ?vperm v27,v27,v28,$keyperm
1440 lvx v30,$x50,$key
1441 ?vperm v28,v28,v29,$keyperm
1442 lvx v31,$x60,$key
1443 ?vperm v29,v29,v30,$keyperm
1444 lvx $out0,$x70,$key # borrow $out0
1445 ?vperm v30,v30,v31,$keyperm
1446 lvx v24,$x00,$key_ # pre-load round[1]
1447 ?vperm v31,v31,$out0,$keyperm
1448 lvx v25,$x10,$key_ # pre-load round[2]
1449
1450 vadduwm $two,$one,$one
1451 subi $inp,$inp,15 # undo "caller"
1452 $SHL $len,$len,4
1453
1454 vadduwm $out1,$ivec,$one # counter values ...
1455 vadduwm $out2,$ivec,$two
1456 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1457 le?li $idx,8
1458 vadduwm $out3,$out1,$two
1459 vxor $out1,$out1,$rndkey0
1460 le?lvsl $inpperm,0,$idx
1461 vadduwm $out4,$out2,$two
1462 vxor $out2,$out2,$rndkey0
1463 le?vspltisb $tmp,0x0f
1464 vadduwm $out5,$out3,$two
1465 vxor $out3,$out3,$rndkey0
1466 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1467 vadduwm $out6,$out4,$two
1468 vxor $out4,$out4,$rndkey0
1469 vadduwm $out7,$out5,$two
1470 vxor $out5,$out5,$rndkey0
1471 vadduwm $ivec,$out6,$two # next counter value
1472 vxor $out6,$out6,$rndkey0
1473 vxor $out7,$out7,$rndkey0
1474
1475 mtctr $rounds
1476 b Loop_ctr32_enc8x
1477.align 5
1478Loop_ctr32_enc8x:
1479 vcipher $out0,$out0,v24
1480 vcipher $out1,$out1,v24
1481 vcipher $out2,$out2,v24
1482 vcipher $out3,$out3,v24
1483 vcipher $out4,$out4,v24
1484 vcipher $out5,$out5,v24
1485 vcipher $out6,$out6,v24
1486 vcipher $out7,$out7,v24
1487Loop_ctr32_enc8x_middle:
1488 lvx v24,$x20,$key_ # round[3]
1489 addi $key_,$key_,0x20
1490
1491 vcipher $out0,$out0,v25
1492 vcipher $out1,$out1,v25
1493 vcipher $out2,$out2,v25
1494 vcipher $out3,$out3,v25
1495 vcipher $out4,$out4,v25
1496 vcipher $out5,$out5,v25
1497 vcipher $out6,$out6,v25
1498 vcipher $out7,$out7,v25
1499 lvx v25,$x10,$key_ # round[4]
1500 bdnz Loop_ctr32_enc8x
1501
1502 subic r11,$len,256 # $len-256, borrow $key_
1503 vcipher $out0,$out0,v24
1504 vcipher $out1,$out1,v24
1505 vcipher $out2,$out2,v24
1506 vcipher $out3,$out3,v24
1507 vcipher $out4,$out4,v24
1508 vcipher $out5,$out5,v24
1509 vcipher $out6,$out6,v24
1510 vcipher $out7,$out7,v24
1511
1512 subfe r0,r0,r0 # borrow?-1:0
1513 vcipher $out0,$out0,v25
1514 vcipher $out1,$out1,v25
1515 vcipher $out2,$out2,v25
1516 vcipher $out3,$out3,v25
1517 vcipher $out4,$out4,v25
1518 vcipher $out5,$out5,v25
1519 vcipher $out6,$out6,v25
1520 vcipher $out7,$out7,v25
1521
1522 and r0,r0,r11
1523 addi $key_,$sp,$FRAME+15 # rewind $key_
1524 vcipher $out0,$out0,v26
1525 vcipher $out1,$out1,v26
1526 vcipher $out2,$out2,v26
1527 vcipher $out3,$out3,v26
1528 vcipher $out4,$out4,v26
1529 vcipher $out5,$out5,v26
1530 vcipher $out6,$out6,v26
1531 vcipher $out7,$out7,v26
1532 lvx v24,$x00,$key_ # re-pre-load round[1]
1533
1534 subic $len,$len,129 # $len-=129
1535 vcipher $out0,$out0,v27
1536 addi $len,$len,1 # $len-=128 really
1537 vcipher $out1,$out1,v27
1538 vcipher $out2,$out2,v27
1539 vcipher $out3,$out3,v27
1540 vcipher $out4,$out4,v27
1541 vcipher $out5,$out5,v27
1542 vcipher $out6,$out6,v27
1543 vcipher $out7,$out7,v27
1544 lvx v25,$x10,$key_ # re-pre-load round[2]
1545
1546 vcipher $out0,$out0,v28
1547 lvx_u $in0,$x00,$inp # load input
1548 vcipher $out1,$out1,v28
1549 lvx_u $in1,$x10,$inp
1550 vcipher $out2,$out2,v28
1551 lvx_u $in2,$x20,$inp
1552 vcipher $out3,$out3,v28
1553 lvx_u $in3,$x30,$inp
1554 vcipher $out4,$out4,v28
1555 lvx_u $in4,$x40,$inp
1556 vcipher $out5,$out5,v28
1557 lvx_u $in5,$x50,$inp
1558 vcipher $out6,$out6,v28
1559 lvx_u $in6,$x60,$inp
1560 vcipher $out7,$out7,v28
1561 lvx_u $in7,$x70,$inp
1562 addi $inp,$inp,0x80
1563
1564 vcipher $out0,$out0,v29
1565 le?vperm $in0,$in0,$in0,$inpperm
1566 vcipher $out1,$out1,v29
1567 le?vperm $in1,$in1,$in1,$inpperm
1568 vcipher $out2,$out2,v29
1569 le?vperm $in2,$in2,$in2,$inpperm
1570 vcipher $out3,$out3,v29
1571 le?vperm $in3,$in3,$in3,$inpperm
1572 vcipher $out4,$out4,v29
1573 le?vperm $in4,$in4,$in4,$inpperm
1574 vcipher $out5,$out5,v29
1575 le?vperm $in5,$in5,$in5,$inpperm
1576 vcipher $out6,$out6,v29
1577 le?vperm $in6,$in6,$in6,$inpperm
1578 vcipher $out7,$out7,v29
1579 le?vperm $in7,$in7,$in7,$inpperm
1580
1581 add $inp,$inp,r0 # $inp is adjusted in such
1582 # way that at exit from the
1583 # loop inX-in7 are loaded
1584 # with last "words"
1585 subfe. r0,r0,r0 # borrow?-1:0
1586 vcipher $out0,$out0,v30
1587 vxor $in0,$in0,v31 # xor with last round key
1588 vcipher $out1,$out1,v30
1589 vxor $in1,$in1,v31
1590 vcipher $out2,$out2,v30
1591 vxor $in2,$in2,v31
1592 vcipher $out3,$out3,v30
1593 vxor $in3,$in3,v31
1594 vcipher $out4,$out4,v30
1595 vxor $in4,$in4,v31
1596 vcipher $out5,$out5,v30
1597 vxor $in5,$in5,v31
1598 vcipher $out6,$out6,v30
1599 vxor $in6,$in6,v31
1600 vcipher $out7,$out7,v30
1601 vxor $in7,$in7,v31
1602
1603 bne Lctr32_enc8x_break # did $len-129 borrow?
1604
1605 vcipherlast $in0,$out0,$in0
1606 vcipherlast $in1,$out1,$in1
1607 vadduwm $out1,$ivec,$one # counter values ...
1608 vcipherlast $in2,$out2,$in2
1609 vadduwm $out2,$ivec,$two
1610 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1611 vcipherlast $in3,$out3,$in3
1612 vadduwm $out3,$out1,$two
1613 vxor $out1,$out1,$rndkey0
1614 vcipherlast $in4,$out4,$in4
1615 vadduwm $out4,$out2,$two
1616 vxor $out2,$out2,$rndkey0
1617 vcipherlast $in5,$out5,$in5
1618 vadduwm $out5,$out3,$two
1619 vxor $out3,$out3,$rndkey0
1620 vcipherlast $in6,$out6,$in6
1621 vadduwm $out6,$out4,$two
1622 vxor $out4,$out4,$rndkey0
1623 vcipherlast $in7,$out7,$in7
1624 vadduwm $out7,$out5,$two
1625 vxor $out5,$out5,$rndkey0
1626 le?vperm $in0,$in0,$in0,$inpperm
1627 vadduwm $ivec,$out6,$two # next counter value
1628 vxor $out6,$out6,$rndkey0
1629 le?vperm $in1,$in1,$in1,$inpperm
1630 vxor $out7,$out7,$rndkey0
1631 mtctr $rounds
1632
1633 vcipher $out0,$out0,v24
1634 stvx_u $in0,$x00,$out
1635 le?vperm $in2,$in2,$in2,$inpperm
1636 vcipher $out1,$out1,v24
1637 stvx_u $in1,$x10,$out
1638 le?vperm $in3,$in3,$in3,$inpperm
1639 vcipher $out2,$out2,v24
1640 stvx_u $in2,$x20,$out
1641 le?vperm $in4,$in4,$in4,$inpperm
1642 vcipher $out3,$out3,v24
1643 stvx_u $in3,$x30,$out
1644 le?vperm $in5,$in5,$in5,$inpperm
1645 vcipher $out4,$out4,v24
1646 stvx_u $in4,$x40,$out
1647 le?vperm $in6,$in6,$in6,$inpperm
1648 vcipher $out5,$out5,v24
1649 stvx_u $in5,$x50,$out
1650 le?vperm $in7,$in7,$in7,$inpperm
1651 vcipher $out6,$out6,v24
1652 stvx_u $in6,$x60,$out
1653 vcipher $out7,$out7,v24
1654 stvx_u $in7,$x70,$out
1655 addi $out,$out,0x80
1656
1657 b Loop_ctr32_enc8x_middle
1658
1659.align 5
1660Lctr32_enc8x_break:
1661 cmpwi $len,-0x60
1662 blt Lctr32_enc8x_one
1663 nop
1664 beq Lctr32_enc8x_two
1665 cmpwi $len,-0x40
1666 blt Lctr32_enc8x_three
1667 nop
1668 beq Lctr32_enc8x_four
1669 cmpwi $len,-0x20
1670 blt Lctr32_enc8x_five
1671 nop
1672 beq Lctr32_enc8x_six
1673 cmpwi $len,0x00
1674 blt Lctr32_enc8x_seven
1675
1676Lctr32_enc8x_eight:
1677 vcipherlast $out0,$out0,$in0
1678 vcipherlast $out1,$out1,$in1
1679 vcipherlast $out2,$out2,$in2
1680 vcipherlast $out3,$out3,$in3
1681 vcipherlast $out4,$out4,$in4
1682 vcipherlast $out5,$out5,$in5
1683 vcipherlast $out6,$out6,$in6
1684 vcipherlast $out7,$out7,$in7
1685
1686 le?vperm $out0,$out0,$out0,$inpperm
1687 le?vperm $out1,$out1,$out1,$inpperm
1688 stvx_u $out0,$x00,$out
1689 le?vperm $out2,$out2,$out2,$inpperm
1690 stvx_u $out1,$x10,$out
1691 le?vperm $out3,$out3,$out3,$inpperm
1692 stvx_u $out2,$x20,$out
1693 le?vperm $out4,$out4,$out4,$inpperm
1694 stvx_u $out3,$x30,$out
1695 le?vperm $out5,$out5,$out5,$inpperm
1696 stvx_u $out4,$x40,$out
1697 le?vperm $out6,$out6,$out6,$inpperm
1698 stvx_u $out5,$x50,$out
1699 le?vperm $out7,$out7,$out7,$inpperm
1700 stvx_u $out6,$x60,$out
1701 stvx_u $out7,$x70,$out
1702 addi $out,$out,0x80
1703 b Lctr32_enc8x_done
1704
1705.align 5
1706Lctr32_enc8x_seven:
1707 vcipherlast $out0,$out0,$in1
1708 vcipherlast $out1,$out1,$in2
1709 vcipherlast $out2,$out2,$in3
1710 vcipherlast $out3,$out3,$in4
1711 vcipherlast $out4,$out4,$in5
1712 vcipherlast $out5,$out5,$in6
1713 vcipherlast $out6,$out6,$in7
1714
1715 le?vperm $out0,$out0,$out0,$inpperm
1716 le?vperm $out1,$out1,$out1,$inpperm
1717 stvx_u $out0,$x00,$out
1718 le?vperm $out2,$out2,$out2,$inpperm
1719 stvx_u $out1,$x10,$out
1720 le?vperm $out3,$out3,$out3,$inpperm
1721 stvx_u $out2,$x20,$out
1722 le?vperm $out4,$out4,$out4,$inpperm
1723 stvx_u $out3,$x30,$out
1724 le?vperm $out5,$out5,$out5,$inpperm
1725 stvx_u $out4,$x40,$out
1726 le?vperm $out6,$out6,$out6,$inpperm
1727 stvx_u $out5,$x50,$out
1728 stvx_u $out6,$x60,$out
1729 addi $out,$out,0x70
1730 b Lctr32_enc8x_done
1731
1732.align 5
1733Lctr32_enc8x_six:
1734 vcipherlast $out0,$out0,$in2
1735 vcipherlast $out1,$out1,$in3
1736 vcipherlast $out2,$out2,$in4
1737 vcipherlast $out3,$out3,$in5
1738 vcipherlast $out4,$out4,$in6
1739 vcipherlast $out5,$out5,$in7
1740
1741 le?vperm $out0,$out0,$out0,$inpperm
1742 le?vperm $out1,$out1,$out1,$inpperm
1743 stvx_u $out0,$x00,$out
1744 le?vperm $out2,$out2,$out2,$inpperm
1745 stvx_u $out1,$x10,$out
1746 le?vperm $out3,$out3,$out3,$inpperm
1747 stvx_u $out2,$x20,$out
1748 le?vperm $out4,$out4,$out4,$inpperm
1749 stvx_u $out3,$x30,$out
1750 le?vperm $out5,$out5,$out5,$inpperm
1751 stvx_u $out4,$x40,$out
1752 stvx_u $out5,$x50,$out
1753 addi $out,$out,0x60
1754 b Lctr32_enc8x_done
1755
1756.align 5
1757Lctr32_enc8x_five:
1758 vcipherlast $out0,$out0,$in3
1759 vcipherlast $out1,$out1,$in4
1760 vcipherlast $out2,$out2,$in5
1761 vcipherlast $out3,$out3,$in6
1762 vcipherlast $out4,$out4,$in7
1763
1764 le?vperm $out0,$out0,$out0,$inpperm
1765 le?vperm $out1,$out1,$out1,$inpperm
1766 stvx_u $out0,$x00,$out
1767 le?vperm $out2,$out2,$out2,$inpperm
1768 stvx_u $out1,$x10,$out
1769 le?vperm $out3,$out3,$out3,$inpperm
1770 stvx_u $out2,$x20,$out
1771 le?vperm $out4,$out4,$out4,$inpperm
1772 stvx_u $out3,$x30,$out
1773 stvx_u $out4,$x40,$out
1774 addi $out,$out,0x50
1775 b Lctr32_enc8x_done
1776
1777.align 5
1778Lctr32_enc8x_four:
1779 vcipherlast $out0,$out0,$in4
1780 vcipherlast $out1,$out1,$in5
1781 vcipherlast $out2,$out2,$in6
1782 vcipherlast $out3,$out3,$in7
1783
1784 le?vperm $out0,$out0,$out0,$inpperm
1785 le?vperm $out1,$out1,$out1,$inpperm
1786 stvx_u $out0,$x00,$out
1787 le?vperm $out2,$out2,$out2,$inpperm
1788 stvx_u $out1,$x10,$out
1789 le?vperm $out3,$out3,$out3,$inpperm
1790 stvx_u $out2,$x20,$out
1791 stvx_u $out3,$x30,$out
1792 addi $out,$out,0x40
1793 b Lctr32_enc8x_done
1794
1795.align 5
1796Lctr32_enc8x_three:
1797 vcipherlast $out0,$out0,$in5
1798 vcipherlast $out1,$out1,$in6
1799 vcipherlast $out2,$out2,$in7
1800
1801 le?vperm $out0,$out0,$out0,$inpperm
1802 le?vperm $out1,$out1,$out1,$inpperm
1803 stvx_u $out0,$x00,$out
1804 le?vperm $out2,$out2,$out2,$inpperm
1805 stvx_u $out1,$x10,$out
1806 stvx_u $out2,$x20,$out
1807 addi $out,$out,0x30
1808 b Lcbc_dec8x_done
1809
1810.align 5
1811Lctr32_enc8x_two:
1812 vcipherlast $out0,$out0,$in6
1813 vcipherlast $out1,$out1,$in7
1814
1815 le?vperm $out0,$out0,$out0,$inpperm
1816 le?vperm $out1,$out1,$out1,$inpperm
1817 stvx_u $out0,$x00,$out
1818 stvx_u $out1,$x10,$out
1819 addi $out,$out,0x20
1820 b Lcbc_dec8x_done
1821
1822.align 5
1823Lctr32_enc8x_one:
1824 vcipherlast $out0,$out0,$in7
1825
1826 le?vperm $out0,$out0,$out0,$inpperm
1827 stvx_u $out0,0,$out
1828 addi $out,$out,0x10
1829
1830Lctr32_enc8x_done:
1831 li r10,`$FRAME+15`
1832 li r11,`$FRAME+31`
1833 stvx $inpperm,r10,$sp # wipe copies of round keys
1834 addi r10,r10,32
1835 stvx $inpperm,r11,$sp
1836 addi r11,r11,32
1837 stvx $inpperm,r10,$sp
1838 addi r10,r10,32
1839 stvx $inpperm,r11,$sp
1840 addi r11,r11,32
1841 stvx $inpperm,r10,$sp
1842 addi r10,r10,32
1843 stvx $inpperm,r11,$sp
1844 addi r11,r11,32
1845 stvx $inpperm,r10,$sp
1846 addi r10,r10,32
1847 stvx $inpperm,r11,$sp
1848 addi r11,r11,32
1849
1850 mtspr 256,$vrsave
1851 lvx v20,r10,$sp # ABI says so
1852 addi r10,r10,32
1853 lvx v21,r11,$sp
1854 addi r11,r11,32
1855 lvx v22,r10,$sp
1856 addi r10,r10,32
1857 lvx v23,r11,$sp
1858 addi r11,r11,32
1859 lvx v24,r10,$sp
1860 addi r10,r10,32
1861 lvx v25,r11,$sp
1862 addi r11,r11,32
1863 lvx v26,r10,$sp
1864 addi r10,r10,32
1865 lvx v27,r11,$sp
1866 addi r11,r11,32
1867 lvx v28,r10,$sp
1868 addi r10,r10,32
1869 lvx v29,r11,$sp
1870 addi r11,r11,32
1871 lvx v30,r10,$sp
1872 lvx v31,r11,$sp
1873 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1874 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1875 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1876 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1877 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1878 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1879 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1880 blr
1881 .long 0
1882 .byte 0,12,0x14,0,0x80,6,6,0
1883 .long 0
1884.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1885___
1886}} }}}
1887
1888my $consts=1;
1889foreach(split("\n",$code)) {
1890 s/\`([^\`]*)\`/eval($1)/geo;
1891
1892 # constants table endian-specific conversion
1893 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1894 my $conv=$3;
1895 my @bytes=();
1896
1897 # convert to endian-agnostic format
1898 if ($1 eq "long") {
1899 foreach (split(/,\s*/,$2)) {
1900 my $l = /^0/?oct:int;
1901 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1902 }
1903 } else {
1904 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
1905 }
1906
1907 # little-endian conversion
1908 if ($flavour =~ /le$/o) {
1909 SWITCH: for($conv) {
1910 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1911 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1912 }
1913 }
1914
1915 #emit
1916 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1917 next;
1918 }
1919 $consts=0 if (m/Lconsts:/o); # end of table
1920
1921 # instructions prefixed with '?' are endian-specific and need
1922 # to be adjusted accordingly...
1923 if ($flavour =~ /le$/o) { # little-endian
1924 s/le\?//o or
1925 s/be\?/#be#/o or
1926 s/\?lvsr/lvsl/o or
1927 s/\?lvsl/lvsr/o or
1928 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1929 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1930 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1931 } else { # big-endian
1932 s/le\?/#le#/o or
1933 s/be\?//o or
1934 s/\?([a-z]+)/$1/o;
1935 }
1936
1937 print $_,"\n";
1938}
1939
1940close STDOUT;
diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl
new file mode 100755
index 000000000000..e76a58c343c1
--- /dev/null
+++ b/drivers/crypto/vmx/ghashp8-ppc.pl
@@ -0,0 +1,234 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# GHASH for for PowerISA v2.07.
11#
12# July 2014
13#
14# Accurate performance measurements are problematic, because it's
15# always virtualized setup with possibly throttled processor.
16# Relative comparison is therefore more informative. This initial
17# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
18# faster than "4-bit" integer-only compiler-generated 64-bit code.
19# "Initial version" means that there is room for futher improvement.
20
21$flavour=shift;
22$output =shift;
23
24if ($flavour =~ /64/) {
25 $SIZE_T=8;
26 $LRSAVE=2*$SIZE_T;
27 $STU="stdu";
28 $POP="ld";
29 $PUSH="std";
30} elsif ($flavour =~ /32/) {
31 $SIZE_T=4;
32 $LRSAVE=$SIZE_T;
33 $STU="stwu";
34 $POP="lwz";
35 $PUSH="stw";
36} else { die "nonsense $flavour"; }
37
38$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
39( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
40( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
41die "can't locate ppc-xlate.pl";
42
43open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
44
45my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
46
47my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
48my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
49my $vrsave="r12";
50
51$code=<<___;
52.machine "any"
53
54.text
55
56.globl .gcm_init_p8
57.align 5
58.gcm_init_p8:
59 lis r0,0xfff0
60 li r8,0x10
61 mfspr $vrsave,256
62 li r9,0x20
63 mtspr 256,r0
64 li r10,0x30
65 lvx_u $H,0,r4 # load H
66
67 vspltisb $xC2,-16 # 0xf0
68 vspltisb $t0,1 # one
69 vaddubm $xC2,$xC2,$xC2 # 0xe0
70 vxor $zero,$zero,$zero
71 vor $xC2,$xC2,$t0 # 0xe1
72 vsldoi $xC2,$xC2,$zero,15 # 0xe1...
73 vsldoi $t1,$zero,$t0,1 # ...1
74 vaddubm $xC2,$xC2,$xC2 # 0xc2...
75 vspltisb $t2,7
76 vor $xC2,$xC2,$t1 # 0xc2....01
77 vspltb $t1,$H,0 # most significant byte
78 vsl $H,$H,$t0 # H<<=1
79 vsrab $t1,$t1,$t2 # broadcast carry bit
80 vand $t1,$t1,$xC2
81 vxor $H,$H,$t1 # twisted H
82
83 vsldoi $H,$H,$H,8 # twist even more ...
84 vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
85 vsldoi $Hl,$zero,$H,8 # ... and split
86 vsldoi $Hh,$H,$zero,8
87
88 stvx_u $xC2,0,r3 # save pre-computed table
89 stvx_u $Hl,r8,r3
90 stvx_u $H, r9,r3
91 stvx_u $Hh,r10,r3
92
93 mtspr 256,$vrsave
94 blr
95 .long 0
96 .byte 0,12,0x14,0,0,0,2,0
97 .long 0
98.size .gcm_init_p8,.-.gcm_init_p8
99
100.globl .gcm_gmult_p8
101.align 5
102.gcm_gmult_p8:
103 lis r0,0xfff8
104 li r8,0x10
105 mfspr $vrsave,256
106 li r9,0x20
107 mtspr 256,r0
108 li r10,0x30
109 lvx_u $IN,0,$Xip # load Xi
110
111 lvx_u $Hl,r8,$Htbl # load pre-computed table
112 le?lvsl $lemask,r0,r0
113 lvx_u $H, r9,$Htbl
114 le?vspltisb $t0,0x07
115 lvx_u $Hh,r10,$Htbl
116 le?vxor $lemask,$lemask,$t0
117 lvx_u $xC2,0,$Htbl
118 le?vperm $IN,$IN,$IN,$lemask
119 vxor $zero,$zero,$zero
120
121 vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
122 vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
123 vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
124
125 vpmsumd $t2,$Xl,$xC2 # 1st phase
126
127 vsldoi $t0,$Xm,$zero,8
128 vsldoi $t1,$zero,$Xm,8
129 vxor $Xl,$Xl,$t0
130 vxor $Xh,$Xh,$t1
131
132 vsldoi $Xl,$Xl,$Xl,8
133 vxor $Xl,$Xl,$t2
134
135 vsldoi $t1,$Xl,$Xl,8 # 2nd phase
136 vpmsumd $Xl,$Xl,$xC2
137 vxor $t1,$t1,$Xh
138 vxor $Xl,$Xl,$t1
139
140 le?vperm $Xl,$Xl,$Xl,$lemask
141 stvx_u $Xl,0,$Xip # write out Xi
142
143 mtspr 256,$vrsave
144 blr
145 .long 0
146 .byte 0,12,0x14,0,0,0,2,0
147 .long 0
148.size .gcm_gmult_p8,.-.gcm_gmult_p8
149
150.globl .gcm_ghash_p8
151.align 5
152.gcm_ghash_p8:
153 lis r0,0xfff8
154 li r8,0x10
155 mfspr $vrsave,256
156 li r9,0x20
157 mtspr 256,r0
158 li r10,0x30
159 lvx_u $Xl,0,$Xip # load Xi
160
161 lvx_u $Hl,r8,$Htbl # load pre-computed table
162 le?lvsl $lemask,r0,r0
163 lvx_u $H, r9,$Htbl
164 le?vspltisb $t0,0x07
165 lvx_u $Hh,r10,$Htbl
166 le?vxor $lemask,$lemask,$t0
167 lvx_u $xC2,0,$Htbl
168 le?vperm $Xl,$Xl,$Xl,$lemask
169 vxor $zero,$zero,$zero
170
171 lvx_u $IN,0,$inp
172 addi $inp,$inp,16
173 subi $len,$len,16
174 le?vperm $IN,$IN,$IN,$lemask
175 vxor $IN,$IN,$Xl
176 b Loop
177
178.align 5
179Loop:
180 subic $len,$len,16
181 vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
182 subfe. r0,r0,r0 # borrow?-1:0
183 vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
184 and r0,r0,$len
185 vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
186 add $inp,$inp,r0
187
188 vpmsumd $t2,$Xl,$xC2 # 1st phase
189
190 vsldoi $t0,$Xm,$zero,8
191 vsldoi $t1,$zero,$Xm,8
192 vxor $Xl,$Xl,$t0
193 vxor $Xh,$Xh,$t1
194
195 vsldoi $Xl,$Xl,$Xl,8
196 vxor $Xl,$Xl,$t2
197 lvx_u $IN,0,$inp
198 addi $inp,$inp,16
199
200 vsldoi $t1,$Xl,$Xl,8 # 2nd phase
201 vpmsumd $Xl,$Xl,$xC2
202 le?vperm $IN,$IN,$IN,$lemask
203 vxor $t1,$t1,$Xh
204 vxor $IN,$IN,$t1
205 vxor $IN,$IN,$Xl
206 beq Loop # did $len-=16 borrow?
207
208 vxor $Xl,$Xl,$t1
209 le?vperm $Xl,$Xl,$Xl,$lemask
210 stvx_u $Xl,0,$Xip # write out Xi
211
212 mtspr 256,$vrsave
213 blr
214 .long 0
215 .byte 0,12,0x14,0,0,0,4,0
216 .long 0
217.size .gcm_ghash_p8,.-.gcm_ghash_p8
218
219.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
220.align 2
221___
222
223foreach (split("\n",$code)) {
224 if ($flavour =~ /le$/o) { # little-endian
225 s/le\?//o or
226 s/be\?/#be#/o;
227 } else {
228 s/le\?/#le#/o or
229 s/be\?//o;
230 }
231 print $_,"\n";
232}
233
234close STDOUT; # enforce flush
diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
new file mode 100755
index 000000000000..f89e81429931
--- /dev/null
+++ b/drivers/crypto/vmx/ppc-xlate.pl
@@ -0,0 +1,226 @@
1#!/usr/bin/env perl
2
3# PowerPC assembler distiller by <appro>.
4
5my $flavour = shift;
6my $output = shift;
7open STDOUT,">$output" || die "can't open $output: $!";
8
9my %GLOBALS;
10my $dotinlocallabels=($flavour=~/linux/)?1:0;
11
12################################################################
13# directives which need special treatment on different platforms
14################################################################
15my $globl = sub {
16 my $junk = shift;
17 my $name = shift;
18 my $global = \$GLOBALS{$name};
19 my $ret;
20
21 $name =~ s|^[\.\_]||;
22
23 SWITCH: for ($flavour) {
24 /aix/ && do { $name = ".$name";
25 last;
26 };
27 /osx/ && do { $name = "_$name";
28 last;
29 };
30 /linux.*(32|64le)/
31 && do { $ret .= ".globl $name\n";
32 $ret .= ".type $name,\@function";
33 last;
34 };
35 /linux.*64/ && do { $ret .= ".globl $name\n";
36 $ret .= ".type $name,\@function\n";
37 $ret .= ".section \".opd\",\"aw\"\n";
38 $ret .= ".align 3\n";
39 $ret .= "$name:\n";
40 $ret .= ".quad .$name,.TOC.\@tocbase,0\n";
41 $ret .= ".previous\n";
42
43 $name = ".$name";
44 last;
45 };
46 }
47
48 $ret = ".globl $name" if (!$ret);
49 $$global = $name;
50 $ret;
51};
52my $text = sub {
53 my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
54 $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
55 $ret;
56};
57my $machine = sub {
58 my $junk = shift;
59 my $arch = shift;
60 if ($flavour =~ /osx/)
61 { $arch =~ s/\"//g;
62 $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
63 }
64 ".machine $arch";
65};
66my $size = sub {
67 if ($flavour =~ /linux/)
68 { shift;
69 my $name = shift; $name =~ s|^[\.\_]||;
70 my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name;
71 $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/);
72 $ret;
73 }
74 else
75 { ""; }
76};
77my $asciz = sub {
78 shift;
79 my $line = join(",",@_);
80 if ($line =~ /^"(.*)"$/)
81 { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
82 else
83 { ""; }
84};
85my $quad = sub {
86 shift;
87 my @ret;
88 my ($hi,$lo);
89 for (@_) {
90 if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
91 { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
92 elsif (/^([0-9]+)$/o)
93 { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
94 else
95 { $hi=undef; $lo=$_; }
96
97 if (defined($hi))
98 { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
99 else
100 { push(@ret,".quad $lo"); }
101 }
102 join("\n",@ret);
103};
104
105################################################################
106# simplified mnemonics not handled by at least one assembler
107################################################################
108my $cmplw = sub {
109 my $f = shift;
110 my $cr = 0; $cr = shift if ($#_>1);
111 # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
112 ($flavour =~ /linux.*32/) ?
113 " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
114 " cmplw ".join(',',$cr,@_);
115};
116my $bdnz = sub {
117 my $f = shift;
118 my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
119 " bc $bo,0,".shift;
120} if ($flavour!~/linux/);
121my $bltlr = sub {
122 my $f = shift;
123 my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
124 ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
125 " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
126 " bclr $bo,0";
127};
128my $bnelr = sub {
129 my $f = shift;
130 my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
131 ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
132 " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
133 " bclr $bo,2";
134};
135my $beqlr = sub {
136 my $f = shift;
137 my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
138 ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
139 " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
140 " bclr $bo,2";
141};
142# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
143# arguments is 64, with "operand out of range" error.
144my $extrdi = sub {
145 my ($f,$ra,$rs,$n,$b) = @_;
146 $b = ($b+$n)&63; $n = 64-$n;
147 " rldicl $ra,$rs,$b,$n";
148};
149my $vmr = sub {
150 my ($f,$vx,$vy) = @_;
151 " vor $vx,$vy,$vy";
152};
153
154# PowerISA 2.06 stuff
155sub vsxmem_op {
156 my ($f, $vrt, $ra, $rb, $op) = @_;
157 " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
158}
159# made-up unaligned memory reference AltiVec/VMX instructions
160my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
161my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
162my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
163my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
164my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
165my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
166
167# PowerISA 2.07 stuff
168sub vcrypto_op {
169 my ($f, $vrt, $vra, $vrb, $op) = @_;
170 " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
171}
172my $vcipher = sub { vcrypto_op(@_, 1288); };
173my $vcipherlast = sub { vcrypto_op(@_, 1289); };
174my $vncipher = sub { vcrypto_op(@_, 1352); };
175my $vncipherlast= sub { vcrypto_op(@_, 1353); };
176my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
177my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
178my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
179my $vpmsumb = sub { vcrypto_op(@_, 1032); };
180my $vpmsumd = sub { vcrypto_op(@_, 1224); };
181my $vpmsubh = sub { vcrypto_op(@_, 1096); };
182my $vpmsumw = sub { vcrypto_op(@_, 1160); };
183my $vaddudm = sub { vcrypto_op(@_, 192); };
184
185my $mtsle = sub {
186 my ($f, $arg) = @_;
187 " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
188};
189
190while($line=<>) {
191
192 $line =~ s|[#!;].*$||; # get rid of asm-style comments...
193 $line =~ s|/\*.*\*/||; # ... and C-style comments...
194 $line =~ s|^\s+||; # ... and skip white spaces in beginning...
195 $line =~ s|\s+$||; # ... and at the end
196
197 {
198 $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel
199 $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
200 }
201
202 {
203 $line =~ s|(^[\.\w]+)\:\s*||;
204 my $label = $1;
205 if ($label) {
206 printf "%s:",($GLOBALS{$label} or $label);
207 printf "\n.localentry\t$GLOBALS{$label},0" if ($GLOBALS{$label} && $flavour =~ /linux.*64le/);
208 }
209 }
210
211 {
212 $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
213 my $c = $1; $c = "\t" if ($c eq "");
214 my $mnemonic = $2;
215 my $f = $3;
216 my $opcode = eval("\$$mnemonic");
217 $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
218 if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
219 elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
220 }
221
222 print $line if ($line);
223 print "\n";
224}
225
226close STDOUT;