diff options
author | Dave Martin <dave.martin@linaro.org> | 2013-01-10 06:20:15 -0500 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2013-01-13 07:41:22 -0500 |
commit | 638591cd7b601d403ed703d55062b48c32ea8cfb (patch) | |
tree | eb7f735b59d7609976b768fc7deb020b6f3d955a /arch/arm | |
parent | 9931faca02c604c22335f5a935a501bb2ace6e20 (diff) |
ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2 compatible
This patch fixes aes-armv4.S and sha1-armv4-large.S to work
natively in Thumb. This allows ARM/Thumb interworking workarounds
to be removed.
I also take the opportunity to convert some explicit assembler
directives for exported functions to the standard
ENTRY()/ENDPROC().
For the code itself:
* In sha1_block_data_order, use of TEQ with sp is deprecated in
ARMv7 and not supported in Thumb. For the branches back to
.L_00_15 and .L_40_59, the TEQ is converted to a CMP, under the
assumption that clobbering the C flag here will not cause
incorrect behaviour.
For the first branch back to .L_20_39_or_60_79 the C flag is
important, so sp is moved temporarily into another register so
that TEQ can be used for the comparison.
* In the AES code, most forms of register-indexed addressing with
shifts and rotates are not permitted for loads and stores in
Thumb, so the address calculation is done using a separate
instruction for the Thumb case.
The resulting code is unlikely to be optimally scheduled, but it
should not have a large impact given the overall size of the code.
I haven't run any benchmarks.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Tested-by: David McCullough <ucdevel@gmail.com> (ARM only)
Acked-by: David McCullough <ucdevel@gmail.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm')
-rw-r--r-- | arch/arm/crypto/aes-armv4.S | 64 | ||||
-rw-r--r-- | arch/arm/crypto/sha1-armv4-large.S | 24 |
2 files changed, 29 insertions, 59 deletions
diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S index e59b1d505d6c..19d6cd6f29f9 100644 --- a/arch/arm/crypto/aes-armv4.S +++ b/arch/arm/crypto/aes-armv4.S | |||
@@ -34,8 +34,9 @@ | |||
34 | @ A little glue here to select the correct code below for the ARM CPU | 34 | @ A little glue here to select the correct code below for the ARM CPU |
35 | @ that is being targetted. | 35 | @ that is being targetted. |
36 | 36 | ||
37 | #include <linux/linkage.h> | ||
38 | |||
37 | .text | 39 | .text |
38 | .code 32 | ||
39 | 40 | ||
40 | .type AES_Te,%object | 41 | .type AES_Te,%object |
41 | .align 5 | 42 | .align 5 |
@@ -145,10 +146,8 @@ AES_Te: | |||
145 | 146 | ||
146 | @ void AES_encrypt(const unsigned char *in, unsigned char *out, | 147 | @ void AES_encrypt(const unsigned char *in, unsigned char *out, |
147 | @ const AES_KEY *key) { | 148 | @ const AES_KEY *key) { |
148 | .global AES_encrypt | ||
149 | .type AES_encrypt,%function | ||
150 | .align 5 | 149 | .align 5 |
151 | AES_encrypt: | 150 | ENTRY(AES_encrypt) |
152 | sub r3,pc,#8 @ AES_encrypt | 151 | sub r3,pc,#8 @ AES_encrypt |
153 | stmdb sp!,{r1,r4-r12,lr} | 152 | stmdb sp!,{r1,r4-r12,lr} |
154 | mov r12,r0 @ inp | 153 | mov r12,r0 @ inp |
@@ -239,15 +238,8 @@ AES_encrypt: | |||
239 | strb r6,[r12,#14] | 238 | strb r6,[r12,#14] |
240 | strb r3,[r12,#15] | 239 | strb r3,[r12,#15] |
241 | #endif | 240 | #endif |
242 | #if __ARM_ARCH__>=5 | ||
243 | ldmia sp!,{r4-r12,pc} | 241 | ldmia sp!,{r4-r12,pc} |
244 | #else | 242 | ENDPROC(AES_encrypt) |
245 | ldmia sp!,{r4-r12,lr} | ||
246 | tst lr,#1 | ||
247 | moveq pc,lr @ be binary compatible with V4, yet | ||
248 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
249 | #endif | ||
250 | .size AES_encrypt,.-AES_encrypt | ||
251 | 243 | ||
252 | .type _armv4_AES_encrypt,%function | 244 | .type _armv4_AES_encrypt,%function |
253 | .align 2 | 245 | .align 2 |
@@ -386,10 +378,8 @@ _armv4_AES_encrypt: | |||
386 | ldr pc,[sp],#4 @ pop and return | 378 | ldr pc,[sp],#4 @ pop and return |
387 | .size _armv4_AES_encrypt,.-_armv4_AES_encrypt | 379 | .size _armv4_AES_encrypt,.-_armv4_AES_encrypt |
388 | 380 | ||
389 | .global private_AES_set_encrypt_key | ||
390 | .type private_AES_set_encrypt_key,%function | ||
391 | .align 5 | 381 | .align 5 |
392 | private_AES_set_encrypt_key: | 382 | ENTRY(private_AES_set_encrypt_key) |
393 | _armv4_AES_set_encrypt_key: | 383 | _armv4_AES_set_encrypt_key: |
394 | sub r3,pc,#8 @ AES_set_encrypt_key | 384 | sub r3,pc,#8 @ AES_set_encrypt_key |
395 | teq r0,#0 | 385 | teq r0,#0 |
@@ -658,15 +648,11 @@ _armv4_AES_set_encrypt_key: | |||
658 | 648 | ||
659 | .Ldone: mov r0,#0 | 649 | .Ldone: mov r0,#0 |
660 | ldmia sp!,{r4-r12,lr} | 650 | ldmia sp!,{r4-r12,lr} |
661 | .Labrt: tst lr,#1 | 651 | .Labrt: mov pc,lr |
662 | moveq pc,lr @ be binary compatible with V4, yet | 652 | ENDPROC(private_AES_set_encrypt_key) |
663 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
664 | .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key | ||
665 | 653 | ||
666 | .global private_AES_set_decrypt_key | ||
667 | .type private_AES_set_decrypt_key,%function | ||
668 | .align 5 | 654 | .align 5 |
669 | private_AES_set_decrypt_key: | 655 | ENTRY(private_AES_set_decrypt_key) |
670 | str lr,[sp,#-4]! @ push lr | 656 | str lr,[sp,#-4]! @ push lr |
671 | #if 0 | 657 | #if 0 |
672 | @ kernel does both of these in setkey so optimise this bit out by | 658 | @ kernel does both of these in setkey so optimise this bit out by |
@@ -748,15 +734,8 @@ private_AES_set_decrypt_key: | |||
748 | bne .Lmix | 734 | bne .Lmix |
749 | 735 | ||
750 | mov r0,#0 | 736 | mov r0,#0 |
751 | #if __ARM_ARCH__>=5 | ||
752 | ldmia sp!,{r4-r12,pc} | 737 | ldmia sp!,{r4-r12,pc} |
753 | #else | 738 | ENDPROC(private_AES_set_decrypt_key) |
754 | ldmia sp!,{r4-r12,lr} | ||
755 | tst lr,#1 | ||
756 | moveq pc,lr @ be binary compatible with V4, yet | ||
757 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
758 | #endif | ||
759 | .size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key | ||
760 | 739 | ||
761 | .type AES_Td,%object | 740 | .type AES_Td,%object |
762 | .align 5 | 741 | .align 5 |
@@ -862,10 +841,8 @@ AES_Td: | |||
862 | 841 | ||
863 | @ void AES_decrypt(const unsigned char *in, unsigned char *out, | 842 | @ void AES_decrypt(const unsigned char *in, unsigned char *out, |
864 | @ const AES_KEY *key) { | 843 | @ const AES_KEY *key) { |
865 | .global AES_decrypt | ||
866 | .type AES_decrypt,%function | ||
867 | .align 5 | 844 | .align 5 |
868 | AES_decrypt: | 845 | ENTRY(AES_decrypt) |
869 | sub r3,pc,#8 @ AES_decrypt | 846 | sub r3,pc,#8 @ AES_decrypt |
870 | stmdb sp!,{r1,r4-r12,lr} | 847 | stmdb sp!,{r1,r4-r12,lr} |
871 | mov r12,r0 @ inp | 848 | mov r12,r0 @ inp |
@@ -956,15 +933,8 @@ AES_decrypt: | |||
956 | strb r6,[r12,#14] | 933 | strb r6,[r12,#14] |
957 | strb r3,[r12,#15] | 934 | strb r3,[r12,#15] |
958 | #endif | 935 | #endif |
959 | #if __ARM_ARCH__>=5 | ||
960 | ldmia sp!,{r4-r12,pc} | 936 | ldmia sp!,{r4-r12,pc} |
961 | #else | 937 | ENDPROC(AES_decrypt) |
962 | ldmia sp!,{r4-r12,lr} | ||
963 | tst lr,#1 | ||
964 | moveq pc,lr @ be binary compatible with V4, yet | ||
965 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
966 | #endif | ||
967 | .size AES_decrypt,.-AES_decrypt | ||
968 | 938 | ||
969 | .type _armv4_AES_decrypt,%function | 939 | .type _armv4_AES_decrypt,%function |
970 | .align 2 | 940 | .align 2 |
@@ -1064,7 +1034,9 @@ _armv4_AES_decrypt: | |||
1064 | and r9,lr,r1,lsr#8 | 1034 | and r9,lr,r1,lsr#8 |
1065 | 1035 | ||
1066 | ldrb r7,[r10,r7] @ Td4[s1>>0] | 1036 | ldrb r7,[r10,r7] @ Td4[s1>>0] |
1067 | ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24] | 1037 | ARM( ldrb r1,[r10,r1,lsr#24] ) @ Td4[s1>>24] |
1038 | THUMB( add r1,r10,r1,lsr#24 ) @ Td4[s1>>24] | ||
1039 | THUMB( ldrb r1,[r1] ) | ||
1068 | ldrb r8,[r10,r8] @ Td4[s1>>16] | 1040 | ldrb r8,[r10,r8] @ Td4[s1>>16] |
1069 | eor r0,r7,r0,lsl#24 | 1041 | eor r0,r7,r0,lsl#24 |
1070 | ldrb r9,[r10,r9] @ Td4[s1>>8] | 1042 | ldrb r9,[r10,r9] @ Td4[s1>>8] |
@@ -1077,7 +1049,9 @@ _armv4_AES_decrypt: | |||
1077 | ldrb r8,[r10,r8] @ Td4[s2>>0] | 1049 | ldrb r8,[r10,r8] @ Td4[s2>>0] |
1078 | and r9,lr,r2,lsr#16 | 1050 | and r9,lr,r2,lsr#16 |
1079 | 1051 | ||
1080 | ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] | 1052 | ARM( ldrb r2,[r10,r2,lsr#24] ) @ Td4[s2>>24] |
1053 | THUMB( add r2,r10,r2,lsr#24 ) @ Td4[s2>>24] | ||
1054 | THUMB( ldrb r2,[r2] ) | ||
1081 | eor r0,r0,r7,lsl#8 | 1055 | eor r0,r0,r7,lsl#8 |
1082 | ldrb r9,[r10,r9] @ Td4[s2>>16] | 1056 | ldrb r9,[r10,r9] @ Td4[s2>>16] |
1083 | eor r1,r8,r1,lsl#16 | 1057 | eor r1,r8,r1,lsl#16 |
@@ -1090,7 +1064,9 @@ _armv4_AES_decrypt: | |||
1090 | and r9,lr,r3 @ i2 | 1064 | and r9,lr,r3 @ i2 |
1091 | 1065 | ||
1092 | ldrb r9,[r10,r9] @ Td4[s3>>0] | 1066 | ldrb r9,[r10,r9] @ Td4[s3>>0] |
1093 | ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] | 1067 | ARM( ldrb r3,[r10,r3,lsr#24] ) @ Td4[s3>>24] |
1068 | THUMB( add r3,r10,r3,lsr#24 ) @ Td4[s3>>24] | ||
1069 | THUMB( ldrb r3,[r3] ) | ||
1094 | eor r0,r0,r7,lsl#16 | 1070 | eor r0,r0,r7,lsl#16 |
1095 | ldr r7,[r11,#0] | 1071 | ldr r7,[r11,#0] |
1096 | eor r1,r1,r8,lsl#8 | 1072 | eor r1,r1,r8,lsl#8 |
diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S index 7050ab133b9d..92c6eed7aac9 100644 --- a/arch/arm/crypto/sha1-armv4-large.S +++ b/arch/arm/crypto/sha1-armv4-large.S | |||
@@ -51,13 +51,12 @@ | |||
51 | @ Profiler-assisted and platform-specific optimization resulted in 10% | 51 | @ Profiler-assisted and platform-specific optimization resulted in 10% |
52 | @ improvement on Cortex A8 core and 12.2 cycles per byte. | 52 | @ improvement on Cortex A8 core and 12.2 cycles per byte. |
53 | 53 | ||
54 | .text | 54 | #include <linux/linkage.h> |
55 | 55 | ||
56 | .global sha1_block_data_order | 56 | .text |
57 | .type sha1_block_data_order,%function | ||
58 | 57 | ||
59 | .align 2 | 58 | .align 2 |
60 | sha1_block_data_order: | 59 | ENTRY(sha1_block_data_order) |
61 | stmdb sp!,{r4-r12,lr} | 60 | stmdb sp!,{r4-r12,lr} |
62 | add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 | 61 | add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 |
63 | ldmia r0,{r3,r4,r5,r6,r7} | 62 | ldmia r0,{r3,r4,r5,r6,r7} |
@@ -194,7 +193,7 @@ sha1_block_data_order: | |||
194 | eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) | 193 | eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) |
195 | str r9,[r14,#-4]! | 194 | str r9,[r14,#-4]! |
196 | add r3,r3,r10 @ E+=F_00_19(B,C,D) | 195 | add r3,r3,r10 @ E+=F_00_19(B,C,D) |
197 | teq r14,sp | 196 | cmp r14,sp |
198 | bne .L_00_15 @ [((11+4)*5+2)*3] | 197 | bne .L_00_15 @ [((11+4)*5+2)*3] |
199 | #if __ARM_ARCH__<7 | 198 | #if __ARM_ARCH__<7 |
200 | ldrb r10,[r1,#2] | 199 | ldrb r10,[r1,#2] |
@@ -374,7 +373,9 @@ sha1_block_data_order: | |||
374 | @ F_xx_xx | 373 | @ F_xx_xx |
375 | add r3,r3,r9 @ E+=X[i] | 374 | add r3,r3,r9 @ E+=X[i] |
376 | add r3,r3,r10 @ E+=F_20_39(B,C,D) | 375 | add r3,r3,r10 @ E+=F_20_39(B,C,D) |
377 | teq r14,sp @ preserve carry | 376 | ARM( teq r14,sp ) @ preserve carry |
377 | THUMB( mov r11,sp ) | ||
378 | THUMB( teq r14,r11 ) @ preserve carry | ||
378 | bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] | 379 | bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] |
379 | bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes | 380 | bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes |
380 | 381 | ||
@@ -466,7 +467,7 @@ sha1_block_data_order: | |||
466 | add r3,r3,r9 @ E+=X[i] | 467 | add r3,r3,r9 @ E+=X[i] |
467 | add r3,r3,r10 @ E+=F_40_59(B,C,D) | 468 | add r3,r3,r10 @ E+=F_40_59(B,C,D) |
468 | add r3,r3,r11,ror#2 | 469 | add r3,r3,r11,ror#2 |
469 | teq r14,sp | 470 | cmp r14,sp |
470 | bne .L_40_59 @ [+((12+5)*5+2)*4] | 471 | bne .L_40_59 @ [+((12+5)*5+2)*4] |
471 | 472 | ||
472 | ldr r8,.LK_60_79 | 473 | ldr r8,.LK_60_79 |
@@ -485,19 +486,12 @@ sha1_block_data_order: | |||
485 | teq r1,r2 | 486 | teq r1,r2 |
486 | bne .Lloop @ [+18], total 1307 | 487 | bne .Lloop @ [+18], total 1307 |
487 | 488 | ||
488 | #if __ARM_ARCH__>=5 | ||
489 | ldmia sp!,{r4-r12,pc} | 489 | ldmia sp!,{r4-r12,pc} |
490 | #else | ||
491 | ldmia sp!,{r4-r12,lr} | ||
492 | tst lr,#1 | ||
493 | moveq pc,lr @ be binary compatible with V4, yet | ||
494 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
495 | #endif | ||
496 | .align 2 | 490 | .align 2 |
497 | .LK_00_19: .word 0x5a827999 | 491 | .LK_00_19: .word 0x5a827999 |
498 | .LK_20_39: .word 0x6ed9eba1 | 492 | .LK_20_39: .word 0x6ed9eba1 |
499 | .LK_40_59: .word 0x8f1bbcdc | 493 | .LK_40_59: .word 0x8f1bbcdc |
500 | .LK_60_79: .word 0xca62c1d6 | 494 | .LK_60_79: .word 0xca62c1d6 |
501 | .size sha1_block_data_order,.-sha1_block_data_order | 495 | ENDPROC(sha1_block_data_order) |
502 | .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" | 496 | .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" |
503 | .align 2 | 497 | .align 2 |