diff options
author | Dave Martin <dave.martin@linaro.org> | 2013-01-10 06:20:15 -0500 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2013-01-13 07:41:22 -0500 |
commit | 638591cd7b601d403ed703d55062b48c32ea8cfb (patch) | |
tree | eb7f735b59d7609976b768fc7deb020b6f3d955a /arch/arm/crypto/aes-armv4.S | |
parent | 9931faca02c604c22335f5a935a501bb2ace6e20 (diff) |
ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2 compatible
This patch fixes aes-armv4.S and sha1-armv4-large.S to work
natively in Thumb. This allows ARM/Thumb interworking workarounds
to be removed.
I also take the opportunity to convert some explicit assembler
directives for exported functions to the standard
ENTRY()/ENDPROC().
For the code itself:
* In sha1_block_data_order, use of TEQ with sp is deprecated in
ARMv7 and not supported in Thumb. For the branches back to
.L_00_15 and .L_40_59, the TEQ is converted to a CMP, under the
assumption that clobbering the C flag here will not cause
incorrect behaviour.
For the first branch back to .L_20_39_or_60_79 the C flag is
important, so sp is moved temporarily into another register so
that TEQ can be used for the comparison.
* In the AES code, most forms of register-indexed addressing with
shifts and rotates are not permitted for loads and stores in
Thumb, so the address calculation is done using a separate
instruction for the Thumb case.
The resulting code is unlikely to be optimally scheduled, but it
should not have a large impact given the overall size of the code.
I haven't run any benchmarks.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Tested-by: David McCullough <ucdevel@gmail.com> (ARM only)
Acked-by: David McCullough <ucdevel@gmail.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/crypto/aes-armv4.S')
-rw-r--r-- | arch/arm/crypto/aes-armv4.S | 64 |
1 files changed, 20 insertions, 44 deletions
diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S index e59b1d505d6c..19d6cd6f29f9 100644 --- a/arch/arm/crypto/aes-armv4.S +++ b/arch/arm/crypto/aes-armv4.S | |||
@@ -34,8 +34,9 @@ | |||
34 | @ A little glue here to select the correct code below for the ARM CPU | 34 | @ A little glue here to select the correct code below for the ARM CPU |
35 | @ that is being targetted. | 35 | @ that is being targetted. |
36 | 36 | ||
37 | #include <linux/linkage.h> | ||
38 | |||
37 | .text | 39 | .text |
38 | .code 32 | ||
39 | 40 | ||
40 | .type AES_Te,%object | 41 | .type AES_Te,%object |
41 | .align 5 | 42 | .align 5 |
@@ -145,10 +146,8 @@ AES_Te: | |||
145 | 146 | ||
146 | @ void AES_encrypt(const unsigned char *in, unsigned char *out, | 147 | @ void AES_encrypt(const unsigned char *in, unsigned char *out, |
147 | @ const AES_KEY *key) { | 148 | @ const AES_KEY *key) { |
148 | .global AES_encrypt | ||
149 | .type AES_encrypt,%function | ||
150 | .align 5 | 149 | .align 5 |
151 | AES_encrypt: | 150 | ENTRY(AES_encrypt) |
152 | sub r3,pc,#8 @ AES_encrypt | 151 | sub r3,pc,#8 @ AES_encrypt |
153 | stmdb sp!,{r1,r4-r12,lr} | 152 | stmdb sp!,{r1,r4-r12,lr} |
154 | mov r12,r0 @ inp | 153 | mov r12,r0 @ inp |
@@ -239,15 +238,8 @@ AES_encrypt: | |||
239 | strb r6,[r12,#14] | 238 | strb r6,[r12,#14] |
240 | strb r3,[r12,#15] | 239 | strb r3,[r12,#15] |
241 | #endif | 240 | #endif |
242 | #if __ARM_ARCH__>=5 | ||
243 | ldmia sp!,{r4-r12,pc} | 241 | ldmia sp!,{r4-r12,pc} |
244 | #else | 242 | ENDPROC(AES_encrypt) |
245 | ldmia sp!,{r4-r12,lr} | ||
246 | tst lr,#1 | ||
247 | moveq pc,lr @ be binary compatible with V4, yet | ||
248 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
249 | #endif | ||
250 | .size AES_encrypt,.-AES_encrypt | ||
251 | 243 | ||
252 | .type _armv4_AES_encrypt,%function | 244 | .type _armv4_AES_encrypt,%function |
253 | .align 2 | 245 | .align 2 |
@@ -386,10 +378,8 @@ _armv4_AES_encrypt: | |||
386 | ldr pc,[sp],#4 @ pop and return | 378 | ldr pc,[sp],#4 @ pop and return |
387 | .size _armv4_AES_encrypt,.-_armv4_AES_encrypt | 379 | .size _armv4_AES_encrypt,.-_armv4_AES_encrypt |
388 | 380 | ||
389 | .global private_AES_set_encrypt_key | ||
390 | .type private_AES_set_encrypt_key,%function | ||
391 | .align 5 | 381 | .align 5 |
392 | private_AES_set_encrypt_key: | 382 | ENTRY(private_AES_set_encrypt_key) |
393 | _armv4_AES_set_encrypt_key: | 383 | _armv4_AES_set_encrypt_key: |
394 | sub r3,pc,#8 @ AES_set_encrypt_key | 384 | sub r3,pc,#8 @ AES_set_encrypt_key |
395 | teq r0,#0 | 385 | teq r0,#0 |
@@ -658,15 +648,11 @@ _armv4_AES_set_encrypt_key: | |||
658 | 648 | ||
659 | .Ldone: mov r0,#0 | 649 | .Ldone: mov r0,#0 |
660 | ldmia sp!,{r4-r12,lr} | 650 | ldmia sp!,{r4-r12,lr} |
661 | .Labrt: tst lr,#1 | 651 | .Labrt: mov pc,lr |
662 | moveq pc,lr @ be binary compatible with V4, yet | 652 | ENDPROC(private_AES_set_encrypt_key) |
663 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
664 | .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key | ||
665 | 653 | ||
666 | .global private_AES_set_decrypt_key | ||
667 | .type private_AES_set_decrypt_key,%function | ||
668 | .align 5 | 654 | .align 5 |
669 | private_AES_set_decrypt_key: | 655 | ENTRY(private_AES_set_decrypt_key) |
670 | str lr,[sp,#-4]! @ push lr | 656 | str lr,[sp,#-4]! @ push lr |
671 | #if 0 | 657 | #if 0 |
672 | @ kernel does both of these in setkey so optimise this bit out by | 658 | @ kernel does both of these in setkey so optimise this bit out by |
@@ -748,15 +734,8 @@ private_AES_set_decrypt_key: | |||
748 | bne .Lmix | 734 | bne .Lmix |
749 | 735 | ||
750 | mov r0,#0 | 736 | mov r0,#0 |
751 | #if __ARM_ARCH__>=5 | ||
752 | ldmia sp!,{r4-r12,pc} | 737 | ldmia sp!,{r4-r12,pc} |
753 | #else | 738 | ENDPROC(private_AES_set_decrypt_key) |
754 | ldmia sp!,{r4-r12,lr} | ||
755 | tst lr,#1 | ||
756 | moveq pc,lr @ be binary compatible with V4, yet | ||
757 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
758 | #endif | ||
759 | .size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key | ||
760 | 739 | ||
761 | .type AES_Td,%object | 740 | .type AES_Td,%object |
762 | .align 5 | 741 | .align 5 |
@@ -862,10 +841,8 @@ AES_Td: | |||
862 | 841 | ||
863 | @ void AES_decrypt(const unsigned char *in, unsigned char *out, | 842 | @ void AES_decrypt(const unsigned char *in, unsigned char *out, |
864 | @ const AES_KEY *key) { | 843 | @ const AES_KEY *key) { |
865 | .global AES_decrypt | ||
866 | .type AES_decrypt,%function | ||
867 | .align 5 | 844 | .align 5 |
868 | AES_decrypt: | 845 | ENTRY(AES_decrypt) |
869 | sub r3,pc,#8 @ AES_decrypt | 846 | sub r3,pc,#8 @ AES_decrypt |
870 | stmdb sp!,{r1,r4-r12,lr} | 847 | stmdb sp!,{r1,r4-r12,lr} |
871 | mov r12,r0 @ inp | 848 | mov r12,r0 @ inp |
@@ -956,15 +933,8 @@ AES_decrypt: | |||
956 | strb r6,[r12,#14] | 933 | strb r6,[r12,#14] |
957 | strb r3,[r12,#15] | 934 | strb r3,[r12,#15] |
958 | #endif | 935 | #endif |
959 | #if __ARM_ARCH__>=5 | ||
960 | ldmia sp!,{r4-r12,pc} | 936 | ldmia sp!,{r4-r12,pc} |
961 | #else | 937 | ENDPROC(AES_decrypt) |
962 | ldmia sp!,{r4-r12,lr} | ||
963 | tst lr,#1 | ||
964 | moveq pc,lr @ be binary compatible with V4, yet | ||
965 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
966 | #endif | ||
967 | .size AES_decrypt,.-AES_decrypt | ||
968 | 938 | ||
969 | .type _armv4_AES_decrypt,%function | 939 | .type _armv4_AES_decrypt,%function |
970 | .align 2 | 940 | .align 2 |
@@ -1064,7 +1034,9 @@ _armv4_AES_decrypt: | |||
1064 | and r9,lr,r1,lsr#8 | 1034 | and r9,lr,r1,lsr#8 |
1065 | 1035 | ||
1066 | ldrb r7,[r10,r7] @ Td4[s1>>0] | 1036 | ldrb r7,[r10,r7] @ Td4[s1>>0] |
1067 | ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24] | 1037 | ARM( ldrb r1,[r10,r1,lsr#24] ) @ Td4[s1>>24] |
1038 | THUMB( add r1,r10,r1,lsr#24 ) @ Td4[s1>>24] | ||
1039 | THUMB( ldrb r1,[r1] ) | ||
1068 | ldrb r8,[r10,r8] @ Td4[s1>>16] | 1040 | ldrb r8,[r10,r8] @ Td4[s1>>16] |
1069 | eor r0,r7,r0,lsl#24 | 1041 | eor r0,r7,r0,lsl#24 |
1070 | ldrb r9,[r10,r9] @ Td4[s1>>8] | 1042 | ldrb r9,[r10,r9] @ Td4[s1>>8] |
@@ -1077,7 +1049,9 @@ _armv4_AES_decrypt: | |||
1077 | ldrb r8,[r10,r8] @ Td4[s2>>0] | 1049 | ldrb r8,[r10,r8] @ Td4[s2>>0] |
1078 | and r9,lr,r2,lsr#16 | 1050 | and r9,lr,r2,lsr#16 |
1079 | 1051 | ||
1080 | ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] | 1052 | ARM( ldrb r2,[r10,r2,lsr#24] ) @ Td4[s2>>24] |
1053 | THUMB( add r2,r10,r2,lsr#24 ) @ Td4[s2>>24] | ||
1054 | THUMB( ldrb r2,[r2] ) | ||
1081 | eor r0,r0,r7,lsl#8 | 1055 | eor r0,r0,r7,lsl#8 |
1082 | ldrb r9,[r10,r9] @ Td4[s2>>16] | 1056 | ldrb r9,[r10,r9] @ Td4[s2>>16] |
1083 | eor r1,r8,r1,lsl#16 | 1057 | eor r1,r8,r1,lsl#16 |
@@ -1090,7 +1064,9 @@ _armv4_AES_decrypt: | |||
1090 | and r9,lr,r3 @ i2 | 1064 | and r9,lr,r3 @ i2 |
1091 | 1065 | ||
1092 | ldrb r9,[r10,r9] @ Td4[s3>>0] | 1066 | ldrb r9,[r10,r9] @ Td4[s3>>0] |
1093 | ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] | 1067 | ARM( ldrb r3,[r10,r3,lsr#24] ) @ Td4[s3>>24] |
1068 | THUMB( add r3,r10,r3,lsr#24 ) @ Td4[s3>>24] | ||
1069 | THUMB( ldrb r3,[r3] ) | ||
1094 | eor r0,r0,r7,lsl#16 | 1070 | eor r0,r0,r7,lsl#16 |
1095 | ldr r7,[r11,#0] | 1071 | ldr r7,[r11,#0] |
1096 | eor r1,r1,r8,lsl#8 | 1072 | eor r1,r1,r8,lsl#8 |