aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm
diff options
context:
space:
mode:
authorDave Martin <dave.martin@linaro.org>2013-01-10 06:20:15 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2013-01-13 07:41:22 -0500
commit638591cd7b601d403ed703d55062b48c32ea8cfb (patch)
treeeb7f735b59d7609976b768fc7deb020b6f3d955a /arch/arm
parent9931faca02c604c22335f5a935a501bb2ace6e20 (diff)
ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2 compatible
This patch fixes aes-armv4.S and sha1-armv4-large.S to work natively in Thumb. This allows ARM/Thumb interworking workarounds to be removed. I also take the opportunity to convert some explicit assembler directives for exported functions to the standard ENTRY()/ENDPROC(). For the code itself: * In sha1_block_data_order, use of TEQ with sp is deprecated in ARMv7 and not supported in Thumb. For the branches back to .L_00_15 and .L_40_59, the TEQ is converted to a CMP, under the assumption that clobbering the C flag here will not cause incorrect behaviour. For the first branch back to .L_20_39_or_60_79 the C flag is important, so sp is moved temporarily into another register so that TEQ can be used for the comparison. * In the AES code, most forms of register-indexed addressing with shifts and rotates are not permitted for loads and stores in Thumb, so the address calculation is done using a separate instruction for the Thumb case. The resulting code is unlikely to be optimally scheduled, but it should not have a large impact given the overall size of the code. I haven't run any benchmarks. Signed-off-by: Dave Martin <dave.martin@linaro.org> Tested-by: David McCullough <ucdevel@gmail.com> (ARM only) Acked-by: David McCullough <ucdevel@gmail.com> Acked-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/crypto/aes-armv4.S64
-rw-r--r--arch/arm/crypto/sha1-armv4-large.S24
2 files changed, 29 insertions, 59 deletions
diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S
index e59b1d505d6c..19d6cd6f29f9 100644
--- a/arch/arm/crypto/aes-armv4.S
+++ b/arch/arm/crypto/aes-armv4.S
@@ -34,8 +34,9 @@
34@ A little glue here to select the correct code below for the ARM CPU 34@ A little glue here to select the correct code below for the ARM CPU
35@ that is being targetted. 35@ that is being targetted.
36 36
37#include <linux/linkage.h>
38
37.text 39.text
38.code 32
39 40
40.type AES_Te,%object 41.type AES_Te,%object
41.align 5 42.align 5
@@ -145,10 +146,8 @@ AES_Te:
145 146
146@ void AES_encrypt(const unsigned char *in, unsigned char *out, 147@ void AES_encrypt(const unsigned char *in, unsigned char *out,
147@ const AES_KEY *key) { 148@ const AES_KEY *key) {
148.global AES_encrypt
149.type AES_encrypt,%function
150.align 5 149.align 5
151AES_encrypt: 150ENTRY(AES_encrypt)
152 sub r3,pc,#8 @ AES_encrypt 151 sub r3,pc,#8 @ AES_encrypt
153 stmdb sp!,{r1,r4-r12,lr} 152 stmdb sp!,{r1,r4-r12,lr}
154 mov r12,r0 @ inp 153 mov r12,r0 @ inp
@@ -239,15 +238,8 @@ AES_encrypt:
239 strb r6,[r12,#14] 238 strb r6,[r12,#14]
240 strb r3,[r12,#15] 239 strb r3,[r12,#15]
241#endif 240#endif
242#if __ARM_ARCH__>=5
243 ldmia sp!,{r4-r12,pc} 241 ldmia sp!,{r4-r12,pc}
244#else 242ENDPROC(AES_encrypt)
245 ldmia sp!,{r4-r12,lr}
246 tst lr,#1
247 moveq pc,lr @ be binary compatible with V4, yet
248 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
249#endif
250.size AES_encrypt,.-AES_encrypt
251 243
252.type _armv4_AES_encrypt,%function 244.type _armv4_AES_encrypt,%function
253.align 2 245.align 2
@@ -386,10 +378,8 @@ _armv4_AES_encrypt:
386 ldr pc,[sp],#4 @ pop and return 378 ldr pc,[sp],#4 @ pop and return
387.size _armv4_AES_encrypt,.-_armv4_AES_encrypt 379.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
388 380
389.global private_AES_set_encrypt_key
390.type private_AES_set_encrypt_key,%function
391.align 5 381.align 5
392private_AES_set_encrypt_key: 382ENTRY(private_AES_set_encrypt_key)
393_armv4_AES_set_encrypt_key: 383_armv4_AES_set_encrypt_key:
394 sub r3,pc,#8 @ AES_set_encrypt_key 384 sub r3,pc,#8 @ AES_set_encrypt_key
395 teq r0,#0 385 teq r0,#0
@@ -658,15 +648,11 @@ _armv4_AES_set_encrypt_key:
658 648
659.Ldone: mov r0,#0 649.Ldone: mov r0,#0
660 ldmia sp!,{r4-r12,lr} 650 ldmia sp!,{r4-r12,lr}
661.Labrt: tst lr,#1 651.Labrt: mov pc,lr
662 moveq pc,lr @ be binary compatible with V4, yet 652ENDPROC(private_AES_set_encrypt_key)
663 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
664.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
665 653
666.global private_AES_set_decrypt_key
667.type private_AES_set_decrypt_key,%function
668.align 5 654.align 5
669private_AES_set_decrypt_key: 655ENTRY(private_AES_set_decrypt_key)
670 str lr,[sp,#-4]! @ push lr 656 str lr,[sp,#-4]! @ push lr
671#if 0 657#if 0
672 @ kernel does both of these in setkey so optimise this bit out by 658 @ kernel does both of these in setkey so optimise this bit out by
@@ -748,15 +734,8 @@ private_AES_set_decrypt_key:
748 bne .Lmix 734 bne .Lmix
749 735
750 mov r0,#0 736 mov r0,#0
751#if __ARM_ARCH__>=5
752 ldmia sp!,{r4-r12,pc} 737 ldmia sp!,{r4-r12,pc}
753#else 738ENDPROC(private_AES_set_decrypt_key)
754 ldmia sp!,{r4-r12,lr}
755 tst lr,#1
756 moveq pc,lr @ be binary compatible with V4, yet
757 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
758#endif
759.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
760 739
761.type AES_Td,%object 740.type AES_Td,%object
762.align 5 741.align 5
@@ -862,10 +841,8 @@ AES_Td:
862 841
863@ void AES_decrypt(const unsigned char *in, unsigned char *out, 842@ void AES_decrypt(const unsigned char *in, unsigned char *out,
864@ const AES_KEY *key) { 843@ const AES_KEY *key) {
865.global AES_decrypt
866.type AES_decrypt,%function
867.align 5 844.align 5
868AES_decrypt: 845ENTRY(AES_decrypt)
869 sub r3,pc,#8 @ AES_decrypt 846 sub r3,pc,#8 @ AES_decrypt
870 stmdb sp!,{r1,r4-r12,lr} 847 stmdb sp!,{r1,r4-r12,lr}
871 mov r12,r0 @ inp 848 mov r12,r0 @ inp
@@ -956,15 +933,8 @@ AES_decrypt:
956 strb r6,[r12,#14] 933 strb r6,[r12,#14]
957 strb r3,[r12,#15] 934 strb r3,[r12,#15]
958#endif 935#endif
959#if __ARM_ARCH__>=5
960 ldmia sp!,{r4-r12,pc} 936 ldmia sp!,{r4-r12,pc}
961#else 937ENDPROC(AES_decrypt)
962 ldmia sp!,{r4-r12,lr}
963 tst lr,#1
964 moveq pc,lr @ be binary compatible with V4, yet
965 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
966#endif
967.size AES_decrypt,.-AES_decrypt
968 938
969.type _armv4_AES_decrypt,%function 939.type _armv4_AES_decrypt,%function
970.align 2 940.align 2
@@ -1064,7 +1034,9 @@ _armv4_AES_decrypt:
1064 and r9,lr,r1,lsr#8 1034 and r9,lr,r1,lsr#8
1065 1035
1066 ldrb r7,[r10,r7] @ Td4[s1>>0] 1036 ldrb r7,[r10,r7] @ Td4[s1>>0]
1067 ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24] 1037 ARM( ldrb r1,[r10,r1,lsr#24] ) @ Td4[s1>>24]
1038 THUMB( add r1,r10,r1,lsr#24 ) @ Td4[s1>>24]
1039 THUMB( ldrb r1,[r1] )
1068 ldrb r8,[r10,r8] @ Td4[s1>>16] 1040 ldrb r8,[r10,r8] @ Td4[s1>>16]
1069 eor r0,r7,r0,lsl#24 1041 eor r0,r7,r0,lsl#24
1070 ldrb r9,[r10,r9] @ Td4[s1>>8] 1042 ldrb r9,[r10,r9] @ Td4[s1>>8]
@@ -1077,7 +1049,9 @@ _armv4_AES_decrypt:
1077 ldrb r8,[r10,r8] @ Td4[s2>>0] 1049 ldrb r8,[r10,r8] @ Td4[s2>>0]
1078 and r9,lr,r2,lsr#16 1050 and r9,lr,r2,lsr#16
1079 1051
1080 ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] 1052 ARM( ldrb r2,[r10,r2,lsr#24] ) @ Td4[s2>>24]
1053 THUMB( add r2,r10,r2,lsr#24 ) @ Td4[s2>>24]
1054 THUMB( ldrb r2,[r2] )
1081 eor r0,r0,r7,lsl#8 1055 eor r0,r0,r7,lsl#8
1082 ldrb r9,[r10,r9] @ Td4[s2>>16] 1056 ldrb r9,[r10,r9] @ Td4[s2>>16]
1083 eor r1,r8,r1,lsl#16 1057 eor r1,r8,r1,lsl#16
@@ -1090,7 +1064,9 @@ _armv4_AES_decrypt:
1090 and r9,lr,r3 @ i2 1064 and r9,lr,r3 @ i2
1091 1065
1092 ldrb r9,[r10,r9] @ Td4[s3>>0] 1066 ldrb r9,[r10,r9] @ Td4[s3>>0]
1093 ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] 1067 ARM( ldrb r3,[r10,r3,lsr#24] ) @ Td4[s3>>24]
1068 THUMB( add r3,r10,r3,lsr#24 ) @ Td4[s3>>24]
1069 THUMB( ldrb r3,[r3] )
1094 eor r0,r0,r7,lsl#16 1070 eor r0,r0,r7,lsl#16
1095 ldr r7,[r11,#0] 1071 ldr r7,[r11,#0]
1096 eor r1,r1,r8,lsl#8 1072 eor r1,r1,r8,lsl#8
diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S
index 7050ab133b9d..92c6eed7aac9 100644
--- a/arch/arm/crypto/sha1-armv4-large.S
+++ b/arch/arm/crypto/sha1-armv4-large.S
@@ -51,13 +51,12 @@
51@ Profiler-assisted and platform-specific optimization resulted in 10% 51@ Profiler-assisted and platform-specific optimization resulted in 10%
52@ improvement on Cortex A8 core and 12.2 cycles per byte. 52@ improvement on Cortex A8 core and 12.2 cycles per byte.
53 53
54.text 54#include <linux/linkage.h>
55 55
56.global sha1_block_data_order 56.text
57.type sha1_block_data_order,%function
58 57
59.align 2 58.align 2
60sha1_block_data_order: 59ENTRY(sha1_block_data_order)
61 stmdb sp!,{r4-r12,lr} 60 stmdb sp!,{r4-r12,lr}
62 add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 61 add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
63 ldmia r0,{r3,r4,r5,r6,r7} 62 ldmia r0,{r3,r4,r5,r6,r7}
@@ -194,7 +193,7 @@ sha1_block_data_order:
194 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) 193 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
195 str r9,[r14,#-4]! 194 str r9,[r14,#-4]!
196 add r3,r3,r10 @ E+=F_00_19(B,C,D) 195 add r3,r3,r10 @ E+=F_00_19(B,C,D)
197 teq r14,sp 196 cmp r14,sp
198 bne .L_00_15 @ [((11+4)*5+2)*3] 197 bne .L_00_15 @ [((11+4)*5+2)*3]
199#if __ARM_ARCH__<7 198#if __ARM_ARCH__<7
200 ldrb r10,[r1,#2] 199 ldrb r10,[r1,#2]
@@ -374,7 +373,9 @@ sha1_block_data_order:
374 @ F_xx_xx 373 @ F_xx_xx
375 add r3,r3,r9 @ E+=X[i] 374 add r3,r3,r9 @ E+=X[i]
376 add r3,r3,r10 @ E+=F_20_39(B,C,D) 375 add r3,r3,r10 @ E+=F_20_39(B,C,D)
377 teq r14,sp @ preserve carry 376 ARM( teq r14,sp ) @ preserve carry
377 THUMB( mov r11,sp )
378 THUMB( teq r14,r11 ) @ preserve carry
378 bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] 379 bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
379 bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes 380 bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
380 381
@@ -466,7 +467,7 @@ sha1_block_data_order:
466 add r3,r3,r9 @ E+=X[i] 467 add r3,r3,r9 @ E+=X[i]
467 add r3,r3,r10 @ E+=F_40_59(B,C,D) 468 add r3,r3,r10 @ E+=F_40_59(B,C,D)
468 add r3,r3,r11,ror#2 469 add r3,r3,r11,ror#2
469 teq r14,sp 470 cmp r14,sp
470 bne .L_40_59 @ [+((12+5)*5+2)*4] 471 bne .L_40_59 @ [+((12+5)*5+2)*4]
471 472
472 ldr r8,.LK_60_79 473 ldr r8,.LK_60_79
@@ -485,19 +486,12 @@ sha1_block_data_order:
485 teq r1,r2 486 teq r1,r2
486 bne .Lloop @ [+18], total 1307 487 bne .Lloop @ [+18], total 1307
487 488
488#if __ARM_ARCH__>=5
489 ldmia sp!,{r4-r12,pc} 489 ldmia sp!,{r4-r12,pc}
490#else
491 ldmia sp!,{r4-r12,lr}
492 tst lr,#1
493 moveq pc,lr @ be binary compatible with V4, yet
494 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
495#endif
496.align 2 490.align 2
497.LK_00_19: .word 0x5a827999 491.LK_00_19: .word 0x5a827999
498.LK_20_39: .word 0x6ed9eba1 492.LK_20_39: .word 0x6ed9eba1
499.LK_40_59: .word 0x8f1bbcdc 493.LK_40_59: .word 0x8f1bbcdc
500.LK_60_79: .word 0xca62c1d6 494.LK_60_79: .word 0xca62c1d6
501.size sha1_block_data_order,.-sha1_block_data_order 495ENDPROC(sha1_block_data_order)
502.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" 496.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
503.align 2 497.align 2