diff options
author | David McCullough <david_mccullough@mcafee.com> | 2012-09-06 16:17:02 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2012-09-06 16:17:02 -0400 |
commit | f0be44f4fb1faee42635ca5ea06dc9c3e820a35d (patch) | |
tree | 866c6016f7153ddff778a5eda036934e7eb426ac /arch | |
parent | 956c203c5e370c7beb766400b5c1a32ec570ce96 (diff) |
arm/crypto: Add optimized AES and SHA1 routines
Add assembler versions of AES and SHA1 for ARM platforms. This has provided
up to a 50% improvement in IPsec/TCP throughout for tunnels using AES128/SHA1.
Platform CPU SPeed Endian Before (bps) After (bps) Improvement
IXP425 533 MHz big 11217042 15566294 ~38%
KS8695 166 MHz little 3828549 5795373 ~51%
Signed-off-by: David McCullough <ucdevel@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/Makefile | 1 | ||||
-rw-r--r-- | arch/arm/crypto/Makefile | 9 | ||||
-rw-r--r-- | arch/arm/crypto/aes-armv4.S | 1112 | ||||
-rw-r--r-- | arch/arm/crypto/aes_glue.c | 108 | ||||
-rw-r--r-- | arch/arm/crypto/sha1-armv4-large.S | 503 | ||||
-rw-r--r-- | arch/arm/crypto/sha1_glue.c | 179 |
6 files changed, 1912 insertions, 0 deletions
diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 30eae87ead6d..46038d756bf6 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile | |||
@@ -255,6 +255,7 @@ core-$(CONFIG_VFP) += arch/arm/vfp/ | |||
255 | # If we have a machine-specific directory, then include it in the build. | 255 | # If we have a machine-specific directory, then include it in the build. |
256 | core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/ | 256 | core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/ |
257 | core-y += arch/arm/net/ | 257 | core-y += arch/arm/net/ |
258 | core-y += arch/arm/crypto/ | ||
258 | core-y += $(machdirs) $(platdirs) | 259 | core-y += $(machdirs) $(platdirs) |
259 | 260 | ||
260 | drivers-$(CONFIG_OPROFILE) += arch/arm/oprofile/ | 261 | drivers-$(CONFIG_OPROFILE) += arch/arm/oprofile/ |
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile new file mode 100644 index 000000000000..a2c83851bc90 --- /dev/null +++ b/arch/arm/crypto/Makefile | |||
@@ -0,0 +1,9 @@ | |||
1 | # | ||
2 | # Arch-specific CryptoAPI modules. | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o | ||
6 | obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o | ||
7 | |||
8 | aes-arm-y := aes-armv4.o aes_glue.o | ||
9 | sha1-arm-y := sha1-armv4-large.o sha1_glue.o | ||
diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S new file mode 100644 index 000000000000..e59b1d505d6c --- /dev/null +++ b/arch/arm/crypto/aes-armv4.S | |||
@@ -0,0 +1,1112 @@ | |||
1 | #define __ARM_ARCH__ __LINUX_ARM_ARCH__ | ||
2 | @ ==================================================================== | ||
3 | @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
4 | @ project. The module is, however, dual licensed under OpenSSL and | ||
5 | @ CRYPTOGAMS licenses depending on where you obtain it. For further | ||
6 | @ details see http://www.openssl.org/~appro/cryptogams/. | ||
7 | @ ==================================================================== | ||
8 | |||
9 | @ AES for ARMv4 | ||
10 | |||
11 | @ January 2007. | ||
12 | @ | ||
13 | @ Code uses single 1K S-box and is >2 times faster than code generated | ||
14 | @ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which | ||
15 | @ allows to merge logical or arithmetic operation with shift or rotate | ||
16 | @ in one instruction and emit combined result every cycle. The module | ||
17 | @ is endian-neutral. The performance is ~42 cycles/byte for 128-bit | ||
18 | @ key [on single-issue Xscale PXA250 core]. | ||
19 | |||
20 | @ May 2007. | ||
21 | @ | ||
22 | @ AES_set_[en|de]crypt_key is added. | ||
23 | |||
24 | @ July 2010. | ||
25 | @ | ||
26 | @ Rescheduling for dual-issue pipeline resulted in 12% improvement on | ||
27 | @ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. | ||
28 | |||
29 | @ February 2011. | ||
30 | @ | ||
31 | @ Profiler-assisted and platform-specific optimization resulted in 16% | ||
32 | @ improvement on Cortex A8 core and ~21.5 cycles per byte. | ||
33 | |||
34 | @ A little glue here to select the correct code below for the ARM CPU | ||
35 | @ that is being targetted. | ||
36 | |||
37 | .text | ||
38 | .code 32 | ||
39 | |||
40 | .type AES_Te,%object | ||
41 | .align 5 | ||
42 | AES_Te: | ||
43 | .word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d | ||
44 | .word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 | ||
45 | .word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d | ||
46 | .word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a | ||
47 | .word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 | ||
48 | .word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b | ||
49 | .word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea | ||
50 | .word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b | ||
51 | .word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a | ||
52 | .word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f | ||
53 | .word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 | ||
54 | .word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f | ||
55 | .word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e | ||
56 | .word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 | ||
57 | .word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d | ||
58 | .word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f | ||
59 | .word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e | ||
60 | .word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb | ||
61 | .word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce | ||
62 | .word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 | ||
63 | .word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c | ||
64 | .word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed | ||
65 | .word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b | ||
66 | .word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a | ||
67 | .word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 | ||
68 | .word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 | ||
69 | .word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 | ||
70 | .word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 | ||
71 | .word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a | ||
72 | .word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 | ||
73 | .word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 | ||
74 | .word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d | ||
75 | .word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f | ||
76 | .word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 | ||
77 | .word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 | ||
78 | .word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 | ||
79 | .word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f | ||
80 | .word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 | ||
81 | .word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c | ||
82 | .word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 | ||
83 | .word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e | ||
84 | .word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 | ||
85 | .word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 | ||
86 | .word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b | ||
87 | .word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 | ||
88 | .word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 | ||
89 | .word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 | ||
90 | .word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 | ||
91 | .word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 | ||
92 | .word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 | ||
93 | .word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 | ||
94 | .word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 | ||
95 | .word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa | ||
96 | .word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 | ||
97 | .word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 | ||
98 | .word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 | ||
99 | .word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 | ||
100 | .word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 | ||
101 | .word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 | ||
102 | .word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a | ||
103 | .word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 | ||
104 | .word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 | ||
105 | .word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 | ||
106 | .word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a | ||
107 | @ Te4[256] | ||
108 | .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 | ||
109 | .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 | ||
110 | .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 | ||
111 | .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 | ||
112 | .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc | ||
113 | .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 | ||
114 | .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a | ||
115 | .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 | ||
116 | .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 | ||
117 | .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 | ||
118 | .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b | ||
119 | .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf | ||
120 | .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 | ||
121 | .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 | ||
122 | .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 | ||
123 | .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 | ||
124 | .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 | ||
125 | .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 | ||
126 | .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 | ||
127 | .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb | ||
128 | .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c | ||
129 | .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 | ||
130 | .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 | ||
131 | .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 | ||
132 | .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 | ||
133 | .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a | ||
134 | .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e | ||
135 | .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e | ||
136 | .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 | ||
137 | .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf | ||
138 | .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 | ||
139 | .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 | ||
140 | @ rcon[] | ||
141 | .word 0x01000000, 0x02000000, 0x04000000, 0x08000000 | ||
142 | .word 0x10000000, 0x20000000, 0x40000000, 0x80000000 | ||
143 | .word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 | ||
144 | .size AES_Te,.-AES_Te | ||
145 | |||
146 | @ void AES_encrypt(const unsigned char *in, unsigned char *out, | ||
147 | @ const AES_KEY *key) { | ||
148 | .global AES_encrypt | ||
149 | .type AES_encrypt,%function | ||
150 | .align 5 | ||
151 | AES_encrypt: | ||
152 | sub r3,pc,#8 @ AES_encrypt | ||
153 | stmdb sp!,{r1,r4-r12,lr} | ||
154 | mov r12,r0 @ inp | ||
155 | mov r11,r2 | ||
156 | sub r10,r3,#AES_encrypt-AES_Te @ Te | ||
157 | #if __ARM_ARCH__<7 | ||
158 | ldrb r0,[r12,#3] @ load input data in endian-neutral | ||
159 | ldrb r4,[r12,#2] @ manner... | ||
160 | ldrb r5,[r12,#1] | ||
161 | ldrb r6,[r12,#0] | ||
162 | orr r0,r0,r4,lsl#8 | ||
163 | ldrb r1,[r12,#7] | ||
164 | orr r0,r0,r5,lsl#16 | ||
165 | ldrb r4,[r12,#6] | ||
166 | orr r0,r0,r6,lsl#24 | ||
167 | ldrb r5,[r12,#5] | ||
168 | ldrb r6,[r12,#4] | ||
169 | orr r1,r1,r4,lsl#8 | ||
170 | ldrb r2,[r12,#11] | ||
171 | orr r1,r1,r5,lsl#16 | ||
172 | ldrb r4,[r12,#10] | ||
173 | orr r1,r1,r6,lsl#24 | ||
174 | ldrb r5,[r12,#9] | ||
175 | ldrb r6,[r12,#8] | ||
176 | orr r2,r2,r4,lsl#8 | ||
177 | ldrb r3,[r12,#15] | ||
178 | orr r2,r2,r5,lsl#16 | ||
179 | ldrb r4,[r12,#14] | ||
180 | orr r2,r2,r6,lsl#24 | ||
181 | ldrb r5,[r12,#13] | ||
182 | ldrb r6,[r12,#12] | ||
183 | orr r3,r3,r4,lsl#8 | ||
184 | orr r3,r3,r5,lsl#16 | ||
185 | orr r3,r3,r6,lsl#24 | ||
186 | #else | ||
187 | ldr r0,[r12,#0] | ||
188 | ldr r1,[r12,#4] | ||
189 | ldr r2,[r12,#8] | ||
190 | ldr r3,[r12,#12] | ||
191 | #ifdef __ARMEL__ | ||
192 | rev r0,r0 | ||
193 | rev r1,r1 | ||
194 | rev r2,r2 | ||
195 | rev r3,r3 | ||
196 | #endif | ||
197 | #endif | ||
198 | bl _armv4_AES_encrypt | ||
199 | |||
200 | ldr r12,[sp],#4 @ pop out | ||
201 | #if __ARM_ARCH__>=7 | ||
202 | #ifdef __ARMEL__ | ||
203 | rev r0,r0 | ||
204 | rev r1,r1 | ||
205 | rev r2,r2 | ||
206 | rev r3,r3 | ||
207 | #endif | ||
208 | str r0,[r12,#0] | ||
209 | str r1,[r12,#4] | ||
210 | str r2,[r12,#8] | ||
211 | str r3,[r12,#12] | ||
212 | #else | ||
213 | mov r4,r0,lsr#24 @ write output in endian-neutral | ||
214 | mov r5,r0,lsr#16 @ manner... | ||
215 | mov r6,r0,lsr#8 | ||
216 | strb r4,[r12,#0] | ||
217 | strb r5,[r12,#1] | ||
218 | mov r4,r1,lsr#24 | ||
219 | strb r6,[r12,#2] | ||
220 | mov r5,r1,lsr#16 | ||
221 | strb r0,[r12,#3] | ||
222 | mov r6,r1,lsr#8 | ||
223 | strb r4,[r12,#4] | ||
224 | strb r5,[r12,#5] | ||
225 | mov r4,r2,lsr#24 | ||
226 | strb r6,[r12,#6] | ||
227 | mov r5,r2,lsr#16 | ||
228 | strb r1,[r12,#7] | ||
229 | mov r6,r2,lsr#8 | ||
230 | strb r4,[r12,#8] | ||
231 | strb r5,[r12,#9] | ||
232 | mov r4,r3,lsr#24 | ||
233 | strb r6,[r12,#10] | ||
234 | mov r5,r3,lsr#16 | ||
235 | strb r2,[r12,#11] | ||
236 | mov r6,r3,lsr#8 | ||
237 | strb r4,[r12,#12] | ||
238 | strb r5,[r12,#13] | ||
239 | strb r6,[r12,#14] | ||
240 | strb r3,[r12,#15] | ||
241 | #endif | ||
242 | #if __ARM_ARCH__>=5 | ||
243 | ldmia sp!,{r4-r12,pc} | ||
244 | #else | ||
245 | ldmia sp!,{r4-r12,lr} | ||
246 | tst lr,#1 | ||
247 | moveq pc,lr @ be binary compatible with V4, yet | ||
248 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
249 | #endif | ||
250 | .size AES_encrypt,.-AES_encrypt | ||
251 | |||
252 | .type _armv4_AES_encrypt,%function | ||
253 | .align 2 | ||
254 | _armv4_AES_encrypt: | ||
255 | str lr,[sp,#-4]! @ push lr | ||
256 | ldmia r11!,{r4-r7} | ||
257 | eor r0,r0,r4 | ||
258 | ldr r12,[r11,#240-16] | ||
259 | eor r1,r1,r5 | ||
260 | eor r2,r2,r6 | ||
261 | eor r3,r3,r7 | ||
262 | sub r12,r12,#1 | ||
263 | mov lr,#255 | ||
264 | |||
265 | and r7,lr,r0 | ||
266 | and r8,lr,r0,lsr#8 | ||
267 | and r9,lr,r0,lsr#16 | ||
268 | mov r0,r0,lsr#24 | ||
269 | .Lenc_loop: | ||
270 | ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0] | ||
271 | and r7,lr,r1,lsr#16 @ i0 | ||
272 | ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8] | ||
273 | and r8,lr,r1 | ||
274 | ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16] | ||
275 | and r9,lr,r1,lsr#8 | ||
276 | ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24] | ||
277 | mov r1,r1,lsr#24 | ||
278 | |||
279 | ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16] | ||
280 | ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0] | ||
281 | ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8] | ||
282 | eor r0,r0,r7,ror#8 | ||
283 | ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24] | ||
284 | and r7,lr,r2,lsr#8 @ i0 | ||
285 | eor r5,r5,r8,ror#8 | ||
286 | and r8,lr,r2,lsr#16 @ i1 | ||
287 | eor r6,r6,r9,ror#8 | ||
288 | and r9,lr,r2 | ||
289 | ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8] | ||
290 | eor r1,r1,r4,ror#24 | ||
291 | ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] | ||
292 | mov r2,r2,lsr#24 | ||
293 | |||
294 | ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0] | ||
295 | eor r0,r0,r7,ror#16 | ||
296 | ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24] | ||
297 | and r7,lr,r3 @ i0 | ||
298 | eor r1,r1,r8,ror#8 | ||
299 | and r8,lr,r3,lsr#8 @ i1 | ||
300 | eor r6,r6,r9,ror#16 | ||
301 | and r9,lr,r3,lsr#16 @ i2 | ||
302 | ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0] | ||
303 | eor r2,r2,r5,ror#16 | ||
304 | ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] | ||
305 | mov r3,r3,lsr#24 | ||
306 | |||
307 | ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16] | ||
308 | eor r0,r0,r7,ror#24 | ||
309 | ldr r7,[r11],#16 | ||
310 | eor r1,r1,r8,ror#16 | ||
311 | ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] | ||
312 | eor r2,r2,r9,ror#8 | ||
313 | ldr r4,[r11,#-12] | ||
314 | eor r3,r3,r6,ror#8 | ||
315 | |||
316 | ldr r5,[r11,#-8] | ||
317 | eor r0,r0,r7 | ||
318 | ldr r6,[r11,#-4] | ||
319 | and r7,lr,r0 | ||
320 | eor r1,r1,r4 | ||
321 | and r8,lr,r0,lsr#8 | ||
322 | eor r2,r2,r5 | ||
323 | and r9,lr,r0,lsr#16 | ||
324 | eor r3,r3,r6 | ||
325 | mov r0,r0,lsr#24 | ||
326 | |||
327 | subs r12,r12,#1 | ||
328 | bne .Lenc_loop | ||
329 | |||
330 | add r10,r10,#2 | ||
331 | |||
332 | ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0] | ||
333 | and r7,lr,r1,lsr#16 @ i0 | ||
334 | ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8] | ||
335 | and r8,lr,r1 | ||
336 | ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16] | ||
337 | and r9,lr,r1,lsr#8 | ||
338 | ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24] | ||
339 | mov r1,r1,lsr#24 | ||
340 | |||
341 | ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16] | ||
342 | ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0] | ||
343 | ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8] | ||
344 | eor r0,r7,r0,lsl#8 | ||
345 | ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24] | ||
346 | and r7,lr,r2,lsr#8 @ i0 | ||
347 | eor r5,r8,r5,lsl#8 | ||
348 | and r8,lr,r2,lsr#16 @ i1 | ||
349 | eor r6,r9,r6,lsl#8 | ||
350 | and r9,lr,r2 | ||
351 | ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8] | ||
352 | eor r1,r4,r1,lsl#24 | ||
353 | ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] | ||
354 | mov r2,r2,lsr#24 | ||
355 | |||
356 | ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0] | ||
357 | eor r0,r7,r0,lsl#8 | ||
358 | ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24] | ||
359 | and r7,lr,r3 @ i0 | ||
360 | eor r1,r1,r8,lsl#16 | ||
361 | and r8,lr,r3,lsr#8 @ i1 | ||
362 | eor r6,r9,r6,lsl#8 | ||
363 | and r9,lr,r3,lsr#16 @ i2 | ||
364 | ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0] | ||
365 | eor r2,r5,r2,lsl#24 | ||
366 | ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] | ||
367 | mov r3,r3,lsr#24 | ||
368 | |||
369 | ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16] | ||
370 | eor r0,r7,r0,lsl#8 | ||
371 | ldr r7,[r11,#0] | ||
372 | ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] | ||
373 | eor r1,r1,r8,lsl#8 | ||
374 | ldr r4,[r11,#4] | ||
375 | eor r2,r2,r9,lsl#16 | ||
376 | ldr r5,[r11,#8] | ||
377 | eor r3,r6,r3,lsl#24 | ||
378 | ldr r6,[r11,#12] | ||
379 | |||
380 | eor r0,r0,r7 | ||
381 | eor r1,r1,r4 | ||
382 | eor r2,r2,r5 | ||
383 | eor r3,r3,r6 | ||
384 | |||
385 | sub r10,r10,#2 | ||
386 | ldr pc,[sp],#4 @ pop and return | ||
387 | .size _armv4_AES_encrypt,.-_armv4_AES_encrypt | ||
388 | |||
389 | .global private_AES_set_encrypt_key | ||
390 | .type private_AES_set_encrypt_key,%function | ||
391 | .align 5 | ||
392 | private_AES_set_encrypt_key: | ||
393 | _armv4_AES_set_encrypt_key: | ||
394 | sub r3,pc,#8 @ AES_set_encrypt_key | ||
395 | teq r0,#0 | ||
396 | moveq r0,#-1 | ||
397 | beq .Labrt | ||
398 | teq r2,#0 | ||
399 | moveq r0,#-1 | ||
400 | beq .Labrt | ||
401 | |||
402 | teq r1,#128 | ||
403 | beq .Lok | ||
404 | teq r1,#192 | ||
405 | beq .Lok | ||
406 | teq r1,#256 | ||
407 | movne r0,#-1 | ||
408 | bne .Labrt | ||
409 | |||
410 | .Lok: stmdb sp!,{r4-r12,lr} | ||
411 | sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 | ||
412 | |||
413 | mov r12,r0 @ inp | ||
414 | mov lr,r1 @ bits | ||
415 | mov r11,r2 @ key | ||
416 | |||
417 | #if __ARM_ARCH__<7 | ||
418 | ldrb r0,[r12,#3] @ load input data in endian-neutral | ||
419 | ldrb r4,[r12,#2] @ manner... | ||
420 | ldrb r5,[r12,#1] | ||
421 | ldrb r6,[r12,#0] | ||
422 | orr r0,r0,r4,lsl#8 | ||
423 | ldrb r1,[r12,#7] | ||
424 | orr r0,r0,r5,lsl#16 | ||
425 | ldrb r4,[r12,#6] | ||
426 | orr r0,r0,r6,lsl#24 | ||
427 | ldrb r5,[r12,#5] | ||
428 | ldrb r6,[r12,#4] | ||
429 | orr r1,r1,r4,lsl#8 | ||
430 | ldrb r2,[r12,#11] | ||
431 | orr r1,r1,r5,lsl#16 | ||
432 | ldrb r4,[r12,#10] | ||
433 | orr r1,r1,r6,lsl#24 | ||
434 | ldrb r5,[r12,#9] | ||
435 | ldrb r6,[r12,#8] | ||
436 | orr r2,r2,r4,lsl#8 | ||
437 | ldrb r3,[r12,#15] | ||
438 | orr r2,r2,r5,lsl#16 | ||
439 | ldrb r4,[r12,#14] | ||
440 | orr r2,r2,r6,lsl#24 | ||
441 | ldrb r5,[r12,#13] | ||
442 | ldrb r6,[r12,#12] | ||
443 | orr r3,r3,r4,lsl#8 | ||
444 | str r0,[r11],#16 | ||
445 | orr r3,r3,r5,lsl#16 | ||
446 | str r1,[r11,#-12] | ||
447 | orr r3,r3,r6,lsl#24 | ||
448 | str r2,[r11,#-8] | ||
449 | str r3,[r11,#-4] | ||
450 | #else | ||
451 | ldr r0,[r12,#0] | ||
452 | ldr r1,[r12,#4] | ||
453 | ldr r2,[r12,#8] | ||
454 | ldr r3,[r12,#12] | ||
455 | #ifdef __ARMEL__ | ||
456 | rev r0,r0 | ||
457 | rev r1,r1 | ||
458 | rev r2,r2 | ||
459 | rev r3,r3 | ||
460 | #endif | ||
461 | str r0,[r11],#16 | ||
462 | str r1,[r11,#-12] | ||
463 | str r2,[r11,#-8] | ||
464 | str r3,[r11,#-4] | ||
465 | #endif | ||
466 | |||
467 | teq lr,#128 | ||
468 | bne .Lnot128 | ||
469 | mov r12,#10 | ||
470 | str r12,[r11,#240-16] | ||
471 | add r6,r10,#256 @ rcon | ||
472 | mov lr,#255 | ||
473 | |||
474 | .L128_loop: | ||
475 | and r5,lr,r3,lsr#24 | ||
476 | and r7,lr,r3,lsr#16 | ||
477 | ldrb r5,[r10,r5] | ||
478 | and r8,lr,r3,lsr#8 | ||
479 | ldrb r7,[r10,r7] | ||
480 | and r9,lr,r3 | ||
481 | ldrb r8,[r10,r8] | ||
482 | orr r5,r5,r7,lsl#24 | ||
483 | ldrb r9,[r10,r9] | ||
484 | orr r5,r5,r8,lsl#16 | ||
485 | ldr r4,[r6],#4 @ rcon[i++] | ||
486 | orr r5,r5,r9,lsl#8 | ||
487 | eor r5,r5,r4 | ||
488 | eor r0,r0,r5 @ rk[4]=rk[0]^... | ||
489 | eor r1,r1,r0 @ rk[5]=rk[1]^rk[4] | ||
490 | str r0,[r11],#16 | ||
491 | eor r2,r2,r1 @ rk[6]=rk[2]^rk[5] | ||
492 | str r1,[r11,#-12] | ||
493 | eor r3,r3,r2 @ rk[7]=rk[3]^rk[6] | ||
494 | str r2,[r11,#-8] | ||
495 | subs r12,r12,#1 | ||
496 | str r3,[r11,#-4] | ||
497 | bne .L128_loop | ||
498 | sub r2,r11,#176 | ||
499 | b .Ldone | ||
500 | |||
501 | .Lnot128: | ||
502 | #if __ARM_ARCH__<7 | ||
503 | ldrb r8,[r12,#19] | ||
504 | ldrb r4,[r12,#18] | ||
505 | ldrb r5,[r12,#17] | ||
506 | ldrb r6,[r12,#16] | ||
507 | orr r8,r8,r4,lsl#8 | ||
508 | ldrb r9,[r12,#23] | ||
509 | orr r8,r8,r5,lsl#16 | ||
510 | ldrb r4,[r12,#22] | ||
511 | orr r8,r8,r6,lsl#24 | ||
512 | ldrb r5,[r12,#21] | ||
513 | ldrb r6,[r12,#20] | ||
514 | orr r9,r9,r4,lsl#8 | ||
515 | orr r9,r9,r5,lsl#16 | ||
516 | str r8,[r11],#8 | ||
517 | orr r9,r9,r6,lsl#24 | ||
518 | str r9,[r11,#-4] | ||
519 | #else | ||
520 | ldr r8,[r12,#16] | ||
521 | ldr r9,[r12,#20] | ||
522 | #ifdef __ARMEL__ | ||
523 | rev r8,r8 | ||
524 | rev r9,r9 | ||
525 | #endif | ||
526 | str r8,[r11],#8 | ||
527 | str r9,[r11,#-4] | ||
528 | #endif | ||
529 | |||
530 | teq lr,#192 | ||
531 | bne .Lnot192 | ||
532 | mov r12,#12 | ||
533 | str r12,[r11,#240-24] | ||
534 | add r6,r10,#256 @ rcon | ||
535 | mov lr,#255 | ||
536 | mov r12,#8 | ||
537 | |||
538 | .L192_loop: | ||
539 | and r5,lr,r9,lsr#24 | ||
540 | and r7,lr,r9,lsr#16 | ||
541 | ldrb r5,[r10,r5] | ||
542 | and r8,lr,r9,lsr#8 | ||
543 | ldrb r7,[r10,r7] | ||
544 | and r9,lr,r9 | ||
545 | ldrb r8,[r10,r8] | ||
546 | orr r5,r5,r7,lsl#24 | ||
547 | ldrb r9,[r10,r9] | ||
548 | orr r5,r5,r8,lsl#16 | ||
549 | ldr r4,[r6],#4 @ rcon[i++] | ||
550 | orr r5,r5,r9,lsl#8 | ||
551 | eor r9,r5,r4 | ||
552 | eor r0,r0,r9 @ rk[6]=rk[0]^... | ||
553 | eor r1,r1,r0 @ rk[7]=rk[1]^rk[6] | ||
554 | str r0,[r11],#24 | ||
555 | eor r2,r2,r1 @ rk[8]=rk[2]^rk[7] | ||
556 | str r1,[r11,#-20] | ||
557 | eor r3,r3,r2 @ rk[9]=rk[3]^rk[8] | ||
558 | str r2,[r11,#-16] | ||
559 | subs r12,r12,#1 | ||
560 | str r3,[r11,#-12] | ||
561 | subeq r2,r11,#216 | ||
562 | beq .Ldone | ||
563 | |||
564 | ldr r7,[r11,#-32] | ||
565 | ldr r8,[r11,#-28] | ||
566 | eor r7,r7,r3 @ rk[10]=rk[4]^rk[9] | ||
567 | eor r9,r8,r7 @ rk[11]=rk[5]^rk[10] | ||
568 | str r7,[r11,#-8] | ||
569 | str r9,[r11,#-4] | ||
570 | b .L192_loop | ||
571 | |||
572 | .Lnot192: | ||
573 | #if __ARM_ARCH__<7 | ||
574 | ldrb r8,[r12,#27] | ||
575 | ldrb r4,[r12,#26] | ||
576 | ldrb r5,[r12,#25] | ||
577 | ldrb r6,[r12,#24] | ||
578 | orr r8,r8,r4,lsl#8 | ||
579 | ldrb r9,[r12,#31] | ||
580 | orr r8,r8,r5,lsl#16 | ||
581 | ldrb r4,[r12,#30] | ||
582 | orr r8,r8,r6,lsl#24 | ||
583 | ldrb r5,[r12,#29] | ||
584 | ldrb r6,[r12,#28] | ||
585 | orr r9,r9,r4,lsl#8 | ||
586 | orr r9,r9,r5,lsl#16 | ||
587 | str r8,[r11],#8 | ||
588 | orr r9,r9,r6,lsl#24 | ||
589 | str r9,[r11,#-4] | ||
590 | #else | ||
591 | ldr r8,[r12,#24] | ||
592 | ldr r9,[r12,#28] | ||
593 | #ifdef __ARMEL__ | ||
594 | rev r8,r8 | ||
595 | rev r9,r9 | ||
596 | #endif | ||
597 | str r8,[r11],#8 | ||
598 | str r9,[r11,#-4] | ||
599 | #endif | ||
600 | |||
601 | mov r12,#14 | ||
602 | str r12,[r11,#240-32] | ||
603 | add r6,r10,#256 @ rcon | ||
604 | mov lr,#255 | ||
605 | mov r12,#7 | ||
606 | |||
607 | .L256_loop: | ||
608 | and r5,lr,r9,lsr#24 | ||
609 | and r7,lr,r9,lsr#16 | ||
610 | ldrb r5,[r10,r5] | ||
611 | and r8,lr,r9,lsr#8 | ||
612 | ldrb r7,[r10,r7] | ||
613 | and r9,lr,r9 | ||
614 | ldrb r8,[r10,r8] | ||
615 | orr r5,r5,r7,lsl#24 | ||
616 | ldrb r9,[r10,r9] | ||
617 | orr r5,r5,r8,lsl#16 | ||
618 | ldr r4,[r6],#4 @ rcon[i++] | ||
619 | orr r5,r5,r9,lsl#8 | ||
620 | eor r9,r5,r4 | ||
621 | eor r0,r0,r9 @ rk[8]=rk[0]^... | ||
622 | eor r1,r1,r0 @ rk[9]=rk[1]^rk[8] | ||
623 | str r0,[r11],#32 | ||
624 | eor r2,r2,r1 @ rk[10]=rk[2]^rk[9] | ||
625 | str r1,[r11,#-28] | ||
626 | eor r3,r3,r2 @ rk[11]=rk[3]^rk[10] | ||
627 | str r2,[r11,#-24] | ||
628 | subs r12,r12,#1 | ||
629 | str r3,[r11,#-20] | ||
630 | subeq r2,r11,#256 | ||
631 | beq .Ldone | ||
632 | |||
633 | and r5,lr,r3 | ||
634 | and r7,lr,r3,lsr#8 | ||
635 | ldrb r5,[r10,r5] | ||
636 | and r8,lr,r3,lsr#16 | ||
637 | ldrb r7,[r10,r7] | ||
638 | and r9,lr,r3,lsr#24 | ||
639 | ldrb r8,[r10,r8] | ||
640 | orr r5,r5,r7,lsl#8 | ||
641 | ldrb r9,[r10,r9] | ||
642 | orr r5,r5,r8,lsl#16 | ||
643 | ldr r4,[r11,#-48] | ||
644 | orr r5,r5,r9,lsl#24 | ||
645 | |||
646 | ldr r7,[r11,#-44] | ||
647 | ldr r8,[r11,#-40] | ||
648 | eor r4,r4,r5 @ rk[12]=rk[4]^... | ||
649 | ldr r9,[r11,#-36] | ||
650 | eor r7,r7,r4 @ rk[13]=rk[5]^rk[12] | ||
651 | str r4,[r11,#-16] | ||
652 | eor r8,r8,r7 @ rk[14]=rk[6]^rk[13] | ||
653 | str r7,[r11,#-12] | ||
654 | eor r9,r9,r8 @ rk[15]=rk[7]^rk[14] | ||
655 | str r8,[r11,#-8] | ||
656 | str r9,[r11,#-4] | ||
657 | b .L256_loop | ||
658 | |||
659 | .Ldone: mov r0,#0 | ||
660 | ldmia sp!,{r4-r12,lr} | ||
661 | .Labrt: tst lr,#1 | ||
662 | moveq pc,lr @ be binary compatible with V4, yet | ||
663 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
664 | .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key | ||
665 | |||
666 | .global private_AES_set_decrypt_key | ||
667 | .type private_AES_set_decrypt_key,%function | ||
668 | .align 5 | ||
669 | private_AES_set_decrypt_key: | ||
670 | str lr,[sp,#-4]! @ push lr | ||
671 | #if 0 | ||
672 | @ kernel does both of these in setkey so optimise this bit out by | ||
673 | @ expecting the key to already have the enc_key work done (see aes_glue.c) | ||
674 | bl _armv4_AES_set_encrypt_key | ||
675 | #else | ||
676 | mov r0,#0 | ||
677 | #endif | ||
678 | teq r0,#0 | ||
679 | ldrne lr,[sp],#4 @ pop lr | ||
680 | bne .Labrt | ||
681 | |||
682 | stmdb sp!,{r4-r12} | ||
683 | |||
684 | ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2, | ||
685 | mov r11,r2 @ which is AES_KEY *key | ||
686 | mov r7,r2 | ||
687 | add r8,r2,r12,lsl#4 | ||
688 | |||
689 | .Linv: ldr r0,[r7] | ||
690 | ldr r1,[r7,#4] | ||
691 | ldr r2,[r7,#8] | ||
692 | ldr r3,[r7,#12] | ||
693 | ldr r4,[r8] | ||
694 | ldr r5,[r8,#4] | ||
695 | ldr r6,[r8,#8] | ||
696 | ldr r9,[r8,#12] | ||
697 | str r0,[r8],#-16 | ||
698 | str r1,[r8,#16+4] | ||
699 | str r2,[r8,#16+8] | ||
700 | str r3,[r8,#16+12] | ||
701 | str r4,[r7],#16 | ||
702 | str r5,[r7,#-12] | ||
703 | str r6,[r7,#-8] | ||
704 | str r9,[r7,#-4] | ||
705 | teq r7,r8 | ||
706 | bne .Linv | ||
707 | ldr r0,[r11,#16]! @ prefetch tp1 | ||
708 | mov r7,#0x80 | ||
709 | mov r8,#0x1b | ||
710 | orr r7,r7,#0x8000 | ||
711 | orr r8,r8,#0x1b00 | ||
712 | orr r7,r7,r7,lsl#16 | ||
713 | orr r8,r8,r8,lsl#16 | ||
714 | sub r12,r12,#1 | ||
715 | mvn r9,r7 | ||
716 | mov r12,r12,lsl#2 @ (rounds-1)*4 | ||
717 | |||
718 | .Lmix: and r4,r0,r7 | ||
719 | and r1,r0,r9 | ||
720 | sub r4,r4,r4,lsr#7 | ||
721 | and r4,r4,r8 | ||
722 | eor r1,r4,r1,lsl#1 @ tp2 | ||
723 | |||
724 | and r4,r1,r7 | ||
725 | and r2,r1,r9 | ||
726 | sub r4,r4,r4,lsr#7 | ||
727 | and r4,r4,r8 | ||
728 | eor r2,r4,r2,lsl#1 @ tp4 | ||
729 | |||
730 | and r4,r2,r7 | ||
731 | and r3,r2,r9 | ||
732 | sub r4,r4,r4,lsr#7 | ||
733 | and r4,r4,r8 | ||
734 | eor r3,r4,r3,lsl#1 @ tp8 | ||
735 | |||
736 | eor r4,r1,r2 | ||
737 | eor r5,r0,r3 @ tp9 | ||
738 | eor r4,r4,r3 @ tpe | ||
739 | eor r4,r4,r1,ror#24 | ||
740 | eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) | ||
741 | eor r4,r4,r2,ror#16 | ||
742 | eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) | ||
743 | eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24) | ||
744 | |||
745 | ldr r0,[r11,#4] @ prefetch tp1 | ||
746 | str r4,[r11],#4 | ||
747 | subs r12,r12,#1 | ||
748 | bne .Lmix | ||
749 | |||
750 | mov r0,#0 | ||
751 | #if __ARM_ARCH__>=5 | ||
752 | ldmia sp!,{r4-r12,pc} | ||
753 | #else | ||
754 | ldmia sp!,{r4-r12,lr} | ||
755 | tst lr,#1 | ||
756 | moveq pc,lr @ be binary compatible with V4, yet | ||
757 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
758 | #endif | ||
759 | .size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key | ||
760 | |||
761 | .type AES_Td,%object | ||
762 | .align 5 | ||
763 | AES_Td: | ||
764 | .word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 | ||
765 | .word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 | ||
766 | .word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 | ||
767 | .word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f | ||
768 | .word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 | ||
769 | .word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 | ||
770 | .word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da | ||
771 | .word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 | ||
772 | .word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd | ||
773 | .word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 | ||
774 | .word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 | ||
775 | .word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 | ||
776 | .word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 | ||
777 | .word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a | ||
778 | .word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 | ||
779 | .word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c | ||
780 | .word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 | ||
781 | .word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a | ||
782 | .word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 | ||
783 | .word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 | ||
784 | .word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 | ||
785 | .word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff | ||
786 | .word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 | ||
787 | .word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb | ||
788 | .word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 | ||
789 | .word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e | ||
790 | .word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 | ||
791 | .word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a | ||
792 | .word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e | ||
793 | .word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 | ||
794 | .word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d | ||
795 | .word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 | ||
796 | .word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd | ||
797 | .word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 | ||
798 | .word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 | ||
799 | .word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 | ||
800 | .word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d | ||
801 | .word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 | ||
802 | .word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 | ||
803 | .word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef | ||
804 | .word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 | ||
805 | .word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 | ||
806 | .word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 | ||
807 | .word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 | ||
808 | .word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 | ||
809 | .word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b | ||
810 | .word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 | ||
811 | .word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 | ||
812 | .word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 | ||
813 | .word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 | ||
814 | .word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 | ||
815 | .word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f | ||
816 | .word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df | ||
817 | .word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f | ||
818 | .word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e | ||
819 | .word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 | ||
820 | .word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 | ||
821 | .word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c | ||
822 | .word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf | ||
823 | .word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 | ||
824 | .word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f | ||
825 | .word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 | ||
826 | .word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 | ||
827 | .word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 | ||
828 | @ Td4[256] | ||
829 | .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 | ||
830 | .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb | ||
831 | .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 | ||
832 | .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb | ||
833 | .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d | ||
834 | .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e | ||
835 | .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 | ||
836 | .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 | ||
837 | .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 | ||
838 | .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 | ||
839 | .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda | ||
840 | .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 | ||
841 | .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a | ||
842 | .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 | ||
843 | .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 | ||
844 | .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b | ||
845 | .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea | ||
846 | .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 | ||
847 | .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 | ||
848 | .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e | ||
849 | .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 | ||
850 | .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b | ||
851 | .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 | ||
852 | .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 | ||
853 | .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 | ||
854 | .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f | ||
855 | .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d | ||
856 | .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef | ||
857 | .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 | ||
858 | .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 | ||
859 | .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 | ||
860 | .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d | ||
861 | .size AES_Td,.-AES_Td | ||
862 | |||
863 | @ void AES_decrypt(const unsigned char *in, unsigned char *out, | ||
864 | @ const AES_KEY *key) { | ||
865 | .global AES_decrypt | ||
866 | .type AES_decrypt,%function | ||
867 | .align 5 | ||
868 | AES_decrypt: | ||
869 | sub r3,pc,#8 @ AES_decrypt | ||
870 | stmdb sp!,{r1,r4-r12,lr} | ||
871 | mov r12,r0 @ inp | ||
872 | mov r11,r2 | ||
873 | sub r10,r3,#AES_decrypt-AES_Td @ Td | ||
874 | #if __ARM_ARCH__<7 | ||
875 | ldrb r0,[r12,#3] @ load input data in endian-neutral | ||
876 | ldrb r4,[r12,#2] @ manner... | ||
877 | ldrb r5,[r12,#1] | ||
878 | ldrb r6,[r12,#0] | ||
879 | orr r0,r0,r4,lsl#8 | ||
880 | ldrb r1,[r12,#7] | ||
881 | orr r0,r0,r5,lsl#16 | ||
882 | ldrb r4,[r12,#6] | ||
883 | orr r0,r0,r6,lsl#24 | ||
884 | ldrb r5,[r12,#5] | ||
885 | ldrb r6,[r12,#4] | ||
886 | orr r1,r1,r4,lsl#8 | ||
887 | ldrb r2,[r12,#11] | ||
888 | orr r1,r1,r5,lsl#16 | ||
889 | ldrb r4,[r12,#10] | ||
890 | orr r1,r1,r6,lsl#24 | ||
891 | ldrb r5,[r12,#9] | ||
892 | ldrb r6,[r12,#8] | ||
893 | orr r2,r2,r4,lsl#8 | ||
894 | ldrb r3,[r12,#15] | ||
895 | orr r2,r2,r5,lsl#16 | ||
896 | ldrb r4,[r12,#14] | ||
897 | orr r2,r2,r6,lsl#24 | ||
898 | ldrb r5,[r12,#13] | ||
899 | ldrb r6,[r12,#12] | ||
900 | orr r3,r3,r4,lsl#8 | ||
901 | orr r3,r3,r5,lsl#16 | ||
902 | orr r3,r3,r6,lsl#24 | ||
903 | #else | ||
904 | ldr r0,[r12,#0] | ||
905 | ldr r1,[r12,#4] | ||
906 | ldr r2,[r12,#8] | ||
907 | ldr r3,[r12,#12] | ||
908 | #ifdef __ARMEL__ | ||
909 | rev r0,r0 | ||
910 | rev r1,r1 | ||
911 | rev r2,r2 | ||
912 | rev r3,r3 | ||
913 | #endif | ||
914 | #endif | ||
915 | bl _armv4_AES_decrypt | ||
916 | |||
917 | ldr r12,[sp],#4 @ pop out | ||
918 | #if __ARM_ARCH__>=7 | ||
919 | #ifdef __ARMEL__ | ||
920 | rev r0,r0 | ||
921 | rev r1,r1 | ||
922 | rev r2,r2 | ||
923 | rev r3,r3 | ||
924 | #endif | ||
925 | str r0,[r12,#0] | ||
926 | str r1,[r12,#4] | ||
927 | str r2,[r12,#8] | ||
928 | str r3,[r12,#12] | ||
929 | #else | ||
930 | mov r4,r0,lsr#24 @ write output in endian-neutral | ||
931 | mov r5,r0,lsr#16 @ manner... | ||
932 | mov r6,r0,lsr#8 | ||
933 | strb r4,[r12,#0] | ||
934 | strb r5,[r12,#1] | ||
935 | mov r4,r1,lsr#24 | ||
936 | strb r6,[r12,#2] | ||
937 | mov r5,r1,lsr#16 | ||
938 | strb r0,[r12,#3] | ||
939 | mov r6,r1,lsr#8 | ||
940 | strb r4,[r12,#4] | ||
941 | strb r5,[r12,#5] | ||
942 | mov r4,r2,lsr#24 | ||
943 | strb r6,[r12,#6] | ||
944 | mov r5,r2,lsr#16 | ||
945 | strb r1,[r12,#7] | ||
946 | mov r6,r2,lsr#8 | ||
947 | strb r4,[r12,#8] | ||
948 | strb r5,[r12,#9] | ||
949 | mov r4,r3,lsr#24 | ||
950 | strb r6,[r12,#10] | ||
951 | mov r5,r3,lsr#16 | ||
952 | strb r2,[r12,#11] | ||
953 | mov r6,r3,lsr#8 | ||
954 | strb r4,[r12,#12] | ||
955 | strb r5,[r12,#13] | ||
956 | strb r6,[r12,#14] | ||
957 | strb r3,[r12,#15] | ||
958 | #endif | ||
959 | #if __ARM_ARCH__>=5 | ||
960 | ldmia sp!,{r4-r12,pc} | ||
961 | #else | ||
962 | ldmia sp!,{r4-r12,lr} | ||
963 | tst lr,#1 | ||
964 | moveq pc,lr @ be binary compatible with V4, yet | ||
965 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
966 | #endif | ||
967 | .size AES_decrypt,.-AES_decrypt | ||
968 | |||
969 | .type _armv4_AES_decrypt,%function | ||
970 | .align 2 | ||
971 | _armv4_AES_decrypt: | ||
972 | str lr,[sp,#-4]! @ push lr | ||
973 | ldmia r11!,{r4-r7} | ||
974 | eor r0,r0,r4 | ||
975 | ldr r12,[r11,#240-16] | ||
976 | eor r1,r1,r5 | ||
977 | eor r2,r2,r6 | ||
978 | eor r3,r3,r7 | ||
979 | sub r12,r12,#1 | ||
980 | mov lr,#255 | ||
981 | |||
982 | and r7,lr,r0,lsr#16 | ||
983 | and r8,lr,r0,lsr#8 | ||
984 | and r9,lr,r0 | ||
985 | mov r0,r0,lsr#24 | ||
986 | .Ldec_loop: | ||
987 | ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16] | ||
988 | and r7,lr,r1 @ i0 | ||
989 | ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8] | ||
990 | and r8,lr,r1,lsr#16 | ||
991 | ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0] | ||
992 | and r9,lr,r1,lsr#8 | ||
993 | ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24] | ||
994 | mov r1,r1,lsr#24 | ||
995 | |||
996 | ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0] | ||
997 | ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16] | ||
998 | ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8] | ||
999 | eor r0,r0,r7,ror#24 | ||
1000 | ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24] | ||
1001 | and r7,lr,r2,lsr#8 @ i0 | ||
1002 | eor r5,r8,r5,ror#8 | ||
1003 | and r8,lr,r2 @ i1 | ||
1004 | eor r6,r9,r6,ror#8 | ||
1005 | and r9,lr,r2,lsr#16 | ||
1006 | ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8] | ||
1007 | eor r1,r1,r4,ror#8 | ||
1008 | ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] | ||
1009 | mov r2,r2,lsr#24 | ||
1010 | |||
1011 | ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16] | ||
1012 | eor r0,r0,r7,ror#16 | ||
1013 | ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24] | ||
1014 | and r7,lr,r3,lsr#16 @ i0 | ||
1015 | eor r1,r1,r8,ror#24 | ||
1016 | and r8,lr,r3,lsr#8 @ i1 | ||
1017 | eor r6,r9,r6,ror#8 | ||
1018 | and r9,lr,r3 @ i2 | ||
1019 | ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16] | ||
1020 | eor r2,r2,r5,ror#8 | ||
1021 | ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] | ||
1022 | mov r3,r3,lsr#24 | ||
1023 | |||
1024 | ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0] | ||
1025 | eor r0,r0,r7,ror#8 | ||
1026 | ldr r7,[r11],#16 | ||
1027 | eor r1,r1,r8,ror#16 | ||
1028 | ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] | ||
1029 | eor r2,r2,r9,ror#24 | ||
1030 | |||
1031 | ldr r4,[r11,#-12] | ||
1032 | eor r0,r0,r7 | ||
1033 | ldr r5,[r11,#-8] | ||
1034 | eor r3,r3,r6,ror#8 | ||
1035 | ldr r6,[r11,#-4] | ||
1036 | and r7,lr,r0,lsr#16 | ||
1037 | eor r1,r1,r4 | ||
1038 | and r8,lr,r0,lsr#8 | ||
1039 | eor r2,r2,r5 | ||
1040 | and r9,lr,r0 | ||
1041 | eor r3,r3,r6 | ||
1042 | mov r0,r0,lsr#24 | ||
1043 | |||
1044 | subs r12,r12,#1 | ||
1045 | bne .Ldec_loop | ||
1046 | |||
1047 | add r10,r10,#1024 | ||
1048 | |||
1049 | ldr r5,[r10,#0] @ prefetch Td4 | ||
1050 | ldr r6,[r10,#32] | ||
1051 | ldr r4,[r10,#64] | ||
1052 | ldr r5,[r10,#96] | ||
1053 | ldr r6,[r10,#128] | ||
1054 | ldr r4,[r10,#160] | ||
1055 | ldr r5,[r10,#192] | ||
1056 | ldr r6,[r10,#224] | ||
1057 | |||
1058 | ldrb r0,[r10,r0] @ Td4[s0>>24] | ||
1059 | ldrb r4,[r10,r7] @ Td4[s0>>16] | ||
1060 | and r7,lr,r1 @ i0 | ||
1061 | ldrb r5,[r10,r8] @ Td4[s0>>8] | ||
1062 | and r8,lr,r1,lsr#16 | ||
1063 | ldrb r6,[r10,r9] @ Td4[s0>>0] | ||
1064 | and r9,lr,r1,lsr#8 | ||
1065 | |||
1066 | ldrb r7,[r10,r7] @ Td4[s1>>0] | ||
1067 | ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24] | ||
1068 | ldrb r8,[r10,r8] @ Td4[s1>>16] | ||
1069 | eor r0,r7,r0,lsl#24 | ||
1070 | ldrb r9,[r10,r9] @ Td4[s1>>8] | ||
1071 | eor r1,r4,r1,lsl#8 | ||
1072 | and r7,lr,r2,lsr#8 @ i0 | ||
1073 | eor r5,r5,r8,lsl#8 | ||
1074 | and r8,lr,r2 @ i1 | ||
1075 | ldrb r7,[r10,r7] @ Td4[s2>>8] | ||
1076 | eor r6,r6,r9,lsl#8 | ||
1077 | ldrb r8,[r10,r8] @ Td4[s2>>0] | ||
1078 | and r9,lr,r2,lsr#16 | ||
1079 | |||
1080 | ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] | ||
1081 | eor r0,r0,r7,lsl#8 | ||
1082 | ldrb r9,[r10,r9] @ Td4[s2>>16] | ||
1083 | eor r1,r8,r1,lsl#16 | ||
1084 | and r7,lr,r3,lsr#16 @ i0 | ||
1085 | eor r2,r5,r2,lsl#16 | ||
1086 | and r8,lr,r3,lsr#8 @ i1 | ||
1087 | ldrb r7,[r10,r7] @ Td4[s3>>16] | ||
1088 | eor r6,r6,r9,lsl#16 | ||
1089 | ldrb r8,[r10,r8] @ Td4[s3>>8] | ||
1090 | and r9,lr,r3 @ i2 | ||
1091 | |||
1092 | ldrb r9,[r10,r9] @ Td4[s3>>0] | ||
1093 | ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] | ||
1094 | eor r0,r0,r7,lsl#16 | ||
1095 | ldr r7,[r11,#0] | ||
1096 | eor r1,r1,r8,lsl#8 | ||
1097 | ldr r4,[r11,#4] | ||
1098 | eor r2,r9,r2,lsl#8 | ||
1099 | ldr r5,[r11,#8] | ||
1100 | eor r3,r6,r3,lsl#24 | ||
1101 | ldr r6,[r11,#12] | ||
1102 | |||
1103 | eor r0,r0,r7 | ||
1104 | eor r1,r1,r4 | ||
1105 | eor r2,r2,r5 | ||
1106 | eor r3,r3,r6 | ||
1107 | |||
1108 | sub r10,r10,#1024 | ||
1109 | ldr pc,[sp],#4 @ pop and return | ||
1110 | .size _armv4_AES_decrypt,.-_armv4_AES_decrypt | ||
1111 | .asciz "AES for ARMv4, CRYPTOGAMS by <appro@openssl.org>" | ||
1112 | .align 2 | ||
diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c new file mode 100644 index 000000000000..59f7877ead6a --- /dev/null +++ b/arch/arm/crypto/aes_glue.c | |||
@@ -0,0 +1,108 @@ | |||
1 | /* | ||
2 | * Glue Code for the asm optimized version of the AES Cipher Algorithm | ||
3 | */ | ||
4 | |||
5 | #include <linux/module.h> | ||
6 | #include <linux/crypto.h> | ||
7 | #include <crypto/aes.h> | ||
8 | |||
9 | #define AES_MAXNR 14 | ||
10 | |||
11 | typedef struct { | ||
12 | unsigned int rd_key[4 *(AES_MAXNR + 1)]; | ||
13 | int rounds; | ||
14 | } AES_KEY; | ||
15 | |||
16 | struct AES_CTX { | ||
17 | AES_KEY enc_key; | ||
18 | AES_KEY dec_key; | ||
19 | }; | ||
20 | |||
21 | asmlinkage void AES_encrypt(const u8 *in, u8 *out, AES_KEY *ctx); | ||
22 | asmlinkage void AES_decrypt(const u8 *in, u8 *out, AES_KEY *ctx); | ||
23 | asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); | ||
24 | asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); | ||
25 | |||
26 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
27 | { | ||
28 | struct AES_CTX *ctx = crypto_tfm_ctx(tfm); | ||
29 | AES_encrypt(src, dst, &ctx->enc_key); | ||
30 | } | ||
31 | |||
32 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
33 | { | ||
34 | struct AES_CTX *ctx = crypto_tfm_ctx(tfm); | ||
35 | AES_decrypt(src, dst, &ctx->dec_key); | ||
36 | } | ||
37 | |||
38 | static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, | ||
39 | unsigned int key_len) | ||
40 | { | ||
41 | struct AES_CTX *ctx = crypto_tfm_ctx(tfm); | ||
42 | |||
43 | switch (key_len) { | ||
44 | case AES_KEYSIZE_128: | ||
45 | key_len = 128; | ||
46 | break; | ||
47 | case AES_KEYSIZE_192: | ||
48 | key_len = 192; | ||
49 | break; | ||
50 | case AES_KEYSIZE_256: | ||
51 | key_len = 256; | ||
52 | break; | ||
53 | default: | ||
54 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
55 | return -EINVAL; | ||
56 | } | ||
57 | |||
58 | if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) { | ||
59 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
60 | return -EINVAL; | ||
61 | } | ||
62 | /* private_AES_set_decrypt_key expects an encryption key as input */ | ||
63 | ctx->dec_key = ctx->enc_key; | ||
64 | if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) { | ||
65 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
66 | return -EINVAL; | ||
67 | } | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static struct crypto_alg aes_alg = { | ||
72 | .cra_name = "aes", | ||
73 | .cra_driver_name = "aes-asm", | ||
74 | .cra_priority = 200, | ||
75 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
76 | .cra_blocksize = AES_BLOCK_SIZE, | ||
77 | .cra_ctxsize = sizeof(struct AES_CTX), | ||
78 | .cra_module = THIS_MODULE, | ||
79 | .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), | ||
80 | .cra_u = { | ||
81 | .cipher = { | ||
82 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
83 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
84 | .cia_setkey = aes_set_key, | ||
85 | .cia_encrypt = aes_encrypt, | ||
86 | .cia_decrypt = aes_decrypt | ||
87 | } | ||
88 | } | ||
89 | }; | ||
90 | |||
91 | static int __init aes_init(void) | ||
92 | { | ||
93 | return crypto_register_alg(&aes_alg); | ||
94 | } | ||
95 | |||
96 | static void __exit aes_fini(void) | ||
97 | { | ||
98 | crypto_unregister_alg(&aes_alg); | ||
99 | } | ||
100 | |||
101 | module_init(aes_init); | ||
102 | module_exit(aes_fini); | ||
103 | |||
104 | MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)"); | ||
105 | MODULE_LICENSE("GPL"); | ||
106 | MODULE_ALIAS("aes"); | ||
107 | MODULE_ALIAS("aes-asm"); | ||
108 | MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>"); | ||
diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S new file mode 100644 index 000000000000..7050ab133b9d --- /dev/null +++ b/arch/arm/crypto/sha1-armv4-large.S | |||
@@ -0,0 +1,503 @@ | |||
1 | #define __ARM_ARCH__ __LINUX_ARM_ARCH__ | ||
2 | @ ==================================================================== | ||
3 | @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
4 | @ project. The module is, however, dual licensed under OpenSSL and | ||
5 | @ CRYPTOGAMS licenses depending on where you obtain it. For further | ||
6 | @ details see http://www.openssl.org/~appro/cryptogams/. | ||
7 | @ ==================================================================== | ||
8 | |||
9 | @ sha1_block procedure for ARMv4. | ||
10 | @ | ||
11 | @ January 2007. | ||
12 | |||
13 | @ Size/performance trade-off | ||
14 | @ ==================================================================== | ||
15 | @ impl size in bytes comp cycles[*] measured performance | ||
16 | @ ==================================================================== | ||
17 | @ thumb 304 3212 4420 | ||
18 | @ armv4-small 392/+29% 1958/+64% 2250/+96% | ||
19 | @ armv4-compact 740/+89% 1552/+26% 1840/+22% | ||
20 | @ armv4-large 1420/+92% 1307/+19% 1370/+34%[***] | ||
21 | @ full unroll ~5100/+260% ~1260/+4% ~1300/+5% | ||
22 | @ ==================================================================== | ||
23 | @ thumb = same as 'small' but in Thumb instructions[**] and | ||
24 | @ with recurring code in two private functions; | ||
25 | @ small = detached Xload/update, loops are folded; | ||
26 | @ compact = detached Xload/update, 5x unroll; | ||
27 | @ large = interleaved Xload/update, 5x unroll; | ||
28 | @ full unroll = interleaved Xload/update, full unroll, estimated[!]; | ||
29 | @ | ||
30 | @ [*] Manually counted instructions in "grand" loop body. Measured | ||
31 | @ performance is affected by prologue and epilogue overhead, | ||
32 | @ i-cache availability, branch penalties, etc. | ||
33 | @ [**] While each Thumb instruction is twice smaller, they are not as | ||
34 | @ diverse as ARM ones: e.g., there are only two arithmetic | ||
35 | @ instructions with 3 arguments, no [fixed] rotate, addressing | ||
36 | @ modes are limited. As result it takes more instructions to do | ||
37 | @ the same job in Thumb, therefore the code is never twice as | ||
38 | @ small and always slower. | ||
39 | @ [***] which is also ~35% better than compiler generated code. Dual- | ||
40 | @ issue Cortex A8 core was measured to process input block in | ||
41 | @ ~990 cycles. | ||
42 | |||
43 | @ August 2010. | ||
44 | @ | ||
45 | @ Rescheduling for dual-issue pipeline resulted in 13% improvement on | ||
46 | @ Cortex A8 core and in absolute terms ~870 cycles per input block | ||
47 | @ [or 13.6 cycles per byte]. | ||
48 | |||
49 | @ February 2011. | ||
50 | @ | ||
51 | @ Profiler-assisted and platform-specific optimization resulted in 10% | ||
52 | @ improvement on Cortex A8 core and 12.2 cycles per byte. | ||
53 | |||
54 | .text | ||
55 | |||
56 | .global sha1_block_data_order | ||
57 | .type sha1_block_data_order,%function | ||
58 | |||
59 | .align 2 | ||
60 | sha1_block_data_order: | ||
61 | stmdb sp!,{r4-r12,lr} | ||
62 | add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 | ||
63 | ldmia r0,{r3,r4,r5,r6,r7} | ||
64 | .Lloop: | ||
65 | ldr r8,.LK_00_19 | ||
66 | mov r14,sp | ||
67 | sub sp,sp,#15*4 | ||
68 | mov r5,r5,ror#30 | ||
69 | mov r6,r6,ror#30 | ||
70 | mov r7,r7,ror#30 @ [6] | ||
71 | .L_00_15: | ||
72 | #if __ARM_ARCH__<7 | ||
73 | ldrb r10,[r1,#2] | ||
74 | ldrb r9,[r1,#3] | ||
75 | ldrb r11,[r1,#1] | ||
76 | add r7,r8,r7,ror#2 @ E+=K_00_19 | ||
77 | ldrb r12,[r1],#4 | ||
78 | orr r9,r9,r10,lsl#8 | ||
79 | eor r10,r5,r6 @ F_xx_xx | ||
80 | orr r9,r9,r11,lsl#16 | ||
81 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | ||
82 | orr r9,r9,r12,lsl#24 | ||
83 | #else | ||
84 | ldr r9,[r1],#4 @ handles unaligned | ||
85 | add r7,r8,r7,ror#2 @ E+=K_00_19 | ||
86 | eor r10,r5,r6 @ F_xx_xx | ||
87 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | ||
88 | #ifdef __ARMEL__ | ||
89 | rev r9,r9 @ byte swap | ||
90 | #endif | ||
91 | #endif | ||
92 | and r10,r4,r10,ror#2 | ||
93 | add r7,r7,r9 @ E+=X[i] | ||
94 | eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) | ||
95 | str r9,[r14,#-4]! | ||
96 | add r7,r7,r10 @ E+=F_00_19(B,C,D) | ||
97 | #if __ARM_ARCH__<7 | ||
98 | ldrb r10,[r1,#2] | ||
99 | ldrb r9,[r1,#3] | ||
100 | ldrb r11,[r1,#1] | ||
101 | add r6,r8,r6,ror#2 @ E+=K_00_19 | ||
102 | ldrb r12,[r1],#4 | ||
103 | orr r9,r9,r10,lsl#8 | ||
104 | eor r10,r4,r5 @ F_xx_xx | ||
105 | orr r9,r9,r11,lsl#16 | ||
106 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | ||
107 | orr r9,r9,r12,lsl#24 | ||
108 | #else | ||
109 | ldr r9,[r1],#4 @ handles unaligned | ||
110 | add r6,r8,r6,ror#2 @ E+=K_00_19 | ||
111 | eor r10,r4,r5 @ F_xx_xx | ||
112 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | ||
113 | #ifdef __ARMEL__ | ||
114 | rev r9,r9 @ byte swap | ||
115 | #endif | ||
116 | #endif | ||
117 | and r10,r3,r10,ror#2 | ||
118 | add r6,r6,r9 @ E+=X[i] | ||
119 | eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) | ||
120 | str r9,[r14,#-4]! | ||
121 | add r6,r6,r10 @ E+=F_00_19(B,C,D) | ||
122 | #if __ARM_ARCH__<7 | ||
123 | ldrb r10,[r1,#2] | ||
124 | ldrb r9,[r1,#3] | ||
125 | ldrb r11,[r1,#1] | ||
126 | add r5,r8,r5,ror#2 @ E+=K_00_19 | ||
127 | ldrb r12,[r1],#4 | ||
128 | orr r9,r9,r10,lsl#8 | ||
129 | eor r10,r3,r4 @ F_xx_xx | ||
130 | orr r9,r9,r11,lsl#16 | ||
131 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | ||
132 | orr r9,r9,r12,lsl#24 | ||
133 | #else | ||
134 | ldr r9,[r1],#4 @ handles unaligned | ||
135 | add r5,r8,r5,ror#2 @ E+=K_00_19 | ||
136 | eor r10,r3,r4 @ F_xx_xx | ||
137 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | ||
138 | #ifdef __ARMEL__ | ||
139 | rev r9,r9 @ byte swap | ||
140 | #endif | ||
141 | #endif | ||
142 | and r10,r7,r10,ror#2 | ||
143 | add r5,r5,r9 @ E+=X[i] | ||
144 | eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) | ||
145 | str r9,[r14,#-4]! | ||
146 | add r5,r5,r10 @ E+=F_00_19(B,C,D) | ||
147 | #if __ARM_ARCH__<7 | ||
148 | ldrb r10,[r1,#2] | ||
149 | ldrb r9,[r1,#3] | ||
150 | ldrb r11,[r1,#1] | ||
151 | add r4,r8,r4,ror#2 @ E+=K_00_19 | ||
152 | ldrb r12,[r1],#4 | ||
153 | orr r9,r9,r10,lsl#8 | ||
154 | eor r10,r7,r3 @ F_xx_xx | ||
155 | orr r9,r9,r11,lsl#16 | ||
156 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | ||
157 | orr r9,r9,r12,lsl#24 | ||
158 | #else | ||
159 | ldr r9,[r1],#4 @ handles unaligned | ||
160 | add r4,r8,r4,ror#2 @ E+=K_00_19 | ||
161 | eor r10,r7,r3 @ F_xx_xx | ||
162 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | ||
163 | #ifdef __ARMEL__ | ||
164 | rev r9,r9 @ byte swap | ||
165 | #endif | ||
166 | #endif | ||
167 | and r10,r6,r10,ror#2 | ||
168 | add r4,r4,r9 @ E+=X[i] | ||
169 | eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) | ||
170 | str r9,[r14,#-4]! | ||
171 | add r4,r4,r10 @ E+=F_00_19(B,C,D) | ||
172 | #if __ARM_ARCH__<7 | ||
173 | ldrb r10,[r1,#2] | ||
174 | ldrb r9,[r1,#3] | ||
175 | ldrb r11,[r1,#1] | ||
176 | add r3,r8,r3,ror#2 @ E+=K_00_19 | ||
177 | ldrb r12,[r1],#4 | ||
178 | orr r9,r9,r10,lsl#8 | ||
179 | eor r10,r6,r7 @ F_xx_xx | ||
180 | orr r9,r9,r11,lsl#16 | ||
181 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | ||
182 | orr r9,r9,r12,lsl#24 | ||
183 | #else | ||
184 | ldr r9,[r1],#4 @ handles unaligned | ||
185 | add r3,r8,r3,ror#2 @ E+=K_00_19 | ||
186 | eor r10,r6,r7 @ F_xx_xx | ||
187 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | ||
188 | #ifdef __ARMEL__ | ||
189 | rev r9,r9 @ byte swap | ||
190 | #endif | ||
191 | #endif | ||
192 | and r10,r5,r10,ror#2 | ||
193 | add r3,r3,r9 @ E+=X[i] | ||
194 | eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) | ||
195 | str r9,[r14,#-4]! | ||
196 | add r3,r3,r10 @ E+=F_00_19(B,C,D) | ||
197 | teq r14,sp | ||
198 | bne .L_00_15 @ [((11+4)*5+2)*3] | ||
199 | #if __ARM_ARCH__<7 | ||
200 | ldrb r10,[r1,#2] | ||
201 | ldrb r9,[r1,#3] | ||
202 | ldrb r11,[r1,#1] | ||
203 | add r7,r8,r7,ror#2 @ E+=K_00_19 | ||
204 | ldrb r12,[r1],#4 | ||
205 | orr r9,r9,r10,lsl#8 | ||
206 | eor r10,r5,r6 @ F_xx_xx | ||
207 | orr r9,r9,r11,lsl#16 | ||
208 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | ||
209 | orr r9,r9,r12,lsl#24 | ||
210 | #else | ||
211 | ldr r9,[r1],#4 @ handles unaligned | ||
212 | add r7,r8,r7,ror#2 @ E+=K_00_19 | ||
213 | eor r10,r5,r6 @ F_xx_xx | ||
214 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | ||
215 | #ifdef __ARMEL__ | ||
216 | rev r9,r9 @ byte swap | ||
217 | #endif | ||
218 | #endif | ||
219 | and r10,r4,r10,ror#2 | ||
220 | add r7,r7,r9 @ E+=X[i] | ||
221 | eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) | ||
222 | str r9,[r14,#-4]! | ||
223 | add r7,r7,r10 @ E+=F_00_19(B,C,D) | ||
224 | ldr r9,[r14,#15*4] | ||
225 | ldr r10,[r14,#13*4] | ||
226 | ldr r11,[r14,#7*4] | ||
227 | add r6,r8,r6,ror#2 @ E+=K_xx_xx | ||
228 | ldr r12,[r14,#2*4] | ||
229 | eor r9,r9,r10 | ||
230 | eor r11,r11,r12 @ 1 cycle stall | ||
231 | eor r10,r4,r5 @ F_xx_xx | ||
232 | mov r9,r9,ror#31 | ||
233 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | ||
234 | eor r9,r9,r11,ror#31 | ||
235 | str r9,[r14,#-4]! | ||
236 | and r10,r3,r10,ror#2 @ F_xx_xx | ||
237 | @ F_xx_xx | ||
238 | add r6,r6,r9 @ E+=X[i] | ||
239 | eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) | ||
240 | add r6,r6,r10 @ E+=F_00_19(B,C,D) | ||
241 | ldr r9,[r14,#15*4] | ||
242 | ldr r10,[r14,#13*4] | ||
243 | ldr r11,[r14,#7*4] | ||
244 | add r5,r8,r5,ror#2 @ E+=K_xx_xx | ||
245 | ldr r12,[r14,#2*4] | ||
246 | eor r9,r9,r10 | ||
247 | eor r11,r11,r12 @ 1 cycle stall | ||
248 | eor r10,r3,r4 @ F_xx_xx | ||
249 | mov r9,r9,ror#31 | ||
250 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | ||
251 | eor r9,r9,r11,ror#31 | ||
252 | str r9,[r14,#-4]! | ||
253 | and r10,r7,r10,ror#2 @ F_xx_xx | ||
254 | @ F_xx_xx | ||
255 | add r5,r5,r9 @ E+=X[i] | ||
256 | eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) | ||
257 | add r5,r5,r10 @ E+=F_00_19(B,C,D) | ||
258 | ldr r9,[r14,#15*4] | ||
259 | ldr r10,[r14,#13*4] | ||
260 | ldr r11,[r14,#7*4] | ||
261 | add r4,r8,r4,ror#2 @ E+=K_xx_xx | ||
262 | ldr r12,[r14,#2*4] | ||
263 | eor r9,r9,r10 | ||
264 | eor r11,r11,r12 @ 1 cycle stall | ||
265 | eor r10,r7,r3 @ F_xx_xx | ||
266 | mov r9,r9,ror#31 | ||
267 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | ||
268 | eor r9,r9,r11,ror#31 | ||
269 | str r9,[r14,#-4]! | ||
270 | and r10,r6,r10,ror#2 @ F_xx_xx | ||
271 | @ F_xx_xx | ||
272 | add r4,r4,r9 @ E+=X[i] | ||
273 | eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) | ||
274 | add r4,r4,r10 @ E+=F_00_19(B,C,D) | ||
275 | ldr r9,[r14,#15*4] | ||
276 | ldr r10,[r14,#13*4] | ||
277 | ldr r11,[r14,#7*4] | ||
278 | add r3,r8,r3,ror#2 @ E+=K_xx_xx | ||
279 | ldr r12,[r14,#2*4] | ||
280 | eor r9,r9,r10 | ||
281 | eor r11,r11,r12 @ 1 cycle stall | ||
282 | eor r10,r6,r7 @ F_xx_xx | ||
283 | mov r9,r9,ror#31 | ||
284 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | ||
285 | eor r9,r9,r11,ror#31 | ||
286 | str r9,[r14,#-4]! | ||
287 | and r10,r5,r10,ror#2 @ F_xx_xx | ||
288 | @ F_xx_xx | ||
289 | add r3,r3,r9 @ E+=X[i] | ||
290 | eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) | ||
291 | add r3,r3,r10 @ E+=F_00_19(B,C,D) | ||
292 | |||
293 | ldr r8,.LK_20_39 @ [+15+16*4] | ||
294 | sub sp,sp,#25*4 | ||
295 | cmn sp,#0 @ [+3], clear carry to denote 20_39 | ||
296 | .L_20_39_or_60_79: | ||
297 | ldr r9,[r14,#15*4] | ||
298 | ldr r10,[r14,#13*4] | ||
299 | ldr r11,[r14,#7*4] | ||
300 | add r7,r8,r7,ror#2 @ E+=K_xx_xx | ||
301 | ldr r12,[r14,#2*4] | ||
302 | eor r9,r9,r10 | ||
303 | eor r11,r11,r12 @ 1 cycle stall | ||
304 | eor r10,r5,r6 @ F_xx_xx | ||
305 | mov r9,r9,ror#31 | ||
306 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | ||
307 | eor r9,r9,r11,ror#31 | ||
308 | str r9,[r14,#-4]! | ||
309 | eor r10,r4,r10,ror#2 @ F_xx_xx | ||
310 | @ F_xx_xx | ||
311 | add r7,r7,r9 @ E+=X[i] | ||
312 | add r7,r7,r10 @ E+=F_20_39(B,C,D) | ||
313 | ldr r9,[r14,#15*4] | ||
314 | ldr r10,[r14,#13*4] | ||
315 | ldr r11,[r14,#7*4] | ||
316 | add r6,r8,r6,ror#2 @ E+=K_xx_xx | ||
317 | ldr r12,[r14,#2*4] | ||
318 | eor r9,r9,r10 | ||
319 | eor r11,r11,r12 @ 1 cycle stall | ||
320 | eor r10,r4,r5 @ F_xx_xx | ||
321 | mov r9,r9,ror#31 | ||
322 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | ||
323 | eor r9,r9,r11,ror#31 | ||
324 | str r9,[r14,#-4]! | ||
325 | eor r10,r3,r10,ror#2 @ F_xx_xx | ||
326 | @ F_xx_xx | ||
327 | add r6,r6,r9 @ E+=X[i] | ||
328 | add r6,r6,r10 @ E+=F_20_39(B,C,D) | ||
329 | ldr r9,[r14,#15*4] | ||
330 | ldr r10,[r14,#13*4] | ||
331 | ldr r11,[r14,#7*4] | ||
332 | add r5,r8,r5,ror#2 @ E+=K_xx_xx | ||
333 | ldr r12,[r14,#2*4] | ||
334 | eor r9,r9,r10 | ||
335 | eor r11,r11,r12 @ 1 cycle stall | ||
336 | eor r10,r3,r4 @ F_xx_xx | ||
337 | mov r9,r9,ror#31 | ||
338 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | ||
339 | eor r9,r9,r11,ror#31 | ||
340 | str r9,[r14,#-4]! | ||
341 | eor r10,r7,r10,ror#2 @ F_xx_xx | ||
342 | @ F_xx_xx | ||
343 | add r5,r5,r9 @ E+=X[i] | ||
344 | add r5,r5,r10 @ E+=F_20_39(B,C,D) | ||
345 | ldr r9,[r14,#15*4] | ||
346 | ldr r10,[r14,#13*4] | ||
347 | ldr r11,[r14,#7*4] | ||
348 | add r4,r8,r4,ror#2 @ E+=K_xx_xx | ||
349 | ldr r12,[r14,#2*4] | ||
350 | eor r9,r9,r10 | ||
351 | eor r11,r11,r12 @ 1 cycle stall | ||
352 | eor r10,r7,r3 @ F_xx_xx | ||
353 | mov r9,r9,ror#31 | ||
354 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | ||
355 | eor r9,r9,r11,ror#31 | ||
356 | str r9,[r14,#-4]! | ||
357 | eor r10,r6,r10,ror#2 @ F_xx_xx | ||
358 | @ F_xx_xx | ||
359 | add r4,r4,r9 @ E+=X[i] | ||
360 | add r4,r4,r10 @ E+=F_20_39(B,C,D) | ||
361 | ldr r9,[r14,#15*4] | ||
362 | ldr r10,[r14,#13*4] | ||
363 | ldr r11,[r14,#7*4] | ||
364 | add r3,r8,r3,ror#2 @ E+=K_xx_xx | ||
365 | ldr r12,[r14,#2*4] | ||
366 | eor r9,r9,r10 | ||
367 | eor r11,r11,r12 @ 1 cycle stall | ||
368 | eor r10,r6,r7 @ F_xx_xx | ||
369 | mov r9,r9,ror#31 | ||
370 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | ||
371 | eor r9,r9,r11,ror#31 | ||
372 | str r9,[r14,#-4]! | ||
373 | eor r10,r5,r10,ror#2 @ F_xx_xx | ||
374 | @ F_xx_xx | ||
375 | add r3,r3,r9 @ E+=X[i] | ||
376 | add r3,r3,r10 @ E+=F_20_39(B,C,D) | ||
377 | teq r14,sp @ preserve carry | ||
378 | bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] | ||
379 | bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes | ||
380 | |||
381 | ldr r8,.LK_40_59 | ||
382 | sub sp,sp,#20*4 @ [+2] | ||
383 | .L_40_59: | ||
384 | ldr r9,[r14,#15*4] | ||
385 | ldr r10,[r14,#13*4] | ||
386 | ldr r11,[r14,#7*4] | ||
387 | add r7,r8,r7,ror#2 @ E+=K_xx_xx | ||
388 | ldr r12,[r14,#2*4] | ||
389 | eor r9,r9,r10 | ||
390 | eor r11,r11,r12 @ 1 cycle stall | ||
391 | eor r10,r5,r6 @ F_xx_xx | ||
392 | mov r9,r9,ror#31 | ||
393 | add r7,r7,r3,ror#27 @ E+=ROR(A,27) | ||
394 | eor r9,r9,r11,ror#31 | ||
395 | str r9,[r14,#-4]! | ||
396 | and r10,r4,r10,ror#2 @ F_xx_xx | ||
397 | and r11,r5,r6 @ F_xx_xx | ||
398 | add r7,r7,r9 @ E+=X[i] | ||
399 | add r7,r7,r10 @ E+=F_40_59(B,C,D) | ||
400 | add r7,r7,r11,ror#2 | ||
401 | ldr r9,[r14,#15*4] | ||
402 | ldr r10,[r14,#13*4] | ||
403 | ldr r11,[r14,#7*4] | ||
404 | add r6,r8,r6,ror#2 @ E+=K_xx_xx | ||
405 | ldr r12,[r14,#2*4] | ||
406 | eor r9,r9,r10 | ||
407 | eor r11,r11,r12 @ 1 cycle stall | ||
408 | eor r10,r4,r5 @ F_xx_xx | ||
409 | mov r9,r9,ror#31 | ||
410 | add r6,r6,r7,ror#27 @ E+=ROR(A,27) | ||
411 | eor r9,r9,r11,ror#31 | ||
412 | str r9,[r14,#-4]! | ||
413 | and r10,r3,r10,ror#2 @ F_xx_xx | ||
414 | and r11,r4,r5 @ F_xx_xx | ||
415 | add r6,r6,r9 @ E+=X[i] | ||
416 | add r6,r6,r10 @ E+=F_40_59(B,C,D) | ||
417 | add r6,r6,r11,ror#2 | ||
418 | ldr r9,[r14,#15*4] | ||
419 | ldr r10,[r14,#13*4] | ||
420 | ldr r11,[r14,#7*4] | ||
421 | add r5,r8,r5,ror#2 @ E+=K_xx_xx | ||
422 | ldr r12,[r14,#2*4] | ||
423 | eor r9,r9,r10 | ||
424 | eor r11,r11,r12 @ 1 cycle stall | ||
425 | eor r10,r3,r4 @ F_xx_xx | ||
426 | mov r9,r9,ror#31 | ||
427 | add r5,r5,r6,ror#27 @ E+=ROR(A,27) | ||
428 | eor r9,r9,r11,ror#31 | ||
429 | str r9,[r14,#-4]! | ||
430 | and r10,r7,r10,ror#2 @ F_xx_xx | ||
431 | and r11,r3,r4 @ F_xx_xx | ||
432 | add r5,r5,r9 @ E+=X[i] | ||
433 | add r5,r5,r10 @ E+=F_40_59(B,C,D) | ||
434 | add r5,r5,r11,ror#2 | ||
435 | ldr r9,[r14,#15*4] | ||
436 | ldr r10,[r14,#13*4] | ||
437 | ldr r11,[r14,#7*4] | ||
438 | add r4,r8,r4,ror#2 @ E+=K_xx_xx | ||
439 | ldr r12,[r14,#2*4] | ||
440 | eor r9,r9,r10 | ||
441 | eor r11,r11,r12 @ 1 cycle stall | ||
442 | eor r10,r7,r3 @ F_xx_xx | ||
443 | mov r9,r9,ror#31 | ||
444 | add r4,r4,r5,ror#27 @ E+=ROR(A,27) | ||
445 | eor r9,r9,r11,ror#31 | ||
446 | str r9,[r14,#-4]! | ||
447 | and r10,r6,r10,ror#2 @ F_xx_xx | ||
448 | and r11,r7,r3 @ F_xx_xx | ||
449 | add r4,r4,r9 @ E+=X[i] | ||
450 | add r4,r4,r10 @ E+=F_40_59(B,C,D) | ||
451 | add r4,r4,r11,ror#2 | ||
452 | ldr r9,[r14,#15*4] | ||
453 | ldr r10,[r14,#13*4] | ||
454 | ldr r11,[r14,#7*4] | ||
455 | add r3,r8,r3,ror#2 @ E+=K_xx_xx | ||
456 | ldr r12,[r14,#2*4] | ||
457 | eor r9,r9,r10 | ||
458 | eor r11,r11,r12 @ 1 cycle stall | ||
459 | eor r10,r6,r7 @ F_xx_xx | ||
460 | mov r9,r9,ror#31 | ||
461 | add r3,r3,r4,ror#27 @ E+=ROR(A,27) | ||
462 | eor r9,r9,r11,ror#31 | ||
463 | str r9,[r14,#-4]! | ||
464 | and r10,r5,r10,ror#2 @ F_xx_xx | ||
465 | and r11,r6,r7 @ F_xx_xx | ||
466 | add r3,r3,r9 @ E+=X[i] | ||
467 | add r3,r3,r10 @ E+=F_40_59(B,C,D) | ||
468 | add r3,r3,r11,ror#2 | ||
469 | teq r14,sp | ||
470 | bne .L_40_59 @ [+((12+5)*5+2)*4] | ||
471 | |||
472 | ldr r8,.LK_60_79 | ||
473 | sub sp,sp,#20*4 | ||
474 | cmp sp,#0 @ set carry to denote 60_79 | ||
475 | b .L_20_39_or_60_79 @ [+4], spare 300 bytes | ||
476 | .L_done: | ||
477 | add sp,sp,#80*4 @ "deallocate" stack frame | ||
478 | ldmia r0,{r8,r9,r10,r11,r12} | ||
479 | add r3,r8,r3 | ||
480 | add r4,r9,r4 | ||
481 | add r5,r10,r5,ror#2 | ||
482 | add r6,r11,r6,ror#2 | ||
483 | add r7,r12,r7,ror#2 | ||
484 | stmia r0,{r3,r4,r5,r6,r7} | ||
485 | teq r1,r2 | ||
486 | bne .Lloop @ [+18], total 1307 | ||
487 | |||
488 | #if __ARM_ARCH__>=5 | ||
489 | ldmia sp!,{r4-r12,pc} | ||
490 | #else | ||
491 | ldmia sp!,{r4-r12,lr} | ||
492 | tst lr,#1 | ||
493 | moveq pc,lr @ be binary compatible with V4, yet | ||
494 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
495 | #endif | ||
496 | .align 2 | ||
497 | .LK_00_19: .word 0x5a827999 | ||
498 | .LK_20_39: .word 0x6ed9eba1 | ||
499 | .LK_40_59: .word 0x8f1bbcdc | ||
500 | .LK_60_79: .word 0xca62c1d6 | ||
501 | .size sha1_block_data_order,.-sha1_block_data_order | ||
502 | .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" | ||
503 | .align 2 | ||
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c new file mode 100644 index 000000000000..76cd976230bc --- /dev/null +++ b/arch/arm/crypto/sha1_glue.c | |||
@@ -0,0 +1,179 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * Glue code for the SHA1 Secure Hash Algorithm assembler implementation | ||
4 | * | ||
5 | * This file is based on sha1_generic.c and sha1_ssse3_glue.c | ||
6 | * | ||
7 | * Copyright (c) Alan Smithee. | ||
8 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
9 | * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> | ||
10 | * Copyright (c) Mathias Krause <minipli@googlemail.com> | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify it | ||
13 | * under the terms of the GNU General Public License as published by the Free | ||
14 | * Software Foundation; either version 2 of the License, or (at your option) | ||
15 | * any later version. | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | #include <crypto/internal/hash.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/cryptohash.h> | ||
23 | #include <linux/types.h> | ||
24 | #include <crypto/sha.h> | ||
25 | #include <asm/byteorder.h> | ||
26 | |||
27 | struct SHA1_CTX { | ||
28 | uint32_t h0,h1,h2,h3,h4; | ||
29 | u64 count; | ||
30 | u8 data[SHA1_BLOCK_SIZE]; | ||
31 | }; | ||
32 | |||
33 | asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest, | ||
34 | const unsigned char *data, unsigned int rounds); | ||
35 | |||
36 | |||
37 | static int sha1_init(struct shash_desc *desc) | ||
38 | { | ||
39 | struct SHA1_CTX *sctx = shash_desc_ctx(desc); | ||
40 | memset(sctx, 0, sizeof(*sctx)); | ||
41 | sctx->h0 = SHA1_H0; | ||
42 | sctx->h1 = SHA1_H1; | ||
43 | sctx->h2 = SHA1_H2; | ||
44 | sctx->h3 = SHA1_H3; | ||
45 | sctx->h4 = SHA1_H4; | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | |||
50 | static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data, | ||
51 | unsigned int len, unsigned int partial) | ||
52 | { | ||
53 | unsigned int done = 0; | ||
54 | |||
55 | sctx->count += len; | ||
56 | |||
57 | if (partial) { | ||
58 | done = SHA1_BLOCK_SIZE - partial; | ||
59 | memcpy(sctx->data + partial, data, done); | ||
60 | sha1_block_data_order(sctx, sctx->data, 1); | ||
61 | } | ||
62 | |||
63 | if (len - done >= SHA1_BLOCK_SIZE) { | ||
64 | const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; | ||
65 | sha1_block_data_order(sctx, data + done, rounds); | ||
66 | done += rounds * SHA1_BLOCK_SIZE; | ||
67 | } | ||
68 | |||
69 | memcpy(sctx->data, data + done, len - done); | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | |||
74 | static int sha1_update(struct shash_desc *desc, const u8 *data, | ||
75 | unsigned int len) | ||
76 | { | ||
77 | struct SHA1_CTX *sctx = shash_desc_ctx(desc); | ||
78 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
79 | int res; | ||
80 | |||
81 | /* Handle the fast case right here */ | ||
82 | if (partial + len < SHA1_BLOCK_SIZE) { | ||
83 | sctx->count += len; | ||
84 | memcpy(sctx->data + partial, data, len); | ||
85 | return 0; | ||
86 | } | ||
87 | res = __sha1_update(sctx, data, len, partial); | ||
88 | return res; | ||
89 | } | ||
90 | |||
91 | |||
92 | /* Add padding and return the message digest. */ | ||
93 | static int sha1_final(struct shash_desc *desc, u8 *out) | ||
94 | { | ||
95 | struct SHA1_CTX *sctx = shash_desc_ctx(desc); | ||
96 | unsigned int i, index, padlen; | ||
97 | __be32 *dst = (__be32 *)out; | ||
98 | __be64 bits; | ||
99 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
100 | |||
101 | bits = cpu_to_be64(sctx->count << 3); | ||
102 | |||
103 | /* Pad out to 56 mod 64 and append length */ | ||
104 | index = sctx->count % SHA1_BLOCK_SIZE; | ||
105 | padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); | ||
106 | /* We need to fill a whole block for __sha1_update() */ | ||
107 | if (padlen <= 56) { | ||
108 | sctx->count += padlen; | ||
109 | memcpy(sctx->data + index, padding, padlen); | ||
110 | } else { | ||
111 | __sha1_update(sctx, padding, padlen, index); | ||
112 | } | ||
113 | __sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56); | ||
114 | |||
115 | /* Store state in digest */ | ||
116 | for (i = 0; i < 5; i++) | ||
117 | dst[i] = cpu_to_be32(((u32 *)sctx)[i]); | ||
118 | |||
119 | /* Wipe context */ | ||
120 | memset(sctx, 0, sizeof(*sctx)); | ||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | |||
125 | static int sha1_export(struct shash_desc *desc, void *out) | ||
126 | { | ||
127 | struct SHA1_CTX *sctx = shash_desc_ctx(desc); | ||
128 | memcpy(out, sctx, sizeof(*sctx)); | ||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | |||
133 | static int sha1_import(struct shash_desc *desc, const void *in) | ||
134 | { | ||
135 | struct SHA1_CTX *sctx = shash_desc_ctx(desc); | ||
136 | memcpy(sctx, in, sizeof(*sctx)); | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | |||
141 | static struct shash_alg alg = { | ||
142 | .digestsize = SHA1_DIGEST_SIZE, | ||
143 | .init = sha1_init, | ||
144 | .update = sha1_update, | ||
145 | .final = sha1_final, | ||
146 | .export = sha1_export, | ||
147 | .import = sha1_import, | ||
148 | .descsize = sizeof(struct SHA1_CTX), | ||
149 | .statesize = sizeof(struct SHA1_CTX), | ||
150 | .base = { | ||
151 | .cra_name = "sha1", | ||
152 | .cra_driver_name= "sha1-asm", | ||
153 | .cra_priority = 150, | ||
154 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
155 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
156 | .cra_module = THIS_MODULE, | ||
157 | } | ||
158 | }; | ||
159 | |||
160 | |||
161 | static int __init sha1_mod_init(void) | ||
162 | { | ||
163 | return crypto_register_shash(&alg); | ||
164 | } | ||
165 | |||
166 | |||
167 | static void __exit sha1_mod_fini(void) | ||
168 | { | ||
169 | crypto_unregister_shash(&alg); | ||
170 | } | ||
171 | |||
172 | |||
173 | module_init(sha1_mod_init); | ||
174 | module_exit(sha1_mod_fini); | ||
175 | |||
176 | MODULE_LICENSE("GPL"); | ||
177 | MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (ARM)"); | ||
178 | MODULE_ALIAS("sha1"); | ||
179 | MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>"); | ||