aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/crypto
diff options
context:
space:
mode:
authorDavid McCullough <david_mccullough@mcafee.com>2012-09-06 16:17:02 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2012-09-06 16:17:02 -0400
commitf0be44f4fb1faee42635ca5ea06dc9c3e820a35d (patch)
tree866c6016f7153ddff778a5eda036934e7eb426ac /arch/arm/crypto
parent956c203c5e370c7beb766400b5c1a32ec570ce96 (diff)
arm/crypto: Add optimized AES and SHA1 routines
Add assembler versions of AES and SHA1 for ARM platforms. This has provided up to a 50% improvement in IPsec/TCP throughout for tunnels using AES128/SHA1. Platform CPU SPeed Endian Before (bps) After (bps) Improvement IXP425 533 MHz big 11217042 15566294 ~38% KS8695 166 MHz little 3828549 5795373 ~51% Signed-off-by: David McCullough <ucdevel@gmail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm/crypto')
-rw-r--r--arch/arm/crypto/Makefile9
-rw-r--r--arch/arm/crypto/aes-armv4.S1112
-rw-r--r--arch/arm/crypto/aes_glue.c108
-rw-r--r--arch/arm/crypto/sha1-armv4-large.S503
-rw-r--r--arch/arm/crypto/sha1_glue.c179
5 files changed, 1911 insertions, 0 deletions
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
new file mode 100644
index 000000000000..a2c83851bc90
--- /dev/null
+++ b/arch/arm/crypto/Makefile
@@ -0,0 +1,9 @@
1#
2# Arch-specific CryptoAPI modules.
3#
4
5obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
6obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
7
8aes-arm-y := aes-armv4.o aes_glue.o
9sha1-arm-y := sha1-armv4-large.o sha1_glue.o
diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S
new file mode 100644
index 000000000000..e59b1d505d6c
--- /dev/null
+++ b/arch/arm/crypto/aes-armv4.S
@@ -0,0 +1,1112 @@
1#define __ARM_ARCH__ __LINUX_ARM_ARCH__
2@ ====================================================================
3@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4@ project. The module is, however, dual licensed under OpenSSL and
5@ CRYPTOGAMS licenses depending on where you obtain it. For further
6@ details see http://www.openssl.org/~appro/cryptogams/.
7@ ====================================================================
8
9@ AES for ARMv4
10
11@ January 2007.
12@
13@ Code uses single 1K S-box and is >2 times faster than code generated
14@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
15@ allows to merge logical or arithmetic operation with shift or rotate
16@ in one instruction and emit combined result every cycle. The module
17@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit
18@ key [on single-issue Xscale PXA250 core].
19
20@ May 2007.
21@
22@ AES_set_[en|de]crypt_key is added.
23
24@ July 2010.
25@
26@ Rescheduling for dual-issue pipeline resulted in 12% improvement on
27@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
28
29@ February 2011.
30@
31@ Profiler-assisted and platform-specific optimization resulted in 16%
32@ improvement on Cortex A8 core and ~21.5 cycles per byte.
33
34@ A little glue here to select the correct code below for the ARM CPU
35@ that is being targetted.
36
37.text
38.code 32
39
40.type AES_Te,%object
41.align 5
42AES_Te:
43.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
44.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
45.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
46.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
47.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
48.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
49.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
50.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
51.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
52.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
53.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
54.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
55.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
56.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
57.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
58.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
59.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
60.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
61.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
62.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
63.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
64.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
65.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
66.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
67.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
68.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
69.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
70.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
71.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
72.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
73.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
74.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
75.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
76.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
77.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
78.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
79.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
80.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
81.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
82.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
83.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
84.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
85.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
86.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
87.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
88.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
89.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
90.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
91.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
92.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
93.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
94.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
95.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
96.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
97.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
98.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
99.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
100.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
101.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
102.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
103.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
104.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
105.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
106.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
107@ Te4[256]
108.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
109.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
110.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
111.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
112.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
113.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
114.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
115.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
116.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
117.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
118.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
119.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
120.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
121.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
122.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
123.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
124.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
125.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
126.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
127.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
128.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
129.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
130.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
131.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
132.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
133.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
134.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
135.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
136.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
137.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
138.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
139.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
140@ rcon[]
141.word 0x01000000, 0x02000000, 0x04000000, 0x08000000
142.word 0x10000000, 0x20000000, 0x40000000, 0x80000000
143.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
144.size AES_Te,.-AES_Te
145
146@ void AES_encrypt(const unsigned char *in, unsigned char *out,
147@ const AES_KEY *key) {
148.global AES_encrypt
149.type AES_encrypt,%function
150.align 5
151AES_encrypt:
152 sub r3,pc,#8 @ AES_encrypt
153 stmdb sp!,{r1,r4-r12,lr}
154 mov r12,r0 @ inp
155 mov r11,r2
156 sub r10,r3,#AES_encrypt-AES_Te @ Te
157#if __ARM_ARCH__<7
158 ldrb r0,[r12,#3] @ load input data in endian-neutral
159 ldrb r4,[r12,#2] @ manner...
160 ldrb r5,[r12,#1]
161 ldrb r6,[r12,#0]
162 orr r0,r0,r4,lsl#8
163 ldrb r1,[r12,#7]
164 orr r0,r0,r5,lsl#16
165 ldrb r4,[r12,#6]
166 orr r0,r0,r6,lsl#24
167 ldrb r5,[r12,#5]
168 ldrb r6,[r12,#4]
169 orr r1,r1,r4,lsl#8
170 ldrb r2,[r12,#11]
171 orr r1,r1,r5,lsl#16
172 ldrb r4,[r12,#10]
173 orr r1,r1,r6,lsl#24
174 ldrb r5,[r12,#9]
175 ldrb r6,[r12,#8]
176 orr r2,r2,r4,lsl#8
177 ldrb r3,[r12,#15]
178 orr r2,r2,r5,lsl#16
179 ldrb r4,[r12,#14]
180 orr r2,r2,r6,lsl#24
181 ldrb r5,[r12,#13]
182 ldrb r6,[r12,#12]
183 orr r3,r3,r4,lsl#8
184 orr r3,r3,r5,lsl#16
185 orr r3,r3,r6,lsl#24
186#else
187 ldr r0,[r12,#0]
188 ldr r1,[r12,#4]
189 ldr r2,[r12,#8]
190 ldr r3,[r12,#12]
191#ifdef __ARMEL__
192 rev r0,r0
193 rev r1,r1
194 rev r2,r2
195 rev r3,r3
196#endif
197#endif
198 bl _armv4_AES_encrypt
199
200 ldr r12,[sp],#4 @ pop out
201#if __ARM_ARCH__>=7
202#ifdef __ARMEL__
203 rev r0,r0
204 rev r1,r1
205 rev r2,r2
206 rev r3,r3
207#endif
208 str r0,[r12,#0]
209 str r1,[r12,#4]
210 str r2,[r12,#8]
211 str r3,[r12,#12]
212#else
213 mov r4,r0,lsr#24 @ write output in endian-neutral
214 mov r5,r0,lsr#16 @ manner...
215 mov r6,r0,lsr#8
216 strb r4,[r12,#0]
217 strb r5,[r12,#1]
218 mov r4,r1,lsr#24
219 strb r6,[r12,#2]
220 mov r5,r1,lsr#16
221 strb r0,[r12,#3]
222 mov r6,r1,lsr#8
223 strb r4,[r12,#4]
224 strb r5,[r12,#5]
225 mov r4,r2,lsr#24
226 strb r6,[r12,#6]
227 mov r5,r2,lsr#16
228 strb r1,[r12,#7]
229 mov r6,r2,lsr#8
230 strb r4,[r12,#8]
231 strb r5,[r12,#9]
232 mov r4,r3,lsr#24
233 strb r6,[r12,#10]
234 mov r5,r3,lsr#16
235 strb r2,[r12,#11]
236 mov r6,r3,lsr#8
237 strb r4,[r12,#12]
238 strb r5,[r12,#13]
239 strb r6,[r12,#14]
240 strb r3,[r12,#15]
241#endif
242#if __ARM_ARCH__>=5
243 ldmia sp!,{r4-r12,pc}
244#else
245 ldmia sp!,{r4-r12,lr}
246 tst lr,#1
247 moveq pc,lr @ be binary compatible with V4, yet
248 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
249#endif
250.size AES_encrypt,.-AES_encrypt
251
252.type _armv4_AES_encrypt,%function
253.align 2
254_armv4_AES_encrypt:
255 str lr,[sp,#-4]! @ push lr
256 ldmia r11!,{r4-r7}
257 eor r0,r0,r4
258 ldr r12,[r11,#240-16]
259 eor r1,r1,r5
260 eor r2,r2,r6
261 eor r3,r3,r7
262 sub r12,r12,#1
263 mov lr,#255
264
265 and r7,lr,r0
266 and r8,lr,r0,lsr#8
267 and r9,lr,r0,lsr#16
268 mov r0,r0,lsr#24
269.Lenc_loop:
270 ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0]
271 and r7,lr,r1,lsr#16 @ i0
272 ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8]
273 and r8,lr,r1
274 ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16]
275 and r9,lr,r1,lsr#8
276 ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24]
277 mov r1,r1,lsr#24
278
279 ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16]
280 ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0]
281 ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8]
282 eor r0,r0,r7,ror#8
283 ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24]
284 and r7,lr,r2,lsr#8 @ i0
285 eor r5,r5,r8,ror#8
286 and r8,lr,r2,lsr#16 @ i1
287 eor r6,r6,r9,ror#8
288 and r9,lr,r2
289 ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8]
290 eor r1,r1,r4,ror#24
291 ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16]
292 mov r2,r2,lsr#24
293
294 ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0]
295 eor r0,r0,r7,ror#16
296 ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24]
297 and r7,lr,r3 @ i0
298 eor r1,r1,r8,ror#8
299 and r8,lr,r3,lsr#8 @ i1
300 eor r6,r6,r9,ror#16
301 and r9,lr,r3,lsr#16 @ i2
302 ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0]
303 eor r2,r2,r5,ror#16
304 ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8]
305 mov r3,r3,lsr#24
306
307 ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16]
308 eor r0,r0,r7,ror#24
309 ldr r7,[r11],#16
310 eor r1,r1,r8,ror#16
311 ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24]
312 eor r2,r2,r9,ror#8
313 ldr r4,[r11,#-12]
314 eor r3,r3,r6,ror#8
315
316 ldr r5,[r11,#-8]
317 eor r0,r0,r7
318 ldr r6,[r11,#-4]
319 and r7,lr,r0
320 eor r1,r1,r4
321 and r8,lr,r0,lsr#8
322 eor r2,r2,r5
323 and r9,lr,r0,lsr#16
324 eor r3,r3,r6
325 mov r0,r0,lsr#24
326
327 subs r12,r12,#1
328 bne .Lenc_loop
329
330 add r10,r10,#2
331
332 ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0]
333 and r7,lr,r1,lsr#16 @ i0
334 ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8]
335 and r8,lr,r1
336 ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16]
337 and r9,lr,r1,lsr#8
338 ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24]
339 mov r1,r1,lsr#24
340
341 ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16]
342 ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0]
343 ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8]
344 eor r0,r7,r0,lsl#8
345 ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24]
346 and r7,lr,r2,lsr#8 @ i0
347 eor r5,r8,r5,lsl#8
348 and r8,lr,r2,lsr#16 @ i1
349 eor r6,r9,r6,lsl#8
350 and r9,lr,r2
351 ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8]
352 eor r1,r4,r1,lsl#24
353 ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16]
354 mov r2,r2,lsr#24
355
356 ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0]
357 eor r0,r7,r0,lsl#8
358 ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24]
359 and r7,lr,r3 @ i0
360 eor r1,r1,r8,lsl#16
361 and r8,lr,r3,lsr#8 @ i1
362 eor r6,r9,r6,lsl#8
363 and r9,lr,r3,lsr#16 @ i2
364 ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0]
365 eor r2,r5,r2,lsl#24
366 ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8]
367 mov r3,r3,lsr#24
368
369 ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16]
370 eor r0,r7,r0,lsl#8
371 ldr r7,[r11,#0]
372 ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24]
373 eor r1,r1,r8,lsl#8
374 ldr r4,[r11,#4]
375 eor r2,r2,r9,lsl#16
376 ldr r5,[r11,#8]
377 eor r3,r6,r3,lsl#24
378 ldr r6,[r11,#12]
379
380 eor r0,r0,r7
381 eor r1,r1,r4
382 eor r2,r2,r5
383 eor r3,r3,r6
384
385 sub r10,r10,#2
386 ldr pc,[sp],#4 @ pop and return
387.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
388
389.global private_AES_set_encrypt_key
390.type private_AES_set_encrypt_key,%function
391.align 5
392private_AES_set_encrypt_key:
393_armv4_AES_set_encrypt_key:
394 sub r3,pc,#8 @ AES_set_encrypt_key
395 teq r0,#0
396 moveq r0,#-1
397 beq .Labrt
398 teq r2,#0
399 moveq r0,#-1
400 beq .Labrt
401
402 teq r1,#128
403 beq .Lok
404 teq r1,#192
405 beq .Lok
406 teq r1,#256
407 movne r0,#-1
408 bne .Labrt
409
410.Lok: stmdb sp!,{r4-r12,lr}
411 sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
412
413 mov r12,r0 @ inp
414 mov lr,r1 @ bits
415 mov r11,r2 @ key
416
417#if __ARM_ARCH__<7
418 ldrb r0,[r12,#3] @ load input data in endian-neutral
419 ldrb r4,[r12,#2] @ manner...
420 ldrb r5,[r12,#1]
421 ldrb r6,[r12,#0]
422 orr r0,r0,r4,lsl#8
423 ldrb r1,[r12,#7]
424 orr r0,r0,r5,lsl#16
425 ldrb r4,[r12,#6]
426 orr r0,r0,r6,lsl#24
427 ldrb r5,[r12,#5]
428 ldrb r6,[r12,#4]
429 orr r1,r1,r4,lsl#8
430 ldrb r2,[r12,#11]
431 orr r1,r1,r5,lsl#16
432 ldrb r4,[r12,#10]
433 orr r1,r1,r6,lsl#24
434 ldrb r5,[r12,#9]
435 ldrb r6,[r12,#8]
436 orr r2,r2,r4,lsl#8
437 ldrb r3,[r12,#15]
438 orr r2,r2,r5,lsl#16
439 ldrb r4,[r12,#14]
440 orr r2,r2,r6,lsl#24
441 ldrb r5,[r12,#13]
442 ldrb r6,[r12,#12]
443 orr r3,r3,r4,lsl#8
444 str r0,[r11],#16
445 orr r3,r3,r5,lsl#16
446 str r1,[r11,#-12]
447 orr r3,r3,r6,lsl#24
448 str r2,[r11,#-8]
449 str r3,[r11,#-4]
450#else
451 ldr r0,[r12,#0]
452 ldr r1,[r12,#4]
453 ldr r2,[r12,#8]
454 ldr r3,[r12,#12]
455#ifdef __ARMEL__
456 rev r0,r0
457 rev r1,r1
458 rev r2,r2
459 rev r3,r3
460#endif
461 str r0,[r11],#16
462 str r1,[r11,#-12]
463 str r2,[r11,#-8]
464 str r3,[r11,#-4]
465#endif
466
467 teq lr,#128
468 bne .Lnot128
469 mov r12,#10
470 str r12,[r11,#240-16]
471 add r6,r10,#256 @ rcon
472 mov lr,#255
473
474.L128_loop:
475 and r5,lr,r3,lsr#24
476 and r7,lr,r3,lsr#16
477 ldrb r5,[r10,r5]
478 and r8,lr,r3,lsr#8
479 ldrb r7,[r10,r7]
480 and r9,lr,r3
481 ldrb r8,[r10,r8]
482 orr r5,r5,r7,lsl#24
483 ldrb r9,[r10,r9]
484 orr r5,r5,r8,lsl#16
485 ldr r4,[r6],#4 @ rcon[i++]
486 orr r5,r5,r9,lsl#8
487 eor r5,r5,r4
488 eor r0,r0,r5 @ rk[4]=rk[0]^...
489 eor r1,r1,r0 @ rk[5]=rk[1]^rk[4]
490 str r0,[r11],#16
491 eor r2,r2,r1 @ rk[6]=rk[2]^rk[5]
492 str r1,[r11,#-12]
493 eor r3,r3,r2 @ rk[7]=rk[3]^rk[6]
494 str r2,[r11,#-8]
495 subs r12,r12,#1
496 str r3,[r11,#-4]
497 bne .L128_loop
498 sub r2,r11,#176
499 b .Ldone
500
501.Lnot128:
502#if __ARM_ARCH__<7
503 ldrb r8,[r12,#19]
504 ldrb r4,[r12,#18]
505 ldrb r5,[r12,#17]
506 ldrb r6,[r12,#16]
507 orr r8,r8,r4,lsl#8
508 ldrb r9,[r12,#23]
509 orr r8,r8,r5,lsl#16
510 ldrb r4,[r12,#22]
511 orr r8,r8,r6,lsl#24
512 ldrb r5,[r12,#21]
513 ldrb r6,[r12,#20]
514 orr r9,r9,r4,lsl#8
515 orr r9,r9,r5,lsl#16
516 str r8,[r11],#8
517 orr r9,r9,r6,lsl#24
518 str r9,[r11,#-4]
519#else
520 ldr r8,[r12,#16]
521 ldr r9,[r12,#20]
522#ifdef __ARMEL__
523 rev r8,r8
524 rev r9,r9
525#endif
526 str r8,[r11],#8
527 str r9,[r11,#-4]
528#endif
529
530 teq lr,#192
531 bne .Lnot192
532 mov r12,#12
533 str r12,[r11,#240-24]
534 add r6,r10,#256 @ rcon
535 mov lr,#255
536 mov r12,#8
537
538.L192_loop:
539 and r5,lr,r9,lsr#24
540 and r7,lr,r9,lsr#16
541 ldrb r5,[r10,r5]
542 and r8,lr,r9,lsr#8
543 ldrb r7,[r10,r7]
544 and r9,lr,r9
545 ldrb r8,[r10,r8]
546 orr r5,r5,r7,lsl#24
547 ldrb r9,[r10,r9]
548 orr r5,r5,r8,lsl#16
549 ldr r4,[r6],#4 @ rcon[i++]
550 orr r5,r5,r9,lsl#8
551 eor r9,r5,r4
552 eor r0,r0,r9 @ rk[6]=rk[0]^...
553 eor r1,r1,r0 @ rk[7]=rk[1]^rk[6]
554 str r0,[r11],#24
555 eor r2,r2,r1 @ rk[8]=rk[2]^rk[7]
556 str r1,[r11,#-20]
557 eor r3,r3,r2 @ rk[9]=rk[3]^rk[8]
558 str r2,[r11,#-16]
559 subs r12,r12,#1
560 str r3,[r11,#-12]
561 subeq r2,r11,#216
562 beq .Ldone
563
564 ldr r7,[r11,#-32]
565 ldr r8,[r11,#-28]
566 eor r7,r7,r3 @ rk[10]=rk[4]^rk[9]
567 eor r9,r8,r7 @ rk[11]=rk[5]^rk[10]
568 str r7,[r11,#-8]
569 str r9,[r11,#-4]
570 b .L192_loop
571
572.Lnot192:
573#if __ARM_ARCH__<7
574 ldrb r8,[r12,#27]
575 ldrb r4,[r12,#26]
576 ldrb r5,[r12,#25]
577 ldrb r6,[r12,#24]
578 orr r8,r8,r4,lsl#8
579 ldrb r9,[r12,#31]
580 orr r8,r8,r5,lsl#16
581 ldrb r4,[r12,#30]
582 orr r8,r8,r6,lsl#24
583 ldrb r5,[r12,#29]
584 ldrb r6,[r12,#28]
585 orr r9,r9,r4,lsl#8
586 orr r9,r9,r5,lsl#16
587 str r8,[r11],#8
588 orr r9,r9,r6,lsl#24
589 str r9,[r11,#-4]
590#else
591 ldr r8,[r12,#24]
592 ldr r9,[r12,#28]
593#ifdef __ARMEL__
594 rev r8,r8
595 rev r9,r9
596#endif
597 str r8,[r11],#8
598 str r9,[r11,#-4]
599#endif
600
601 mov r12,#14
602 str r12,[r11,#240-32]
603 add r6,r10,#256 @ rcon
604 mov lr,#255
605 mov r12,#7
606
607.L256_loop:
608 and r5,lr,r9,lsr#24
609 and r7,lr,r9,lsr#16
610 ldrb r5,[r10,r5]
611 and r8,lr,r9,lsr#8
612 ldrb r7,[r10,r7]
613 and r9,lr,r9
614 ldrb r8,[r10,r8]
615 orr r5,r5,r7,lsl#24
616 ldrb r9,[r10,r9]
617 orr r5,r5,r8,lsl#16
618 ldr r4,[r6],#4 @ rcon[i++]
619 orr r5,r5,r9,lsl#8
620 eor r9,r5,r4
621 eor r0,r0,r9 @ rk[8]=rk[0]^...
622 eor r1,r1,r0 @ rk[9]=rk[1]^rk[8]
623 str r0,[r11],#32
624 eor r2,r2,r1 @ rk[10]=rk[2]^rk[9]
625 str r1,[r11,#-28]
626 eor r3,r3,r2 @ rk[11]=rk[3]^rk[10]
627 str r2,[r11,#-24]
628 subs r12,r12,#1
629 str r3,[r11,#-20]
630 subeq r2,r11,#256
631 beq .Ldone
632
633 and r5,lr,r3
634 and r7,lr,r3,lsr#8
635 ldrb r5,[r10,r5]
636 and r8,lr,r3,lsr#16
637 ldrb r7,[r10,r7]
638 and r9,lr,r3,lsr#24
639 ldrb r8,[r10,r8]
640 orr r5,r5,r7,lsl#8
641 ldrb r9,[r10,r9]
642 orr r5,r5,r8,lsl#16
643 ldr r4,[r11,#-48]
644 orr r5,r5,r9,lsl#24
645
646 ldr r7,[r11,#-44]
647 ldr r8,[r11,#-40]
648 eor r4,r4,r5 @ rk[12]=rk[4]^...
649 ldr r9,[r11,#-36]
650 eor r7,r7,r4 @ rk[13]=rk[5]^rk[12]
651 str r4,[r11,#-16]
652 eor r8,r8,r7 @ rk[14]=rk[6]^rk[13]
653 str r7,[r11,#-12]
654 eor r9,r9,r8 @ rk[15]=rk[7]^rk[14]
655 str r8,[r11,#-8]
656 str r9,[r11,#-4]
657 b .L256_loop
658
659.Ldone: mov r0,#0
660 ldmia sp!,{r4-r12,lr}
661.Labrt: tst lr,#1
662 moveq pc,lr @ be binary compatible with V4, yet
663 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
664.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
665
666.global private_AES_set_decrypt_key
667.type private_AES_set_decrypt_key,%function
668.align 5
669private_AES_set_decrypt_key:
670 str lr,[sp,#-4]! @ push lr
671#if 0
672 @ kernel does both of these in setkey so optimise this bit out by
673 @ expecting the key to already have the enc_key work done (see aes_glue.c)
674 bl _armv4_AES_set_encrypt_key
675#else
676 mov r0,#0
677#endif
678 teq r0,#0
679 ldrne lr,[sp],#4 @ pop lr
680 bne .Labrt
681
682 stmdb sp!,{r4-r12}
683
684 ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2,
685 mov r11,r2 @ which is AES_KEY *key
686 mov r7,r2
687 add r8,r2,r12,lsl#4
688
689.Linv: ldr r0,[r7]
690 ldr r1,[r7,#4]
691 ldr r2,[r7,#8]
692 ldr r3,[r7,#12]
693 ldr r4,[r8]
694 ldr r5,[r8,#4]
695 ldr r6,[r8,#8]
696 ldr r9,[r8,#12]
697 str r0,[r8],#-16
698 str r1,[r8,#16+4]
699 str r2,[r8,#16+8]
700 str r3,[r8,#16+12]
701 str r4,[r7],#16
702 str r5,[r7,#-12]
703 str r6,[r7,#-8]
704 str r9,[r7,#-4]
705 teq r7,r8
706 bne .Linv
707 ldr r0,[r11,#16]! @ prefetch tp1
708 mov r7,#0x80
709 mov r8,#0x1b
710 orr r7,r7,#0x8000
711 orr r8,r8,#0x1b00
712 orr r7,r7,r7,lsl#16
713 orr r8,r8,r8,lsl#16
714 sub r12,r12,#1
715 mvn r9,r7
716 mov r12,r12,lsl#2 @ (rounds-1)*4
717
718.Lmix: and r4,r0,r7
719 and r1,r0,r9
720 sub r4,r4,r4,lsr#7
721 and r4,r4,r8
722 eor r1,r4,r1,lsl#1 @ tp2
723
724 and r4,r1,r7
725 and r2,r1,r9
726 sub r4,r4,r4,lsr#7
727 and r4,r4,r8
728 eor r2,r4,r2,lsl#1 @ tp4
729
730 and r4,r2,r7
731 and r3,r2,r9
732 sub r4,r4,r4,lsr#7
733 and r4,r4,r8
734 eor r3,r4,r3,lsl#1 @ tp8
735
736 eor r4,r1,r2
737 eor r5,r0,r3 @ tp9
738 eor r4,r4,r3 @ tpe
739 eor r4,r4,r1,ror#24
740 eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8)
741 eor r4,r4,r2,ror#16
742 eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16)
743 eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24)
744
745 ldr r0,[r11,#4] @ prefetch tp1
746 str r4,[r11],#4
747 subs r12,r12,#1
748 bne .Lmix
749
750 mov r0,#0
751#if __ARM_ARCH__>=5
752 ldmia sp!,{r4-r12,pc}
753#else
754 ldmia sp!,{r4-r12,lr}
755 tst lr,#1
756 moveq pc,lr @ be binary compatible with V4, yet
757 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
758#endif
759.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
760
761.type AES_Td,%object
762.align 5
763AES_Td:
764.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
765.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
766.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
767.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
768.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
769.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
770.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
771.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
772.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
773.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
774.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
775.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
776.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
777.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
778.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
779.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
780.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
781.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
782.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
783.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
784.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
785.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
786.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
787.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
788.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
789.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
790.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
791.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
792.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
793.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
794.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
795.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
796.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
797.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
798.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
799.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
800.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
801.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
802.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
803.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
804.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
805.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
806.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
807.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
808.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
809.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
810.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
811.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
812.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
813.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
814.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
815.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
816.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
817.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
818.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
819.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
820.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
821.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
822.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
823.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
824.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
825.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
826.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
827.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
828@ Td4[256]
829.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
830.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
831.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
832.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
833.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
834.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
835.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
836.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
837.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
838.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
839.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
840.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
841.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
842.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
843.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
844.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
845.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
846.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
847.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
848.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
849.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
850.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
851.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
852.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
853.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
854.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
855.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
856.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
857.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
858.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
859.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
860.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
861.size AES_Td,.-AES_Td
862
863@ void AES_decrypt(const unsigned char *in, unsigned char *out,
864@ const AES_KEY *key) {
865.global AES_decrypt
866.type AES_decrypt,%function
867.align 5
868AES_decrypt:
869 sub r3,pc,#8 @ AES_decrypt
870 stmdb sp!,{r1,r4-r12,lr}
871 mov r12,r0 @ inp
872 mov r11,r2
873 sub r10,r3,#AES_decrypt-AES_Td @ Td
874#if __ARM_ARCH__<7
875 ldrb r0,[r12,#3] @ load input data in endian-neutral
876 ldrb r4,[r12,#2] @ manner...
877 ldrb r5,[r12,#1]
878 ldrb r6,[r12,#0]
879 orr r0,r0,r4,lsl#8
880 ldrb r1,[r12,#7]
881 orr r0,r0,r5,lsl#16
882 ldrb r4,[r12,#6]
883 orr r0,r0,r6,lsl#24
884 ldrb r5,[r12,#5]
885 ldrb r6,[r12,#4]
886 orr r1,r1,r4,lsl#8
887 ldrb r2,[r12,#11]
888 orr r1,r1,r5,lsl#16
889 ldrb r4,[r12,#10]
890 orr r1,r1,r6,lsl#24
891 ldrb r5,[r12,#9]
892 ldrb r6,[r12,#8]
893 orr r2,r2,r4,lsl#8
894 ldrb r3,[r12,#15]
895 orr r2,r2,r5,lsl#16
896 ldrb r4,[r12,#14]
897 orr r2,r2,r6,lsl#24
898 ldrb r5,[r12,#13]
899 ldrb r6,[r12,#12]
900 orr r3,r3,r4,lsl#8
901 orr r3,r3,r5,lsl#16
902 orr r3,r3,r6,lsl#24
903#else
904 ldr r0,[r12,#0]
905 ldr r1,[r12,#4]
906 ldr r2,[r12,#8]
907 ldr r3,[r12,#12]
908#ifdef __ARMEL__
909 rev r0,r0
910 rev r1,r1
911 rev r2,r2
912 rev r3,r3
913#endif
914#endif
915 bl _armv4_AES_decrypt
916
917 ldr r12,[sp],#4 @ pop out
918#if __ARM_ARCH__>=7
919#ifdef __ARMEL__
920 rev r0,r0
921 rev r1,r1
922 rev r2,r2
923 rev r3,r3
924#endif
925 str r0,[r12,#0]
926 str r1,[r12,#4]
927 str r2,[r12,#8]
928 str r3,[r12,#12]
929#else
930 mov r4,r0,lsr#24 @ write output in endian-neutral
931 mov r5,r0,lsr#16 @ manner...
932 mov r6,r0,lsr#8
933 strb r4,[r12,#0]
934 strb r5,[r12,#1]
935 mov r4,r1,lsr#24
936 strb r6,[r12,#2]
937 mov r5,r1,lsr#16
938 strb r0,[r12,#3]
939 mov r6,r1,lsr#8
940 strb r4,[r12,#4]
941 strb r5,[r12,#5]
942 mov r4,r2,lsr#24
943 strb r6,[r12,#6]
944 mov r5,r2,lsr#16
945 strb r1,[r12,#7]
946 mov r6,r2,lsr#8
947 strb r4,[r12,#8]
948 strb r5,[r12,#9]
949 mov r4,r3,lsr#24
950 strb r6,[r12,#10]
951 mov r5,r3,lsr#16
952 strb r2,[r12,#11]
953 mov r6,r3,lsr#8
954 strb r4,[r12,#12]
955 strb r5,[r12,#13]
956 strb r6,[r12,#14]
957 strb r3,[r12,#15]
958#endif
959#if __ARM_ARCH__>=5
960 ldmia sp!,{r4-r12,pc}
961#else
962 ldmia sp!,{r4-r12,lr}
963 tst lr,#1
964 moveq pc,lr @ be binary compatible with V4, yet
965 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
966#endif
967.size AES_decrypt,.-AES_decrypt
968
969.type _armv4_AES_decrypt,%function
970.align 2
971_armv4_AES_decrypt:
972 str lr,[sp,#-4]! @ push lr
973 ldmia r11!,{r4-r7}
974 eor r0,r0,r4
975 ldr r12,[r11,#240-16]
976 eor r1,r1,r5
977 eor r2,r2,r6
978 eor r3,r3,r7
979 sub r12,r12,#1
980 mov lr,#255
981
982 and r7,lr,r0,lsr#16
983 and r8,lr,r0,lsr#8
984 and r9,lr,r0
985 mov r0,r0,lsr#24
986.Ldec_loop:
987 ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16]
988 and r7,lr,r1 @ i0
989 ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8]
990 and r8,lr,r1,lsr#16
991 ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0]
992 and r9,lr,r1,lsr#8
993 ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24]
994 mov r1,r1,lsr#24
995
996 ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0]
997 ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16]
998 ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8]
999 eor r0,r0,r7,ror#24
1000 ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24]
1001 and r7,lr,r2,lsr#8 @ i0
1002 eor r5,r8,r5,ror#8
1003 and r8,lr,r2 @ i1
1004 eor r6,r9,r6,ror#8
1005 and r9,lr,r2,lsr#16
1006 ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8]
1007 eor r1,r1,r4,ror#8
1008 ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0]
1009 mov r2,r2,lsr#24
1010
1011 ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16]
1012 eor r0,r0,r7,ror#16
1013 ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24]
1014 and r7,lr,r3,lsr#16 @ i0
1015 eor r1,r1,r8,ror#24
1016 and r8,lr,r3,lsr#8 @ i1
1017 eor r6,r9,r6,ror#8
1018 and r9,lr,r3 @ i2
1019 ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16]
1020 eor r2,r2,r5,ror#8
1021 ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8]
1022 mov r3,r3,lsr#24
1023
1024 ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0]
1025 eor r0,r0,r7,ror#8
1026 ldr r7,[r11],#16
1027 eor r1,r1,r8,ror#16
1028 ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24]
1029 eor r2,r2,r9,ror#24
1030
1031 ldr r4,[r11,#-12]
1032 eor r0,r0,r7
1033 ldr r5,[r11,#-8]
1034 eor r3,r3,r6,ror#8
1035 ldr r6,[r11,#-4]
1036 and r7,lr,r0,lsr#16
1037 eor r1,r1,r4
1038 and r8,lr,r0,lsr#8
1039 eor r2,r2,r5
1040 and r9,lr,r0
1041 eor r3,r3,r6
1042 mov r0,r0,lsr#24
1043
1044 subs r12,r12,#1
1045 bne .Ldec_loop
1046
1047 add r10,r10,#1024
1048
1049 ldr r5,[r10,#0] @ prefetch Td4
1050 ldr r6,[r10,#32]
1051 ldr r4,[r10,#64]
1052 ldr r5,[r10,#96]
1053 ldr r6,[r10,#128]
1054 ldr r4,[r10,#160]
1055 ldr r5,[r10,#192]
1056 ldr r6,[r10,#224]
1057
1058 ldrb r0,[r10,r0] @ Td4[s0>>24]
1059 ldrb r4,[r10,r7] @ Td4[s0>>16]
1060 and r7,lr,r1 @ i0
1061 ldrb r5,[r10,r8] @ Td4[s0>>8]
1062 and r8,lr,r1,lsr#16
1063 ldrb r6,[r10,r9] @ Td4[s0>>0]
1064 and r9,lr,r1,lsr#8
1065
1066 ldrb r7,[r10,r7] @ Td4[s1>>0]
1067 ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24]
1068 ldrb r8,[r10,r8] @ Td4[s1>>16]
1069 eor r0,r7,r0,lsl#24
1070 ldrb r9,[r10,r9] @ Td4[s1>>8]
1071 eor r1,r4,r1,lsl#8
1072 and r7,lr,r2,lsr#8 @ i0
1073 eor r5,r5,r8,lsl#8
1074 and r8,lr,r2 @ i1
1075 ldrb r7,[r10,r7] @ Td4[s2>>8]
1076 eor r6,r6,r9,lsl#8
1077 ldrb r8,[r10,r8] @ Td4[s2>>0]
1078 and r9,lr,r2,lsr#16
1079
1080 ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24]
1081 eor r0,r0,r7,lsl#8
1082 ldrb r9,[r10,r9] @ Td4[s2>>16]
1083 eor r1,r8,r1,lsl#16
1084 and r7,lr,r3,lsr#16 @ i0
1085 eor r2,r5,r2,lsl#16
1086 and r8,lr,r3,lsr#8 @ i1
1087 ldrb r7,[r10,r7] @ Td4[s3>>16]
1088 eor r6,r6,r9,lsl#16
1089 ldrb r8,[r10,r8] @ Td4[s3>>8]
1090 and r9,lr,r3 @ i2
1091
1092 ldrb r9,[r10,r9] @ Td4[s3>>0]
1093 ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24]
1094 eor r0,r0,r7,lsl#16
1095 ldr r7,[r11,#0]
1096 eor r1,r1,r8,lsl#8
1097 ldr r4,[r11,#4]
1098 eor r2,r9,r2,lsl#8
1099 ldr r5,[r11,#8]
1100 eor r3,r6,r3,lsl#24
1101 ldr r6,[r11,#12]
1102
1103 eor r0,r0,r7
1104 eor r1,r1,r4
1105 eor r2,r2,r5
1106 eor r3,r3,r6
1107
1108 sub r10,r10,#1024
1109 ldr pc,[sp],#4 @ pop and return
1110.size _armv4_AES_decrypt,.-_armv4_AES_decrypt
1111.asciz "AES for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
1112.align 2
diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c
new file mode 100644
index 000000000000..59f7877ead6a
--- /dev/null
+++ b/arch/arm/crypto/aes_glue.c
@@ -0,0 +1,108 @@
1/*
2 * Glue Code for the asm optimized version of the AES Cipher Algorithm
3 */
4
5#include <linux/module.h>
6#include <linux/crypto.h>
7#include <crypto/aes.h>
8
9#define AES_MAXNR 14
10
11typedef struct {
12 unsigned int rd_key[4 *(AES_MAXNR + 1)];
13 int rounds;
14} AES_KEY;
15
16struct AES_CTX {
17 AES_KEY enc_key;
18 AES_KEY dec_key;
19};
20
21asmlinkage void AES_encrypt(const u8 *in, u8 *out, AES_KEY *ctx);
22asmlinkage void AES_decrypt(const u8 *in, u8 *out, AES_KEY *ctx);
23asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
24asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
25
26static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
27{
28 struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
29 AES_encrypt(src, dst, &ctx->enc_key);
30}
31
32static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
33{
34 struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
35 AES_decrypt(src, dst, &ctx->dec_key);
36}
37
38static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
39 unsigned int key_len)
40{
41 struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
42
43 switch (key_len) {
44 case AES_KEYSIZE_128:
45 key_len = 128;
46 break;
47 case AES_KEYSIZE_192:
48 key_len = 192;
49 break;
50 case AES_KEYSIZE_256:
51 key_len = 256;
52 break;
53 default:
54 tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
55 return -EINVAL;
56 }
57
58 if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) {
59 tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
60 return -EINVAL;
61 }
62 /* private_AES_set_decrypt_key expects an encryption key as input */
63 ctx->dec_key = ctx->enc_key;
64 if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) {
65 tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
66 return -EINVAL;
67 }
68 return 0;
69}
70
71static struct crypto_alg aes_alg = {
72 .cra_name = "aes",
73 .cra_driver_name = "aes-asm",
74 .cra_priority = 200,
75 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
76 .cra_blocksize = AES_BLOCK_SIZE,
77 .cra_ctxsize = sizeof(struct AES_CTX),
78 .cra_module = THIS_MODULE,
79 .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
80 .cra_u = {
81 .cipher = {
82 .cia_min_keysize = AES_MIN_KEY_SIZE,
83 .cia_max_keysize = AES_MAX_KEY_SIZE,
84 .cia_setkey = aes_set_key,
85 .cia_encrypt = aes_encrypt,
86 .cia_decrypt = aes_decrypt
87 }
88 }
89};
90
91static int __init aes_init(void)
92{
93 return crypto_register_alg(&aes_alg);
94}
95
96static void __exit aes_fini(void)
97{
98 crypto_unregister_alg(&aes_alg);
99}
100
101module_init(aes_init);
102module_exit(aes_fini);
103
104MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)");
105MODULE_LICENSE("GPL");
106MODULE_ALIAS("aes");
107MODULE_ALIAS("aes-asm");
108MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");
diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S
new file mode 100644
index 000000000000..7050ab133b9d
--- /dev/null
+++ b/arch/arm/crypto/sha1-armv4-large.S
@@ -0,0 +1,503 @@
1#define __ARM_ARCH__ __LINUX_ARM_ARCH__
2@ ====================================================================
3@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4@ project. The module is, however, dual licensed under OpenSSL and
5@ CRYPTOGAMS licenses depending on where you obtain it. For further
6@ details see http://www.openssl.org/~appro/cryptogams/.
7@ ====================================================================
8
9@ sha1_block procedure for ARMv4.
10@
11@ January 2007.
12
13@ Size/performance trade-off
14@ ====================================================================
15@ impl size in bytes comp cycles[*] measured performance
16@ ====================================================================
17@ thumb 304 3212 4420
18@ armv4-small 392/+29% 1958/+64% 2250/+96%
19@ armv4-compact 740/+89% 1552/+26% 1840/+22%
20@ armv4-large 1420/+92% 1307/+19% 1370/+34%[***]
21@ full unroll ~5100/+260% ~1260/+4% ~1300/+5%
22@ ====================================================================
23@ thumb = same as 'small' but in Thumb instructions[**] and
24@ with recurring code in two private functions;
25@ small = detached Xload/update, loops are folded;
26@ compact = detached Xload/update, 5x unroll;
27@ large = interleaved Xload/update, 5x unroll;
28@ full unroll = interleaved Xload/update, full unroll, estimated[!];
29@
30@ [*] Manually counted instructions in "grand" loop body. Measured
31@ performance is affected by prologue and epilogue overhead,
32@ i-cache availability, branch penalties, etc.
33@ [**] While each Thumb instruction is twice smaller, they are not as
34@ diverse as ARM ones: e.g., there are only two arithmetic
35@ instructions with 3 arguments, no [fixed] rotate, addressing
36@ modes are limited. As result it takes more instructions to do
37@ the same job in Thumb, therefore the code is never twice as
38@ small and always slower.
39@ [***] which is also ~35% better than compiler generated code. Dual-
40@ issue Cortex A8 core was measured to process input block in
41@ ~990 cycles.
42
43@ August 2010.
44@
45@ Rescheduling for dual-issue pipeline resulted in 13% improvement on
46@ Cortex A8 core and in absolute terms ~870 cycles per input block
47@ [or 13.6 cycles per byte].
48
49@ February 2011.
50@
51@ Profiler-assisted and platform-specific optimization resulted in 10%
52@ improvement on Cortex A8 core and 12.2 cycles per byte.
53
54.text
55
56.global sha1_block_data_order
57.type sha1_block_data_order,%function
58
59.align 2
60sha1_block_data_order:
61 stmdb sp!,{r4-r12,lr}
62 add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
63 ldmia r0,{r3,r4,r5,r6,r7}
64.Lloop:
65 ldr r8,.LK_00_19
66 mov r14,sp
67 sub sp,sp,#15*4
68 mov r5,r5,ror#30
69 mov r6,r6,ror#30
70 mov r7,r7,ror#30 @ [6]
71.L_00_15:
72#if __ARM_ARCH__<7
73 ldrb r10,[r1,#2]
74 ldrb r9,[r1,#3]
75 ldrb r11,[r1,#1]
76 add r7,r8,r7,ror#2 @ E+=K_00_19
77 ldrb r12,[r1],#4
78 orr r9,r9,r10,lsl#8
79 eor r10,r5,r6 @ F_xx_xx
80 orr r9,r9,r11,lsl#16
81 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
82 orr r9,r9,r12,lsl#24
83#else
84 ldr r9,[r1],#4 @ handles unaligned
85 add r7,r8,r7,ror#2 @ E+=K_00_19
86 eor r10,r5,r6 @ F_xx_xx
87 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
88#ifdef __ARMEL__
89 rev r9,r9 @ byte swap
90#endif
91#endif
92 and r10,r4,r10,ror#2
93 add r7,r7,r9 @ E+=X[i]
94 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
95 str r9,[r14,#-4]!
96 add r7,r7,r10 @ E+=F_00_19(B,C,D)
97#if __ARM_ARCH__<7
98 ldrb r10,[r1,#2]
99 ldrb r9,[r1,#3]
100 ldrb r11,[r1,#1]
101 add r6,r8,r6,ror#2 @ E+=K_00_19
102 ldrb r12,[r1],#4
103 orr r9,r9,r10,lsl#8
104 eor r10,r4,r5 @ F_xx_xx
105 orr r9,r9,r11,lsl#16
106 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
107 orr r9,r9,r12,lsl#24
108#else
109 ldr r9,[r1],#4 @ handles unaligned
110 add r6,r8,r6,ror#2 @ E+=K_00_19
111 eor r10,r4,r5 @ F_xx_xx
112 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
113#ifdef __ARMEL__
114 rev r9,r9 @ byte swap
115#endif
116#endif
117 and r10,r3,r10,ror#2
118 add r6,r6,r9 @ E+=X[i]
119 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
120 str r9,[r14,#-4]!
121 add r6,r6,r10 @ E+=F_00_19(B,C,D)
122#if __ARM_ARCH__<7
123 ldrb r10,[r1,#2]
124 ldrb r9,[r1,#3]
125 ldrb r11,[r1,#1]
126 add r5,r8,r5,ror#2 @ E+=K_00_19
127 ldrb r12,[r1],#4
128 orr r9,r9,r10,lsl#8
129 eor r10,r3,r4 @ F_xx_xx
130 orr r9,r9,r11,lsl#16
131 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
132 orr r9,r9,r12,lsl#24
133#else
134 ldr r9,[r1],#4 @ handles unaligned
135 add r5,r8,r5,ror#2 @ E+=K_00_19
136 eor r10,r3,r4 @ F_xx_xx
137 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
138#ifdef __ARMEL__
139 rev r9,r9 @ byte swap
140#endif
141#endif
142 and r10,r7,r10,ror#2
143 add r5,r5,r9 @ E+=X[i]
144 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
145 str r9,[r14,#-4]!
146 add r5,r5,r10 @ E+=F_00_19(B,C,D)
147#if __ARM_ARCH__<7
148 ldrb r10,[r1,#2]
149 ldrb r9,[r1,#3]
150 ldrb r11,[r1,#1]
151 add r4,r8,r4,ror#2 @ E+=K_00_19
152 ldrb r12,[r1],#4
153 orr r9,r9,r10,lsl#8
154 eor r10,r7,r3 @ F_xx_xx
155 orr r9,r9,r11,lsl#16
156 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
157 orr r9,r9,r12,lsl#24
158#else
159 ldr r9,[r1],#4 @ handles unaligned
160 add r4,r8,r4,ror#2 @ E+=K_00_19
161 eor r10,r7,r3 @ F_xx_xx
162 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
163#ifdef __ARMEL__
164 rev r9,r9 @ byte swap
165#endif
166#endif
167 and r10,r6,r10,ror#2
168 add r4,r4,r9 @ E+=X[i]
169 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
170 str r9,[r14,#-4]!
171 add r4,r4,r10 @ E+=F_00_19(B,C,D)
172#if __ARM_ARCH__<7
173 ldrb r10,[r1,#2]
174 ldrb r9,[r1,#3]
175 ldrb r11,[r1,#1]
176 add r3,r8,r3,ror#2 @ E+=K_00_19
177 ldrb r12,[r1],#4
178 orr r9,r9,r10,lsl#8
179 eor r10,r6,r7 @ F_xx_xx
180 orr r9,r9,r11,lsl#16
181 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
182 orr r9,r9,r12,lsl#24
183#else
184 ldr r9,[r1],#4 @ handles unaligned
185 add r3,r8,r3,ror#2 @ E+=K_00_19
186 eor r10,r6,r7 @ F_xx_xx
187 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
188#ifdef __ARMEL__
189 rev r9,r9 @ byte swap
190#endif
191#endif
192 and r10,r5,r10,ror#2
193 add r3,r3,r9 @ E+=X[i]
194 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
195 str r9,[r14,#-4]!
196 add r3,r3,r10 @ E+=F_00_19(B,C,D)
197 teq r14,sp
198 bne .L_00_15 @ [((11+4)*5+2)*3]
199#if __ARM_ARCH__<7
200 ldrb r10,[r1,#2]
201 ldrb r9,[r1,#3]
202 ldrb r11,[r1,#1]
203 add r7,r8,r7,ror#2 @ E+=K_00_19
204 ldrb r12,[r1],#4
205 orr r9,r9,r10,lsl#8
206 eor r10,r5,r6 @ F_xx_xx
207 orr r9,r9,r11,lsl#16
208 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
209 orr r9,r9,r12,lsl#24
210#else
211 ldr r9,[r1],#4 @ handles unaligned
212 add r7,r8,r7,ror#2 @ E+=K_00_19
213 eor r10,r5,r6 @ F_xx_xx
214 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
215#ifdef __ARMEL__
216 rev r9,r9 @ byte swap
217#endif
218#endif
219 and r10,r4,r10,ror#2
220 add r7,r7,r9 @ E+=X[i]
221 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
222 str r9,[r14,#-4]!
223 add r7,r7,r10 @ E+=F_00_19(B,C,D)
224 ldr r9,[r14,#15*4]
225 ldr r10,[r14,#13*4]
226 ldr r11,[r14,#7*4]
227 add r6,r8,r6,ror#2 @ E+=K_xx_xx
228 ldr r12,[r14,#2*4]
229 eor r9,r9,r10
230 eor r11,r11,r12 @ 1 cycle stall
231 eor r10,r4,r5 @ F_xx_xx
232 mov r9,r9,ror#31
233 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
234 eor r9,r9,r11,ror#31
235 str r9,[r14,#-4]!
236 and r10,r3,r10,ror#2 @ F_xx_xx
237 @ F_xx_xx
238 add r6,r6,r9 @ E+=X[i]
239 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
240 add r6,r6,r10 @ E+=F_00_19(B,C,D)
241 ldr r9,[r14,#15*4]
242 ldr r10,[r14,#13*4]
243 ldr r11,[r14,#7*4]
244 add r5,r8,r5,ror#2 @ E+=K_xx_xx
245 ldr r12,[r14,#2*4]
246 eor r9,r9,r10
247 eor r11,r11,r12 @ 1 cycle stall
248 eor r10,r3,r4 @ F_xx_xx
249 mov r9,r9,ror#31
250 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
251 eor r9,r9,r11,ror#31
252 str r9,[r14,#-4]!
253 and r10,r7,r10,ror#2 @ F_xx_xx
254 @ F_xx_xx
255 add r5,r5,r9 @ E+=X[i]
256 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
257 add r5,r5,r10 @ E+=F_00_19(B,C,D)
258 ldr r9,[r14,#15*4]
259 ldr r10,[r14,#13*4]
260 ldr r11,[r14,#7*4]
261 add r4,r8,r4,ror#2 @ E+=K_xx_xx
262 ldr r12,[r14,#2*4]
263 eor r9,r9,r10
264 eor r11,r11,r12 @ 1 cycle stall
265 eor r10,r7,r3 @ F_xx_xx
266 mov r9,r9,ror#31
267 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
268 eor r9,r9,r11,ror#31
269 str r9,[r14,#-4]!
270 and r10,r6,r10,ror#2 @ F_xx_xx
271 @ F_xx_xx
272 add r4,r4,r9 @ E+=X[i]
273 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
274 add r4,r4,r10 @ E+=F_00_19(B,C,D)
275 ldr r9,[r14,#15*4]
276 ldr r10,[r14,#13*4]
277 ldr r11,[r14,#7*4]
278 add r3,r8,r3,ror#2 @ E+=K_xx_xx
279 ldr r12,[r14,#2*4]
280 eor r9,r9,r10
281 eor r11,r11,r12 @ 1 cycle stall
282 eor r10,r6,r7 @ F_xx_xx
283 mov r9,r9,ror#31
284 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
285 eor r9,r9,r11,ror#31
286 str r9,[r14,#-4]!
287 and r10,r5,r10,ror#2 @ F_xx_xx
288 @ F_xx_xx
289 add r3,r3,r9 @ E+=X[i]
290 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
291 add r3,r3,r10 @ E+=F_00_19(B,C,D)
292
293 ldr r8,.LK_20_39 @ [+15+16*4]
294 sub sp,sp,#25*4
295 cmn sp,#0 @ [+3], clear carry to denote 20_39
296.L_20_39_or_60_79:
297 ldr r9,[r14,#15*4]
298 ldr r10,[r14,#13*4]
299 ldr r11,[r14,#7*4]
300 add r7,r8,r7,ror#2 @ E+=K_xx_xx
301 ldr r12,[r14,#2*4]
302 eor r9,r9,r10
303 eor r11,r11,r12 @ 1 cycle stall
304 eor r10,r5,r6 @ F_xx_xx
305 mov r9,r9,ror#31
306 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
307 eor r9,r9,r11,ror#31
308 str r9,[r14,#-4]!
309 eor r10,r4,r10,ror#2 @ F_xx_xx
310 @ F_xx_xx
311 add r7,r7,r9 @ E+=X[i]
312 add r7,r7,r10 @ E+=F_20_39(B,C,D)
313 ldr r9,[r14,#15*4]
314 ldr r10,[r14,#13*4]
315 ldr r11,[r14,#7*4]
316 add r6,r8,r6,ror#2 @ E+=K_xx_xx
317 ldr r12,[r14,#2*4]
318 eor r9,r9,r10
319 eor r11,r11,r12 @ 1 cycle stall
320 eor r10,r4,r5 @ F_xx_xx
321 mov r9,r9,ror#31
322 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
323 eor r9,r9,r11,ror#31
324 str r9,[r14,#-4]!
325 eor r10,r3,r10,ror#2 @ F_xx_xx
326 @ F_xx_xx
327 add r6,r6,r9 @ E+=X[i]
328 add r6,r6,r10 @ E+=F_20_39(B,C,D)
329 ldr r9,[r14,#15*4]
330 ldr r10,[r14,#13*4]
331 ldr r11,[r14,#7*4]
332 add r5,r8,r5,ror#2 @ E+=K_xx_xx
333 ldr r12,[r14,#2*4]
334 eor r9,r9,r10
335 eor r11,r11,r12 @ 1 cycle stall
336 eor r10,r3,r4 @ F_xx_xx
337 mov r9,r9,ror#31
338 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
339 eor r9,r9,r11,ror#31
340 str r9,[r14,#-4]!
341 eor r10,r7,r10,ror#2 @ F_xx_xx
342 @ F_xx_xx
343 add r5,r5,r9 @ E+=X[i]
344 add r5,r5,r10 @ E+=F_20_39(B,C,D)
345 ldr r9,[r14,#15*4]
346 ldr r10,[r14,#13*4]
347 ldr r11,[r14,#7*4]
348 add r4,r8,r4,ror#2 @ E+=K_xx_xx
349 ldr r12,[r14,#2*4]
350 eor r9,r9,r10
351 eor r11,r11,r12 @ 1 cycle stall
352 eor r10,r7,r3 @ F_xx_xx
353 mov r9,r9,ror#31
354 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
355 eor r9,r9,r11,ror#31
356 str r9,[r14,#-4]!
357 eor r10,r6,r10,ror#2 @ F_xx_xx
358 @ F_xx_xx
359 add r4,r4,r9 @ E+=X[i]
360 add r4,r4,r10 @ E+=F_20_39(B,C,D)
361 ldr r9,[r14,#15*4]
362 ldr r10,[r14,#13*4]
363 ldr r11,[r14,#7*4]
364 add r3,r8,r3,ror#2 @ E+=K_xx_xx
365 ldr r12,[r14,#2*4]
366 eor r9,r9,r10
367 eor r11,r11,r12 @ 1 cycle stall
368 eor r10,r6,r7 @ F_xx_xx
369 mov r9,r9,ror#31
370 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
371 eor r9,r9,r11,ror#31
372 str r9,[r14,#-4]!
373 eor r10,r5,r10,ror#2 @ F_xx_xx
374 @ F_xx_xx
375 add r3,r3,r9 @ E+=X[i]
376 add r3,r3,r10 @ E+=F_20_39(B,C,D)
377 teq r14,sp @ preserve carry
378 bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
379 bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
380
381 ldr r8,.LK_40_59
382 sub sp,sp,#20*4 @ [+2]
383.L_40_59:
384 ldr r9,[r14,#15*4]
385 ldr r10,[r14,#13*4]
386 ldr r11,[r14,#7*4]
387 add r7,r8,r7,ror#2 @ E+=K_xx_xx
388 ldr r12,[r14,#2*4]
389 eor r9,r9,r10
390 eor r11,r11,r12 @ 1 cycle stall
391 eor r10,r5,r6 @ F_xx_xx
392 mov r9,r9,ror#31
393 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
394 eor r9,r9,r11,ror#31
395 str r9,[r14,#-4]!
396 and r10,r4,r10,ror#2 @ F_xx_xx
397 and r11,r5,r6 @ F_xx_xx
398 add r7,r7,r9 @ E+=X[i]
399 add r7,r7,r10 @ E+=F_40_59(B,C,D)
400 add r7,r7,r11,ror#2
401 ldr r9,[r14,#15*4]
402 ldr r10,[r14,#13*4]
403 ldr r11,[r14,#7*4]
404 add r6,r8,r6,ror#2 @ E+=K_xx_xx
405 ldr r12,[r14,#2*4]
406 eor r9,r9,r10
407 eor r11,r11,r12 @ 1 cycle stall
408 eor r10,r4,r5 @ F_xx_xx
409 mov r9,r9,ror#31
410 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
411 eor r9,r9,r11,ror#31
412 str r9,[r14,#-4]!
413 and r10,r3,r10,ror#2 @ F_xx_xx
414 and r11,r4,r5 @ F_xx_xx
415 add r6,r6,r9 @ E+=X[i]
416 add r6,r6,r10 @ E+=F_40_59(B,C,D)
417 add r6,r6,r11,ror#2
418 ldr r9,[r14,#15*4]
419 ldr r10,[r14,#13*4]
420 ldr r11,[r14,#7*4]
421 add r5,r8,r5,ror#2 @ E+=K_xx_xx
422 ldr r12,[r14,#2*4]
423 eor r9,r9,r10
424 eor r11,r11,r12 @ 1 cycle stall
425 eor r10,r3,r4 @ F_xx_xx
426 mov r9,r9,ror#31
427 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
428 eor r9,r9,r11,ror#31
429 str r9,[r14,#-4]!
430 and r10,r7,r10,ror#2 @ F_xx_xx
431 and r11,r3,r4 @ F_xx_xx
432 add r5,r5,r9 @ E+=X[i]
433 add r5,r5,r10 @ E+=F_40_59(B,C,D)
434 add r5,r5,r11,ror#2
435 ldr r9,[r14,#15*4]
436 ldr r10,[r14,#13*4]
437 ldr r11,[r14,#7*4]
438 add r4,r8,r4,ror#2 @ E+=K_xx_xx
439 ldr r12,[r14,#2*4]
440 eor r9,r9,r10
441 eor r11,r11,r12 @ 1 cycle stall
442 eor r10,r7,r3 @ F_xx_xx
443 mov r9,r9,ror#31
444 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
445 eor r9,r9,r11,ror#31
446 str r9,[r14,#-4]!
447 and r10,r6,r10,ror#2 @ F_xx_xx
448 and r11,r7,r3 @ F_xx_xx
449 add r4,r4,r9 @ E+=X[i]
450 add r4,r4,r10 @ E+=F_40_59(B,C,D)
451 add r4,r4,r11,ror#2
452 ldr r9,[r14,#15*4]
453 ldr r10,[r14,#13*4]
454 ldr r11,[r14,#7*4]
455 add r3,r8,r3,ror#2 @ E+=K_xx_xx
456 ldr r12,[r14,#2*4]
457 eor r9,r9,r10
458 eor r11,r11,r12 @ 1 cycle stall
459 eor r10,r6,r7 @ F_xx_xx
460 mov r9,r9,ror#31
461 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
462 eor r9,r9,r11,ror#31
463 str r9,[r14,#-4]!
464 and r10,r5,r10,ror#2 @ F_xx_xx
465 and r11,r6,r7 @ F_xx_xx
466 add r3,r3,r9 @ E+=X[i]
467 add r3,r3,r10 @ E+=F_40_59(B,C,D)
468 add r3,r3,r11,ror#2
469 teq r14,sp
470 bne .L_40_59 @ [+((12+5)*5+2)*4]
471
472 ldr r8,.LK_60_79
473 sub sp,sp,#20*4
474 cmp sp,#0 @ set carry to denote 60_79
475 b .L_20_39_or_60_79 @ [+4], spare 300 bytes
476.L_done:
477 add sp,sp,#80*4 @ "deallocate" stack frame
478 ldmia r0,{r8,r9,r10,r11,r12}
479 add r3,r8,r3
480 add r4,r9,r4
481 add r5,r10,r5,ror#2
482 add r6,r11,r6,ror#2
483 add r7,r12,r7,ror#2
484 stmia r0,{r3,r4,r5,r6,r7}
485 teq r1,r2
486 bne .Lloop @ [+18], total 1307
487
488#if __ARM_ARCH__>=5
489 ldmia sp!,{r4-r12,pc}
490#else
491 ldmia sp!,{r4-r12,lr}
492 tst lr,#1
493 moveq pc,lr @ be binary compatible with V4, yet
494 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
495#endif
496.align 2
497.LK_00_19: .word 0x5a827999
498.LK_20_39: .word 0x6ed9eba1
499.LK_40_59: .word 0x8f1bbcdc
500.LK_60_79: .word 0xca62c1d6
501.size sha1_block_data_order,.-sha1_block_data_order
502.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
503.align 2
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
new file mode 100644
index 000000000000..76cd976230bc
--- /dev/null
+++ b/arch/arm/crypto/sha1_glue.c
@@ -0,0 +1,179 @@
1/*
2 * Cryptographic API.
3 * Glue code for the SHA1 Secure Hash Algorithm assembler implementation
4 *
5 * This file is based on sha1_generic.c and sha1_ssse3_glue.c
6 *
7 * Copyright (c) Alan Smithee.
8 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
9 * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
10 * Copyright (c) Mathias Krause <minipli@googlemail.com>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 2 of the License, or (at your option)
15 * any later version.
16 *
17 */
18
19#include <crypto/internal/hash.h>
20#include <linux/init.h>
21#include <linux/module.h>
22#include <linux/cryptohash.h>
23#include <linux/types.h>
24#include <crypto/sha.h>
25#include <asm/byteorder.h>
26
27struct SHA1_CTX {
28 uint32_t h0,h1,h2,h3,h4;
29 u64 count;
30 u8 data[SHA1_BLOCK_SIZE];
31};
32
33asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest,
34 const unsigned char *data, unsigned int rounds);
35
36
37static int sha1_init(struct shash_desc *desc)
38{
39 struct SHA1_CTX *sctx = shash_desc_ctx(desc);
40 memset(sctx, 0, sizeof(*sctx));
41 sctx->h0 = SHA1_H0;
42 sctx->h1 = SHA1_H1;
43 sctx->h2 = SHA1_H2;
44 sctx->h3 = SHA1_H3;
45 sctx->h4 = SHA1_H4;
46 return 0;
47}
48
49
50static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
51 unsigned int len, unsigned int partial)
52{
53 unsigned int done = 0;
54
55 sctx->count += len;
56
57 if (partial) {
58 done = SHA1_BLOCK_SIZE - partial;
59 memcpy(sctx->data + partial, data, done);
60 sha1_block_data_order(sctx, sctx->data, 1);
61 }
62
63 if (len - done >= SHA1_BLOCK_SIZE) {
64 const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
65 sha1_block_data_order(sctx, data + done, rounds);
66 done += rounds * SHA1_BLOCK_SIZE;
67 }
68
69 memcpy(sctx->data, data + done, len - done);
70 return 0;
71}
72
73
74static int sha1_update(struct shash_desc *desc, const u8 *data,
75 unsigned int len)
76{
77 struct SHA1_CTX *sctx = shash_desc_ctx(desc);
78 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
79 int res;
80
81 /* Handle the fast case right here */
82 if (partial + len < SHA1_BLOCK_SIZE) {
83 sctx->count += len;
84 memcpy(sctx->data + partial, data, len);
85 return 0;
86 }
87 res = __sha1_update(sctx, data, len, partial);
88 return res;
89}
90
91
92/* Add padding and return the message digest. */
93static int sha1_final(struct shash_desc *desc, u8 *out)
94{
95 struct SHA1_CTX *sctx = shash_desc_ctx(desc);
96 unsigned int i, index, padlen;
97 __be32 *dst = (__be32 *)out;
98 __be64 bits;
99 static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
100
101 bits = cpu_to_be64(sctx->count << 3);
102
103 /* Pad out to 56 mod 64 and append length */
104 index = sctx->count % SHA1_BLOCK_SIZE;
105 padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
106 /* We need to fill a whole block for __sha1_update() */
107 if (padlen <= 56) {
108 sctx->count += padlen;
109 memcpy(sctx->data + index, padding, padlen);
110 } else {
111 __sha1_update(sctx, padding, padlen, index);
112 }
113 __sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
114
115 /* Store state in digest */
116 for (i = 0; i < 5; i++)
117 dst[i] = cpu_to_be32(((u32 *)sctx)[i]);
118
119 /* Wipe context */
120 memset(sctx, 0, sizeof(*sctx));
121 return 0;
122}
123
124
125static int sha1_export(struct shash_desc *desc, void *out)
126{
127 struct SHA1_CTX *sctx = shash_desc_ctx(desc);
128 memcpy(out, sctx, sizeof(*sctx));
129 return 0;
130}
131
132
133static int sha1_import(struct shash_desc *desc, const void *in)
134{
135 struct SHA1_CTX *sctx = shash_desc_ctx(desc);
136 memcpy(sctx, in, sizeof(*sctx));
137 return 0;
138}
139
140
141static struct shash_alg alg = {
142 .digestsize = SHA1_DIGEST_SIZE,
143 .init = sha1_init,
144 .update = sha1_update,
145 .final = sha1_final,
146 .export = sha1_export,
147 .import = sha1_import,
148 .descsize = sizeof(struct SHA1_CTX),
149 .statesize = sizeof(struct SHA1_CTX),
150 .base = {
151 .cra_name = "sha1",
152 .cra_driver_name= "sha1-asm",
153 .cra_priority = 150,
154 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
155 .cra_blocksize = SHA1_BLOCK_SIZE,
156 .cra_module = THIS_MODULE,
157 }
158};
159
160
161static int __init sha1_mod_init(void)
162{
163 return crypto_register_shash(&alg);
164}
165
166
167static void __exit sha1_mod_fini(void)
168{
169 crypto_unregister_shash(&alg);
170}
171
172
173module_init(sha1_mod_init);
174module_exit(sha1_mod_fini);
175
176MODULE_LICENSE("GPL");
177MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (ARM)");
178MODULE_ALIAS("sha1");
179MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");