aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/aesni-intel_asm.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/crypto/aesni-intel_asm.S')
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S896
1 files changed, 896 insertions, 0 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
new file mode 100644
index 000000000000..caba99601703
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -0,0 +1,896 @@
1/*
2 * Implement AES algorithm in Intel AES-NI instructions.
3 *
4 * The white paper of AES-NI instructions can be downloaded from:
5 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
6 *
7 * Copyright (C) 2008, Intel Corp.
8 * Author: Huang Ying <ying.huang@intel.com>
9 * Vinodh Gopal <vinodh.gopal@intel.com>
10 * Kahraman Akdemir
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 */
17
18#include <linux/linkage.h>
19
20.text
21
22#define STATE1 %xmm0
23#define STATE2 %xmm4
24#define STATE3 %xmm5
25#define STATE4 %xmm6
26#define STATE STATE1
27#define IN1 %xmm1
28#define IN2 %xmm7
29#define IN3 %xmm8
30#define IN4 %xmm9
31#define IN IN1
32#define KEY %xmm2
33#define IV %xmm3
34
35#define KEYP %rdi
36#define OUTP %rsi
37#define INP %rdx
38#define LEN %rcx
39#define IVP %r8
40#define KLEN %r9d
41#define T1 %r10
42#define TKEYP T1
43#define T2 %r11
44
45_key_expansion_128:
46_key_expansion_256a:
47 pshufd $0b11111111, %xmm1, %xmm1
48 shufps $0b00010000, %xmm0, %xmm4
49 pxor %xmm4, %xmm0
50 shufps $0b10001100, %xmm0, %xmm4
51 pxor %xmm4, %xmm0
52 pxor %xmm1, %xmm0
53 movaps %xmm0, (%rcx)
54 add $0x10, %rcx
55 ret
56
57_key_expansion_192a:
58 pshufd $0b01010101, %xmm1, %xmm1
59 shufps $0b00010000, %xmm0, %xmm4
60 pxor %xmm4, %xmm0
61 shufps $0b10001100, %xmm0, %xmm4
62 pxor %xmm4, %xmm0
63 pxor %xmm1, %xmm0
64
65 movaps %xmm2, %xmm5
66 movaps %xmm2, %xmm6
67 pslldq $4, %xmm5
68 pshufd $0b11111111, %xmm0, %xmm3
69 pxor %xmm3, %xmm2
70 pxor %xmm5, %xmm2
71
72 movaps %xmm0, %xmm1
73 shufps $0b01000100, %xmm0, %xmm6
74 movaps %xmm6, (%rcx)
75 shufps $0b01001110, %xmm2, %xmm1
76 movaps %xmm1, 16(%rcx)
77 add $0x20, %rcx
78 ret
79
80_key_expansion_192b:
81 pshufd $0b01010101, %xmm1, %xmm1
82 shufps $0b00010000, %xmm0, %xmm4
83 pxor %xmm4, %xmm0
84 shufps $0b10001100, %xmm0, %xmm4
85 pxor %xmm4, %xmm0
86 pxor %xmm1, %xmm0
87
88 movaps %xmm2, %xmm5
89 pslldq $4, %xmm5
90 pshufd $0b11111111, %xmm0, %xmm3
91 pxor %xmm3, %xmm2
92 pxor %xmm5, %xmm2
93
94 movaps %xmm0, (%rcx)
95 add $0x10, %rcx
96 ret
97
98_key_expansion_256b:
99 pshufd $0b10101010, %xmm1, %xmm1
100 shufps $0b00010000, %xmm2, %xmm4
101 pxor %xmm4, %xmm2
102 shufps $0b10001100, %xmm2, %xmm4
103 pxor %xmm4, %xmm2
104 pxor %xmm1, %xmm2
105 movaps %xmm2, (%rcx)
106 add $0x10, %rcx
107 ret
108
109/*
110 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
111 * unsigned int key_len)
112 */
113ENTRY(aesni_set_key)
114 movups (%rsi), %xmm0 # user key (first 16 bytes)
115 movaps %xmm0, (%rdi)
116 lea 0x10(%rdi), %rcx # key addr
117 movl %edx, 480(%rdi)
118 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
119 cmp $24, %dl
120 jb .Lenc_key128
121 je .Lenc_key192
122 movups 0x10(%rsi), %xmm2 # other user key
123 movaps %xmm2, (%rcx)
124 add $0x10, %rcx
125 # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
126 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
127 call _key_expansion_256a
128 # aeskeygenassist $0x1, %xmm0, %xmm1
129 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
130 call _key_expansion_256b
131 # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
132 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
133 call _key_expansion_256a
134 # aeskeygenassist $0x2, %xmm0, %xmm1
135 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
136 call _key_expansion_256b
137 # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
138 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
139 call _key_expansion_256a
140 # aeskeygenassist $0x4, %xmm0, %xmm1
141 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
142 call _key_expansion_256b
143 # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
144 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
145 call _key_expansion_256a
146 # aeskeygenassist $0x8, %xmm0, %xmm1
147 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
148 call _key_expansion_256b
149 # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
150 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
151 call _key_expansion_256a
152 # aeskeygenassist $0x10, %xmm0, %xmm1
153 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
154 call _key_expansion_256b
155 # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
156 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
157 call _key_expansion_256a
158 # aeskeygenassist $0x20, %xmm0, %xmm1
159 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
160 call _key_expansion_256b
161 # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
162 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
163 call _key_expansion_256a
164 jmp .Ldec_key
165.Lenc_key192:
166 movq 0x10(%rsi), %xmm2 # other user key
167 # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
168 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
169 call _key_expansion_192a
170 # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
171 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
172 call _key_expansion_192b
173 # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
174 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
175 call _key_expansion_192a
176 # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
177 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
178 call _key_expansion_192b
179 # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
180 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
181 call _key_expansion_192a
182 # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
183 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
184 call _key_expansion_192b
185 # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
186 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
187 call _key_expansion_192a
188 # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
189 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
190 call _key_expansion_192b
191 jmp .Ldec_key
192.Lenc_key128:
193 # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
194 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
195 call _key_expansion_128
196 # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
197 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
198 call _key_expansion_128
199 # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
200 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
201 call _key_expansion_128
202 # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
203 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
204 call _key_expansion_128
205 # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
206 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
207 call _key_expansion_128
208 # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
209 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
210 call _key_expansion_128
211 # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
212 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
213 call _key_expansion_128
214 # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
215 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
216 call _key_expansion_128
217 # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
218 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
219 call _key_expansion_128
220 # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
221 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
222 call _key_expansion_128
223.Ldec_key:
224 sub $0x10, %rcx
225 movaps (%rdi), %xmm0
226 movaps (%rcx), %xmm1
227 movaps %xmm0, 240(%rcx)
228 movaps %xmm1, 240(%rdi)
229 add $0x10, %rdi
230 lea 240-16(%rcx), %rsi
231.align 4
232.Ldec_key_loop:
233 movaps (%rdi), %xmm0
234 # aesimc %xmm0, %xmm1
235 .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
236 movaps %xmm1, (%rsi)
237 add $0x10, %rdi
238 sub $0x10, %rsi
239 cmp %rcx, %rdi
240 jb .Ldec_key_loop
241 xor %rax, %rax
242 ret
243
244/*
245 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
246 */
247ENTRY(aesni_enc)
248 movl 480(KEYP), KLEN # key length
249 movups (INP), STATE # input
250 call _aesni_enc1
251 movups STATE, (OUTP) # output
252 ret
253
254/*
255 * _aesni_enc1: internal ABI
256 * input:
257 * KEYP: key struct pointer
258 * KLEN: round count
259 * STATE: initial state (input)
260 * output:
261 * STATE: finial state (output)
262 * changed:
263 * KEY
264 * TKEYP (T1)
265 */
266_aesni_enc1:
267 movaps (KEYP), KEY # key
268 mov KEYP, TKEYP
269 pxor KEY, STATE # round 0
270 add $0x30, TKEYP
271 cmp $24, KLEN
272 jb .Lenc128
273 lea 0x20(TKEYP), TKEYP
274 je .Lenc192
275 add $0x20, TKEYP
276 movaps -0x60(TKEYP), KEY
277 # aesenc KEY, STATE
278 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
279 movaps -0x50(TKEYP), KEY
280 # aesenc KEY, STATE
281 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
282.align 4
283.Lenc192:
284 movaps -0x40(TKEYP), KEY
285 # aesenc KEY, STATE
286 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
287 movaps -0x30(TKEYP), KEY
288 # aesenc KEY, STATE
289 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
290.align 4
291.Lenc128:
292 movaps -0x20(TKEYP), KEY
293 # aesenc KEY, STATE
294 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
295 movaps -0x10(TKEYP), KEY
296 # aesenc KEY, STATE
297 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
298 movaps (TKEYP), KEY
299 # aesenc KEY, STATE
300 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
301 movaps 0x10(TKEYP), KEY
302 # aesenc KEY, STATE
303 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
304 movaps 0x20(TKEYP), KEY
305 # aesenc KEY, STATE
306 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
307 movaps 0x30(TKEYP), KEY
308 # aesenc KEY, STATE
309 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
310 movaps 0x40(TKEYP), KEY
311 # aesenc KEY, STATE
312 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
313 movaps 0x50(TKEYP), KEY
314 # aesenc KEY, STATE
315 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
316 movaps 0x60(TKEYP), KEY
317 # aesenc KEY, STATE
318 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
319 movaps 0x70(TKEYP), KEY
320 # aesenclast KEY, STATE # last round
321 .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
322 ret
323
324/*
325 * _aesni_enc4: internal ABI
326 * input:
327 * KEYP: key struct pointer
328 * KLEN: round count
329 * STATE1: initial state (input)
330 * STATE2
331 * STATE3
332 * STATE4
333 * output:
334 * STATE1: finial state (output)
335 * STATE2
336 * STATE3
337 * STATE4
338 * changed:
339 * KEY
340 * TKEYP (T1)
341 */
342_aesni_enc4:
343 movaps (KEYP), KEY # key
344 mov KEYP, TKEYP
345 pxor KEY, STATE1 # round 0
346 pxor KEY, STATE2
347 pxor KEY, STATE3
348 pxor KEY, STATE4
349 add $0x30, TKEYP
350 cmp $24, KLEN
351 jb .L4enc128
352 lea 0x20(TKEYP), TKEYP
353 je .L4enc192
354 add $0x20, TKEYP
355 movaps -0x60(TKEYP), KEY
356 # aesenc KEY, STATE1
357 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
358 # aesenc KEY, STATE2
359 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
360 # aesenc KEY, STATE3
361 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
362 # aesenc KEY, STATE4
363 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
364 movaps -0x50(TKEYP), KEY
365 # aesenc KEY, STATE1
366 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
367 # aesenc KEY, STATE2
368 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
369 # aesenc KEY, STATE3
370 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
371 # aesenc KEY, STATE4
372 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
373#.align 4
374.L4enc192:
375 movaps -0x40(TKEYP), KEY
376 # aesenc KEY, STATE1
377 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
378 # aesenc KEY, STATE2
379 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
380 # aesenc KEY, STATE3
381 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
382 # aesenc KEY, STATE4
383 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
384 movaps -0x30(TKEYP), KEY
385 # aesenc KEY, STATE1
386 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
387 # aesenc KEY, STATE2
388 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
389 # aesenc KEY, STATE3
390 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
391 # aesenc KEY, STATE4
392 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
393#.align 4
394.L4enc128:
395 movaps -0x20(TKEYP), KEY
396 # aesenc KEY, STATE1
397 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
398 # aesenc KEY, STATE2
399 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
400 # aesenc KEY, STATE3
401 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
402 # aesenc KEY, STATE4
403 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
404 movaps -0x10(TKEYP), KEY
405 # aesenc KEY, STATE1
406 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
407 # aesenc KEY, STATE2
408 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
409 # aesenc KEY, STATE3
410 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
411 # aesenc KEY, STATE4
412 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
413 movaps (TKEYP), KEY
414 # aesenc KEY, STATE1
415 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
416 # aesenc KEY, STATE2
417 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
418 # aesenc KEY, STATE3
419 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
420 # aesenc KEY, STATE4
421 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
422 movaps 0x10(TKEYP), KEY
423 # aesenc KEY, STATE1
424 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
425 # aesenc KEY, STATE2
426 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
427 # aesenc KEY, STATE3
428 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
429 # aesenc KEY, STATE4
430 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
431 movaps 0x20(TKEYP), KEY
432 # aesenc KEY, STATE1
433 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
434 # aesenc KEY, STATE2
435 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
436 # aesenc KEY, STATE3
437 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
438 # aesenc KEY, STATE4
439 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
440 movaps 0x30(TKEYP), KEY
441 # aesenc KEY, STATE1
442 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
443 # aesenc KEY, STATE2
444 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
445 # aesenc KEY, STATE3
446 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
447 # aesenc KEY, STATE4
448 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
449 movaps 0x40(TKEYP), KEY
450 # aesenc KEY, STATE1
451 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
452 # aesenc KEY, STATE2
453 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
454 # aesenc KEY, STATE3
455 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
456 # aesenc KEY, STATE4
457 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
458 movaps 0x50(TKEYP), KEY
459 # aesenc KEY, STATE1
460 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
461 # aesenc KEY, STATE2
462 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
463 # aesenc KEY, STATE3
464 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
465 # aesenc KEY, STATE4
466 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
467 movaps 0x60(TKEYP), KEY
468 # aesenc KEY, STATE1
469 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
470 # aesenc KEY, STATE2
471 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
472 # aesenc KEY, STATE3
473 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
474 # aesenc KEY, STATE4
475 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
476 movaps 0x70(TKEYP), KEY
477 # aesenclast KEY, STATE1 # last round
478 .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
479 # aesenclast KEY, STATE2
480 .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2
481 # aesenclast KEY, STATE3
482 .byte 0x66, 0x0f, 0x38, 0xdd, 0xea
483 # aesenclast KEY, STATE4
484 .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2
485 ret
486
487/*
488 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
489 */
490ENTRY(aesni_dec)
491 mov 480(KEYP), KLEN # key length
492 add $240, KEYP
493 movups (INP), STATE # input
494 call _aesni_dec1
495 movups STATE, (OUTP) #output
496 ret
497
498/*
499 * _aesni_dec1: internal ABI
500 * input:
501 * KEYP: key struct pointer
502 * KLEN: key length
503 * STATE: initial state (input)
504 * output:
505 * STATE: finial state (output)
506 * changed:
507 * KEY
508 * TKEYP (T1)
509 */
510_aesni_dec1:
511 movaps (KEYP), KEY # key
512 mov KEYP, TKEYP
513 pxor KEY, STATE # round 0
514 add $0x30, TKEYP
515 cmp $24, KLEN
516 jb .Ldec128
517 lea 0x20(TKEYP), TKEYP
518 je .Ldec192
519 add $0x20, TKEYP
520 movaps -0x60(TKEYP), KEY
521 # aesdec KEY, STATE
522 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
523 movaps -0x50(TKEYP), KEY
524 # aesdec KEY, STATE
525 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
526.align 4
527.Ldec192:
528 movaps -0x40(TKEYP), KEY
529 # aesdec KEY, STATE
530 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
531 movaps -0x30(TKEYP), KEY
532 # aesdec KEY, STATE
533 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
534.align 4
535.Ldec128:
536 movaps -0x20(TKEYP), KEY
537 # aesdec KEY, STATE
538 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
539 movaps -0x10(TKEYP), KEY
540 # aesdec KEY, STATE
541 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
542 movaps (TKEYP), KEY
543 # aesdec KEY, STATE
544 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
545 movaps 0x10(TKEYP), KEY
546 # aesdec KEY, STATE
547 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
548 movaps 0x20(TKEYP), KEY
549 # aesdec KEY, STATE
550 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
551 movaps 0x30(TKEYP), KEY
552 # aesdec KEY, STATE
553 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
554 movaps 0x40(TKEYP), KEY
555 # aesdec KEY, STATE
556 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
557 movaps 0x50(TKEYP), KEY
558 # aesdec KEY, STATE
559 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
560 movaps 0x60(TKEYP), KEY
561 # aesdec KEY, STATE
562 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
563 movaps 0x70(TKEYP), KEY
564 # aesdeclast KEY, STATE # last round
565 .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
566 ret
567
568/*
569 * _aesni_dec4: internal ABI
570 * input:
571 * KEYP: key struct pointer
572 * KLEN: key length
573 * STATE1: initial state (input)
574 * STATE2
575 * STATE3
576 * STATE4
577 * output:
578 * STATE1: finial state (output)
579 * STATE2
580 * STATE3
581 * STATE4
582 * changed:
583 * KEY
584 * TKEYP (T1)
585 */
586_aesni_dec4:
587 movaps (KEYP), KEY # key
588 mov KEYP, TKEYP
589 pxor KEY, STATE1 # round 0
590 pxor KEY, STATE2
591 pxor KEY, STATE3
592 pxor KEY, STATE4
593 add $0x30, TKEYP
594 cmp $24, KLEN
595 jb .L4dec128
596 lea 0x20(TKEYP), TKEYP
597 je .L4dec192
598 add $0x20, TKEYP
599 movaps -0x60(TKEYP), KEY
600 # aesdec KEY, STATE1
601 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
602 # aesdec KEY, STATE2
603 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
604 # aesdec KEY, STATE3
605 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
606 # aesdec KEY, STATE4
607 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
608 movaps -0x50(TKEYP), KEY
609 # aesdec KEY, STATE1
610 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
611 # aesdec KEY, STATE2
612 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
613 # aesdec KEY, STATE3
614 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
615 # aesdec KEY, STATE4
616 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
617.align 4
618.L4dec192:
619 movaps -0x40(TKEYP), KEY
620 # aesdec KEY, STATE1
621 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
622 # aesdec KEY, STATE2
623 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
624 # aesdec KEY, STATE3
625 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
626 # aesdec KEY, STATE4
627 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
628 movaps -0x30(TKEYP), KEY
629 # aesdec KEY, STATE1
630 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
631 # aesdec KEY, STATE2
632 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
633 # aesdec KEY, STATE3
634 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
635 # aesdec KEY, STATE4
636 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
637.align 4
638.L4dec128:
639 movaps -0x20(TKEYP), KEY
640 # aesdec KEY, STATE1
641 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
642 # aesdec KEY, STATE2
643 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
644 # aesdec KEY, STATE3
645 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
646 # aesdec KEY, STATE4
647 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
648 movaps -0x10(TKEYP), KEY
649 # aesdec KEY, STATE1
650 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
651 # aesdec KEY, STATE2
652 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
653 # aesdec KEY, STATE3
654 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
655 # aesdec KEY, STATE4
656 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
657 movaps (TKEYP), KEY
658 # aesdec KEY, STATE1
659 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
660 # aesdec KEY, STATE2
661 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
662 # aesdec KEY, STATE3
663 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
664 # aesdec KEY, STATE4
665 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
666 movaps 0x10(TKEYP), KEY
667 # aesdec KEY, STATE1
668 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
669 # aesdec KEY, STATE2
670 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
671 # aesdec KEY, STATE3
672 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
673 # aesdec KEY, STATE4
674 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
675 movaps 0x20(TKEYP), KEY
676 # aesdec KEY, STATE1
677 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
678 # aesdec KEY, STATE2
679 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
680 # aesdec KEY, STATE3
681 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
682 # aesdec KEY, STATE4
683 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
684 movaps 0x30(TKEYP), KEY
685 # aesdec KEY, STATE1
686 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
687 # aesdec KEY, STATE2
688 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
689 # aesdec KEY, STATE3
690 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
691 # aesdec KEY, STATE4
692 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
693 movaps 0x40(TKEYP), KEY
694 # aesdec KEY, STATE1
695 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
696 # aesdec KEY, STATE2
697 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
698 # aesdec KEY, STATE3
699 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
700 # aesdec KEY, STATE4
701 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
702 movaps 0x50(TKEYP), KEY
703 # aesdec KEY, STATE1
704 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
705 # aesdec KEY, STATE2
706 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
707 # aesdec KEY, STATE3
708 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
709 # aesdec KEY, STATE4
710 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
711 movaps 0x60(TKEYP), KEY
712 # aesdec KEY, STATE1
713 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
714 # aesdec KEY, STATE2
715 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
716 # aesdec KEY, STATE3
717 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
718 # aesdec KEY, STATE4
719 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
720 movaps 0x70(TKEYP), KEY
721 # aesdeclast KEY, STATE1 # last round
722 .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
723 # aesdeclast KEY, STATE2
724 .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2
725 # aesdeclast KEY, STATE3
726 .byte 0x66, 0x0f, 0x38, 0xdf, 0xea
727 # aesdeclast KEY, STATE4
728 .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2
729 ret
730
731/*
732 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
733 * size_t len)
734 */
735ENTRY(aesni_ecb_enc)
736 test LEN, LEN # check length
737 jz .Lecb_enc_ret
738 mov 480(KEYP), KLEN
739 cmp $16, LEN
740 jb .Lecb_enc_ret
741 cmp $64, LEN
742 jb .Lecb_enc_loop1
743.align 4
744.Lecb_enc_loop4:
745 movups (INP), STATE1
746 movups 0x10(INP), STATE2
747 movups 0x20(INP), STATE3
748 movups 0x30(INP), STATE4
749 call _aesni_enc4
750 movups STATE1, (OUTP)
751 movups STATE2, 0x10(OUTP)
752 movups STATE3, 0x20(OUTP)
753 movups STATE4, 0x30(OUTP)
754 sub $64, LEN
755 add $64, INP
756 add $64, OUTP
757 cmp $64, LEN
758 jge .Lecb_enc_loop4
759 cmp $16, LEN
760 jb .Lecb_enc_ret
761.align 4
762.Lecb_enc_loop1:
763 movups (INP), STATE1
764 call _aesni_enc1
765 movups STATE1, (OUTP)
766 sub $16, LEN
767 add $16, INP
768 add $16, OUTP
769 cmp $16, LEN
770 jge .Lecb_enc_loop1
771.Lecb_enc_ret:
772 ret
773
774/*
775 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
776 * size_t len);
777 */
778ENTRY(aesni_ecb_dec)
779 test LEN, LEN
780 jz .Lecb_dec_ret
781 mov 480(KEYP), KLEN
782 add $240, KEYP
783 cmp $16, LEN
784 jb .Lecb_dec_ret
785 cmp $64, LEN
786 jb .Lecb_dec_loop1
787.align 4
788.Lecb_dec_loop4:
789 movups (INP), STATE1
790 movups 0x10(INP), STATE2
791 movups 0x20(INP), STATE3
792 movups 0x30(INP), STATE4
793 call _aesni_dec4
794 movups STATE1, (OUTP)
795 movups STATE2, 0x10(OUTP)
796 movups STATE3, 0x20(OUTP)
797 movups STATE4, 0x30(OUTP)
798 sub $64, LEN
799 add $64, INP
800 add $64, OUTP
801 cmp $64, LEN
802 jge .Lecb_dec_loop4
803 cmp $16, LEN
804 jb .Lecb_dec_ret
805.align 4
806.Lecb_dec_loop1:
807 movups (INP), STATE1
808 call _aesni_dec1
809 movups STATE1, (OUTP)
810 sub $16, LEN
811 add $16, INP
812 add $16, OUTP
813 cmp $16, LEN
814 jge .Lecb_dec_loop1
815.Lecb_dec_ret:
816 ret
817
818/*
819 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
820 * size_t len, u8 *iv)
821 */
822ENTRY(aesni_cbc_enc)
823 cmp $16, LEN
824 jb .Lcbc_enc_ret
825 mov 480(KEYP), KLEN
826 movups (IVP), STATE # load iv as initial state
827.align 4
828.Lcbc_enc_loop:
829 movups (INP), IN # load input
830 pxor IN, STATE
831 call _aesni_enc1
832 movups STATE, (OUTP) # store output
833 sub $16, LEN
834 add $16, INP
835 add $16, OUTP
836 cmp $16, LEN
837 jge .Lcbc_enc_loop
838 movups STATE, (IVP)
839.Lcbc_enc_ret:
840 ret
841
842/*
843 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
844 * size_t len, u8 *iv)
845 */
846ENTRY(aesni_cbc_dec)
847 cmp $16, LEN
848 jb .Lcbc_dec_ret
849 mov 480(KEYP), KLEN
850 add $240, KEYP
851 movups (IVP), IV
852 cmp $64, LEN
853 jb .Lcbc_dec_loop1
854.align 4
855.Lcbc_dec_loop4:
856 movups (INP), IN1
857 movaps IN1, STATE1
858 movups 0x10(INP), IN2
859 movaps IN2, STATE2
860 movups 0x20(INP), IN3
861 movaps IN3, STATE3
862 movups 0x30(INP), IN4
863 movaps IN4, STATE4
864 call _aesni_dec4
865 pxor IV, STATE1
866 pxor IN1, STATE2
867 pxor IN2, STATE3
868 pxor IN3, STATE4
869 movaps IN4, IV
870 movups STATE1, (OUTP)
871 movups STATE2, 0x10(OUTP)
872 movups STATE3, 0x20(OUTP)
873 movups STATE4, 0x30(OUTP)
874 sub $64, LEN
875 add $64, INP
876 add $64, OUTP
877 cmp $64, LEN
878 jge .Lcbc_dec_loop4
879 cmp $16, LEN
880 jb .Lcbc_dec_ret
881.align 4
882.Lcbc_dec_loop1:
883 movups (INP), IN
884 movaps IN, STATE
885 call _aesni_dec1
886 pxor IV, STATE
887 movups STATE, (OUTP)
888 movaps IN, IV
889 sub $16, LEN
890 add $16, INP
891 add $16, OUTP
892 cmp $16, LEN
893 jge .Lcbc_dec_loop1
894 movups IV, (IVP)
895.Lcbc_dec_ret:
896 ret