diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 13:25:58 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 13:25:58 -0500 |
commit | 27d189c02ba25851973c8582e419c0bded9f7e5b (patch) | |
tree | be142d664bc4e3cec7ab2878a243343f46e897ee /arch/x86/crypto | |
parent | a1703154200c390ab03c10224c586e815d3e31e8 (diff) | |
parent | 55db8387a5e8d07407f0b7c6b2526417a2bc6243 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (46 commits)
hwrng: via_rng - Fix memory scribbling on some CPUs
crypto: padlock - Move padlock.h into include/crypto
hwrng: via_rng - Fix asm constraints
crypto: n2 - use __devexit not __exit in n2_unregister_algs
crypto: mark crypto workqueues CPU_INTENSIVE
crypto: mv_cesa - dont return PTR_ERR() of wrong pointer
crypto: ripemd - Set module author and update email address
crypto: omap-sham - backlog handling fix
crypto: gf128mul - Remove experimental tag
crypto: af_alg - fix af_alg memory_allocated data type
crypto: aesni-intel - Fixed build with binutils 2.16
crypto: af_alg - Make sure sk_security is initialized on accept()ed sockets
net: Add missing lockdep class names for af_alg
include: Install linux/if_alg.h for user-space crypto API
crypto: omap-aes - checkpatch --file warning fixes
crypto: omap-aes - initialize aes module once per request
crypto: omap-aes - unnecessary code removed
crypto: omap-aes - error handling implementation improved
crypto: omap-aes - redundant locking is removed
crypto: omap-aes - DMA initialization fixes for OMAP off mode
...
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r-- | arch/x86/crypto/aesni-intel_asm.S | 1832 | ||||
-rw-r--r-- | arch/x86/crypto/aesni-intel_glue.c | 540 |
2 files changed, 2335 insertions, 37 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index ff16756a51c1..8fe2a4966b7a 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -9,6 +9,20 @@ | |||
9 | * Vinodh Gopal <vinodh.gopal@intel.com> | 9 | * Vinodh Gopal <vinodh.gopal@intel.com> |
10 | * Kahraman Akdemir | 10 | * Kahraman Akdemir |
11 | * | 11 | * |
12 | * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD | ||
13 | * interface for 64-bit kernels. | ||
14 | * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com) | ||
15 | * Aidan O'Mahony (aidan.o.mahony@intel.com) | ||
16 | * Adrian Hoban <adrian.hoban@intel.com> | ||
17 | * James Guilford (james.guilford@intel.com) | ||
18 | * Gabriele Paoloni <gabriele.paoloni@intel.com> | ||
19 | * Tadeusz Struk (tadeusz.struk@intel.com) | ||
20 | * Wajdi Feghali (wajdi.k.feghali@intel.com) | ||
21 | * Copyright (c) 2010, Intel Corporation. | ||
22 | * | ||
23 | * Ported x86_64 version to x86: | ||
24 | * Author: Mathias Krause <minipli@googlemail.com> | ||
25 | * | ||
12 | * This program is free software; you can redistribute it and/or modify | 26 | * This program is free software; you can redistribute it and/or modify |
13 | * it under the terms of the GNU General Public License as published by | 27 | * it under the terms of the GNU General Public License as published by |
14 | * the Free Software Foundation; either version 2 of the License, or | 28 | * the Free Software Foundation; either version 2 of the License, or |
@@ -18,8 +32,62 @@ | |||
18 | #include <linux/linkage.h> | 32 | #include <linux/linkage.h> |
19 | #include <asm/inst.h> | 33 | #include <asm/inst.h> |
20 | 34 | ||
35 | #ifdef __x86_64__ | ||
36 | .data | ||
37 | POLY: .octa 0xC2000000000000000000000000000001 | ||
38 | TWOONE: .octa 0x00000001000000000000000000000001 | ||
39 | |||
40 | # order of these constants should not change. | ||
41 | # more specifically, ALL_F should follow SHIFT_MASK, | ||
42 | # and ZERO should follow ALL_F | ||
43 | |||
44 | SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F | ||
45 | MASK1: .octa 0x0000000000000000ffffffffffffffff | ||
46 | MASK2: .octa 0xffffffffffffffff0000000000000000 | ||
47 | SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 | ||
48 | ALL_F: .octa 0xffffffffffffffffffffffffffffffff | ||
49 | ZERO: .octa 0x00000000000000000000000000000000 | ||
50 | ONE: .octa 0x00000000000000000000000000000001 | ||
51 | F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 | ||
52 | dec: .octa 0x1 | ||
53 | enc: .octa 0x2 | ||
54 | |||
55 | |||
21 | .text | 56 | .text |
22 | 57 | ||
58 | |||
59 | #define STACK_OFFSET 8*3 | ||
60 | #define HashKey 16*0 // store HashKey <<1 mod poly here | ||
61 | #define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here | ||
62 | #define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here | ||
63 | #define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here | ||
64 | #define HashKey_k 16*4 // store XOR of High 64 bits and Low 64 | ||
65 | // bits of HashKey <<1 mod poly here | ||
66 | //(for Karatsuba purposes) | ||
67 | #define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64 | ||
68 | // bits of HashKey^2 <<1 mod poly here | ||
69 | // (for Karatsuba purposes) | ||
70 | #define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64 | ||
71 | // bits of HashKey^3 <<1 mod poly here | ||
72 | // (for Karatsuba purposes) | ||
73 | #define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64 | ||
74 | // bits of HashKey^4 <<1 mod poly here | ||
75 | // (for Karatsuba purposes) | ||
76 | #define VARIABLE_OFFSET 16*8 | ||
77 | |||
78 | #define arg1 rdi | ||
79 | #define arg2 rsi | ||
80 | #define arg3 rdx | ||
81 | #define arg4 rcx | ||
82 | #define arg5 r8 | ||
83 | #define arg6 r9 | ||
84 | #define arg7 STACK_OFFSET+8(%r14) | ||
85 | #define arg8 STACK_OFFSET+16(%r14) | ||
86 | #define arg9 STACK_OFFSET+24(%r14) | ||
87 | #define arg10 STACK_OFFSET+32(%r14) | ||
88 | #endif | ||
89 | |||
90 | |||
23 | #define STATE1 %xmm0 | 91 | #define STATE1 %xmm0 |
24 | #define STATE2 %xmm4 | 92 | #define STATE2 %xmm4 |
25 | #define STATE3 %xmm5 | 93 | #define STATE3 %xmm5 |
@@ -32,12 +100,16 @@ | |||
32 | #define IN IN1 | 100 | #define IN IN1 |
33 | #define KEY %xmm2 | 101 | #define KEY %xmm2 |
34 | #define IV %xmm3 | 102 | #define IV %xmm3 |
103 | |||
35 | #define BSWAP_MASK %xmm10 | 104 | #define BSWAP_MASK %xmm10 |
36 | #define CTR %xmm11 | 105 | #define CTR %xmm11 |
37 | #define INC %xmm12 | 106 | #define INC %xmm12 |
38 | 107 | ||
108 | #ifdef __x86_64__ | ||
109 | #define AREG %rax | ||
39 | #define KEYP %rdi | 110 | #define KEYP %rdi |
40 | #define OUTP %rsi | 111 | #define OUTP %rsi |
112 | #define UKEYP OUTP | ||
41 | #define INP %rdx | 113 | #define INP %rdx |
42 | #define LEN %rcx | 114 | #define LEN %rcx |
43 | #define IVP %r8 | 115 | #define IVP %r8 |
@@ -46,6 +118,1588 @@ | |||
46 | #define TKEYP T1 | 118 | #define TKEYP T1 |
47 | #define T2 %r11 | 119 | #define T2 %r11 |
48 | #define TCTR_LOW T2 | 120 | #define TCTR_LOW T2 |
121 | #else | ||
122 | #define AREG %eax | ||
123 | #define KEYP %edi | ||
124 | #define OUTP AREG | ||
125 | #define UKEYP OUTP | ||
126 | #define INP %edx | ||
127 | #define LEN %esi | ||
128 | #define IVP %ebp | ||
129 | #define KLEN %ebx | ||
130 | #define T1 %ecx | ||
131 | #define TKEYP T1 | ||
132 | #endif | ||
133 | |||
134 | |||
135 | #ifdef __x86_64__ | ||
136 | /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) | ||
137 | * | ||
138 | * | ||
139 | * Input: A and B (128-bits each, bit-reflected) | ||
140 | * Output: C = A*B*x mod poly, (i.e. >>1 ) | ||
141 | * To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input | ||
142 | * GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. | ||
143 | * | ||
144 | */ | ||
145 | .macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5 | ||
146 | movdqa \GH, \TMP1 | ||
147 | pshufd $78, \GH, \TMP2 | ||
148 | pshufd $78, \HK, \TMP3 | ||
149 | pxor \GH, \TMP2 # TMP2 = a1+a0 | ||
150 | pxor \HK, \TMP3 # TMP3 = b1+b0 | ||
151 | PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1 | ||
152 | PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0 | ||
153 | PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) | ||
154 | pxor \GH, \TMP2 | ||
155 | pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) | ||
156 | movdqa \TMP2, \TMP3 | ||
157 | pslldq $8, \TMP3 # left shift TMP3 2 DWs | ||
158 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
159 | pxor \TMP3, \GH | ||
160 | pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK | ||
161 | |||
162 | # first phase of the reduction | ||
163 | |||
164 | movdqa \GH, \TMP2 | ||
165 | movdqa \GH, \TMP3 | ||
166 | movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4 | ||
167 | # in in order to perform | ||
168 | # independent shifts | ||
169 | pslld $31, \TMP2 # packed right shift <<31 | ||
170 | pslld $30, \TMP3 # packed right shift <<30 | ||
171 | pslld $25, \TMP4 # packed right shift <<25 | ||
172 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
173 | pxor \TMP4, \TMP2 | ||
174 | movdqa \TMP2, \TMP5 | ||
175 | psrldq $4, \TMP5 # right shift TMP5 1 DW | ||
176 | pslldq $12, \TMP2 # left shift TMP2 3 DWs | ||
177 | pxor \TMP2, \GH | ||
178 | |||
179 | # second phase of the reduction | ||
180 | |||
181 | movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4 | ||
182 | # in in order to perform | ||
183 | # independent shifts | ||
184 | movdqa \GH,\TMP3 | ||
185 | movdqa \GH,\TMP4 | ||
186 | psrld $1,\TMP2 # packed left shift >>1 | ||
187 | psrld $2,\TMP3 # packed left shift >>2 | ||
188 | psrld $7,\TMP4 # packed left shift >>7 | ||
189 | pxor \TMP3,\TMP2 # xor the shifted versions | ||
190 | pxor \TMP4,\TMP2 | ||
191 | pxor \TMP5, \TMP2 | ||
192 | pxor \TMP2, \GH | ||
193 | pxor \TMP1, \GH # result is in TMP1 | ||
194 | .endm | ||
195 | |||
196 | /* | ||
197 | * if a = number of total plaintext bytes | ||
198 | * b = floor(a/16) | ||
199 | * num_initial_blocks = b mod 4 | ||
200 | * encrypt the initial num_initial_blocks blocks and apply ghash on | ||
201 | * the ciphertext | ||
202 | * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers | ||
203 | * are clobbered | ||
204 | * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified | ||
205 | */ | ||
206 | |||
207 | |||
208 | .macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ | ||
209 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation | ||
210 | mov arg7, %r10 # %r10 = AAD | ||
211 | mov arg8, %r12 # %r12 = aadLen | ||
212 | mov %r12, %r11 | ||
213 | pxor %xmm\i, %xmm\i | ||
214 | _get_AAD_loop\num_initial_blocks\operation: | ||
215 | movd (%r10), \TMP1 | ||
216 | pslldq $12, \TMP1 | ||
217 | psrldq $4, %xmm\i | ||
218 | pxor \TMP1, %xmm\i | ||
219 | add $4, %r10 | ||
220 | sub $4, %r12 | ||
221 | jne _get_AAD_loop\num_initial_blocks\operation | ||
222 | cmp $16, %r11 | ||
223 | je _get_AAD_loop2_done\num_initial_blocks\operation | ||
224 | mov $16, %r12 | ||
225 | _get_AAD_loop2\num_initial_blocks\operation: | ||
226 | psrldq $4, %xmm\i | ||
227 | sub $4, %r12 | ||
228 | cmp %r11, %r12 | ||
229 | jne _get_AAD_loop2\num_initial_blocks\operation | ||
230 | _get_AAD_loop2_done\num_initial_blocks\operation: | ||
231 | movdqa SHUF_MASK(%rip), %xmm14 | ||
232 | PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data | ||
233 | |||
234 | xor %r11, %r11 # initialise the data pointer offset as zero | ||
235 | |||
236 | # start AES for num_initial_blocks blocks | ||
237 | |||
238 | mov %arg5, %rax # %rax = *Y0 | ||
239 | movdqu (%rax), \XMM0 # XMM0 = Y0 | ||
240 | movdqa SHUF_MASK(%rip), %xmm14 | ||
241 | PSHUFB_XMM %xmm14, \XMM0 | ||
242 | |||
243 | .if (\i == 5) || (\i == 6) || (\i == 7) | ||
244 | .irpc index, \i_seq | ||
245 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
246 | movdqa \XMM0, %xmm\index | ||
247 | movdqa SHUF_MASK(%rip), %xmm14 | ||
248 | PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap | ||
249 | |||
250 | .endr | ||
251 | .irpc index, \i_seq | ||
252 | pxor 16*0(%arg1), %xmm\index | ||
253 | .endr | ||
254 | .irpc index, \i_seq | ||
255 | movaps 0x10(%rdi), \TMP1 | ||
256 | AESENC \TMP1, %xmm\index # Round 1 | ||
257 | .endr | ||
258 | .irpc index, \i_seq | ||
259 | movaps 0x20(%arg1), \TMP1 | ||
260 | AESENC \TMP1, %xmm\index # Round 2 | ||
261 | .endr | ||
262 | .irpc index, \i_seq | ||
263 | movaps 0x30(%arg1), \TMP1 | ||
264 | AESENC \TMP1, %xmm\index # Round 2 | ||
265 | .endr | ||
266 | .irpc index, \i_seq | ||
267 | movaps 0x40(%arg1), \TMP1 | ||
268 | AESENC \TMP1, %xmm\index # Round 2 | ||
269 | .endr | ||
270 | .irpc index, \i_seq | ||
271 | movaps 0x50(%arg1), \TMP1 | ||
272 | AESENC \TMP1, %xmm\index # Round 2 | ||
273 | .endr | ||
274 | .irpc index, \i_seq | ||
275 | movaps 0x60(%arg1), \TMP1 | ||
276 | AESENC \TMP1, %xmm\index # Round 2 | ||
277 | .endr | ||
278 | .irpc index, \i_seq | ||
279 | movaps 0x70(%arg1), \TMP1 | ||
280 | AESENC \TMP1, %xmm\index # Round 2 | ||
281 | .endr | ||
282 | .irpc index, \i_seq | ||
283 | movaps 0x80(%arg1), \TMP1 | ||
284 | AESENC \TMP1, %xmm\index # Round 2 | ||
285 | .endr | ||
286 | .irpc index, \i_seq | ||
287 | movaps 0x90(%arg1), \TMP1 | ||
288 | AESENC \TMP1, %xmm\index # Round 2 | ||
289 | .endr | ||
290 | .irpc index, \i_seq | ||
291 | movaps 0xa0(%arg1), \TMP1 | ||
292 | AESENCLAST \TMP1, %xmm\index # Round 10 | ||
293 | .endr | ||
294 | .irpc index, \i_seq | ||
295 | movdqu (%arg3 , %r11, 1), \TMP1 | ||
296 | pxor \TMP1, %xmm\index | ||
297 | movdqu %xmm\index, (%arg2 , %r11, 1) | ||
298 | # write back plaintext/ciphertext for num_initial_blocks | ||
299 | add $16, %r11 | ||
300 | |||
301 | movdqa \TMP1, %xmm\index | ||
302 | movdqa SHUF_MASK(%rip), %xmm14 | ||
303 | PSHUFB_XMM %xmm14, %xmm\index | ||
304 | |||
305 | # prepare plaintext/ciphertext for GHASH computation | ||
306 | .endr | ||
307 | .endif | ||
308 | GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
309 | # apply GHASH on num_initial_blocks blocks | ||
310 | |||
311 | .if \i == 5 | ||
312 | pxor %xmm5, %xmm6 | ||
313 | GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
314 | pxor %xmm6, %xmm7 | ||
315 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
316 | pxor %xmm7, %xmm8 | ||
317 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
318 | .elseif \i == 6 | ||
319 | pxor %xmm6, %xmm7 | ||
320 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
321 | pxor %xmm7, %xmm8 | ||
322 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
323 | .elseif \i == 7 | ||
324 | pxor %xmm7, %xmm8 | ||
325 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
326 | .endif | ||
327 | cmp $64, %r13 | ||
328 | jl _initial_blocks_done\num_initial_blocks\operation | ||
329 | # no need for precomputed values | ||
330 | /* | ||
331 | * | ||
332 | * Precomputations for HashKey parallel with encryption of first 4 blocks. | ||
333 | * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
334 | */ | ||
335 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
336 | movdqa \XMM0, \XMM1 | ||
337 | movdqa SHUF_MASK(%rip), %xmm14 | ||
338 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
339 | |||
340 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
341 | movdqa \XMM0, \XMM2 | ||
342 | movdqa SHUF_MASK(%rip), %xmm14 | ||
343 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
344 | |||
345 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
346 | movdqa \XMM0, \XMM3 | ||
347 | movdqa SHUF_MASK(%rip), %xmm14 | ||
348 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
349 | |||
350 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
351 | movdqa \XMM0, \XMM4 | ||
352 | movdqa SHUF_MASK(%rip), %xmm14 | ||
353 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
354 | |||
355 | pxor 16*0(%arg1), \XMM1 | ||
356 | pxor 16*0(%arg1), \XMM2 | ||
357 | pxor 16*0(%arg1), \XMM3 | ||
358 | pxor 16*0(%arg1), \XMM4 | ||
359 | movdqa \TMP3, \TMP5 | ||
360 | pshufd $78, \TMP3, \TMP1 | ||
361 | pxor \TMP3, \TMP1 | ||
362 | movdqa \TMP1, HashKey_k(%rsp) | ||
363 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
364 | # TMP5 = HashKey^2<<1 (mod poly) | ||
365 | movdqa \TMP5, HashKey_2(%rsp) | ||
366 | # HashKey_2 = HashKey^2<<1 (mod poly) | ||
367 | pshufd $78, \TMP5, \TMP1 | ||
368 | pxor \TMP5, \TMP1 | ||
369 | movdqa \TMP1, HashKey_2_k(%rsp) | ||
370 | .irpc index, 1234 # do 4 rounds | ||
371 | movaps 0x10*\index(%arg1), \TMP1 | ||
372 | AESENC \TMP1, \XMM1 | ||
373 | AESENC \TMP1, \XMM2 | ||
374 | AESENC \TMP1, \XMM3 | ||
375 | AESENC \TMP1, \XMM4 | ||
376 | .endr | ||
377 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
378 | # TMP5 = HashKey^3<<1 (mod poly) | ||
379 | movdqa \TMP5, HashKey_3(%rsp) | ||
380 | pshufd $78, \TMP5, \TMP1 | ||
381 | pxor \TMP5, \TMP1 | ||
382 | movdqa \TMP1, HashKey_3_k(%rsp) | ||
383 | .irpc index, 56789 # do next 5 rounds | ||
384 | movaps 0x10*\index(%arg1), \TMP1 | ||
385 | AESENC \TMP1, \XMM1 | ||
386 | AESENC \TMP1, \XMM2 | ||
387 | AESENC \TMP1, \XMM3 | ||
388 | AESENC \TMP1, \XMM4 | ||
389 | .endr | ||
390 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
391 | # TMP5 = HashKey^3<<1 (mod poly) | ||
392 | movdqa \TMP5, HashKey_4(%rsp) | ||
393 | pshufd $78, \TMP5, \TMP1 | ||
394 | pxor \TMP5, \TMP1 | ||
395 | movdqa \TMP1, HashKey_4_k(%rsp) | ||
396 | movaps 0xa0(%arg1), \TMP2 | ||
397 | AESENCLAST \TMP2, \XMM1 | ||
398 | AESENCLAST \TMP2, \XMM2 | ||
399 | AESENCLAST \TMP2, \XMM3 | ||
400 | AESENCLAST \TMP2, \XMM4 | ||
401 | movdqu 16*0(%arg3 , %r11 , 1), \TMP1 | ||
402 | pxor \TMP1, \XMM1 | ||
403 | movdqu \XMM1, 16*0(%arg2 , %r11 , 1) | ||
404 | movdqa \TMP1, \XMM1 | ||
405 | movdqu 16*1(%arg3 , %r11 , 1), \TMP1 | ||
406 | pxor \TMP1, \XMM2 | ||
407 | movdqu \XMM2, 16*1(%arg2 , %r11 , 1) | ||
408 | movdqa \TMP1, \XMM2 | ||
409 | movdqu 16*2(%arg3 , %r11 , 1), \TMP1 | ||
410 | pxor \TMP1, \XMM3 | ||
411 | movdqu \XMM3, 16*2(%arg2 , %r11 , 1) | ||
412 | movdqa \TMP1, \XMM3 | ||
413 | movdqu 16*3(%arg3 , %r11 , 1), \TMP1 | ||
414 | pxor \TMP1, \XMM4 | ||
415 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) | ||
416 | movdqa \TMP1, \XMM4 | ||
417 | add $64, %r11 | ||
418 | movdqa SHUF_MASK(%rip), %xmm14 | ||
419 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
420 | pxor \XMMDst, \XMM1 | ||
421 | # combine GHASHed value with the corresponding ciphertext | ||
422 | movdqa SHUF_MASK(%rip), %xmm14 | ||
423 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
424 | movdqa SHUF_MASK(%rip), %xmm14 | ||
425 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
426 | movdqa SHUF_MASK(%rip), %xmm14 | ||
427 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
428 | |||
429 | _initial_blocks_done\num_initial_blocks\operation: | ||
430 | |||
431 | .endm | ||
432 | |||
433 | |||
434 | /* | ||
435 | * if a = number of total plaintext bytes | ||
436 | * b = floor(a/16) | ||
437 | * num_initial_blocks = b mod 4 | ||
438 | * encrypt the initial num_initial_blocks blocks and apply ghash on | ||
439 | * the ciphertext | ||
440 | * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers | ||
441 | * are clobbered | ||
442 | * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified | ||
443 | */ | ||
444 | |||
445 | |||
446 | .macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ | ||
447 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation | ||
448 | mov arg7, %r10 # %r10 = AAD | ||
449 | mov arg8, %r12 # %r12 = aadLen | ||
450 | mov %r12, %r11 | ||
451 | pxor %xmm\i, %xmm\i | ||
452 | _get_AAD_loop\num_initial_blocks\operation: | ||
453 | movd (%r10), \TMP1 | ||
454 | pslldq $12, \TMP1 | ||
455 | psrldq $4, %xmm\i | ||
456 | pxor \TMP1, %xmm\i | ||
457 | add $4, %r10 | ||
458 | sub $4, %r12 | ||
459 | jne _get_AAD_loop\num_initial_blocks\operation | ||
460 | cmp $16, %r11 | ||
461 | je _get_AAD_loop2_done\num_initial_blocks\operation | ||
462 | mov $16, %r12 | ||
463 | _get_AAD_loop2\num_initial_blocks\operation: | ||
464 | psrldq $4, %xmm\i | ||
465 | sub $4, %r12 | ||
466 | cmp %r11, %r12 | ||
467 | jne _get_AAD_loop2\num_initial_blocks\operation | ||
468 | _get_AAD_loop2_done\num_initial_blocks\operation: | ||
469 | movdqa SHUF_MASK(%rip), %xmm14 | ||
470 | PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data | ||
471 | |||
472 | xor %r11, %r11 # initialise the data pointer offset as zero | ||
473 | |||
474 | # start AES for num_initial_blocks blocks | ||
475 | |||
476 | mov %arg5, %rax # %rax = *Y0 | ||
477 | movdqu (%rax), \XMM0 # XMM0 = Y0 | ||
478 | movdqa SHUF_MASK(%rip), %xmm14 | ||
479 | PSHUFB_XMM %xmm14, \XMM0 | ||
480 | |||
481 | .if (\i == 5) || (\i == 6) || (\i == 7) | ||
482 | .irpc index, \i_seq | ||
483 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
484 | movdqa \XMM0, %xmm\index | ||
485 | movdqa SHUF_MASK(%rip), %xmm14 | ||
486 | PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap | ||
487 | |||
488 | .endr | ||
489 | .irpc index, \i_seq | ||
490 | pxor 16*0(%arg1), %xmm\index | ||
491 | .endr | ||
492 | .irpc index, \i_seq | ||
493 | movaps 0x10(%rdi), \TMP1 | ||
494 | AESENC \TMP1, %xmm\index # Round 1 | ||
495 | .endr | ||
496 | .irpc index, \i_seq | ||
497 | movaps 0x20(%arg1), \TMP1 | ||
498 | AESENC \TMP1, %xmm\index # Round 2 | ||
499 | .endr | ||
500 | .irpc index, \i_seq | ||
501 | movaps 0x30(%arg1), \TMP1 | ||
502 | AESENC \TMP1, %xmm\index # Round 2 | ||
503 | .endr | ||
504 | .irpc index, \i_seq | ||
505 | movaps 0x40(%arg1), \TMP1 | ||
506 | AESENC \TMP1, %xmm\index # Round 2 | ||
507 | .endr | ||
508 | .irpc index, \i_seq | ||
509 | movaps 0x50(%arg1), \TMP1 | ||
510 | AESENC \TMP1, %xmm\index # Round 2 | ||
511 | .endr | ||
512 | .irpc index, \i_seq | ||
513 | movaps 0x60(%arg1), \TMP1 | ||
514 | AESENC \TMP1, %xmm\index # Round 2 | ||
515 | .endr | ||
516 | .irpc index, \i_seq | ||
517 | movaps 0x70(%arg1), \TMP1 | ||
518 | AESENC \TMP1, %xmm\index # Round 2 | ||
519 | .endr | ||
520 | .irpc index, \i_seq | ||
521 | movaps 0x80(%arg1), \TMP1 | ||
522 | AESENC \TMP1, %xmm\index # Round 2 | ||
523 | .endr | ||
524 | .irpc index, \i_seq | ||
525 | movaps 0x90(%arg1), \TMP1 | ||
526 | AESENC \TMP1, %xmm\index # Round 2 | ||
527 | .endr | ||
528 | .irpc index, \i_seq | ||
529 | movaps 0xa0(%arg1), \TMP1 | ||
530 | AESENCLAST \TMP1, %xmm\index # Round 10 | ||
531 | .endr | ||
532 | .irpc index, \i_seq | ||
533 | movdqu (%arg3 , %r11, 1), \TMP1 | ||
534 | pxor \TMP1, %xmm\index | ||
535 | movdqu %xmm\index, (%arg2 , %r11, 1) | ||
536 | # write back plaintext/ciphertext for num_initial_blocks | ||
537 | add $16, %r11 | ||
538 | |||
539 | movdqa SHUF_MASK(%rip), %xmm14 | ||
540 | PSHUFB_XMM %xmm14, %xmm\index | ||
541 | |||
542 | # prepare plaintext/ciphertext for GHASH computation | ||
543 | .endr | ||
544 | .endif | ||
545 | GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
546 | # apply GHASH on num_initial_blocks blocks | ||
547 | |||
548 | .if \i == 5 | ||
549 | pxor %xmm5, %xmm6 | ||
550 | GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
551 | pxor %xmm6, %xmm7 | ||
552 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
553 | pxor %xmm7, %xmm8 | ||
554 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
555 | .elseif \i == 6 | ||
556 | pxor %xmm6, %xmm7 | ||
557 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
558 | pxor %xmm7, %xmm8 | ||
559 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
560 | .elseif \i == 7 | ||
561 | pxor %xmm7, %xmm8 | ||
562 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
563 | .endif | ||
564 | cmp $64, %r13 | ||
565 | jl _initial_blocks_done\num_initial_blocks\operation | ||
566 | # no need for precomputed values | ||
567 | /* | ||
568 | * | ||
569 | * Precomputations for HashKey parallel with encryption of first 4 blocks. | ||
570 | * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
571 | */ | ||
572 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
573 | movdqa \XMM0, \XMM1 | ||
574 | movdqa SHUF_MASK(%rip), %xmm14 | ||
575 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
576 | |||
577 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
578 | movdqa \XMM0, \XMM2 | ||
579 | movdqa SHUF_MASK(%rip), %xmm14 | ||
580 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
581 | |||
582 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
583 | movdqa \XMM0, \XMM3 | ||
584 | movdqa SHUF_MASK(%rip), %xmm14 | ||
585 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
586 | |||
587 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
588 | movdqa \XMM0, \XMM4 | ||
589 | movdqa SHUF_MASK(%rip), %xmm14 | ||
590 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
591 | |||
592 | pxor 16*0(%arg1), \XMM1 | ||
593 | pxor 16*0(%arg1), \XMM2 | ||
594 | pxor 16*0(%arg1), \XMM3 | ||
595 | pxor 16*0(%arg1), \XMM4 | ||
596 | movdqa \TMP3, \TMP5 | ||
597 | pshufd $78, \TMP3, \TMP1 | ||
598 | pxor \TMP3, \TMP1 | ||
599 | movdqa \TMP1, HashKey_k(%rsp) | ||
600 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
601 | # TMP5 = HashKey^2<<1 (mod poly) | ||
602 | movdqa \TMP5, HashKey_2(%rsp) | ||
603 | # HashKey_2 = HashKey^2<<1 (mod poly) | ||
604 | pshufd $78, \TMP5, \TMP1 | ||
605 | pxor \TMP5, \TMP1 | ||
606 | movdqa \TMP1, HashKey_2_k(%rsp) | ||
607 | .irpc index, 1234 # do 4 rounds | ||
608 | movaps 0x10*\index(%arg1), \TMP1 | ||
609 | AESENC \TMP1, \XMM1 | ||
610 | AESENC \TMP1, \XMM2 | ||
611 | AESENC \TMP1, \XMM3 | ||
612 | AESENC \TMP1, \XMM4 | ||
613 | .endr | ||
614 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
615 | # TMP5 = HashKey^3<<1 (mod poly) | ||
616 | movdqa \TMP5, HashKey_3(%rsp) | ||
617 | pshufd $78, \TMP5, \TMP1 | ||
618 | pxor \TMP5, \TMP1 | ||
619 | movdqa \TMP1, HashKey_3_k(%rsp) | ||
620 | .irpc index, 56789 # do next 5 rounds | ||
621 | movaps 0x10*\index(%arg1), \TMP1 | ||
622 | AESENC \TMP1, \XMM1 | ||
623 | AESENC \TMP1, \XMM2 | ||
624 | AESENC \TMP1, \XMM3 | ||
625 | AESENC \TMP1, \XMM4 | ||
626 | .endr | ||
627 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
628 | # TMP5 = HashKey^3<<1 (mod poly) | ||
629 | movdqa \TMP5, HashKey_4(%rsp) | ||
630 | pshufd $78, \TMP5, \TMP1 | ||
631 | pxor \TMP5, \TMP1 | ||
632 | movdqa \TMP1, HashKey_4_k(%rsp) | ||
633 | movaps 0xa0(%arg1), \TMP2 | ||
634 | AESENCLAST \TMP2, \XMM1 | ||
635 | AESENCLAST \TMP2, \XMM2 | ||
636 | AESENCLAST \TMP2, \XMM3 | ||
637 | AESENCLAST \TMP2, \XMM4 | ||
638 | movdqu 16*0(%arg3 , %r11 , 1), \TMP1 | ||
639 | pxor \TMP1, \XMM1 | ||
640 | movdqu 16*1(%arg3 , %r11 , 1), \TMP1 | ||
641 | pxor \TMP1, \XMM2 | ||
642 | movdqu 16*2(%arg3 , %r11 , 1), \TMP1 | ||
643 | pxor \TMP1, \XMM3 | ||
644 | movdqu 16*3(%arg3 , %r11 , 1), \TMP1 | ||
645 | pxor \TMP1, \XMM4 | ||
646 | movdqu \XMM1, 16*0(%arg2 , %r11 , 1) | ||
647 | movdqu \XMM2, 16*1(%arg2 , %r11 , 1) | ||
648 | movdqu \XMM3, 16*2(%arg2 , %r11 , 1) | ||
649 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) | ||
650 | |||
651 | add $64, %r11 | ||
652 | movdqa SHUF_MASK(%rip), %xmm14 | ||
653 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
654 | pxor \XMMDst, \XMM1 | ||
655 | # combine GHASHed value with the corresponding ciphertext | ||
656 | movdqa SHUF_MASK(%rip), %xmm14 | ||
657 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
658 | movdqa SHUF_MASK(%rip), %xmm14 | ||
659 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
660 | movdqa SHUF_MASK(%rip), %xmm14 | ||
661 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
662 | |||
663 | _initial_blocks_done\num_initial_blocks\operation: | ||
664 | |||
665 | .endm | ||
666 | |||
667 | /* | ||
668 | * encrypt 4 blocks at a time | ||
669 | * ghash the 4 previously encrypted ciphertext blocks | ||
670 | * arg1, %arg2, %arg3 are used as pointers only, not modified | ||
671 | * %r11 is the data offset value | ||
672 | */ | ||
673 | .macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ | ||
674 | TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | ||
675 | |||
676 | movdqa \XMM1, \XMM5 | ||
677 | movdqa \XMM2, \XMM6 | ||
678 | movdqa \XMM3, \XMM7 | ||
679 | movdqa \XMM4, \XMM8 | ||
680 | |||
681 | movdqa SHUF_MASK(%rip), %xmm15 | ||
682 | # multiply TMP5 * HashKey using karatsuba | ||
683 | |||
684 | movdqa \XMM5, \TMP4 | ||
685 | pshufd $78, \XMM5, \TMP6 | ||
686 | pxor \XMM5, \TMP6 | ||
687 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
688 | movdqa HashKey_4(%rsp), \TMP5 | ||
689 | PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 | ||
690 | movdqa \XMM0, \XMM1 | ||
691 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
692 | movdqa \XMM0, \XMM2 | ||
693 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
694 | movdqa \XMM0, \XMM3 | ||
695 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
696 | movdqa \XMM0, \XMM4 | ||
697 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
698 | PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 | ||
699 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
700 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
701 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
702 | |||
703 | pxor (%arg1), \XMM1 | ||
704 | pxor (%arg1), \XMM2 | ||
705 | pxor (%arg1), \XMM3 | ||
706 | pxor (%arg1), \XMM4 | ||
707 | movdqa HashKey_4_k(%rsp), \TMP5 | ||
708 | PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) | ||
709 | movaps 0x10(%arg1), \TMP1 | ||
710 | AESENC \TMP1, \XMM1 # Round 1 | ||
711 | AESENC \TMP1, \XMM2 | ||
712 | AESENC \TMP1, \XMM3 | ||
713 | AESENC \TMP1, \XMM4 | ||
714 | movaps 0x20(%arg1), \TMP1 | ||
715 | AESENC \TMP1, \XMM1 # Round 2 | ||
716 | AESENC \TMP1, \XMM2 | ||
717 | AESENC \TMP1, \XMM3 | ||
718 | AESENC \TMP1, \XMM4 | ||
719 | movdqa \XMM6, \TMP1 | ||
720 | pshufd $78, \XMM6, \TMP2 | ||
721 | pxor \XMM6, \TMP2 | ||
722 | movdqa HashKey_3(%rsp), \TMP5 | ||
723 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 | ||
724 | movaps 0x30(%arg1), \TMP3 | ||
725 | AESENC \TMP3, \XMM1 # Round 3 | ||
726 | AESENC \TMP3, \XMM2 | ||
727 | AESENC \TMP3, \XMM3 | ||
728 | AESENC \TMP3, \XMM4 | ||
729 | PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 | ||
730 | movaps 0x40(%arg1), \TMP3 | ||
731 | AESENC \TMP3, \XMM1 # Round 4 | ||
732 | AESENC \TMP3, \XMM2 | ||
733 | AESENC \TMP3, \XMM3 | ||
734 | AESENC \TMP3, \XMM4 | ||
735 | movdqa HashKey_3_k(%rsp), \TMP5 | ||
736 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
737 | movaps 0x50(%arg1), \TMP3 | ||
738 | AESENC \TMP3, \XMM1 # Round 5 | ||
739 | AESENC \TMP3, \XMM2 | ||
740 | AESENC \TMP3, \XMM3 | ||
741 | AESENC \TMP3, \XMM4 | ||
742 | pxor \TMP1, \TMP4 | ||
743 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
744 | pxor \XMM6, \XMM5 | ||
745 | pxor \TMP2, \TMP6 | ||
746 | movdqa \XMM7, \TMP1 | ||
747 | pshufd $78, \XMM7, \TMP2 | ||
748 | pxor \XMM7, \TMP2 | ||
749 | movdqa HashKey_2(%rsp ), \TMP5 | ||
750 | |||
751 | # Multiply TMP5 * HashKey using karatsuba | ||
752 | |||
753 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
754 | movaps 0x60(%arg1), \TMP3 | ||
755 | AESENC \TMP3, \XMM1 # Round 6 | ||
756 | AESENC \TMP3, \XMM2 | ||
757 | AESENC \TMP3, \XMM3 | ||
758 | AESENC \TMP3, \XMM4 | ||
759 | PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 | ||
760 | movaps 0x70(%arg1), \TMP3 | ||
761 | AESENC \TMP3, \XMM1 # Round 7 | ||
762 | AESENC \TMP3, \XMM2 | ||
763 | AESENC \TMP3, \XMM3 | ||
764 | AESENC \TMP3, \XMM4 | ||
765 | movdqa HashKey_2_k(%rsp), \TMP5 | ||
766 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
767 | movaps 0x80(%arg1), \TMP3 | ||
768 | AESENC \TMP3, \XMM1 # Round 8 | ||
769 | AESENC \TMP3, \XMM2 | ||
770 | AESENC \TMP3, \XMM3 | ||
771 | AESENC \TMP3, \XMM4 | ||
772 | pxor \TMP1, \TMP4 | ||
773 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
774 | pxor \XMM7, \XMM5 | ||
775 | pxor \TMP2, \TMP6 | ||
776 | |||
777 | # Multiply XMM8 * HashKey | ||
778 | # XMM8 and TMP5 hold the values for the two operands | ||
779 | |||
780 | movdqa \XMM8, \TMP1 | ||
781 | pshufd $78, \XMM8, \TMP2 | ||
782 | pxor \XMM8, \TMP2 | ||
783 | movdqa HashKey(%rsp), \TMP5 | ||
784 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
785 | movaps 0x90(%arg1), \TMP3 | ||
786 | AESENC \TMP3, \XMM1 # Round 9 | ||
787 | AESENC \TMP3, \XMM2 | ||
788 | AESENC \TMP3, \XMM3 | ||
789 | AESENC \TMP3, \XMM4 | ||
790 | PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 | ||
791 | movaps 0xa0(%arg1), \TMP3 | ||
792 | AESENCLAST \TMP3, \XMM1 # Round 10 | ||
793 | AESENCLAST \TMP3, \XMM2 | ||
794 | AESENCLAST \TMP3, \XMM3 | ||
795 | AESENCLAST \TMP3, \XMM4 | ||
796 | movdqa HashKey_k(%rsp), \TMP5 | ||
797 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
798 | movdqu (%arg3,%r11,1), \TMP3 | ||
799 | pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK | ||
800 | movdqu 16(%arg3,%r11,1), \TMP3 | ||
801 | pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK | ||
802 | movdqu 32(%arg3,%r11,1), \TMP3 | ||
803 | pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK | ||
804 | movdqu 48(%arg3,%r11,1), \TMP3 | ||
805 | pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK | ||
806 | movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer | ||
807 | movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer | ||
808 | movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer | ||
809 | movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer | ||
810 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
811 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
812 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
813 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
814 | |||
815 | pxor \TMP4, \TMP1 | ||
816 | pxor \XMM8, \XMM5 | ||
817 | pxor \TMP6, \TMP2 | ||
818 | pxor \TMP1, \TMP2 | ||
819 | pxor \XMM5, \TMP2 | ||
820 | movdqa \TMP2, \TMP3 | ||
821 | pslldq $8, \TMP3 # left shift TMP3 2 DWs | ||
822 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
823 | pxor \TMP3, \XMM5 | ||
824 | pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 | ||
825 | |||
826 | # first phase of reduction | ||
827 | |||
828 | movdqa \XMM5, \TMP2 | ||
829 | movdqa \XMM5, \TMP3 | ||
830 | movdqa \XMM5, \TMP4 | ||
831 | # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently | ||
832 | pslld $31, \TMP2 # packed right shift << 31 | ||
833 | pslld $30, \TMP3 # packed right shift << 30 | ||
834 | pslld $25, \TMP4 # packed right shift << 25 | ||
835 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
836 | pxor \TMP4, \TMP2 | ||
837 | movdqa \TMP2, \TMP5 | ||
838 | psrldq $4, \TMP5 # right shift T5 1 DW | ||
839 | pslldq $12, \TMP2 # left shift T2 3 DWs | ||
840 | pxor \TMP2, \XMM5 | ||
841 | |||
842 | # second phase of reduction | ||
843 | |||
844 | movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 | ||
845 | movdqa \XMM5,\TMP3 | ||
846 | movdqa \XMM5,\TMP4 | ||
847 | psrld $1, \TMP2 # packed left shift >>1 | ||
848 | psrld $2, \TMP3 # packed left shift >>2 | ||
849 | psrld $7, \TMP4 # packed left shift >>7 | ||
850 | pxor \TMP3,\TMP2 # xor the shifted versions | ||
851 | pxor \TMP4,\TMP2 | ||
852 | pxor \TMP5, \TMP2 | ||
853 | pxor \TMP2, \XMM5 | ||
854 | pxor \TMP1, \XMM5 # result is in TMP1 | ||
855 | |||
856 | pxor \XMM5, \XMM1 | ||
857 | .endm | ||
858 | |||
859 | /* | ||
860 | * decrypt 4 blocks at a time | ||
861 | * ghash the 4 previously decrypted ciphertext blocks | ||
862 | * arg1, %arg2, %arg3 are used as pointers only, not modified | ||
863 | * %r11 is the data offset value | ||
864 | */ | ||
865 | .macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ | ||
866 | TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | ||
867 | |||
868 | movdqa \XMM1, \XMM5 | ||
869 | movdqa \XMM2, \XMM6 | ||
870 | movdqa \XMM3, \XMM7 | ||
871 | movdqa \XMM4, \XMM8 | ||
872 | |||
873 | movdqa SHUF_MASK(%rip), %xmm15 | ||
874 | # multiply TMP5 * HashKey using karatsuba | ||
875 | |||
876 | movdqa \XMM5, \TMP4 | ||
877 | pshufd $78, \XMM5, \TMP6 | ||
878 | pxor \XMM5, \TMP6 | ||
879 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
880 | movdqa HashKey_4(%rsp), \TMP5 | ||
881 | PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 | ||
882 | movdqa \XMM0, \XMM1 | ||
883 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
884 | movdqa \XMM0, \XMM2 | ||
885 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
886 | movdqa \XMM0, \XMM3 | ||
887 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
888 | movdqa \XMM0, \XMM4 | ||
889 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
890 | PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 | ||
891 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
892 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
893 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
894 | |||
895 | pxor (%arg1), \XMM1 | ||
896 | pxor (%arg1), \XMM2 | ||
897 | pxor (%arg1), \XMM3 | ||
898 | pxor (%arg1), \XMM4 | ||
899 | movdqa HashKey_4_k(%rsp), \TMP5 | ||
900 | PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) | ||
901 | movaps 0x10(%arg1), \TMP1 | ||
902 | AESENC \TMP1, \XMM1 # Round 1 | ||
903 | AESENC \TMP1, \XMM2 | ||
904 | AESENC \TMP1, \XMM3 | ||
905 | AESENC \TMP1, \XMM4 | ||
906 | movaps 0x20(%arg1), \TMP1 | ||
907 | AESENC \TMP1, \XMM1 # Round 2 | ||
908 | AESENC \TMP1, \XMM2 | ||
909 | AESENC \TMP1, \XMM3 | ||
910 | AESENC \TMP1, \XMM4 | ||
911 | movdqa \XMM6, \TMP1 | ||
912 | pshufd $78, \XMM6, \TMP2 | ||
913 | pxor \XMM6, \TMP2 | ||
914 | movdqa HashKey_3(%rsp), \TMP5 | ||
915 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 | ||
916 | movaps 0x30(%arg1), \TMP3 | ||
917 | AESENC \TMP3, \XMM1 # Round 3 | ||
918 | AESENC \TMP3, \XMM2 | ||
919 | AESENC \TMP3, \XMM3 | ||
920 | AESENC \TMP3, \XMM4 | ||
921 | PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 | ||
922 | movaps 0x40(%arg1), \TMP3 | ||
923 | AESENC \TMP3, \XMM1 # Round 4 | ||
924 | AESENC \TMP3, \XMM2 | ||
925 | AESENC \TMP3, \XMM3 | ||
926 | AESENC \TMP3, \XMM4 | ||
927 | movdqa HashKey_3_k(%rsp), \TMP5 | ||
928 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
929 | movaps 0x50(%arg1), \TMP3 | ||
930 | AESENC \TMP3, \XMM1 # Round 5 | ||
931 | AESENC \TMP3, \XMM2 | ||
932 | AESENC \TMP3, \XMM3 | ||
933 | AESENC \TMP3, \XMM4 | ||
934 | pxor \TMP1, \TMP4 | ||
935 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
936 | pxor \XMM6, \XMM5 | ||
937 | pxor \TMP2, \TMP6 | ||
938 | movdqa \XMM7, \TMP1 | ||
939 | pshufd $78, \XMM7, \TMP2 | ||
940 | pxor \XMM7, \TMP2 | ||
941 | movdqa HashKey_2(%rsp ), \TMP5 | ||
942 | |||
943 | # Multiply TMP5 * HashKey using karatsuba | ||
944 | |||
945 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
946 | movaps 0x60(%arg1), \TMP3 | ||
947 | AESENC \TMP3, \XMM1 # Round 6 | ||
948 | AESENC \TMP3, \XMM2 | ||
949 | AESENC \TMP3, \XMM3 | ||
950 | AESENC \TMP3, \XMM4 | ||
951 | PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 | ||
952 | movaps 0x70(%arg1), \TMP3 | ||
953 | AESENC \TMP3, \XMM1 # Round 7 | ||
954 | AESENC \TMP3, \XMM2 | ||
955 | AESENC \TMP3, \XMM3 | ||
956 | AESENC \TMP3, \XMM4 | ||
957 | movdqa HashKey_2_k(%rsp), \TMP5 | ||
958 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
959 | movaps 0x80(%arg1), \TMP3 | ||
960 | AESENC \TMP3, \XMM1 # Round 8 | ||
961 | AESENC \TMP3, \XMM2 | ||
962 | AESENC \TMP3, \XMM3 | ||
963 | AESENC \TMP3, \XMM4 | ||
964 | pxor \TMP1, \TMP4 | ||
965 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
966 | pxor \XMM7, \XMM5 | ||
967 | pxor \TMP2, \TMP6 | ||
968 | |||
969 | # Multiply XMM8 * HashKey | ||
970 | # XMM8 and TMP5 hold the values for the two operands | ||
971 | |||
972 | movdqa \XMM8, \TMP1 | ||
973 | pshufd $78, \XMM8, \TMP2 | ||
974 | pxor \XMM8, \TMP2 | ||
975 | movdqa HashKey(%rsp), \TMP5 | ||
976 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
977 | movaps 0x90(%arg1), \TMP3 | ||
978 | AESENC \TMP3, \XMM1 # Round 9 | ||
979 | AESENC \TMP3, \XMM2 | ||
980 | AESENC \TMP3, \XMM3 | ||
981 | AESENC \TMP3, \XMM4 | ||
982 | PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 | ||
983 | movaps 0xa0(%arg1), \TMP3 | ||
984 | AESENCLAST \TMP3, \XMM1 # Round 10 | ||
985 | AESENCLAST \TMP3, \XMM2 | ||
986 | AESENCLAST \TMP3, \XMM3 | ||
987 | AESENCLAST \TMP3, \XMM4 | ||
988 | movdqa HashKey_k(%rsp), \TMP5 | ||
989 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
990 | movdqu (%arg3,%r11,1), \TMP3 | ||
991 | pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK | ||
992 | movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer | ||
993 | movdqa \TMP3, \XMM1 | ||
994 | movdqu 16(%arg3,%r11,1), \TMP3 | ||
995 | pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK | ||
996 | movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer | ||
997 | movdqa \TMP3, \XMM2 | ||
998 | movdqu 32(%arg3,%r11,1), \TMP3 | ||
999 | pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK | ||
1000 | movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer | ||
1001 | movdqa \TMP3, \XMM3 | ||
1002 | movdqu 48(%arg3,%r11,1), \TMP3 | ||
1003 | pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK | ||
1004 | movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer | ||
1005 | movdqa \TMP3, \XMM4 | ||
1006 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
1007 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
1008 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
1009 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
1010 | |||
1011 | pxor \TMP4, \TMP1 | ||
1012 | pxor \XMM8, \XMM5 | ||
1013 | pxor \TMP6, \TMP2 | ||
1014 | pxor \TMP1, \TMP2 | ||
1015 | pxor \XMM5, \TMP2 | ||
1016 | movdqa \TMP2, \TMP3 | ||
1017 | pslldq $8, \TMP3 # left shift TMP3 2 DWs | ||
1018 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
1019 | pxor \TMP3, \XMM5 | ||
1020 | pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 | ||
1021 | |||
1022 | # first phase of reduction | ||
1023 | |||
1024 | movdqa \XMM5, \TMP2 | ||
1025 | movdqa \XMM5, \TMP3 | ||
1026 | movdqa \XMM5, \TMP4 | ||
1027 | # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently | ||
1028 | pslld $31, \TMP2 # packed right shift << 31 | ||
1029 | pslld $30, \TMP3 # packed right shift << 30 | ||
1030 | pslld $25, \TMP4 # packed right shift << 25 | ||
1031 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
1032 | pxor \TMP4, \TMP2 | ||
1033 | movdqa \TMP2, \TMP5 | ||
1034 | psrldq $4, \TMP5 # right shift T5 1 DW | ||
1035 | pslldq $12, \TMP2 # left shift T2 3 DWs | ||
1036 | pxor \TMP2, \XMM5 | ||
1037 | |||
1038 | # second phase of reduction | ||
1039 | |||
1040 | movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 | ||
1041 | movdqa \XMM5,\TMP3 | ||
1042 | movdqa \XMM5,\TMP4 | ||
1043 | psrld $1, \TMP2 # packed left shift >>1 | ||
1044 | psrld $2, \TMP3 # packed left shift >>2 | ||
1045 | psrld $7, \TMP4 # packed left shift >>7 | ||
1046 | pxor \TMP3,\TMP2 # xor the shifted versions | ||
1047 | pxor \TMP4,\TMP2 | ||
1048 | pxor \TMP5, \TMP2 | ||
1049 | pxor \TMP2, \XMM5 | ||
1050 | pxor \TMP1, \XMM5 # result is in TMP1 | ||
1051 | |||
1052 | pxor \XMM5, \XMM1 | ||
1053 | .endm | ||
1054 | |||
1055 | /* GHASH the last 4 ciphertext blocks. */ | ||
1056 | .macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \ | ||
1057 | TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst | ||
1058 | |||
1059 | # Multiply TMP6 * HashKey (using Karatsuba) | ||
1060 | |||
1061 | movdqa \XMM1, \TMP6 | ||
1062 | pshufd $78, \XMM1, \TMP2 | ||
1063 | pxor \XMM1, \TMP2 | ||
1064 | movdqa HashKey_4(%rsp), \TMP5 | ||
1065 | PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 | ||
1066 | PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 | ||
1067 | movdqa HashKey_4_k(%rsp), \TMP4 | ||
1068 | PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
1069 | movdqa \XMM1, \XMMDst | ||
1070 | movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 | ||
1071 | |||
1072 | # Multiply TMP1 * HashKey (using Karatsuba) | ||
1073 | |||
1074 | movdqa \XMM2, \TMP1 | ||
1075 | pshufd $78, \XMM2, \TMP2 | ||
1076 | pxor \XMM2, \TMP2 | ||
1077 | movdqa HashKey_3(%rsp), \TMP5 | ||
1078 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
1079 | PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 | ||
1080 | movdqa HashKey_3_k(%rsp), \TMP4 | ||
1081 | PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
1082 | pxor \TMP1, \TMP6 | ||
1083 | pxor \XMM2, \XMMDst | ||
1084 | pxor \TMP2, \XMM1 | ||
1085 | # results accumulated in TMP6, XMMDst, XMM1 | ||
1086 | |||
1087 | # Multiply TMP1 * HashKey (using Karatsuba) | ||
1088 | |||
1089 | movdqa \XMM3, \TMP1 | ||
1090 | pshufd $78, \XMM3, \TMP2 | ||
1091 | pxor \XMM3, \TMP2 | ||
1092 | movdqa HashKey_2(%rsp), \TMP5 | ||
1093 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
1094 | PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 | ||
1095 | movdqa HashKey_2_k(%rsp), \TMP4 | ||
1096 | PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
1097 | pxor \TMP1, \TMP6 | ||
1098 | pxor \XMM3, \XMMDst | ||
1099 | pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 | ||
1100 | |||
1101 | # Multiply TMP1 * HashKey (using Karatsuba) | ||
1102 | movdqa \XMM4, \TMP1 | ||
1103 | pshufd $78, \XMM4, \TMP2 | ||
1104 | pxor \XMM4, \TMP2 | ||
1105 | movdqa HashKey(%rsp), \TMP5 | ||
1106 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
1107 | PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 | ||
1108 | movdqa HashKey_k(%rsp), \TMP4 | ||
1109 | PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
1110 | pxor \TMP1, \TMP6 | ||
1111 | pxor \XMM4, \XMMDst | ||
1112 | pxor \XMM1, \TMP2 | ||
1113 | pxor \TMP6, \TMP2 | ||
1114 | pxor \XMMDst, \TMP2 | ||
1115 | # middle section of the temp results combined as in karatsuba algorithm | ||
1116 | movdqa \TMP2, \TMP4 | ||
1117 | pslldq $8, \TMP4 # left shift TMP4 2 DWs | ||
1118 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
1119 | pxor \TMP4, \XMMDst | ||
1120 | pxor \TMP2, \TMP6 | ||
1121 | # TMP6:XMMDst holds the result of the accumulated carry-less multiplications | ||
1122 | # first phase of the reduction | ||
1123 | movdqa \XMMDst, \TMP2 | ||
1124 | movdqa \XMMDst, \TMP3 | ||
1125 | movdqa \XMMDst, \TMP4 | ||
1126 | # move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently | ||
1127 | pslld $31, \TMP2 # packed right shifting << 31 | ||
1128 | pslld $30, \TMP3 # packed right shifting << 30 | ||
1129 | pslld $25, \TMP4 # packed right shifting << 25 | ||
1130 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
1131 | pxor \TMP4, \TMP2 | ||
1132 | movdqa \TMP2, \TMP7 | ||
1133 | psrldq $4, \TMP7 # right shift TMP7 1 DW | ||
1134 | pslldq $12, \TMP2 # left shift TMP2 3 DWs | ||
1135 | pxor \TMP2, \XMMDst | ||
1136 | |||
1137 | # second phase of the reduction | ||
1138 | movdqa \XMMDst, \TMP2 | ||
1139 | # make 3 copies of XMMDst for doing 3 shift operations | ||
1140 | movdqa \XMMDst, \TMP3 | ||
1141 | movdqa \XMMDst, \TMP4 | ||
1142 | psrld $1, \TMP2 # packed left shift >> 1 | ||
1143 | psrld $2, \TMP3 # packed left shift >> 2 | ||
1144 | psrld $7, \TMP4 # packed left shift >> 7 | ||
1145 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
1146 | pxor \TMP4, \TMP2 | ||
1147 | pxor \TMP7, \TMP2 | ||
1148 | pxor \TMP2, \XMMDst | ||
1149 | pxor \TMP6, \XMMDst # reduced result is in XMMDst | ||
1150 | .endm | ||
1151 | |||
1152 | /* Encryption of a single block done*/ | ||
1153 | .macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 | ||
1154 | |||
1155 | pxor (%arg1), \XMM0 | ||
1156 | movaps 16(%arg1), \TMP1 | ||
1157 | AESENC \TMP1, \XMM0 | ||
1158 | movaps 32(%arg1), \TMP1 | ||
1159 | AESENC \TMP1, \XMM0 | ||
1160 | movaps 48(%arg1), \TMP1 | ||
1161 | AESENC \TMP1, \XMM0 | ||
1162 | movaps 64(%arg1), \TMP1 | ||
1163 | AESENC \TMP1, \XMM0 | ||
1164 | movaps 80(%arg1), \TMP1 | ||
1165 | AESENC \TMP1, \XMM0 | ||
1166 | movaps 96(%arg1), \TMP1 | ||
1167 | AESENC \TMP1, \XMM0 | ||
1168 | movaps 112(%arg1), \TMP1 | ||
1169 | AESENC \TMP1, \XMM0 | ||
1170 | movaps 128(%arg1), \TMP1 | ||
1171 | AESENC \TMP1, \XMM0 | ||
1172 | movaps 144(%arg1), \TMP1 | ||
1173 | AESENC \TMP1, \XMM0 | ||
1174 | movaps 160(%arg1), \TMP1 | ||
1175 | AESENCLAST \TMP1, \XMM0 | ||
1176 | .endm | ||
1177 | |||
1178 | |||
1179 | /***************************************************************************** | ||
1180 | * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. | ||
1181 | * u8 *out, // Plaintext output. Encrypt in-place is allowed. | ||
1182 | * const u8 *in, // Ciphertext input | ||
1183 | * u64 plaintext_len, // Length of data in bytes for decryption. | ||
1184 | * u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) | ||
1185 | * // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) | ||
1186 | * // concatenated with 0x00000001. 16-byte aligned pointer. | ||
1187 | * u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. | ||
1188 | * const u8 *aad, // Additional Authentication Data (AAD) | ||
1189 | * u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes | ||
1190 | * u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the | ||
1191 | * // given authentication tag and only return the plaintext if they match. | ||
1192 | * u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 | ||
1193 | * // (most likely), 12 or 8. | ||
1194 | * | ||
1195 | * Assumptions: | ||
1196 | * | ||
1197 | * keys: | ||
1198 | * keys are pre-expanded and aligned to 16 bytes. we are using the first | ||
1199 | * set of 11 keys in the data structure void *aes_ctx | ||
1200 | * | ||
1201 | * iv: | ||
1202 | * 0 1 2 3 | ||
1203 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1204 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1205 | * | Salt (From the SA) | | ||
1206 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1207 | * | Initialization Vector | | ||
1208 | * | (This is the sequence number from IPSec header) | | ||
1209 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1210 | * | 0x1 | | ||
1211 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1212 | * | ||
1213 | * | ||
1214 | * | ||
1215 | * AAD: | ||
1216 | * AAD padded to 128 bits with 0 | ||
1217 | * for example, assume AAD is a u32 vector | ||
1218 | * | ||
1219 | * if AAD is 8 bytes: | ||
1220 | * AAD[3] = {A0, A1}; | ||
1221 | * padded AAD in xmm register = {A1 A0 0 0} | ||
1222 | * | ||
1223 | * 0 1 2 3 | ||
1224 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1225 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1226 | * | SPI (A1) | | ||
1227 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1228 | * | 32-bit Sequence Number (A0) | | ||
1229 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1230 | * | 0x0 | | ||
1231 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1232 | * | ||
1233 | * AAD Format with 32-bit Sequence Number | ||
1234 | * | ||
1235 | * if AAD is 12 bytes: | ||
1236 | * AAD[3] = {A0, A1, A2}; | ||
1237 | * padded AAD in xmm register = {A2 A1 A0 0} | ||
1238 | * | ||
1239 | * 0 1 2 3 | ||
1240 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1241 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1242 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1243 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1244 | * | SPI (A2) | | ||
1245 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1246 | * | 64-bit Extended Sequence Number {A1,A0} | | ||
1247 | * | | | ||
1248 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1249 | * | 0x0 | | ||
1250 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1251 | * | ||
1252 | * AAD Format with 64-bit Extended Sequence Number | ||
1253 | * | ||
1254 | * aadLen: | ||
1255 | * from the definition of the spec, aadLen can only be 8 or 12 bytes. | ||
1256 | * The code supports 16 too but for other sizes, the code will fail. | ||
1257 | * | ||
1258 | * TLen: | ||
1259 | * from the definition of the spec, TLen can only be 8, 12 or 16 bytes. | ||
1260 | * For other sizes, the code will fail. | ||
1261 | * | ||
1262 | * poly = x^128 + x^127 + x^126 + x^121 + 1 | ||
1263 | * | ||
1264 | *****************************************************************************/ | ||
1265 | |||
1266 | ENTRY(aesni_gcm_dec) | ||
1267 | push %r12 | ||
1268 | push %r13 | ||
1269 | push %r14 | ||
1270 | mov %rsp, %r14 | ||
1271 | /* | ||
1272 | * states of %xmm registers %xmm6:%xmm15 not saved | ||
1273 | * all %xmm registers are clobbered | ||
1274 | */ | ||
1275 | sub $VARIABLE_OFFSET, %rsp | ||
1276 | and $~63, %rsp # align rsp to 64 bytes | ||
1277 | mov %arg6, %r12 | ||
1278 | movdqu (%r12), %xmm13 # %xmm13 = HashKey | ||
1279 | movdqa SHUF_MASK(%rip), %xmm2 | ||
1280 | PSHUFB_XMM %xmm2, %xmm13 | ||
1281 | |||
1282 | |||
1283 | # Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) | ||
1284 | |||
1285 | movdqa %xmm13, %xmm2 | ||
1286 | psllq $1, %xmm13 | ||
1287 | psrlq $63, %xmm2 | ||
1288 | movdqa %xmm2, %xmm1 | ||
1289 | pslldq $8, %xmm2 | ||
1290 | psrldq $8, %xmm1 | ||
1291 | por %xmm2, %xmm13 | ||
1292 | |||
1293 | # Reduction | ||
1294 | |||
1295 | pshufd $0x24, %xmm1, %xmm2 | ||
1296 | pcmpeqd TWOONE(%rip), %xmm2 | ||
1297 | pand POLY(%rip), %xmm2 | ||
1298 | pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly) | ||
1299 | |||
1300 | |||
1301 | # Decrypt first few blocks | ||
1302 | |||
1303 | movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly) | ||
1304 | mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext | ||
1305 | and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) | ||
1306 | mov %r13, %r12 | ||
1307 | and $(3<<4), %r12 | ||
1308 | jz _initial_num_blocks_is_0_decrypt | ||
1309 | cmp $(2<<4), %r12 | ||
1310 | jb _initial_num_blocks_is_1_decrypt | ||
1311 | je _initial_num_blocks_is_2_decrypt | ||
1312 | _initial_num_blocks_is_3_decrypt: | ||
1313 | INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1314 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec | ||
1315 | sub $48, %r13 | ||
1316 | jmp _initial_blocks_decrypted | ||
1317 | _initial_num_blocks_is_2_decrypt: | ||
1318 | INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1319 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec | ||
1320 | sub $32, %r13 | ||
1321 | jmp _initial_blocks_decrypted | ||
1322 | _initial_num_blocks_is_1_decrypt: | ||
1323 | INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1324 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec | ||
1325 | sub $16, %r13 | ||
1326 | jmp _initial_blocks_decrypted | ||
1327 | _initial_num_blocks_is_0_decrypt: | ||
1328 | INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1329 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec | ||
1330 | _initial_blocks_decrypted: | ||
1331 | cmp $0, %r13 | ||
1332 | je _zero_cipher_left_decrypt | ||
1333 | sub $64, %r13 | ||
1334 | je _four_cipher_left_decrypt | ||
1335 | _decrypt_by_4: | ||
1336 | GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ | ||
1337 | %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec | ||
1338 | add $64, %r11 | ||
1339 | sub $64, %r13 | ||
1340 | jne _decrypt_by_4 | ||
1341 | _four_cipher_left_decrypt: | ||
1342 | GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ | ||
1343 | %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 | ||
1344 | _zero_cipher_left_decrypt: | ||
1345 | mov %arg4, %r13 | ||
1346 | and $15, %r13 # %r13 = arg4 (mod 16) | ||
1347 | je _multiple_of_16_bytes_decrypt | ||
1348 | |||
1349 | # Handle the last <16 byte block seperately | ||
1350 | |||
1351 | paddd ONE(%rip), %xmm0 # increment CNT to get Yn | ||
1352 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1353 | PSHUFB_XMM %xmm10, %xmm0 | ||
1354 | |||
1355 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) | ||
1356 | sub $16, %r11 | ||
1357 | add %r13, %r11 | ||
1358 | movdqu (%arg3,%r11,1), %xmm1 # recieve the last <16 byte block | ||
1359 | lea SHIFT_MASK+16(%rip), %r12 | ||
1360 | sub %r13, %r12 | ||
1361 | # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes | ||
1362 | # (%r13 is the number of bytes in plaintext mod 16) | ||
1363 | movdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
1364 | PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes | ||
1365 | |||
1366 | movdqa %xmm1, %xmm2 | ||
1367 | pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) | ||
1368 | movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 | ||
1369 | # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 | ||
1370 | pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 | ||
1371 | pand %xmm1, %xmm2 | ||
1372 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1373 | PSHUFB_XMM %xmm10 ,%xmm2 | ||
1374 | |||
1375 | pxor %xmm2, %xmm8 | ||
1376 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1377 | # GHASH computation for the last <16 byte block | ||
1378 | sub %r13, %r11 | ||
1379 | add $16, %r11 | ||
1380 | |||
1381 | # output %r13 bytes | ||
1382 | MOVQ_R64_XMM %xmm0, %rax | ||
1383 | cmp $8, %r13 | ||
1384 | jle _less_than_8_bytes_left_decrypt | ||
1385 | mov %rax, (%arg2 , %r11, 1) | ||
1386 | add $8, %r11 | ||
1387 | psrldq $8, %xmm0 | ||
1388 | MOVQ_R64_XMM %xmm0, %rax | ||
1389 | sub $8, %r13 | ||
1390 | _less_than_8_bytes_left_decrypt: | ||
1391 | mov %al, (%arg2, %r11, 1) | ||
1392 | add $1, %r11 | ||
1393 | shr $8, %rax | ||
1394 | sub $1, %r13 | ||
1395 | jne _less_than_8_bytes_left_decrypt | ||
1396 | _multiple_of_16_bytes_decrypt: | ||
1397 | mov arg8, %r12 # %r13 = aadLen (number of bytes) | ||
1398 | shl $3, %r12 # convert into number of bits | ||
1399 | movd %r12d, %xmm15 # len(A) in %xmm15 | ||
1400 | shl $3, %arg4 # len(C) in bits (*128) | ||
1401 | MOVQ_R64_XMM %arg4, %xmm1 | ||
1402 | pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 | ||
1403 | pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) | ||
1404 | pxor %xmm15, %xmm8 | ||
1405 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1406 | # final GHASH computation | ||
1407 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1408 | PSHUFB_XMM %xmm10, %xmm8 | ||
1409 | |||
1410 | mov %arg5, %rax # %rax = *Y0 | ||
1411 | movdqu (%rax), %xmm0 # %xmm0 = Y0 | ||
1412 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) | ||
1413 | pxor %xmm8, %xmm0 | ||
1414 | _return_T_decrypt: | ||
1415 | mov arg9, %r10 # %r10 = authTag | ||
1416 | mov arg10, %r11 # %r11 = auth_tag_len | ||
1417 | cmp $16, %r11 | ||
1418 | je _T_16_decrypt | ||
1419 | cmp $12, %r11 | ||
1420 | je _T_12_decrypt | ||
1421 | _T_8_decrypt: | ||
1422 | MOVQ_R64_XMM %xmm0, %rax | ||
1423 | mov %rax, (%r10) | ||
1424 | jmp _return_T_done_decrypt | ||
1425 | _T_12_decrypt: | ||
1426 | MOVQ_R64_XMM %xmm0, %rax | ||
1427 | mov %rax, (%r10) | ||
1428 | psrldq $8, %xmm0 | ||
1429 | movd %xmm0, %eax | ||
1430 | mov %eax, 8(%r10) | ||
1431 | jmp _return_T_done_decrypt | ||
1432 | _T_16_decrypt: | ||
1433 | movdqu %xmm0, (%r10) | ||
1434 | _return_T_done_decrypt: | ||
1435 | mov %r14, %rsp | ||
1436 | pop %r14 | ||
1437 | pop %r13 | ||
1438 | pop %r12 | ||
1439 | ret | ||
1440 | |||
1441 | |||
1442 | /***************************************************************************** | ||
1443 | * void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. | ||
1444 | * u8 *out, // Ciphertext output. Encrypt in-place is allowed. | ||
1445 | * const u8 *in, // Plaintext input | ||
1446 | * u64 plaintext_len, // Length of data in bytes for encryption. | ||
1447 | * u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) | ||
1448 | * // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) | ||
1449 | * // concatenated with 0x00000001. 16-byte aligned pointer. | ||
1450 | * u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. | ||
1451 | * const u8 *aad, // Additional Authentication Data (AAD) | ||
1452 | * u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes | ||
1453 | * u8 *auth_tag, // Authenticated Tag output. | ||
1454 | * u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), | ||
1455 | * // 12 or 8. | ||
1456 | * | ||
1457 | * Assumptions: | ||
1458 | * | ||
1459 | * keys: | ||
1460 | * keys are pre-expanded and aligned to 16 bytes. we are using the | ||
1461 | * first set of 11 keys in the data structure void *aes_ctx | ||
1462 | * | ||
1463 | * | ||
1464 | * iv: | ||
1465 | * 0 1 2 3 | ||
1466 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1467 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1468 | * | Salt (From the SA) | | ||
1469 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1470 | * | Initialization Vector | | ||
1471 | * | (This is the sequence number from IPSec header) | | ||
1472 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1473 | * | 0x1 | | ||
1474 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1475 | * | ||
1476 | * | ||
1477 | * | ||
1478 | * AAD: | ||
1479 | * AAD padded to 128 bits with 0 | ||
1480 | * for example, assume AAD is a u32 vector | ||
1481 | * | ||
1482 | * if AAD is 8 bytes: | ||
1483 | * AAD[3] = {A0, A1}; | ||
1484 | * padded AAD in xmm register = {A1 A0 0 0} | ||
1485 | * | ||
1486 | * 0 1 2 3 | ||
1487 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1488 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1489 | * | SPI (A1) | | ||
1490 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1491 | * | 32-bit Sequence Number (A0) | | ||
1492 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1493 | * | 0x0 | | ||
1494 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1495 | * | ||
1496 | * AAD Format with 32-bit Sequence Number | ||
1497 | * | ||
1498 | * if AAD is 12 bytes: | ||
1499 | * AAD[3] = {A0, A1, A2}; | ||
1500 | * padded AAD in xmm register = {A2 A1 A0 0} | ||
1501 | * | ||
1502 | * 0 1 2 3 | ||
1503 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1504 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1505 | * | SPI (A2) | | ||
1506 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1507 | * | 64-bit Extended Sequence Number {A1,A0} | | ||
1508 | * | | | ||
1509 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1510 | * | 0x0 | | ||
1511 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1512 | * | ||
1513 | * AAD Format with 64-bit Extended Sequence Number | ||
1514 | * | ||
1515 | * aadLen: | ||
1516 | * from the definition of the spec, aadLen can only be 8 or 12 bytes. | ||
1517 | * The code supports 16 too but for other sizes, the code will fail. | ||
1518 | * | ||
1519 | * TLen: | ||
1520 | * from the definition of the spec, TLen can only be 8, 12 or 16 bytes. | ||
1521 | * For other sizes, the code will fail. | ||
1522 | * | ||
1523 | * poly = x^128 + x^127 + x^126 + x^121 + 1 | ||
1524 | ***************************************************************************/ | ||
1525 | ENTRY(aesni_gcm_enc) | ||
1526 | push %r12 | ||
1527 | push %r13 | ||
1528 | push %r14 | ||
1529 | mov %rsp, %r14 | ||
1530 | # | ||
1531 | # states of %xmm registers %xmm6:%xmm15 not saved | ||
1532 | # all %xmm registers are clobbered | ||
1533 | # | ||
1534 | sub $VARIABLE_OFFSET, %rsp | ||
1535 | and $~63, %rsp | ||
1536 | mov %arg6, %r12 | ||
1537 | movdqu (%r12), %xmm13 | ||
1538 | movdqa SHUF_MASK(%rip), %xmm2 | ||
1539 | PSHUFB_XMM %xmm2, %xmm13 | ||
1540 | |||
1541 | |||
1542 | # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) | ||
1543 | |||
1544 | movdqa %xmm13, %xmm2 | ||
1545 | psllq $1, %xmm13 | ||
1546 | psrlq $63, %xmm2 | ||
1547 | movdqa %xmm2, %xmm1 | ||
1548 | pslldq $8, %xmm2 | ||
1549 | psrldq $8, %xmm1 | ||
1550 | por %xmm2, %xmm13 | ||
1551 | |||
1552 | # reduce HashKey<<1 | ||
1553 | |||
1554 | pshufd $0x24, %xmm1, %xmm2 | ||
1555 | pcmpeqd TWOONE(%rip), %xmm2 | ||
1556 | pand POLY(%rip), %xmm2 | ||
1557 | pxor %xmm2, %xmm13 | ||
1558 | movdqa %xmm13, HashKey(%rsp) | ||
1559 | mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) | ||
1560 | and $-16, %r13 | ||
1561 | mov %r13, %r12 | ||
1562 | |||
1563 | # Encrypt first few blocks | ||
1564 | |||
1565 | and $(3<<4), %r12 | ||
1566 | jz _initial_num_blocks_is_0_encrypt | ||
1567 | cmp $(2<<4), %r12 | ||
1568 | jb _initial_num_blocks_is_1_encrypt | ||
1569 | je _initial_num_blocks_is_2_encrypt | ||
1570 | _initial_num_blocks_is_3_encrypt: | ||
1571 | INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1572 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc | ||
1573 | sub $48, %r13 | ||
1574 | jmp _initial_blocks_encrypted | ||
1575 | _initial_num_blocks_is_2_encrypt: | ||
1576 | INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1577 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc | ||
1578 | sub $32, %r13 | ||
1579 | jmp _initial_blocks_encrypted | ||
1580 | _initial_num_blocks_is_1_encrypt: | ||
1581 | INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1582 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc | ||
1583 | sub $16, %r13 | ||
1584 | jmp _initial_blocks_encrypted | ||
1585 | _initial_num_blocks_is_0_encrypt: | ||
1586 | INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1587 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc | ||
1588 | _initial_blocks_encrypted: | ||
1589 | |||
1590 | # Main loop - Encrypt remaining blocks | ||
1591 | |||
1592 | cmp $0, %r13 | ||
1593 | je _zero_cipher_left_encrypt | ||
1594 | sub $64, %r13 | ||
1595 | je _four_cipher_left_encrypt | ||
1596 | _encrypt_by_4_encrypt: | ||
1597 | GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ | ||
1598 | %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc | ||
1599 | add $64, %r11 | ||
1600 | sub $64, %r13 | ||
1601 | jne _encrypt_by_4_encrypt | ||
1602 | _four_cipher_left_encrypt: | ||
1603 | GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ | ||
1604 | %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 | ||
1605 | _zero_cipher_left_encrypt: | ||
1606 | mov %arg4, %r13 | ||
1607 | and $15, %r13 # %r13 = arg4 (mod 16) | ||
1608 | je _multiple_of_16_bytes_encrypt | ||
1609 | |||
1610 | # Handle the last <16 Byte block seperately | ||
1611 | paddd ONE(%rip), %xmm0 # INCR CNT to get Yn | ||
1612 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1613 | PSHUFB_XMM %xmm10, %xmm0 | ||
1614 | |||
1615 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) | ||
1616 | sub $16, %r11 | ||
1617 | add %r13, %r11 | ||
1618 | movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte blocks | ||
1619 | lea SHIFT_MASK+16(%rip), %r12 | ||
1620 | sub %r13, %r12 | ||
1621 | # adjust the shuffle mask pointer to be able to shift 16-r13 bytes | ||
1622 | # (%r13 is the number of bytes in plaintext mod 16) | ||
1623 | movdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
1624 | PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte | ||
1625 | pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) | ||
1626 | movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 | ||
1627 | # get the appropriate mask to mask out top 16-r13 bytes of xmm0 | ||
1628 | pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 | ||
1629 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1630 | PSHUFB_XMM %xmm10,%xmm0 | ||
1631 | |||
1632 | pxor %xmm0, %xmm8 | ||
1633 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1634 | # GHASH computation for the last <16 byte block | ||
1635 | sub %r13, %r11 | ||
1636 | add $16, %r11 | ||
1637 | PSHUFB_XMM %xmm10, %xmm1 | ||
1638 | |||
1639 | # shuffle xmm0 back to output as ciphertext | ||
1640 | |||
1641 | # Output %r13 bytes | ||
1642 | MOVQ_R64_XMM %xmm0, %rax | ||
1643 | cmp $8, %r13 | ||
1644 | jle _less_than_8_bytes_left_encrypt | ||
1645 | mov %rax, (%arg2 , %r11, 1) | ||
1646 | add $8, %r11 | ||
1647 | psrldq $8, %xmm0 | ||
1648 | MOVQ_R64_XMM %xmm0, %rax | ||
1649 | sub $8, %r13 | ||
1650 | _less_than_8_bytes_left_encrypt: | ||
1651 | mov %al, (%arg2, %r11, 1) | ||
1652 | add $1, %r11 | ||
1653 | shr $8, %rax | ||
1654 | sub $1, %r13 | ||
1655 | jne _less_than_8_bytes_left_encrypt | ||
1656 | _multiple_of_16_bytes_encrypt: | ||
1657 | mov arg8, %r12 # %r12 = addLen (number of bytes) | ||
1658 | shl $3, %r12 | ||
1659 | movd %r12d, %xmm15 # len(A) in %xmm15 | ||
1660 | shl $3, %arg4 # len(C) in bits (*128) | ||
1661 | MOVQ_R64_XMM %arg4, %xmm1 | ||
1662 | pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 | ||
1663 | pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) | ||
1664 | pxor %xmm15, %xmm8 | ||
1665 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1666 | # final GHASH computation | ||
1667 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1668 | PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap | ||
1669 | |||
1670 | mov %arg5, %rax # %rax = *Y0 | ||
1671 | movdqu (%rax), %xmm0 # %xmm0 = Y0 | ||
1672 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) | ||
1673 | pxor %xmm8, %xmm0 | ||
1674 | _return_T_encrypt: | ||
1675 | mov arg9, %r10 # %r10 = authTag | ||
1676 | mov arg10, %r11 # %r11 = auth_tag_len | ||
1677 | cmp $16, %r11 | ||
1678 | je _T_16_encrypt | ||
1679 | cmp $12, %r11 | ||
1680 | je _T_12_encrypt | ||
1681 | _T_8_encrypt: | ||
1682 | MOVQ_R64_XMM %xmm0, %rax | ||
1683 | mov %rax, (%r10) | ||
1684 | jmp _return_T_done_encrypt | ||
1685 | _T_12_encrypt: | ||
1686 | MOVQ_R64_XMM %xmm0, %rax | ||
1687 | mov %rax, (%r10) | ||
1688 | psrldq $8, %xmm0 | ||
1689 | movd %xmm0, %eax | ||
1690 | mov %eax, 8(%r10) | ||
1691 | jmp _return_T_done_encrypt | ||
1692 | _T_16_encrypt: | ||
1693 | movdqu %xmm0, (%r10) | ||
1694 | _return_T_done_encrypt: | ||
1695 | mov %r14, %rsp | ||
1696 | pop %r14 | ||
1697 | pop %r13 | ||
1698 | pop %r12 | ||
1699 | ret | ||
1700 | |||
1701 | #endif | ||
1702 | |||
49 | 1703 | ||
50 | _key_expansion_128: | 1704 | _key_expansion_128: |
51 | _key_expansion_256a: | 1705 | _key_expansion_256a: |
@@ -55,10 +1709,11 @@ _key_expansion_256a: | |||
55 | shufps $0b10001100, %xmm0, %xmm4 | 1709 | shufps $0b10001100, %xmm0, %xmm4 |
56 | pxor %xmm4, %xmm0 | 1710 | pxor %xmm4, %xmm0 |
57 | pxor %xmm1, %xmm0 | 1711 | pxor %xmm1, %xmm0 |
58 | movaps %xmm0, (%rcx) | 1712 | movaps %xmm0, (TKEYP) |
59 | add $0x10, %rcx | 1713 | add $0x10, TKEYP |
60 | ret | 1714 | ret |
61 | 1715 | ||
1716 | .align 4 | ||
62 | _key_expansion_192a: | 1717 | _key_expansion_192a: |
63 | pshufd $0b01010101, %xmm1, %xmm1 | 1718 | pshufd $0b01010101, %xmm1, %xmm1 |
64 | shufps $0b00010000, %xmm0, %xmm4 | 1719 | shufps $0b00010000, %xmm0, %xmm4 |
@@ -76,12 +1731,13 @@ _key_expansion_192a: | |||
76 | 1731 | ||
77 | movaps %xmm0, %xmm1 | 1732 | movaps %xmm0, %xmm1 |
78 | shufps $0b01000100, %xmm0, %xmm6 | 1733 | shufps $0b01000100, %xmm0, %xmm6 |
79 | movaps %xmm6, (%rcx) | 1734 | movaps %xmm6, (TKEYP) |
80 | shufps $0b01001110, %xmm2, %xmm1 | 1735 | shufps $0b01001110, %xmm2, %xmm1 |
81 | movaps %xmm1, 16(%rcx) | 1736 | movaps %xmm1, 0x10(TKEYP) |
82 | add $0x20, %rcx | 1737 | add $0x20, TKEYP |
83 | ret | 1738 | ret |
84 | 1739 | ||
1740 | .align 4 | ||
85 | _key_expansion_192b: | 1741 | _key_expansion_192b: |
86 | pshufd $0b01010101, %xmm1, %xmm1 | 1742 | pshufd $0b01010101, %xmm1, %xmm1 |
87 | shufps $0b00010000, %xmm0, %xmm4 | 1743 | shufps $0b00010000, %xmm0, %xmm4 |
@@ -96,10 +1752,11 @@ _key_expansion_192b: | |||
96 | pxor %xmm3, %xmm2 | 1752 | pxor %xmm3, %xmm2 |
97 | pxor %xmm5, %xmm2 | 1753 | pxor %xmm5, %xmm2 |
98 | 1754 | ||
99 | movaps %xmm0, (%rcx) | 1755 | movaps %xmm0, (TKEYP) |
100 | add $0x10, %rcx | 1756 | add $0x10, TKEYP |
101 | ret | 1757 | ret |
102 | 1758 | ||
1759 | .align 4 | ||
103 | _key_expansion_256b: | 1760 | _key_expansion_256b: |
104 | pshufd $0b10101010, %xmm1, %xmm1 | 1761 | pshufd $0b10101010, %xmm1, %xmm1 |
105 | shufps $0b00010000, %xmm2, %xmm4 | 1762 | shufps $0b00010000, %xmm2, %xmm4 |
@@ -107,8 +1764,8 @@ _key_expansion_256b: | |||
107 | shufps $0b10001100, %xmm2, %xmm4 | 1764 | shufps $0b10001100, %xmm2, %xmm4 |
108 | pxor %xmm4, %xmm2 | 1765 | pxor %xmm4, %xmm2 |
109 | pxor %xmm1, %xmm2 | 1766 | pxor %xmm1, %xmm2 |
110 | movaps %xmm2, (%rcx) | 1767 | movaps %xmm2, (TKEYP) |
111 | add $0x10, %rcx | 1768 | add $0x10, TKEYP |
112 | ret | 1769 | ret |
113 | 1770 | ||
114 | /* | 1771 | /* |
@@ -116,17 +1773,23 @@ _key_expansion_256b: | |||
116 | * unsigned int key_len) | 1773 | * unsigned int key_len) |
117 | */ | 1774 | */ |
118 | ENTRY(aesni_set_key) | 1775 | ENTRY(aesni_set_key) |
119 | movups (%rsi), %xmm0 # user key (first 16 bytes) | 1776 | #ifndef __x86_64__ |
120 | movaps %xmm0, (%rdi) | 1777 | pushl KEYP |
121 | lea 0x10(%rdi), %rcx # key addr | 1778 | movl 8(%esp), KEYP # ctx |
122 | movl %edx, 480(%rdi) | 1779 | movl 12(%esp), UKEYP # in_key |
1780 | movl 16(%esp), %edx # key_len | ||
1781 | #endif | ||
1782 | movups (UKEYP), %xmm0 # user key (first 16 bytes) | ||
1783 | movaps %xmm0, (KEYP) | ||
1784 | lea 0x10(KEYP), TKEYP # key addr | ||
1785 | movl %edx, 480(KEYP) | ||
123 | pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x | 1786 | pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x |
124 | cmp $24, %dl | 1787 | cmp $24, %dl |
125 | jb .Lenc_key128 | 1788 | jb .Lenc_key128 |
126 | je .Lenc_key192 | 1789 | je .Lenc_key192 |
127 | movups 0x10(%rsi), %xmm2 # other user key | 1790 | movups 0x10(UKEYP), %xmm2 # other user key |
128 | movaps %xmm2, (%rcx) | 1791 | movaps %xmm2, (TKEYP) |
129 | add $0x10, %rcx | 1792 | add $0x10, TKEYP |
130 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 | 1793 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
131 | call _key_expansion_256a | 1794 | call _key_expansion_256a |
132 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 | 1795 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 |
@@ -155,7 +1818,7 @@ ENTRY(aesni_set_key) | |||
155 | call _key_expansion_256a | 1818 | call _key_expansion_256a |
156 | jmp .Ldec_key | 1819 | jmp .Ldec_key |
157 | .Lenc_key192: | 1820 | .Lenc_key192: |
158 | movq 0x10(%rsi), %xmm2 # other user key | 1821 | movq 0x10(UKEYP), %xmm2 # other user key |
159 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 | 1822 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
160 | call _key_expansion_192a | 1823 | call _key_expansion_192a |
161 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 | 1824 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 |
@@ -195,33 +1858,47 @@ ENTRY(aesni_set_key) | |||
195 | AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 | 1858 | AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 |
196 | call _key_expansion_128 | 1859 | call _key_expansion_128 |
197 | .Ldec_key: | 1860 | .Ldec_key: |
198 | sub $0x10, %rcx | 1861 | sub $0x10, TKEYP |
199 | movaps (%rdi), %xmm0 | 1862 | movaps (KEYP), %xmm0 |
200 | movaps (%rcx), %xmm1 | 1863 | movaps (TKEYP), %xmm1 |
201 | movaps %xmm0, 240(%rcx) | 1864 | movaps %xmm0, 240(TKEYP) |
202 | movaps %xmm1, 240(%rdi) | 1865 | movaps %xmm1, 240(KEYP) |
203 | add $0x10, %rdi | 1866 | add $0x10, KEYP |
204 | lea 240-16(%rcx), %rsi | 1867 | lea 240-16(TKEYP), UKEYP |
205 | .align 4 | 1868 | .align 4 |
206 | .Ldec_key_loop: | 1869 | .Ldec_key_loop: |
207 | movaps (%rdi), %xmm0 | 1870 | movaps (KEYP), %xmm0 |
208 | AESIMC %xmm0 %xmm1 | 1871 | AESIMC %xmm0 %xmm1 |
209 | movaps %xmm1, (%rsi) | 1872 | movaps %xmm1, (UKEYP) |
210 | add $0x10, %rdi | 1873 | add $0x10, KEYP |
211 | sub $0x10, %rsi | 1874 | sub $0x10, UKEYP |
212 | cmp %rcx, %rdi | 1875 | cmp TKEYP, KEYP |
213 | jb .Ldec_key_loop | 1876 | jb .Ldec_key_loop |
214 | xor %rax, %rax | 1877 | xor AREG, AREG |
1878 | #ifndef __x86_64__ | ||
1879 | popl KEYP | ||
1880 | #endif | ||
215 | ret | 1881 | ret |
216 | 1882 | ||
217 | /* | 1883 | /* |
218 | * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | 1884 | * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) |
219 | */ | 1885 | */ |
220 | ENTRY(aesni_enc) | 1886 | ENTRY(aesni_enc) |
1887 | #ifndef __x86_64__ | ||
1888 | pushl KEYP | ||
1889 | pushl KLEN | ||
1890 | movl 12(%esp), KEYP | ||
1891 | movl 16(%esp), OUTP | ||
1892 | movl 20(%esp), INP | ||
1893 | #endif | ||
221 | movl 480(KEYP), KLEN # key length | 1894 | movl 480(KEYP), KLEN # key length |
222 | movups (INP), STATE # input | 1895 | movups (INP), STATE # input |
223 | call _aesni_enc1 | 1896 | call _aesni_enc1 |
224 | movups STATE, (OUTP) # output | 1897 | movups STATE, (OUTP) # output |
1898 | #ifndef __x86_64__ | ||
1899 | popl KLEN | ||
1900 | popl KEYP | ||
1901 | #endif | ||
225 | ret | 1902 | ret |
226 | 1903 | ||
227 | /* | 1904 | /* |
@@ -236,6 +1913,7 @@ ENTRY(aesni_enc) | |||
236 | * KEY | 1913 | * KEY |
237 | * TKEYP (T1) | 1914 | * TKEYP (T1) |
238 | */ | 1915 | */ |
1916 | .align 4 | ||
239 | _aesni_enc1: | 1917 | _aesni_enc1: |
240 | movaps (KEYP), KEY # key | 1918 | movaps (KEYP), KEY # key |
241 | mov KEYP, TKEYP | 1919 | mov KEYP, TKEYP |
@@ -298,6 +1976,7 @@ _aesni_enc1: | |||
298 | * KEY | 1976 | * KEY |
299 | * TKEYP (T1) | 1977 | * TKEYP (T1) |
300 | */ | 1978 | */ |
1979 | .align 4 | ||
301 | _aesni_enc4: | 1980 | _aesni_enc4: |
302 | movaps (KEYP), KEY # key | 1981 | movaps (KEYP), KEY # key |
303 | mov KEYP, TKEYP | 1982 | mov KEYP, TKEYP |
@@ -391,11 +2070,22 @@ _aesni_enc4: | |||
391 | * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | 2070 | * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) |
392 | */ | 2071 | */ |
393 | ENTRY(aesni_dec) | 2072 | ENTRY(aesni_dec) |
2073 | #ifndef __x86_64__ | ||
2074 | pushl KEYP | ||
2075 | pushl KLEN | ||
2076 | movl 12(%esp), KEYP | ||
2077 | movl 16(%esp), OUTP | ||
2078 | movl 20(%esp), INP | ||
2079 | #endif | ||
394 | mov 480(KEYP), KLEN # key length | 2080 | mov 480(KEYP), KLEN # key length |
395 | add $240, KEYP | 2081 | add $240, KEYP |
396 | movups (INP), STATE # input | 2082 | movups (INP), STATE # input |
397 | call _aesni_dec1 | 2083 | call _aesni_dec1 |
398 | movups STATE, (OUTP) #output | 2084 | movups STATE, (OUTP) #output |
2085 | #ifndef __x86_64__ | ||
2086 | popl KLEN | ||
2087 | popl KEYP | ||
2088 | #endif | ||
399 | ret | 2089 | ret |
400 | 2090 | ||
401 | /* | 2091 | /* |
@@ -410,6 +2100,7 @@ ENTRY(aesni_dec) | |||
410 | * KEY | 2100 | * KEY |
411 | * TKEYP (T1) | 2101 | * TKEYP (T1) |
412 | */ | 2102 | */ |
2103 | .align 4 | ||
413 | _aesni_dec1: | 2104 | _aesni_dec1: |
414 | movaps (KEYP), KEY # key | 2105 | movaps (KEYP), KEY # key |
415 | mov KEYP, TKEYP | 2106 | mov KEYP, TKEYP |
@@ -472,6 +2163,7 @@ _aesni_dec1: | |||
472 | * KEY | 2163 | * KEY |
473 | * TKEYP (T1) | 2164 | * TKEYP (T1) |
474 | */ | 2165 | */ |
2166 | .align 4 | ||
475 | _aesni_dec4: | 2167 | _aesni_dec4: |
476 | movaps (KEYP), KEY # key | 2168 | movaps (KEYP), KEY # key |
477 | mov KEYP, TKEYP | 2169 | mov KEYP, TKEYP |
@@ -566,6 +2258,15 @@ _aesni_dec4: | |||
566 | * size_t len) | 2258 | * size_t len) |
567 | */ | 2259 | */ |
568 | ENTRY(aesni_ecb_enc) | 2260 | ENTRY(aesni_ecb_enc) |
2261 | #ifndef __x86_64__ | ||
2262 | pushl LEN | ||
2263 | pushl KEYP | ||
2264 | pushl KLEN | ||
2265 | movl 16(%esp), KEYP | ||
2266 | movl 20(%esp), OUTP | ||
2267 | movl 24(%esp), INP | ||
2268 | movl 28(%esp), LEN | ||
2269 | #endif | ||
569 | test LEN, LEN # check length | 2270 | test LEN, LEN # check length |
570 | jz .Lecb_enc_ret | 2271 | jz .Lecb_enc_ret |
571 | mov 480(KEYP), KLEN | 2272 | mov 480(KEYP), KLEN |
@@ -602,6 +2303,11 @@ ENTRY(aesni_ecb_enc) | |||
602 | cmp $16, LEN | 2303 | cmp $16, LEN |
603 | jge .Lecb_enc_loop1 | 2304 | jge .Lecb_enc_loop1 |
604 | .Lecb_enc_ret: | 2305 | .Lecb_enc_ret: |
2306 | #ifndef __x86_64__ | ||
2307 | popl KLEN | ||
2308 | popl KEYP | ||
2309 | popl LEN | ||
2310 | #endif | ||
605 | ret | 2311 | ret |
606 | 2312 | ||
607 | /* | 2313 | /* |
@@ -609,6 +2315,15 @@ ENTRY(aesni_ecb_enc) | |||
609 | * size_t len); | 2315 | * size_t len); |
610 | */ | 2316 | */ |
611 | ENTRY(aesni_ecb_dec) | 2317 | ENTRY(aesni_ecb_dec) |
2318 | #ifndef __x86_64__ | ||
2319 | pushl LEN | ||
2320 | pushl KEYP | ||
2321 | pushl KLEN | ||
2322 | movl 16(%esp), KEYP | ||
2323 | movl 20(%esp), OUTP | ||
2324 | movl 24(%esp), INP | ||
2325 | movl 28(%esp), LEN | ||
2326 | #endif | ||
612 | test LEN, LEN | 2327 | test LEN, LEN |
613 | jz .Lecb_dec_ret | 2328 | jz .Lecb_dec_ret |
614 | mov 480(KEYP), KLEN | 2329 | mov 480(KEYP), KLEN |
@@ -646,6 +2361,11 @@ ENTRY(aesni_ecb_dec) | |||
646 | cmp $16, LEN | 2361 | cmp $16, LEN |
647 | jge .Lecb_dec_loop1 | 2362 | jge .Lecb_dec_loop1 |
648 | .Lecb_dec_ret: | 2363 | .Lecb_dec_ret: |
2364 | #ifndef __x86_64__ | ||
2365 | popl KLEN | ||
2366 | popl KEYP | ||
2367 | popl LEN | ||
2368 | #endif | ||
649 | ret | 2369 | ret |
650 | 2370 | ||
651 | /* | 2371 | /* |
@@ -653,6 +2373,17 @@ ENTRY(aesni_ecb_dec) | |||
653 | * size_t len, u8 *iv) | 2373 | * size_t len, u8 *iv) |
654 | */ | 2374 | */ |
655 | ENTRY(aesni_cbc_enc) | 2375 | ENTRY(aesni_cbc_enc) |
2376 | #ifndef __x86_64__ | ||
2377 | pushl IVP | ||
2378 | pushl LEN | ||
2379 | pushl KEYP | ||
2380 | pushl KLEN | ||
2381 | movl 20(%esp), KEYP | ||
2382 | movl 24(%esp), OUTP | ||
2383 | movl 28(%esp), INP | ||
2384 | movl 32(%esp), LEN | ||
2385 | movl 36(%esp), IVP | ||
2386 | #endif | ||
656 | cmp $16, LEN | 2387 | cmp $16, LEN |
657 | jb .Lcbc_enc_ret | 2388 | jb .Lcbc_enc_ret |
658 | mov 480(KEYP), KLEN | 2389 | mov 480(KEYP), KLEN |
@@ -670,6 +2401,12 @@ ENTRY(aesni_cbc_enc) | |||
670 | jge .Lcbc_enc_loop | 2401 | jge .Lcbc_enc_loop |
671 | movups STATE, (IVP) | 2402 | movups STATE, (IVP) |
672 | .Lcbc_enc_ret: | 2403 | .Lcbc_enc_ret: |
2404 | #ifndef __x86_64__ | ||
2405 | popl KLEN | ||
2406 | popl KEYP | ||
2407 | popl LEN | ||
2408 | popl IVP | ||
2409 | #endif | ||
673 | ret | 2410 | ret |
674 | 2411 | ||
675 | /* | 2412 | /* |
@@ -677,6 +2414,17 @@ ENTRY(aesni_cbc_enc) | |||
677 | * size_t len, u8 *iv) | 2414 | * size_t len, u8 *iv) |
678 | */ | 2415 | */ |
679 | ENTRY(aesni_cbc_dec) | 2416 | ENTRY(aesni_cbc_dec) |
2417 | #ifndef __x86_64__ | ||
2418 | pushl IVP | ||
2419 | pushl LEN | ||
2420 | pushl KEYP | ||
2421 | pushl KLEN | ||
2422 | movl 20(%esp), KEYP | ||
2423 | movl 24(%esp), OUTP | ||
2424 | movl 28(%esp), INP | ||
2425 | movl 32(%esp), LEN | ||
2426 | movl 36(%esp), IVP | ||
2427 | #endif | ||
680 | cmp $16, LEN | 2428 | cmp $16, LEN |
681 | jb .Lcbc_dec_just_ret | 2429 | jb .Lcbc_dec_just_ret |
682 | mov 480(KEYP), KLEN | 2430 | mov 480(KEYP), KLEN |
@@ -690,16 +2438,30 @@ ENTRY(aesni_cbc_dec) | |||
690 | movaps IN1, STATE1 | 2438 | movaps IN1, STATE1 |
691 | movups 0x10(INP), IN2 | 2439 | movups 0x10(INP), IN2 |
692 | movaps IN2, STATE2 | 2440 | movaps IN2, STATE2 |
2441 | #ifdef __x86_64__ | ||
693 | movups 0x20(INP), IN3 | 2442 | movups 0x20(INP), IN3 |
694 | movaps IN3, STATE3 | 2443 | movaps IN3, STATE3 |
695 | movups 0x30(INP), IN4 | 2444 | movups 0x30(INP), IN4 |
696 | movaps IN4, STATE4 | 2445 | movaps IN4, STATE4 |
2446 | #else | ||
2447 | movups 0x20(INP), IN1 | ||
2448 | movaps IN1, STATE3 | ||
2449 | movups 0x30(INP), IN2 | ||
2450 | movaps IN2, STATE4 | ||
2451 | #endif | ||
697 | call _aesni_dec4 | 2452 | call _aesni_dec4 |
698 | pxor IV, STATE1 | 2453 | pxor IV, STATE1 |
2454 | #ifdef __x86_64__ | ||
699 | pxor IN1, STATE2 | 2455 | pxor IN1, STATE2 |
700 | pxor IN2, STATE3 | 2456 | pxor IN2, STATE3 |
701 | pxor IN3, STATE4 | 2457 | pxor IN3, STATE4 |
702 | movaps IN4, IV | 2458 | movaps IN4, IV |
2459 | #else | ||
2460 | pxor (INP), STATE2 | ||
2461 | pxor 0x10(INP), STATE3 | ||
2462 | pxor IN1, STATE4 | ||
2463 | movaps IN2, IV | ||
2464 | #endif | ||
703 | movups STATE1, (OUTP) | 2465 | movups STATE1, (OUTP) |
704 | movups STATE2, 0x10(OUTP) | 2466 | movups STATE2, 0x10(OUTP) |
705 | movups STATE3, 0x20(OUTP) | 2467 | movups STATE3, 0x20(OUTP) |
@@ -727,8 +2489,15 @@ ENTRY(aesni_cbc_dec) | |||
727 | .Lcbc_dec_ret: | 2489 | .Lcbc_dec_ret: |
728 | movups IV, (IVP) | 2490 | movups IV, (IVP) |
729 | .Lcbc_dec_just_ret: | 2491 | .Lcbc_dec_just_ret: |
2492 | #ifndef __x86_64__ | ||
2493 | popl KLEN | ||
2494 | popl KEYP | ||
2495 | popl LEN | ||
2496 | popl IVP | ||
2497 | #endif | ||
730 | ret | 2498 | ret |
731 | 2499 | ||
2500 | #ifdef __x86_64__ | ||
732 | .align 16 | 2501 | .align 16 |
733 | .Lbswap_mask: | 2502 | .Lbswap_mask: |
734 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | 2503 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 |
@@ -744,6 +2513,7 @@ ENTRY(aesni_cbc_dec) | |||
744 | * INC: == 1, in little endian | 2513 | * INC: == 1, in little endian |
745 | * BSWAP_MASK == endian swapping mask | 2514 | * BSWAP_MASK == endian swapping mask |
746 | */ | 2515 | */ |
2516 | .align 4 | ||
747 | _aesni_inc_init: | 2517 | _aesni_inc_init: |
748 | movaps .Lbswap_mask, BSWAP_MASK | 2518 | movaps .Lbswap_mask, BSWAP_MASK |
749 | movaps IV, CTR | 2519 | movaps IV, CTR |
@@ -768,6 +2538,7 @@ _aesni_inc_init: | |||
768 | * CTR: == output IV, in little endian | 2538 | * CTR: == output IV, in little endian |
769 | * TCTR_LOW: == lower qword of CTR | 2539 | * TCTR_LOW: == lower qword of CTR |
770 | */ | 2540 | */ |
2541 | .align 4 | ||
771 | _aesni_inc: | 2542 | _aesni_inc: |
772 | paddq INC, CTR | 2543 | paddq INC, CTR |
773 | add $1, TCTR_LOW | 2544 | add $1, TCTR_LOW |
@@ -839,3 +2610,4 @@ ENTRY(aesni_ctr_enc) | |||
839 | movups IV, (IVP) | 2610 | movups IV, (IVP) |
840 | .Lctr_enc_just_ret: | 2611 | .Lctr_enc_just_ret: |
841 | ret | 2612 | ret |
2613 | #endif | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2cb3dcc4490a..e1e60c7d5813 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -5,6 +5,14 @@ | |||
5 | * Copyright (C) 2008, Intel Corp. | 5 | * Copyright (C) 2008, Intel Corp. |
6 | * Author: Huang Ying <ying.huang@intel.com> | 6 | * Author: Huang Ying <ying.huang@intel.com> |
7 | * | 7 | * |
8 | * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD | ||
9 | * interface for 64-bit kernels. | ||
10 | * Authors: Adrian Hoban <adrian.hoban@intel.com> | ||
11 | * Gabriele Paoloni <gabriele.paoloni@intel.com> | ||
12 | * Tadeusz Struk (tadeusz.struk@intel.com) | ||
13 | * Aidan O'Mahony (aidan.o.mahony@intel.com) | ||
14 | * Copyright (c) 2010, Intel Corporation. | ||
15 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | 16 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by | 17 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or | 18 | * the Free Software Foundation; either version 2 of the License, or |
@@ -21,6 +29,10 @@ | |||
21 | #include <crypto/ctr.h> | 29 | #include <crypto/ctr.h> |
22 | #include <asm/i387.h> | 30 | #include <asm/i387.h> |
23 | #include <asm/aes.h> | 31 | #include <asm/aes.h> |
32 | #include <crypto/scatterwalk.h> | ||
33 | #include <crypto/internal/aead.h> | ||
34 | #include <linux/workqueue.h> | ||
35 | #include <linux/spinlock.h> | ||
24 | 36 | ||
25 | #if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) | 37 | #if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) |
26 | #define HAS_CTR | 38 | #define HAS_CTR |
@@ -42,8 +54,31 @@ struct async_aes_ctx { | |||
42 | struct cryptd_ablkcipher *cryptd_tfm; | 54 | struct cryptd_ablkcipher *cryptd_tfm; |
43 | }; | 55 | }; |
44 | 56 | ||
45 | #define AESNI_ALIGN 16 | 57 | /* This data is stored at the end of the crypto_tfm struct. |
58 | * It's a type of per "session" data storage location. | ||
59 | * This needs to be 16 byte aligned. | ||
60 | */ | ||
61 | struct aesni_rfc4106_gcm_ctx { | ||
62 | u8 hash_subkey[16]; | ||
63 | struct crypto_aes_ctx aes_key_expanded; | ||
64 | u8 nonce[4]; | ||
65 | struct cryptd_aead *cryptd_tfm; | ||
66 | }; | ||
67 | |||
68 | struct aesni_gcm_set_hash_subkey_result { | ||
69 | int err; | ||
70 | struct completion completion; | ||
71 | }; | ||
72 | |||
73 | struct aesni_hash_subkey_req_data { | ||
74 | u8 iv[16]; | ||
75 | struct aesni_gcm_set_hash_subkey_result result; | ||
76 | struct scatterlist sg; | ||
77 | }; | ||
78 | |||
79 | #define AESNI_ALIGN (16) | ||
46 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) | 80 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) |
81 | #define RFC4106_HASH_SUBKEY_SIZE 16 | ||
47 | 82 | ||
48 | asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, | 83 | asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, |
49 | unsigned int key_len); | 84 | unsigned int key_len); |
@@ -59,9 +94,62 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, | |||
59 | const u8 *in, unsigned int len, u8 *iv); | 94 | const u8 *in, unsigned int len, u8 *iv); |
60 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | 95 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, |
61 | const u8 *in, unsigned int len, u8 *iv); | 96 | const u8 *in, unsigned int len, u8 *iv); |
97 | #ifdef CONFIG_X86_64 | ||
62 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | 98 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, |
63 | const u8 *in, unsigned int len, u8 *iv); | 99 | const u8 *in, unsigned int len, u8 *iv); |
64 | 100 | ||
101 | /* asmlinkage void aesni_gcm_enc() | ||
102 | * void *ctx, AES Key schedule. Starts on a 16 byte boundary. | ||
103 | * u8 *out, Ciphertext output. Encrypt in-place is allowed. | ||
104 | * const u8 *in, Plaintext input | ||
105 | * unsigned long plaintext_len, Length of data in bytes for encryption. | ||
106 | * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) | ||
107 | * concatenated with 8 byte Initialisation Vector (from IPSec ESP | ||
108 | * Payload) concatenated with 0x00000001. 16-byte aligned pointer. | ||
109 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
110 | * const u8 *aad, Additional Authentication Data (AAD) | ||
111 | * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this | ||
112 | * is going to be 8 or 12 bytes | ||
113 | * u8 *auth_tag, Authenticated Tag output. | ||
114 | * unsigned long auth_tag_len), Authenticated Tag Length in bytes. | ||
115 | * Valid values are 16 (most likely), 12 or 8. | ||
116 | */ | ||
117 | asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, | ||
118 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
119 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
120 | u8 *auth_tag, unsigned long auth_tag_len); | ||
121 | |||
122 | /* asmlinkage void aesni_gcm_dec() | ||
123 | * void *ctx, AES Key schedule. Starts on a 16 byte boundary. | ||
124 | * u8 *out, Plaintext output. Decrypt in-place is allowed. | ||
125 | * const u8 *in, Ciphertext input | ||
126 | * unsigned long ciphertext_len, Length of data in bytes for decryption. | ||
127 | * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) | ||
128 | * concatenated with 8 byte Initialisation Vector (from IPSec ESP | ||
129 | * Payload) concatenated with 0x00000001. 16-byte aligned pointer. | ||
130 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
131 | * const u8 *aad, Additional Authentication Data (AAD) | ||
132 | * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going | ||
133 | * to be 8 or 12 bytes | ||
134 | * u8 *auth_tag, Authenticated Tag output. | ||
135 | * unsigned long auth_tag_len) Authenticated Tag Length in bytes. | ||
136 | * Valid values are 16 (most likely), 12 or 8. | ||
137 | */ | ||
138 | asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, | ||
139 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
140 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
141 | u8 *auth_tag, unsigned long auth_tag_len); | ||
142 | |||
143 | static inline struct | ||
144 | aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) | ||
145 | { | ||
146 | return | ||
147 | (struct aesni_rfc4106_gcm_ctx *) | ||
148 | PTR_ALIGN((u8 *) | ||
149 | crypto_tfm_ctx(crypto_aead_tfm(tfm)), AESNI_ALIGN); | ||
150 | } | ||
151 | #endif | ||
152 | |||
65 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) | 153 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) |
66 | { | 154 | { |
67 | unsigned long addr = (unsigned long)raw_ctx; | 155 | unsigned long addr = (unsigned long)raw_ctx; |
@@ -324,6 +412,7 @@ static struct crypto_alg blk_cbc_alg = { | |||
324 | }, | 412 | }, |
325 | }; | 413 | }; |
326 | 414 | ||
415 | #ifdef CONFIG_X86_64 | ||
327 | static void ctr_crypt_final(struct crypto_aes_ctx *ctx, | 416 | static void ctr_crypt_final(struct crypto_aes_ctx *ctx, |
328 | struct blkcipher_walk *walk) | 417 | struct blkcipher_walk *walk) |
329 | { | 418 | { |
@@ -389,6 +478,7 @@ static struct crypto_alg blk_ctr_alg = { | |||
389 | }, | 478 | }, |
390 | }, | 479 | }, |
391 | }; | 480 | }; |
481 | #endif | ||
392 | 482 | ||
393 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | 483 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, |
394 | unsigned int key_len) | 484 | unsigned int key_len) |
@@ -536,6 +626,7 @@ static struct crypto_alg ablk_cbc_alg = { | |||
536 | }, | 626 | }, |
537 | }; | 627 | }; |
538 | 628 | ||
629 | #ifdef CONFIG_X86_64 | ||
539 | static int ablk_ctr_init(struct crypto_tfm *tfm) | 630 | static int ablk_ctr_init(struct crypto_tfm *tfm) |
540 | { | 631 | { |
541 | struct cryptd_ablkcipher *cryptd_tfm; | 632 | struct cryptd_ablkcipher *cryptd_tfm; |
@@ -612,6 +703,7 @@ static struct crypto_alg ablk_rfc3686_ctr_alg = { | |||
612 | }, | 703 | }, |
613 | }; | 704 | }; |
614 | #endif | 705 | #endif |
706 | #endif | ||
615 | 707 | ||
616 | #ifdef HAS_LRW | 708 | #ifdef HAS_LRW |
617 | static int ablk_lrw_init(struct crypto_tfm *tfm) | 709 | static int ablk_lrw_init(struct crypto_tfm *tfm) |
@@ -730,6 +822,424 @@ static struct crypto_alg ablk_xts_alg = { | |||
730 | }; | 822 | }; |
731 | #endif | 823 | #endif |
732 | 824 | ||
825 | #ifdef CONFIG_X86_64 | ||
826 | static int rfc4106_init(struct crypto_tfm *tfm) | ||
827 | { | ||
828 | struct cryptd_aead *cryptd_tfm; | ||
829 | struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *) | ||
830 | PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); | ||
831 | cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); | ||
832 | if (IS_ERR(cryptd_tfm)) | ||
833 | return PTR_ERR(cryptd_tfm); | ||
834 | ctx->cryptd_tfm = cryptd_tfm; | ||
835 | tfm->crt_aead.reqsize = sizeof(struct aead_request) | ||
836 | + crypto_aead_reqsize(&cryptd_tfm->base); | ||
837 | return 0; | ||
838 | } | ||
839 | |||
840 | static void rfc4106_exit(struct crypto_tfm *tfm) | ||
841 | { | ||
842 | struct aesni_rfc4106_gcm_ctx *ctx = | ||
843 | (struct aesni_rfc4106_gcm_ctx *) | ||
844 | PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); | ||
845 | if (!IS_ERR(ctx->cryptd_tfm)) | ||
846 | cryptd_free_aead(ctx->cryptd_tfm); | ||
847 | return; | ||
848 | } | ||
849 | |||
850 | static void | ||
851 | rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err) | ||
852 | { | ||
853 | struct aesni_gcm_set_hash_subkey_result *result = req->data; | ||
854 | |||
855 | if (err == -EINPROGRESS) | ||
856 | return; | ||
857 | result->err = err; | ||
858 | complete(&result->completion); | ||
859 | } | ||
860 | |||
861 | static int | ||
862 | rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) | ||
863 | { | ||
864 | struct crypto_ablkcipher *ctr_tfm; | ||
865 | struct ablkcipher_request *req; | ||
866 | int ret = -EINVAL; | ||
867 | struct aesni_hash_subkey_req_data *req_data; | ||
868 | |||
869 | ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0); | ||
870 | if (IS_ERR(ctr_tfm)) | ||
871 | return PTR_ERR(ctr_tfm); | ||
872 | |||
873 | crypto_ablkcipher_clear_flags(ctr_tfm, ~0); | ||
874 | |||
875 | ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); | ||
876 | if (ret) { | ||
877 | crypto_free_ablkcipher(ctr_tfm); | ||
878 | return ret; | ||
879 | } | ||
880 | |||
881 | req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); | ||
882 | if (!req) { | ||
883 | crypto_free_ablkcipher(ctr_tfm); | ||
884 | return -EINVAL; | ||
885 | } | ||
886 | |||
887 | req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); | ||
888 | if (!req_data) { | ||
889 | crypto_free_ablkcipher(ctr_tfm); | ||
890 | return -ENOMEM; | ||
891 | } | ||
892 | memset(req_data->iv, 0, sizeof(req_data->iv)); | ||
893 | |||
894 | /* Clear the data in the hash sub key container to zero.*/ | ||
895 | /* We want to cipher all zeros to create the hash sub key. */ | ||
896 | memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); | ||
897 | |||
898 | init_completion(&req_data->result.completion); | ||
899 | sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE); | ||
900 | ablkcipher_request_set_tfm(req, ctr_tfm); | ||
901 | ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | | ||
902 | CRYPTO_TFM_REQ_MAY_BACKLOG, | ||
903 | rfc4106_set_hash_subkey_done, | ||
904 | &req_data->result); | ||
905 | |||
906 | ablkcipher_request_set_crypt(req, &req_data->sg, | ||
907 | &req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv); | ||
908 | |||
909 | ret = crypto_ablkcipher_encrypt(req); | ||
910 | if (ret == -EINPROGRESS || ret == -EBUSY) { | ||
911 | ret = wait_for_completion_interruptible | ||
912 | (&req_data->result.completion); | ||
913 | if (!ret) | ||
914 | ret = req_data->result.err; | ||
915 | } | ||
916 | ablkcipher_request_free(req); | ||
917 | kfree(req_data); | ||
918 | crypto_free_ablkcipher(ctr_tfm); | ||
919 | return ret; | ||
920 | } | ||
921 | |||
922 | static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | ||
923 | unsigned int key_len) | ||
924 | { | ||
925 | int ret = 0; | ||
926 | struct crypto_tfm *tfm = crypto_aead_tfm(parent); | ||
927 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | ||
928 | u8 *new_key_mem = NULL; | ||
929 | |||
930 | if (key_len < 4) { | ||
931 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
932 | return -EINVAL; | ||
933 | } | ||
934 | /*Account for 4 byte nonce at the end.*/ | ||
935 | key_len -= 4; | ||
936 | if (key_len != AES_KEYSIZE_128) { | ||
937 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
938 | return -EINVAL; | ||
939 | } | ||
940 | |||
941 | memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce)); | ||
942 | /*This must be on a 16 byte boundary!*/ | ||
943 | if ((unsigned long)(&(ctx->aes_key_expanded.key_enc[0])) % AESNI_ALIGN) | ||
944 | return -EINVAL; | ||
945 | |||
946 | if ((unsigned long)key % AESNI_ALIGN) { | ||
947 | /*key is not aligned: use an auxuliar aligned pointer*/ | ||
948 | new_key_mem = kmalloc(key_len+AESNI_ALIGN, GFP_KERNEL); | ||
949 | if (!new_key_mem) | ||
950 | return -ENOMEM; | ||
951 | |||
952 | new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); | ||
953 | memcpy(new_key_mem, key, key_len); | ||
954 | key = new_key_mem; | ||
955 | } | ||
956 | |||
957 | if (!irq_fpu_usable()) | ||
958 | ret = crypto_aes_expand_key(&(ctx->aes_key_expanded), | ||
959 | key, key_len); | ||
960 | else { | ||
961 | kernel_fpu_begin(); | ||
962 | ret = aesni_set_key(&(ctx->aes_key_expanded), key, key_len); | ||
963 | kernel_fpu_end(); | ||
964 | } | ||
965 | /*This must be on a 16 byte boundary!*/ | ||
966 | if ((unsigned long)(&(ctx->hash_subkey[0])) % AESNI_ALIGN) { | ||
967 | ret = -EINVAL; | ||
968 | goto exit; | ||
969 | } | ||
970 | ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); | ||
971 | exit: | ||
972 | kfree(new_key_mem); | ||
973 | return ret; | ||
974 | } | ||
975 | |||
976 | /* This is the Integrity Check Value (aka the authentication tag length and can | ||
977 | * be 8, 12 or 16 bytes long. */ | ||
978 | static int rfc4106_set_authsize(struct crypto_aead *parent, | ||
979 | unsigned int authsize) | ||
980 | { | ||
981 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | ||
982 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
983 | |||
984 | switch (authsize) { | ||
985 | case 8: | ||
986 | case 12: | ||
987 | case 16: | ||
988 | break; | ||
989 | default: | ||
990 | return -EINVAL; | ||
991 | } | ||
992 | crypto_aead_crt(parent)->authsize = authsize; | ||
993 | crypto_aead_crt(cryptd_child)->authsize = authsize; | ||
994 | return 0; | ||
995 | } | ||
996 | |||
997 | static int rfc4106_encrypt(struct aead_request *req) | ||
998 | { | ||
999 | int ret; | ||
1000 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1001 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1002 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
1003 | |||
1004 | if (!irq_fpu_usable()) { | ||
1005 | struct aead_request *cryptd_req = | ||
1006 | (struct aead_request *) aead_request_ctx(req); | ||
1007 | memcpy(cryptd_req, req, sizeof(*req)); | ||
1008 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1009 | return crypto_aead_encrypt(cryptd_req); | ||
1010 | } else { | ||
1011 | kernel_fpu_begin(); | ||
1012 | ret = cryptd_child->base.crt_aead.encrypt(req); | ||
1013 | kernel_fpu_end(); | ||
1014 | return ret; | ||
1015 | } | ||
1016 | } | ||
1017 | |||
1018 | static int rfc4106_decrypt(struct aead_request *req) | ||
1019 | { | ||
1020 | int ret; | ||
1021 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1022 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1023 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
1024 | |||
1025 | if (!irq_fpu_usable()) { | ||
1026 | struct aead_request *cryptd_req = | ||
1027 | (struct aead_request *) aead_request_ctx(req); | ||
1028 | memcpy(cryptd_req, req, sizeof(*req)); | ||
1029 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1030 | return crypto_aead_decrypt(cryptd_req); | ||
1031 | } else { | ||
1032 | kernel_fpu_begin(); | ||
1033 | ret = cryptd_child->base.crt_aead.decrypt(req); | ||
1034 | kernel_fpu_end(); | ||
1035 | return ret; | ||
1036 | } | ||
1037 | } | ||
1038 | |||
1039 | static struct crypto_alg rfc4106_alg = { | ||
1040 | .cra_name = "rfc4106(gcm(aes))", | ||
1041 | .cra_driver_name = "rfc4106-gcm-aesni", | ||
1042 | .cra_priority = 400, | ||
1043 | .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, | ||
1044 | .cra_blocksize = 1, | ||
1045 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, | ||
1046 | .cra_alignmask = 0, | ||
1047 | .cra_type = &crypto_nivaead_type, | ||
1048 | .cra_module = THIS_MODULE, | ||
1049 | .cra_list = LIST_HEAD_INIT(rfc4106_alg.cra_list), | ||
1050 | .cra_init = rfc4106_init, | ||
1051 | .cra_exit = rfc4106_exit, | ||
1052 | .cra_u = { | ||
1053 | .aead = { | ||
1054 | .setkey = rfc4106_set_key, | ||
1055 | .setauthsize = rfc4106_set_authsize, | ||
1056 | .encrypt = rfc4106_encrypt, | ||
1057 | .decrypt = rfc4106_decrypt, | ||
1058 | .geniv = "seqiv", | ||
1059 | .ivsize = 8, | ||
1060 | .maxauthsize = 16, | ||
1061 | }, | ||
1062 | }, | ||
1063 | }; | ||
1064 | |||
1065 | static int __driver_rfc4106_encrypt(struct aead_request *req) | ||
1066 | { | ||
1067 | u8 one_entry_in_sg = 0; | ||
1068 | u8 *src, *dst, *assoc; | ||
1069 | __be32 counter = cpu_to_be32(1); | ||
1070 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1071 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1072 | void *aes_ctx = &(ctx->aes_key_expanded); | ||
1073 | unsigned long auth_tag_len = crypto_aead_authsize(tfm); | ||
1074 | u8 iv_tab[16+AESNI_ALIGN]; | ||
1075 | u8* iv = (u8 *) PTR_ALIGN((u8 *)iv_tab, AESNI_ALIGN); | ||
1076 | struct scatter_walk src_sg_walk; | ||
1077 | struct scatter_walk assoc_sg_walk; | ||
1078 | struct scatter_walk dst_sg_walk; | ||
1079 | unsigned int i; | ||
1080 | |||
1081 | /* Assuming we are supporting rfc4106 64-bit extended */ | ||
1082 | /* sequence numbers We need to have the AAD length equal */ | ||
1083 | /* to 8 or 12 bytes */ | ||
1084 | if (unlikely(req->assoclen != 8 && req->assoclen != 12)) | ||
1085 | return -EINVAL; | ||
1086 | /* IV below built */ | ||
1087 | for (i = 0; i < 4; i++) | ||
1088 | *(iv+i) = ctx->nonce[i]; | ||
1089 | for (i = 0; i < 8; i++) | ||
1090 | *(iv+4+i) = req->iv[i]; | ||
1091 | *((__be32 *)(iv+12)) = counter; | ||
1092 | |||
1093 | if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { | ||
1094 | one_entry_in_sg = 1; | ||
1095 | scatterwalk_start(&src_sg_walk, req->src); | ||
1096 | scatterwalk_start(&assoc_sg_walk, req->assoc); | ||
1097 | src = scatterwalk_map(&src_sg_walk, 0); | ||
1098 | assoc = scatterwalk_map(&assoc_sg_walk, 0); | ||
1099 | dst = src; | ||
1100 | if (unlikely(req->src != req->dst)) { | ||
1101 | scatterwalk_start(&dst_sg_walk, req->dst); | ||
1102 | dst = scatterwalk_map(&dst_sg_walk, 0); | ||
1103 | } | ||
1104 | |||
1105 | } else { | ||
1106 | /* Allocate memory for src, dst, assoc */ | ||
1107 | src = kmalloc(req->cryptlen + auth_tag_len + req->assoclen, | ||
1108 | GFP_ATOMIC); | ||
1109 | if (unlikely(!src)) | ||
1110 | return -ENOMEM; | ||
1111 | assoc = (src + req->cryptlen + auth_tag_len); | ||
1112 | scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); | ||
1113 | scatterwalk_map_and_copy(assoc, req->assoc, 0, | ||
1114 | req->assoclen, 0); | ||
1115 | dst = src; | ||
1116 | } | ||
1117 | |||
1118 | aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, | ||
1119 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst | ||
1120 | + ((unsigned long)req->cryptlen), auth_tag_len); | ||
1121 | |||
1122 | /* The authTag (aka the Integrity Check Value) needs to be written | ||
1123 | * back to the packet. */ | ||
1124 | if (one_entry_in_sg) { | ||
1125 | if (unlikely(req->src != req->dst)) { | ||
1126 | scatterwalk_unmap(dst, 0); | ||
1127 | scatterwalk_done(&dst_sg_walk, 0, 0); | ||
1128 | } | ||
1129 | scatterwalk_unmap(src, 0); | ||
1130 | scatterwalk_unmap(assoc, 0); | ||
1131 | scatterwalk_done(&src_sg_walk, 0, 0); | ||
1132 | scatterwalk_done(&assoc_sg_walk, 0, 0); | ||
1133 | } else { | ||
1134 | scatterwalk_map_and_copy(dst, req->dst, 0, | ||
1135 | req->cryptlen + auth_tag_len, 1); | ||
1136 | kfree(src); | ||
1137 | } | ||
1138 | return 0; | ||
1139 | } | ||
1140 | |||
1141 | static int __driver_rfc4106_decrypt(struct aead_request *req) | ||
1142 | { | ||
1143 | u8 one_entry_in_sg = 0; | ||
1144 | u8 *src, *dst, *assoc; | ||
1145 | unsigned long tempCipherLen = 0; | ||
1146 | __be32 counter = cpu_to_be32(1); | ||
1147 | int retval = 0; | ||
1148 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1149 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1150 | void *aes_ctx = &(ctx->aes_key_expanded); | ||
1151 | unsigned long auth_tag_len = crypto_aead_authsize(tfm); | ||
1152 | u8 iv_and_authTag[32+AESNI_ALIGN]; | ||
1153 | u8 *iv = (u8 *) PTR_ALIGN((u8 *)iv_and_authTag, AESNI_ALIGN); | ||
1154 | u8 *authTag = iv + 16; | ||
1155 | struct scatter_walk src_sg_walk; | ||
1156 | struct scatter_walk assoc_sg_walk; | ||
1157 | struct scatter_walk dst_sg_walk; | ||
1158 | unsigned int i; | ||
1159 | |||
1160 | if (unlikely((req->cryptlen < auth_tag_len) || | ||
1161 | (req->assoclen != 8 && req->assoclen != 12))) | ||
1162 | return -EINVAL; | ||
1163 | /* Assuming we are supporting rfc4106 64-bit extended */ | ||
1164 | /* sequence numbers We need to have the AAD length */ | ||
1165 | /* equal to 8 or 12 bytes */ | ||
1166 | |||
1167 | tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); | ||
1168 | /* IV below built */ | ||
1169 | for (i = 0; i < 4; i++) | ||
1170 | *(iv+i) = ctx->nonce[i]; | ||
1171 | for (i = 0; i < 8; i++) | ||
1172 | *(iv+4+i) = req->iv[i]; | ||
1173 | *((__be32 *)(iv+12)) = counter; | ||
1174 | |||
1175 | if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { | ||
1176 | one_entry_in_sg = 1; | ||
1177 | scatterwalk_start(&src_sg_walk, req->src); | ||
1178 | scatterwalk_start(&assoc_sg_walk, req->assoc); | ||
1179 | src = scatterwalk_map(&src_sg_walk, 0); | ||
1180 | assoc = scatterwalk_map(&assoc_sg_walk, 0); | ||
1181 | dst = src; | ||
1182 | if (unlikely(req->src != req->dst)) { | ||
1183 | scatterwalk_start(&dst_sg_walk, req->dst); | ||
1184 | dst = scatterwalk_map(&dst_sg_walk, 0); | ||
1185 | } | ||
1186 | |||
1187 | } else { | ||
1188 | /* Allocate memory for src, dst, assoc */ | ||
1189 | src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); | ||
1190 | if (!src) | ||
1191 | return -ENOMEM; | ||
1192 | assoc = (src + req->cryptlen + auth_tag_len); | ||
1193 | scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); | ||
1194 | scatterwalk_map_and_copy(assoc, req->assoc, 0, | ||
1195 | req->assoclen, 0); | ||
1196 | dst = src; | ||
1197 | } | ||
1198 | |||
1199 | aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, | ||
1200 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, | ||
1201 | authTag, auth_tag_len); | ||
1202 | |||
1203 | /* Compare generated tag with passed in tag. */ | ||
1204 | retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? | ||
1205 | -EBADMSG : 0; | ||
1206 | |||
1207 | if (one_entry_in_sg) { | ||
1208 | if (unlikely(req->src != req->dst)) { | ||
1209 | scatterwalk_unmap(dst, 0); | ||
1210 | scatterwalk_done(&dst_sg_walk, 0, 0); | ||
1211 | } | ||
1212 | scatterwalk_unmap(src, 0); | ||
1213 | scatterwalk_unmap(assoc, 0); | ||
1214 | scatterwalk_done(&src_sg_walk, 0, 0); | ||
1215 | scatterwalk_done(&assoc_sg_walk, 0, 0); | ||
1216 | } else { | ||
1217 | scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1); | ||
1218 | kfree(src); | ||
1219 | } | ||
1220 | return retval; | ||
1221 | } | ||
1222 | |||
1223 | static struct crypto_alg __rfc4106_alg = { | ||
1224 | .cra_name = "__gcm-aes-aesni", | ||
1225 | .cra_driver_name = "__driver-gcm-aes-aesni", | ||
1226 | .cra_priority = 0, | ||
1227 | .cra_flags = CRYPTO_ALG_TYPE_AEAD, | ||
1228 | .cra_blocksize = 1, | ||
1229 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, | ||
1230 | .cra_alignmask = 0, | ||
1231 | .cra_type = &crypto_aead_type, | ||
1232 | .cra_module = THIS_MODULE, | ||
1233 | .cra_list = LIST_HEAD_INIT(__rfc4106_alg.cra_list), | ||
1234 | .cra_u = { | ||
1235 | .aead = { | ||
1236 | .encrypt = __driver_rfc4106_encrypt, | ||
1237 | .decrypt = __driver_rfc4106_decrypt, | ||
1238 | }, | ||
1239 | }, | ||
1240 | }; | ||
1241 | #endif | ||
1242 | |||
733 | static int __init aesni_init(void) | 1243 | static int __init aesni_init(void) |
734 | { | 1244 | { |
735 | int err; | 1245 | int err; |
@@ -738,6 +1248,7 @@ static int __init aesni_init(void) | |||
738 | printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); | 1248 | printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); |
739 | return -ENODEV; | 1249 | return -ENODEV; |
740 | } | 1250 | } |
1251 | |||
741 | if ((err = crypto_register_alg(&aesni_alg))) | 1252 | if ((err = crypto_register_alg(&aesni_alg))) |
742 | goto aes_err; | 1253 | goto aes_err; |
743 | if ((err = crypto_register_alg(&__aesni_alg))) | 1254 | if ((err = crypto_register_alg(&__aesni_alg))) |
@@ -746,18 +1257,24 @@ static int __init aesni_init(void) | |||
746 | goto blk_ecb_err; | 1257 | goto blk_ecb_err; |
747 | if ((err = crypto_register_alg(&blk_cbc_alg))) | 1258 | if ((err = crypto_register_alg(&blk_cbc_alg))) |
748 | goto blk_cbc_err; | 1259 | goto blk_cbc_err; |
749 | if ((err = crypto_register_alg(&blk_ctr_alg))) | ||
750 | goto blk_ctr_err; | ||
751 | if ((err = crypto_register_alg(&ablk_ecb_alg))) | 1260 | if ((err = crypto_register_alg(&ablk_ecb_alg))) |
752 | goto ablk_ecb_err; | 1261 | goto ablk_ecb_err; |
753 | if ((err = crypto_register_alg(&ablk_cbc_alg))) | 1262 | if ((err = crypto_register_alg(&ablk_cbc_alg))) |
754 | goto ablk_cbc_err; | 1263 | goto ablk_cbc_err; |
1264 | #ifdef CONFIG_X86_64 | ||
1265 | if ((err = crypto_register_alg(&blk_ctr_alg))) | ||
1266 | goto blk_ctr_err; | ||
755 | if ((err = crypto_register_alg(&ablk_ctr_alg))) | 1267 | if ((err = crypto_register_alg(&ablk_ctr_alg))) |
756 | goto ablk_ctr_err; | 1268 | goto ablk_ctr_err; |
1269 | if ((err = crypto_register_alg(&__rfc4106_alg))) | ||
1270 | goto __aead_gcm_err; | ||
1271 | if ((err = crypto_register_alg(&rfc4106_alg))) | ||
1272 | goto aead_gcm_err; | ||
757 | #ifdef HAS_CTR | 1273 | #ifdef HAS_CTR |
758 | if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) | 1274 | if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) |
759 | goto ablk_rfc3686_ctr_err; | 1275 | goto ablk_rfc3686_ctr_err; |
760 | #endif | 1276 | #endif |
1277 | #endif | ||
761 | #ifdef HAS_LRW | 1278 | #ifdef HAS_LRW |
762 | if ((err = crypto_register_alg(&ablk_lrw_alg))) | 1279 | if ((err = crypto_register_alg(&ablk_lrw_alg))) |
763 | goto ablk_lrw_err; | 1280 | goto ablk_lrw_err; |
@@ -770,7 +1287,6 @@ static int __init aesni_init(void) | |||
770 | if ((err = crypto_register_alg(&ablk_xts_alg))) | 1287 | if ((err = crypto_register_alg(&ablk_xts_alg))) |
771 | goto ablk_xts_err; | 1288 | goto ablk_xts_err; |
772 | #endif | 1289 | #endif |
773 | |||
774 | return err; | 1290 | return err; |
775 | 1291 | ||
776 | #ifdef HAS_XTS | 1292 | #ifdef HAS_XTS |
@@ -784,18 +1300,24 @@ ablk_pcbc_err: | |||
784 | crypto_unregister_alg(&ablk_lrw_alg); | 1300 | crypto_unregister_alg(&ablk_lrw_alg); |
785 | ablk_lrw_err: | 1301 | ablk_lrw_err: |
786 | #endif | 1302 | #endif |
1303 | #ifdef CONFIG_X86_64 | ||
787 | #ifdef HAS_CTR | 1304 | #ifdef HAS_CTR |
788 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); | 1305 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); |
789 | ablk_rfc3686_ctr_err: | 1306 | ablk_rfc3686_ctr_err: |
790 | #endif | 1307 | #endif |
1308 | crypto_unregister_alg(&rfc4106_alg); | ||
1309 | aead_gcm_err: | ||
1310 | crypto_unregister_alg(&__rfc4106_alg); | ||
1311 | __aead_gcm_err: | ||
791 | crypto_unregister_alg(&ablk_ctr_alg); | 1312 | crypto_unregister_alg(&ablk_ctr_alg); |
792 | ablk_ctr_err: | 1313 | ablk_ctr_err: |
1314 | crypto_unregister_alg(&blk_ctr_alg); | ||
1315 | blk_ctr_err: | ||
1316 | #endif | ||
793 | crypto_unregister_alg(&ablk_cbc_alg); | 1317 | crypto_unregister_alg(&ablk_cbc_alg); |
794 | ablk_cbc_err: | 1318 | ablk_cbc_err: |
795 | crypto_unregister_alg(&ablk_ecb_alg); | 1319 | crypto_unregister_alg(&ablk_ecb_alg); |
796 | ablk_ecb_err: | 1320 | ablk_ecb_err: |
797 | crypto_unregister_alg(&blk_ctr_alg); | ||
798 | blk_ctr_err: | ||
799 | crypto_unregister_alg(&blk_cbc_alg); | 1321 | crypto_unregister_alg(&blk_cbc_alg); |
800 | blk_cbc_err: | 1322 | blk_cbc_err: |
801 | crypto_unregister_alg(&blk_ecb_alg); | 1323 | crypto_unregister_alg(&blk_ecb_alg); |
@@ -818,13 +1340,17 @@ static void __exit aesni_exit(void) | |||
818 | #ifdef HAS_LRW | 1340 | #ifdef HAS_LRW |
819 | crypto_unregister_alg(&ablk_lrw_alg); | 1341 | crypto_unregister_alg(&ablk_lrw_alg); |
820 | #endif | 1342 | #endif |
1343 | #ifdef CONFIG_X86_64 | ||
821 | #ifdef HAS_CTR | 1344 | #ifdef HAS_CTR |
822 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); | 1345 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); |
823 | #endif | 1346 | #endif |
1347 | crypto_unregister_alg(&rfc4106_alg); | ||
1348 | crypto_unregister_alg(&__rfc4106_alg); | ||
824 | crypto_unregister_alg(&ablk_ctr_alg); | 1349 | crypto_unregister_alg(&ablk_ctr_alg); |
1350 | crypto_unregister_alg(&blk_ctr_alg); | ||
1351 | #endif | ||
825 | crypto_unregister_alg(&ablk_cbc_alg); | 1352 | crypto_unregister_alg(&ablk_cbc_alg); |
826 | crypto_unregister_alg(&ablk_ecb_alg); | 1353 | crypto_unregister_alg(&ablk_ecb_alg); |
827 | crypto_unregister_alg(&blk_ctr_alg); | ||
828 | crypto_unregister_alg(&blk_cbc_alg); | 1354 | crypto_unregister_alg(&blk_cbc_alg); |
829 | crypto_unregister_alg(&blk_ecb_alg); | 1355 | crypto_unregister_alg(&blk_ecb_alg); |
830 | crypto_unregister_alg(&__aesni_alg); | 1356 | crypto_unregister_alg(&__aesni_alg); |