diff options
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r-- | arch/x86/crypto/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/crypto/aesni-intel_asm.S | 1835 | ||||
-rw-r--r-- | arch/x86/crypto/aesni-intel_glue.c | 557 | ||||
-rw-r--r-- | arch/x86/crypto/fpu.c | 10 | ||||
-rw-r--r-- | arch/x86/crypto/ghash-clmulni-intel_glue.c | 1 |
5 files changed, 2359 insertions, 48 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 1a58ad89fdf7..c04f1b7a9139 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -2,8 +2,6 @@ | |||
2 | # Arch-specific CryptoAPI modules. | 2 | # Arch-specific CryptoAPI modules. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_CRYPTO_FPU) += fpu.o | ||
6 | |||
7 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | 5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o |
8 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | 6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o |
9 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | 7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o |
@@ -24,6 +22,6 @@ aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o | |||
24 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | 22 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
25 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 23 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
26 | 24 | ||
27 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o | 25 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
28 | 26 | ||
29 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 27 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index ff16756a51c1..be6d9e365a80 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -9,6 +9,20 @@ | |||
9 | * Vinodh Gopal <vinodh.gopal@intel.com> | 9 | * Vinodh Gopal <vinodh.gopal@intel.com> |
10 | * Kahraman Akdemir | 10 | * Kahraman Akdemir |
11 | * | 11 | * |
12 | * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD | ||
13 | * interface for 64-bit kernels. | ||
14 | * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com) | ||
15 | * Aidan O'Mahony (aidan.o.mahony@intel.com) | ||
16 | * Adrian Hoban <adrian.hoban@intel.com> | ||
17 | * James Guilford (james.guilford@intel.com) | ||
18 | * Gabriele Paoloni <gabriele.paoloni@intel.com> | ||
19 | * Tadeusz Struk (tadeusz.struk@intel.com) | ||
20 | * Wajdi Feghali (wajdi.k.feghali@intel.com) | ||
21 | * Copyright (c) 2010, Intel Corporation. | ||
22 | * | ||
23 | * Ported x86_64 version to x86: | ||
24 | * Author: Mathias Krause <minipli@googlemail.com> | ||
25 | * | ||
12 | * This program is free software; you can redistribute it and/or modify | 26 | * This program is free software; you can redistribute it and/or modify |
13 | * it under the terms of the GNU General Public License as published by | 27 | * it under the terms of the GNU General Public License as published by |
14 | * the Free Software Foundation; either version 2 of the License, or | 28 | * the Free Software Foundation; either version 2 of the License, or |
@@ -18,8 +32,62 @@ | |||
18 | #include <linux/linkage.h> | 32 | #include <linux/linkage.h> |
19 | #include <asm/inst.h> | 33 | #include <asm/inst.h> |
20 | 34 | ||
35 | #ifdef __x86_64__ | ||
36 | .data | ||
37 | POLY: .octa 0xC2000000000000000000000000000001 | ||
38 | TWOONE: .octa 0x00000001000000000000000000000001 | ||
39 | |||
40 | # order of these constants should not change. | ||
41 | # more specifically, ALL_F should follow SHIFT_MASK, | ||
42 | # and ZERO should follow ALL_F | ||
43 | |||
44 | SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F | ||
45 | MASK1: .octa 0x0000000000000000ffffffffffffffff | ||
46 | MASK2: .octa 0xffffffffffffffff0000000000000000 | ||
47 | SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 | ||
48 | ALL_F: .octa 0xffffffffffffffffffffffffffffffff | ||
49 | ZERO: .octa 0x00000000000000000000000000000000 | ||
50 | ONE: .octa 0x00000000000000000000000000000001 | ||
51 | F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 | ||
52 | dec: .octa 0x1 | ||
53 | enc: .octa 0x2 | ||
54 | |||
55 | |||
21 | .text | 56 | .text |
22 | 57 | ||
58 | |||
59 | #define STACK_OFFSET 8*3 | ||
60 | #define HashKey 16*0 // store HashKey <<1 mod poly here | ||
61 | #define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here | ||
62 | #define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here | ||
63 | #define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here | ||
64 | #define HashKey_k 16*4 // store XOR of High 64 bits and Low 64 | ||
65 | // bits of HashKey <<1 mod poly here | ||
66 | //(for Karatsuba purposes) | ||
67 | #define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64 | ||
68 | // bits of HashKey^2 <<1 mod poly here | ||
69 | // (for Karatsuba purposes) | ||
70 | #define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64 | ||
71 | // bits of HashKey^3 <<1 mod poly here | ||
72 | // (for Karatsuba purposes) | ||
73 | #define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64 | ||
74 | // bits of HashKey^4 <<1 mod poly here | ||
75 | // (for Karatsuba purposes) | ||
76 | #define VARIABLE_OFFSET 16*8 | ||
77 | |||
78 | #define arg1 rdi | ||
79 | #define arg2 rsi | ||
80 | #define arg3 rdx | ||
81 | #define arg4 rcx | ||
82 | #define arg5 r8 | ||
83 | #define arg6 r9 | ||
84 | #define arg7 STACK_OFFSET+8(%r14) | ||
85 | #define arg8 STACK_OFFSET+16(%r14) | ||
86 | #define arg9 STACK_OFFSET+24(%r14) | ||
87 | #define arg10 STACK_OFFSET+32(%r14) | ||
88 | #endif | ||
89 | |||
90 | |||
23 | #define STATE1 %xmm0 | 91 | #define STATE1 %xmm0 |
24 | #define STATE2 %xmm4 | 92 | #define STATE2 %xmm4 |
25 | #define STATE3 %xmm5 | 93 | #define STATE3 %xmm5 |
@@ -32,12 +100,16 @@ | |||
32 | #define IN IN1 | 100 | #define IN IN1 |
33 | #define KEY %xmm2 | 101 | #define KEY %xmm2 |
34 | #define IV %xmm3 | 102 | #define IV %xmm3 |
103 | |||
35 | #define BSWAP_MASK %xmm10 | 104 | #define BSWAP_MASK %xmm10 |
36 | #define CTR %xmm11 | 105 | #define CTR %xmm11 |
37 | #define INC %xmm12 | 106 | #define INC %xmm12 |
38 | 107 | ||
108 | #ifdef __x86_64__ | ||
109 | #define AREG %rax | ||
39 | #define KEYP %rdi | 110 | #define KEYP %rdi |
40 | #define OUTP %rsi | 111 | #define OUTP %rsi |
112 | #define UKEYP OUTP | ||
41 | #define INP %rdx | 113 | #define INP %rdx |
42 | #define LEN %rcx | 114 | #define LEN %rcx |
43 | #define IVP %r8 | 115 | #define IVP %r8 |
@@ -46,6 +118,1591 @@ | |||
46 | #define TKEYP T1 | 118 | #define TKEYP T1 |
47 | #define T2 %r11 | 119 | #define T2 %r11 |
48 | #define TCTR_LOW T2 | 120 | #define TCTR_LOW T2 |
121 | #else | ||
122 | #define AREG %eax | ||
123 | #define KEYP %edi | ||
124 | #define OUTP AREG | ||
125 | #define UKEYP OUTP | ||
126 | #define INP %edx | ||
127 | #define LEN %esi | ||
128 | #define IVP %ebp | ||
129 | #define KLEN %ebx | ||
130 | #define T1 %ecx | ||
131 | #define TKEYP T1 | ||
132 | #endif | ||
133 | |||
134 | |||
135 | #ifdef __x86_64__ | ||
136 | /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) | ||
137 | * | ||
138 | * | ||
139 | * Input: A and B (128-bits each, bit-reflected) | ||
140 | * Output: C = A*B*x mod poly, (i.e. >>1 ) | ||
141 | * To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input | ||
142 | * GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. | ||
143 | * | ||
144 | */ | ||
145 | .macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5 | ||
146 | movdqa \GH, \TMP1 | ||
147 | pshufd $78, \GH, \TMP2 | ||
148 | pshufd $78, \HK, \TMP3 | ||
149 | pxor \GH, \TMP2 # TMP2 = a1+a0 | ||
150 | pxor \HK, \TMP3 # TMP3 = b1+b0 | ||
151 | PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1 | ||
152 | PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0 | ||
153 | PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) | ||
154 | pxor \GH, \TMP2 | ||
155 | pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) | ||
156 | movdqa \TMP2, \TMP3 | ||
157 | pslldq $8, \TMP3 # left shift TMP3 2 DWs | ||
158 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
159 | pxor \TMP3, \GH | ||
160 | pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK | ||
161 | |||
162 | # first phase of the reduction | ||
163 | |||
164 | movdqa \GH, \TMP2 | ||
165 | movdqa \GH, \TMP3 | ||
166 | movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4 | ||
167 | # in in order to perform | ||
168 | # independent shifts | ||
169 | pslld $31, \TMP2 # packed right shift <<31 | ||
170 | pslld $30, \TMP3 # packed right shift <<30 | ||
171 | pslld $25, \TMP4 # packed right shift <<25 | ||
172 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
173 | pxor \TMP4, \TMP2 | ||
174 | movdqa \TMP2, \TMP5 | ||
175 | psrldq $4, \TMP5 # right shift TMP5 1 DW | ||
176 | pslldq $12, \TMP2 # left shift TMP2 3 DWs | ||
177 | pxor \TMP2, \GH | ||
178 | |||
179 | # second phase of the reduction | ||
180 | |||
181 | movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4 | ||
182 | # in in order to perform | ||
183 | # independent shifts | ||
184 | movdqa \GH,\TMP3 | ||
185 | movdqa \GH,\TMP4 | ||
186 | psrld $1,\TMP2 # packed left shift >>1 | ||
187 | psrld $2,\TMP3 # packed left shift >>2 | ||
188 | psrld $7,\TMP4 # packed left shift >>7 | ||
189 | pxor \TMP3,\TMP2 # xor the shifted versions | ||
190 | pxor \TMP4,\TMP2 | ||
191 | pxor \TMP5, \TMP2 | ||
192 | pxor \TMP2, \GH | ||
193 | pxor \TMP1, \GH # result is in TMP1 | ||
194 | .endm | ||
195 | |||
196 | /* | ||
197 | * if a = number of total plaintext bytes | ||
198 | * b = floor(a/16) | ||
199 | * num_initial_blocks = b mod 4 | ||
200 | * encrypt the initial num_initial_blocks blocks and apply ghash on | ||
201 | * the ciphertext | ||
202 | * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers | ||
203 | * are clobbered | ||
204 | * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified | ||
205 | */ | ||
206 | |||
207 | |||
208 | .macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ | ||
209 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation | ||
210 | mov arg7, %r10 # %r10 = AAD | ||
211 | mov arg8, %r12 # %r12 = aadLen | ||
212 | mov %r12, %r11 | ||
213 | pxor %xmm\i, %xmm\i | ||
214 | _get_AAD_loop\num_initial_blocks\operation: | ||
215 | movd (%r10), \TMP1 | ||
216 | pslldq $12, \TMP1 | ||
217 | psrldq $4, %xmm\i | ||
218 | pxor \TMP1, %xmm\i | ||
219 | add $4, %r10 | ||
220 | sub $4, %r12 | ||
221 | jne _get_AAD_loop\num_initial_blocks\operation | ||
222 | cmp $16, %r11 | ||
223 | je _get_AAD_loop2_done\num_initial_blocks\operation | ||
224 | mov $16, %r12 | ||
225 | _get_AAD_loop2\num_initial_blocks\operation: | ||
226 | psrldq $4, %xmm\i | ||
227 | sub $4, %r12 | ||
228 | cmp %r11, %r12 | ||
229 | jne _get_AAD_loop2\num_initial_blocks\operation | ||
230 | _get_AAD_loop2_done\num_initial_blocks\operation: | ||
231 | movdqa SHUF_MASK(%rip), %xmm14 | ||
232 | PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data | ||
233 | |||
234 | xor %r11, %r11 # initialise the data pointer offset as zero | ||
235 | |||
236 | # start AES for num_initial_blocks blocks | ||
237 | |||
238 | mov %arg5, %rax # %rax = *Y0 | ||
239 | movdqu (%rax), \XMM0 # XMM0 = Y0 | ||
240 | movdqa SHUF_MASK(%rip), %xmm14 | ||
241 | PSHUFB_XMM %xmm14, \XMM0 | ||
242 | |||
243 | .if (\i == 5) || (\i == 6) || (\i == 7) | ||
244 | .irpc index, \i_seq | ||
245 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
246 | movdqa \XMM0, %xmm\index | ||
247 | movdqa SHUF_MASK(%rip), %xmm14 | ||
248 | PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap | ||
249 | |||
250 | .endr | ||
251 | .irpc index, \i_seq | ||
252 | pxor 16*0(%arg1), %xmm\index | ||
253 | .endr | ||
254 | .irpc index, \i_seq | ||
255 | movaps 0x10(%rdi), \TMP1 | ||
256 | AESENC \TMP1, %xmm\index # Round 1 | ||
257 | .endr | ||
258 | .irpc index, \i_seq | ||
259 | movaps 0x20(%arg1), \TMP1 | ||
260 | AESENC \TMP1, %xmm\index # Round 2 | ||
261 | .endr | ||
262 | .irpc index, \i_seq | ||
263 | movaps 0x30(%arg1), \TMP1 | ||
264 | AESENC \TMP1, %xmm\index # Round 2 | ||
265 | .endr | ||
266 | .irpc index, \i_seq | ||
267 | movaps 0x40(%arg1), \TMP1 | ||
268 | AESENC \TMP1, %xmm\index # Round 2 | ||
269 | .endr | ||
270 | .irpc index, \i_seq | ||
271 | movaps 0x50(%arg1), \TMP1 | ||
272 | AESENC \TMP1, %xmm\index # Round 2 | ||
273 | .endr | ||
274 | .irpc index, \i_seq | ||
275 | movaps 0x60(%arg1), \TMP1 | ||
276 | AESENC \TMP1, %xmm\index # Round 2 | ||
277 | .endr | ||
278 | .irpc index, \i_seq | ||
279 | movaps 0x70(%arg1), \TMP1 | ||
280 | AESENC \TMP1, %xmm\index # Round 2 | ||
281 | .endr | ||
282 | .irpc index, \i_seq | ||
283 | movaps 0x80(%arg1), \TMP1 | ||
284 | AESENC \TMP1, %xmm\index # Round 2 | ||
285 | .endr | ||
286 | .irpc index, \i_seq | ||
287 | movaps 0x90(%arg1), \TMP1 | ||
288 | AESENC \TMP1, %xmm\index # Round 2 | ||
289 | .endr | ||
290 | .irpc index, \i_seq | ||
291 | movaps 0xa0(%arg1), \TMP1 | ||
292 | AESENCLAST \TMP1, %xmm\index # Round 10 | ||
293 | .endr | ||
294 | .irpc index, \i_seq | ||
295 | movdqu (%arg3 , %r11, 1), \TMP1 | ||
296 | pxor \TMP1, %xmm\index | ||
297 | movdqu %xmm\index, (%arg2 , %r11, 1) | ||
298 | # write back plaintext/ciphertext for num_initial_blocks | ||
299 | add $16, %r11 | ||
300 | |||
301 | movdqa \TMP1, %xmm\index | ||
302 | movdqa SHUF_MASK(%rip), %xmm14 | ||
303 | PSHUFB_XMM %xmm14, %xmm\index | ||
304 | |||
305 | # prepare plaintext/ciphertext for GHASH computation | ||
306 | .endr | ||
307 | .endif | ||
308 | GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
309 | # apply GHASH on num_initial_blocks blocks | ||
310 | |||
311 | .if \i == 5 | ||
312 | pxor %xmm5, %xmm6 | ||
313 | GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
314 | pxor %xmm6, %xmm7 | ||
315 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
316 | pxor %xmm7, %xmm8 | ||
317 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
318 | .elseif \i == 6 | ||
319 | pxor %xmm6, %xmm7 | ||
320 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
321 | pxor %xmm7, %xmm8 | ||
322 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
323 | .elseif \i == 7 | ||
324 | pxor %xmm7, %xmm8 | ||
325 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
326 | .endif | ||
327 | cmp $64, %r13 | ||
328 | jl _initial_blocks_done\num_initial_blocks\operation | ||
329 | # no need for precomputed values | ||
330 | /* | ||
331 | * | ||
332 | * Precomputations for HashKey parallel with encryption of first 4 blocks. | ||
333 | * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
334 | */ | ||
335 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
336 | movdqa \XMM0, \XMM1 | ||
337 | movdqa SHUF_MASK(%rip), %xmm14 | ||
338 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
339 | |||
340 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
341 | movdqa \XMM0, \XMM2 | ||
342 | movdqa SHUF_MASK(%rip), %xmm14 | ||
343 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
344 | |||
345 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
346 | movdqa \XMM0, \XMM3 | ||
347 | movdqa SHUF_MASK(%rip), %xmm14 | ||
348 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
349 | |||
350 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
351 | movdqa \XMM0, \XMM4 | ||
352 | movdqa SHUF_MASK(%rip), %xmm14 | ||
353 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
354 | |||
355 | pxor 16*0(%arg1), \XMM1 | ||
356 | pxor 16*0(%arg1), \XMM2 | ||
357 | pxor 16*0(%arg1), \XMM3 | ||
358 | pxor 16*0(%arg1), \XMM4 | ||
359 | movdqa \TMP3, \TMP5 | ||
360 | pshufd $78, \TMP3, \TMP1 | ||
361 | pxor \TMP3, \TMP1 | ||
362 | movdqa \TMP1, HashKey_k(%rsp) | ||
363 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
364 | # TMP5 = HashKey^2<<1 (mod poly) | ||
365 | movdqa \TMP5, HashKey_2(%rsp) | ||
366 | # HashKey_2 = HashKey^2<<1 (mod poly) | ||
367 | pshufd $78, \TMP5, \TMP1 | ||
368 | pxor \TMP5, \TMP1 | ||
369 | movdqa \TMP1, HashKey_2_k(%rsp) | ||
370 | .irpc index, 1234 # do 4 rounds | ||
371 | movaps 0x10*\index(%arg1), \TMP1 | ||
372 | AESENC \TMP1, \XMM1 | ||
373 | AESENC \TMP1, \XMM2 | ||
374 | AESENC \TMP1, \XMM3 | ||
375 | AESENC \TMP1, \XMM4 | ||
376 | .endr | ||
377 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
378 | # TMP5 = HashKey^3<<1 (mod poly) | ||
379 | movdqa \TMP5, HashKey_3(%rsp) | ||
380 | pshufd $78, \TMP5, \TMP1 | ||
381 | pxor \TMP5, \TMP1 | ||
382 | movdqa \TMP1, HashKey_3_k(%rsp) | ||
383 | .irpc index, 56789 # do next 5 rounds | ||
384 | movaps 0x10*\index(%arg1), \TMP1 | ||
385 | AESENC \TMP1, \XMM1 | ||
386 | AESENC \TMP1, \XMM2 | ||
387 | AESENC \TMP1, \XMM3 | ||
388 | AESENC \TMP1, \XMM4 | ||
389 | .endr | ||
390 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
391 | # TMP5 = HashKey^3<<1 (mod poly) | ||
392 | movdqa \TMP5, HashKey_4(%rsp) | ||
393 | pshufd $78, \TMP5, \TMP1 | ||
394 | pxor \TMP5, \TMP1 | ||
395 | movdqa \TMP1, HashKey_4_k(%rsp) | ||
396 | movaps 0xa0(%arg1), \TMP2 | ||
397 | AESENCLAST \TMP2, \XMM1 | ||
398 | AESENCLAST \TMP2, \XMM2 | ||
399 | AESENCLAST \TMP2, \XMM3 | ||
400 | AESENCLAST \TMP2, \XMM4 | ||
401 | movdqu 16*0(%arg3 , %r11 , 1), \TMP1 | ||
402 | pxor \TMP1, \XMM1 | ||
403 | movdqu \XMM1, 16*0(%arg2 , %r11 , 1) | ||
404 | movdqa \TMP1, \XMM1 | ||
405 | movdqu 16*1(%arg3 , %r11 , 1), \TMP1 | ||
406 | pxor \TMP1, \XMM2 | ||
407 | movdqu \XMM2, 16*1(%arg2 , %r11 , 1) | ||
408 | movdqa \TMP1, \XMM2 | ||
409 | movdqu 16*2(%arg3 , %r11 , 1), \TMP1 | ||
410 | pxor \TMP1, \XMM3 | ||
411 | movdqu \XMM3, 16*2(%arg2 , %r11 , 1) | ||
412 | movdqa \TMP1, \XMM3 | ||
413 | movdqu 16*3(%arg3 , %r11 , 1), \TMP1 | ||
414 | pxor \TMP1, \XMM4 | ||
415 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) | ||
416 | movdqa \TMP1, \XMM4 | ||
417 | add $64, %r11 | ||
418 | movdqa SHUF_MASK(%rip), %xmm14 | ||
419 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
420 | pxor \XMMDst, \XMM1 | ||
421 | # combine GHASHed value with the corresponding ciphertext | ||
422 | movdqa SHUF_MASK(%rip), %xmm14 | ||
423 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
424 | movdqa SHUF_MASK(%rip), %xmm14 | ||
425 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
426 | movdqa SHUF_MASK(%rip), %xmm14 | ||
427 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
428 | |||
429 | _initial_blocks_done\num_initial_blocks\operation: | ||
430 | |||
431 | .endm | ||
432 | |||
433 | |||
434 | /* | ||
435 | * if a = number of total plaintext bytes | ||
436 | * b = floor(a/16) | ||
437 | * num_initial_blocks = b mod 4 | ||
438 | * encrypt the initial num_initial_blocks blocks and apply ghash on | ||
439 | * the ciphertext | ||
440 | * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers | ||
441 | * are clobbered | ||
442 | * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified | ||
443 | */ | ||
444 | |||
445 | |||
446 | .macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ | ||
447 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation | ||
448 | mov arg7, %r10 # %r10 = AAD | ||
449 | mov arg8, %r12 # %r12 = aadLen | ||
450 | mov %r12, %r11 | ||
451 | pxor %xmm\i, %xmm\i | ||
452 | _get_AAD_loop\num_initial_blocks\operation: | ||
453 | movd (%r10), \TMP1 | ||
454 | pslldq $12, \TMP1 | ||
455 | psrldq $4, %xmm\i | ||
456 | pxor \TMP1, %xmm\i | ||
457 | add $4, %r10 | ||
458 | sub $4, %r12 | ||
459 | jne _get_AAD_loop\num_initial_blocks\operation | ||
460 | cmp $16, %r11 | ||
461 | je _get_AAD_loop2_done\num_initial_blocks\operation | ||
462 | mov $16, %r12 | ||
463 | _get_AAD_loop2\num_initial_blocks\operation: | ||
464 | psrldq $4, %xmm\i | ||
465 | sub $4, %r12 | ||
466 | cmp %r11, %r12 | ||
467 | jne _get_AAD_loop2\num_initial_blocks\operation | ||
468 | _get_AAD_loop2_done\num_initial_blocks\operation: | ||
469 | movdqa SHUF_MASK(%rip), %xmm14 | ||
470 | PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data | ||
471 | |||
472 | xor %r11, %r11 # initialise the data pointer offset as zero | ||
473 | |||
474 | # start AES for num_initial_blocks blocks | ||
475 | |||
476 | mov %arg5, %rax # %rax = *Y0 | ||
477 | movdqu (%rax), \XMM0 # XMM0 = Y0 | ||
478 | movdqa SHUF_MASK(%rip), %xmm14 | ||
479 | PSHUFB_XMM %xmm14, \XMM0 | ||
480 | |||
481 | .if (\i == 5) || (\i == 6) || (\i == 7) | ||
482 | .irpc index, \i_seq | ||
483 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
484 | movdqa \XMM0, %xmm\index | ||
485 | movdqa SHUF_MASK(%rip), %xmm14 | ||
486 | PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap | ||
487 | |||
488 | .endr | ||
489 | .irpc index, \i_seq | ||
490 | pxor 16*0(%arg1), %xmm\index | ||
491 | .endr | ||
492 | .irpc index, \i_seq | ||
493 | movaps 0x10(%rdi), \TMP1 | ||
494 | AESENC \TMP1, %xmm\index # Round 1 | ||
495 | .endr | ||
496 | .irpc index, \i_seq | ||
497 | movaps 0x20(%arg1), \TMP1 | ||
498 | AESENC \TMP1, %xmm\index # Round 2 | ||
499 | .endr | ||
500 | .irpc index, \i_seq | ||
501 | movaps 0x30(%arg1), \TMP1 | ||
502 | AESENC \TMP1, %xmm\index # Round 2 | ||
503 | .endr | ||
504 | .irpc index, \i_seq | ||
505 | movaps 0x40(%arg1), \TMP1 | ||
506 | AESENC \TMP1, %xmm\index # Round 2 | ||
507 | .endr | ||
508 | .irpc index, \i_seq | ||
509 | movaps 0x50(%arg1), \TMP1 | ||
510 | AESENC \TMP1, %xmm\index # Round 2 | ||
511 | .endr | ||
512 | .irpc index, \i_seq | ||
513 | movaps 0x60(%arg1), \TMP1 | ||
514 | AESENC \TMP1, %xmm\index # Round 2 | ||
515 | .endr | ||
516 | .irpc index, \i_seq | ||
517 | movaps 0x70(%arg1), \TMP1 | ||
518 | AESENC \TMP1, %xmm\index # Round 2 | ||
519 | .endr | ||
520 | .irpc index, \i_seq | ||
521 | movaps 0x80(%arg1), \TMP1 | ||
522 | AESENC \TMP1, %xmm\index # Round 2 | ||
523 | .endr | ||
524 | .irpc index, \i_seq | ||
525 | movaps 0x90(%arg1), \TMP1 | ||
526 | AESENC \TMP1, %xmm\index # Round 2 | ||
527 | .endr | ||
528 | .irpc index, \i_seq | ||
529 | movaps 0xa0(%arg1), \TMP1 | ||
530 | AESENCLAST \TMP1, %xmm\index # Round 10 | ||
531 | .endr | ||
532 | .irpc index, \i_seq | ||
533 | movdqu (%arg3 , %r11, 1), \TMP1 | ||
534 | pxor \TMP1, %xmm\index | ||
535 | movdqu %xmm\index, (%arg2 , %r11, 1) | ||
536 | # write back plaintext/ciphertext for num_initial_blocks | ||
537 | add $16, %r11 | ||
538 | |||
539 | movdqa SHUF_MASK(%rip), %xmm14 | ||
540 | PSHUFB_XMM %xmm14, %xmm\index | ||
541 | |||
542 | # prepare plaintext/ciphertext for GHASH computation | ||
543 | .endr | ||
544 | .endif | ||
545 | GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
546 | # apply GHASH on num_initial_blocks blocks | ||
547 | |||
548 | .if \i == 5 | ||
549 | pxor %xmm5, %xmm6 | ||
550 | GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
551 | pxor %xmm6, %xmm7 | ||
552 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
553 | pxor %xmm7, %xmm8 | ||
554 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
555 | .elseif \i == 6 | ||
556 | pxor %xmm6, %xmm7 | ||
557 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
558 | pxor %xmm7, %xmm8 | ||
559 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
560 | .elseif \i == 7 | ||
561 | pxor %xmm7, %xmm8 | ||
562 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
563 | .endif | ||
564 | cmp $64, %r13 | ||
565 | jl _initial_blocks_done\num_initial_blocks\operation | ||
566 | # no need for precomputed values | ||
567 | /* | ||
568 | * | ||
569 | * Precomputations for HashKey parallel with encryption of first 4 blocks. | ||
570 | * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
571 | */ | ||
572 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
573 | movdqa \XMM0, \XMM1 | ||
574 | movdqa SHUF_MASK(%rip), %xmm14 | ||
575 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
576 | |||
577 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
578 | movdqa \XMM0, \XMM2 | ||
579 | movdqa SHUF_MASK(%rip), %xmm14 | ||
580 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
581 | |||
582 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
583 | movdqa \XMM0, \XMM3 | ||
584 | movdqa SHUF_MASK(%rip), %xmm14 | ||
585 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
586 | |||
587 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
588 | movdqa \XMM0, \XMM4 | ||
589 | movdqa SHUF_MASK(%rip), %xmm14 | ||
590 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
591 | |||
592 | pxor 16*0(%arg1), \XMM1 | ||
593 | pxor 16*0(%arg1), \XMM2 | ||
594 | pxor 16*0(%arg1), \XMM3 | ||
595 | pxor 16*0(%arg1), \XMM4 | ||
596 | movdqa \TMP3, \TMP5 | ||
597 | pshufd $78, \TMP3, \TMP1 | ||
598 | pxor \TMP3, \TMP1 | ||
599 | movdqa \TMP1, HashKey_k(%rsp) | ||
600 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
601 | # TMP5 = HashKey^2<<1 (mod poly) | ||
602 | movdqa \TMP5, HashKey_2(%rsp) | ||
603 | # HashKey_2 = HashKey^2<<1 (mod poly) | ||
604 | pshufd $78, \TMP5, \TMP1 | ||
605 | pxor \TMP5, \TMP1 | ||
606 | movdqa \TMP1, HashKey_2_k(%rsp) | ||
607 | .irpc index, 1234 # do 4 rounds | ||
608 | movaps 0x10*\index(%arg1), \TMP1 | ||
609 | AESENC \TMP1, \XMM1 | ||
610 | AESENC \TMP1, \XMM2 | ||
611 | AESENC \TMP1, \XMM3 | ||
612 | AESENC \TMP1, \XMM4 | ||
613 | .endr | ||
614 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
615 | # TMP5 = HashKey^3<<1 (mod poly) | ||
616 | movdqa \TMP5, HashKey_3(%rsp) | ||
617 | pshufd $78, \TMP5, \TMP1 | ||
618 | pxor \TMP5, \TMP1 | ||
619 | movdqa \TMP1, HashKey_3_k(%rsp) | ||
620 | .irpc index, 56789 # do next 5 rounds | ||
621 | movaps 0x10*\index(%arg1), \TMP1 | ||
622 | AESENC \TMP1, \XMM1 | ||
623 | AESENC \TMP1, \XMM2 | ||
624 | AESENC \TMP1, \XMM3 | ||
625 | AESENC \TMP1, \XMM4 | ||
626 | .endr | ||
627 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
628 | # TMP5 = HashKey^3<<1 (mod poly) | ||
629 | movdqa \TMP5, HashKey_4(%rsp) | ||
630 | pshufd $78, \TMP5, \TMP1 | ||
631 | pxor \TMP5, \TMP1 | ||
632 | movdqa \TMP1, HashKey_4_k(%rsp) | ||
633 | movaps 0xa0(%arg1), \TMP2 | ||
634 | AESENCLAST \TMP2, \XMM1 | ||
635 | AESENCLAST \TMP2, \XMM2 | ||
636 | AESENCLAST \TMP2, \XMM3 | ||
637 | AESENCLAST \TMP2, \XMM4 | ||
638 | movdqu 16*0(%arg3 , %r11 , 1), \TMP1 | ||
639 | pxor \TMP1, \XMM1 | ||
640 | movdqu 16*1(%arg3 , %r11 , 1), \TMP1 | ||
641 | pxor \TMP1, \XMM2 | ||
642 | movdqu 16*2(%arg3 , %r11 , 1), \TMP1 | ||
643 | pxor \TMP1, \XMM3 | ||
644 | movdqu 16*3(%arg3 , %r11 , 1), \TMP1 | ||
645 | pxor \TMP1, \XMM4 | ||
646 | movdqu \XMM1, 16*0(%arg2 , %r11 , 1) | ||
647 | movdqu \XMM2, 16*1(%arg2 , %r11 , 1) | ||
648 | movdqu \XMM3, 16*2(%arg2 , %r11 , 1) | ||
649 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) | ||
650 | |||
651 | add $64, %r11 | ||
652 | movdqa SHUF_MASK(%rip), %xmm14 | ||
653 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
654 | pxor \XMMDst, \XMM1 | ||
655 | # combine GHASHed value with the corresponding ciphertext | ||
656 | movdqa SHUF_MASK(%rip), %xmm14 | ||
657 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
658 | movdqa SHUF_MASK(%rip), %xmm14 | ||
659 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
660 | movdqa SHUF_MASK(%rip), %xmm14 | ||
661 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
662 | |||
663 | _initial_blocks_done\num_initial_blocks\operation: | ||
664 | |||
665 | .endm | ||
666 | |||
667 | /* | ||
668 | * encrypt 4 blocks at a time | ||
669 | * ghash the 4 previously encrypted ciphertext blocks | ||
670 | * arg1, %arg2, %arg3 are used as pointers only, not modified | ||
671 | * %r11 is the data offset value | ||
672 | */ | ||
673 | .macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ | ||
674 | TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | ||
675 | |||
676 | movdqa \XMM1, \XMM5 | ||
677 | movdqa \XMM2, \XMM6 | ||
678 | movdqa \XMM3, \XMM7 | ||
679 | movdqa \XMM4, \XMM8 | ||
680 | |||
681 | movdqa SHUF_MASK(%rip), %xmm15 | ||
682 | # multiply TMP5 * HashKey using karatsuba | ||
683 | |||
684 | movdqa \XMM5, \TMP4 | ||
685 | pshufd $78, \XMM5, \TMP6 | ||
686 | pxor \XMM5, \TMP6 | ||
687 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
688 | movdqa HashKey_4(%rsp), \TMP5 | ||
689 | PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 | ||
690 | movdqa \XMM0, \XMM1 | ||
691 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
692 | movdqa \XMM0, \XMM2 | ||
693 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
694 | movdqa \XMM0, \XMM3 | ||
695 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
696 | movdqa \XMM0, \XMM4 | ||
697 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
698 | PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 | ||
699 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
700 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
701 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
702 | |||
703 | pxor (%arg1), \XMM1 | ||
704 | pxor (%arg1), \XMM2 | ||
705 | pxor (%arg1), \XMM3 | ||
706 | pxor (%arg1), \XMM4 | ||
707 | movdqa HashKey_4_k(%rsp), \TMP5 | ||
708 | PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) | ||
709 | movaps 0x10(%arg1), \TMP1 | ||
710 | AESENC \TMP1, \XMM1 # Round 1 | ||
711 | AESENC \TMP1, \XMM2 | ||
712 | AESENC \TMP1, \XMM3 | ||
713 | AESENC \TMP1, \XMM4 | ||
714 | movaps 0x20(%arg1), \TMP1 | ||
715 | AESENC \TMP1, \XMM1 # Round 2 | ||
716 | AESENC \TMP1, \XMM2 | ||
717 | AESENC \TMP1, \XMM3 | ||
718 | AESENC \TMP1, \XMM4 | ||
719 | movdqa \XMM6, \TMP1 | ||
720 | pshufd $78, \XMM6, \TMP2 | ||
721 | pxor \XMM6, \TMP2 | ||
722 | movdqa HashKey_3(%rsp), \TMP5 | ||
723 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 | ||
724 | movaps 0x30(%arg1), \TMP3 | ||
725 | AESENC \TMP3, \XMM1 # Round 3 | ||
726 | AESENC \TMP3, \XMM2 | ||
727 | AESENC \TMP3, \XMM3 | ||
728 | AESENC \TMP3, \XMM4 | ||
729 | PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 | ||
730 | movaps 0x40(%arg1), \TMP3 | ||
731 | AESENC \TMP3, \XMM1 # Round 4 | ||
732 | AESENC \TMP3, \XMM2 | ||
733 | AESENC \TMP3, \XMM3 | ||
734 | AESENC \TMP3, \XMM4 | ||
735 | movdqa HashKey_3_k(%rsp), \TMP5 | ||
736 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
737 | movaps 0x50(%arg1), \TMP3 | ||
738 | AESENC \TMP3, \XMM1 # Round 5 | ||
739 | AESENC \TMP3, \XMM2 | ||
740 | AESENC \TMP3, \XMM3 | ||
741 | AESENC \TMP3, \XMM4 | ||
742 | pxor \TMP1, \TMP4 | ||
743 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
744 | pxor \XMM6, \XMM5 | ||
745 | pxor \TMP2, \TMP6 | ||
746 | movdqa \XMM7, \TMP1 | ||
747 | pshufd $78, \XMM7, \TMP2 | ||
748 | pxor \XMM7, \TMP2 | ||
749 | movdqa HashKey_2(%rsp ), \TMP5 | ||
750 | |||
751 | # Multiply TMP5 * HashKey using karatsuba | ||
752 | |||
753 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
754 | movaps 0x60(%arg1), \TMP3 | ||
755 | AESENC \TMP3, \XMM1 # Round 6 | ||
756 | AESENC \TMP3, \XMM2 | ||
757 | AESENC \TMP3, \XMM3 | ||
758 | AESENC \TMP3, \XMM4 | ||
759 | PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 | ||
760 | movaps 0x70(%arg1), \TMP3 | ||
761 | AESENC \TMP3, \XMM1 # Round 7 | ||
762 | AESENC \TMP3, \XMM2 | ||
763 | AESENC \TMP3, \XMM3 | ||
764 | AESENC \TMP3, \XMM4 | ||
765 | movdqa HashKey_2_k(%rsp), \TMP5 | ||
766 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
767 | movaps 0x80(%arg1), \TMP3 | ||
768 | AESENC \TMP3, \XMM1 # Round 8 | ||
769 | AESENC \TMP3, \XMM2 | ||
770 | AESENC \TMP3, \XMM3 | ||
771 | AESENC \TMP3, \XMM4 | ||
772 | pxor \TMP1, \TMP4 | ||
773 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
774 | pxor \XMM7, \XMM5 | ||
775 | pxor \TMP2, \TMP6 | ||
776 | |||
777 | # Multiply XMM8 * HashKey | ||
778 | # XMM8 and TMP5 hold the values for the two operands | ||
779 | |||
780 | movdqa \XMM8, \TMP1 | ||
781 | pshufd $78, \XMM8, \TMP2 | ||
782 | pxor \XMM8, \TMP2 | ||
783 | movdqa HashKey(%rsp), \TMP5 | ||
784 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
785 | movaps 0x90(%arg1), \TMP3 | ||
786 | AESENC \TMP3, \XMM1 # Round 9 | ||
787 | AESENC \TMP3, \XMM2 | ||
788 | AESENC \TMP3, \XMM3 | ||
789 | AESENC \TMP3, \XMM4 | ||
790 | PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 | ||
791 | movaps 0xa0(%arg1), \TMP3 | ||
792 | AESENCLAST \TMP3, \XMM1 # Round 10 | ||
793 | AESENCLAST \TMP3, \XMM2 | ||
794 | AESENCLAST \TMP3, \XMM3 | ||
795 | AESENCLAST \TMP3, \XMM4 | ||
796 | movdqa HashKey_k(%rsp), \TMP5 | ||
797 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
798 | movdqu (%arg3,%r11,1), \TMP3 | ||
799 | pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK | ||
800 | movdqu 16(%arg3,%r11,1), \TMP3 | ||
801 | pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK | ||
802 | movdqu 32(%arg3,%r11,1), \TMP3 | ||
803 | pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK | ||
804 | movdqu 48(%arg3,%r11,1), \TMP3 | ||
805 | pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK | ||
806 | movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer | ||
807 | movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer | ||
808 | movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer | ||
809 | movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer | ||
810 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
811 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
812 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
813 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
814 | |||
815 | pxor \TMP4, \TMP1 | ||
816 | pxor \XMM8, \XMM5 | ||
817 | pxor \TMP6, \TMP2 | ||
818 | pxor \TMP1, \TMP2 | ||
819 | pxor \XMM5, \TMP2 | ||
820 | movdqa \TMP2, \TMP3 | ||
821 | pslldq $8, \TMP3 # left shift TMP3 2 DWs | ||
822 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
823 | pxor \TMP3, \XMM5 | ||
824 | pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 | ||
825 | |||
826 | # first phase of reduction | ||
827 | |||
828 | movdqa \XMM5, \TMP2 | ||
829 | movdqa \XMM5, \TMP3 | ||
830 | movdqa \XMM5, \TMP4 | ||
831 | # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently | ||
832 | pslld $31, \TMP2 # packed right shift << 31 | ||
833 | pslld $30, \TMP3 # packed right shift << 30 | ||
834 | pslld $25, \TMP4 # packed right shift << 25 | ||
835 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
836 | pxor \TMP4, \TMP2 | ||
837 | movdqa \TMP2, \TMP5 | ||
838 | psrldq $4, \TMP5 # right shift T5 1 DW | ||
839 | pslldq $12, \TMP2 # left shift T2 3 DWs | ||
840 | pxor \TMP2, \XMM5 | ||
841 | |||
842 | # second phase of reduction | ||
843 | |||
844 | movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 | ||
845 | movdqa \XMM5,\TMP3 | ||
846 | movdqa \XMM5,\TMP4 | ||
847 | psrld $1, \TMP2 # packed left shift >>1 | ||
848 | psrld $2, \TMP3 # packed left shift >>2 | ||
849 | psrld $7, \TMP4 # packed left shift >>7 | ||
850 | pxor \TMP3,\TMP2 # xor the shifted versions | ||
851 | pxor \TMP4,\TMP2 | ||
852 | pxor \TMP5, \TMP2 | ||
853 | pxor \TMP2, \XMM5 | ||
854 | pxor \TMP1, \XMM5 # result is in TMP1 | ||
855 | |||
856 | pxor \XMM5, \XMM1 | ||
857 | .endm | ||
858 | |||
859 | /* | ||
860 | * decrypt 4 blocks at a time | ||
861 | * ghash the 4 previously decrypted ciphertext blocks | ||
862 | * arg1, %arg2, %arg3 are used as pointers only, not modified | ||
863 | * %r11 is the data offset value | ||
864 | */ | ||
865 | .macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ | ||
866 | TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | ||
867 | |||
868 | movdqa \XMM1, \XMM5 | ||
869 | movdqa \XMM2, \XMM6 | ||
870 | movdqa \XMM3, \XMM7 | ||
871 | movdqa \XMM4, \XMM8 | ||
872 | |||
873 | movdqa SHUF_MASK(%rip), %xmm15 | ||
874 | # multiply TMP5 * HashKey using karatsuba | ||
875 | |||
876 | movdqa \XMM5, \TMP4 | ||
877 | pshufd $78, \XMM5, \TMP6 | ||
878 | pxor \XMM5, \TMP6 | ||
879 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
880 | movdqa HashKey_4(%rsp), \TMP5 | ||
881 | PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 | ||
882 | movdqa \XMM0, \XMM1 | ||
883 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
884 | movdqa \XMM0, \XMM2 | ||
885 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
886 | movdqa \XMM0, \XMM3 | ||
887 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
888 | movdqa \XMM0, \XMM4 | ||
889 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
890 | PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 | ||
891 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
892 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
893 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
894 | |||
895 | pxor (%arg1), \XMM1 | ||
896 | pxor (%arg1), \XMM2 | ||
897 | pxor (%arg1), \XMM3 | ||
898 | pxor (%arg1), \XMM4 | ||
899 | movdqa HashKey_4_k(%rsp), \TMP5 | ||
900 | PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) | ||
901 | movaps 0x10(%arg1), \TMP1 | ||
902 | AESENC \TMP1, \XMM1 # Round 1 | ||
903 | AESENC \TMP1, \XMM2 | ||
904 | AESENC \TMP1, \XMM3 | ||
905 | AESENC \TMP1, \XMM4 | ||
906 | movaps 0x20(%arg1), \TMP1 | ||
907 | AESENC \TMP1, \XMM1 # Round 2 | ||
908 | AESENC \TMP1, \XMM2 | ||
909 | AESENC \TMP1, \XMM3 | ||
910 | AESENC \TMP1, \XMM4 | ||
911 | movdqa \XMM6, \TMP1 | ||
912 | pshufd $78, \XMM6, \TMP2 | ||
913 | pxor \XMM6, \TMP2 | ||
914 | movdqa HashKey_3(%rsp), \TMP5 | ||
915 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 | ||
916 | movaps 0x30(%arg1), \TMP3 | ||
917 | AESENC \TMP3, \XMM1 # Round 3 | ||
918 | AESENC \TMP3, \XMM2 | ||
919 | AESENC \TMP3, \XMM3 | ||
920 | AESENC \TMP3, \XMM4 | ||
921 | PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 | ||
922 | movaps 0x40(%arg1), \TMP3 | ||
923 | AESENC \TMP3, \XMM1 # Round 4 | ||
924 | AESENC \TMP3, \XMM2 | ||
925 | AESENC \TMP3, \XMM3 | ||
926 | AESENC \TMP3, \XMM4 | ||
927 | movdqa HashKey_3_k(%rsp), \TMP5 | ||
928 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
929 | movaps 0x50(%arg1), \TMP3 | ||
930 | AESENC \TMP3, \XMM1 # Round 5 | ||
931 | AESENC \TMP3, \XMM2 | ||
932 | AESENC \TMP3, \XMM3 | ||
933 | AESENC \TMP3, \XMM4 | ||
934 | pxor \TMP1, \TMP4 | ||
935 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
936 | pxor \XMM6, \XMM5 | ||
937 | pxor \TMP2, \TMP6 | ||
938 | movdqa \XMM7, \TMP1 | ||
939 | pshufd $78, \XMM7, \TMP2 | ||
940 | pxor \XMM7, \TMP2 | ||
941 | movdqa HashKey_2(%rsp ), \TMP5 | ||
942 | |||
943 | # Multiply TMP5 * HashKey using karatsuba | ||
944 | |||
945 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
946 | movaps 0x60(%arg1), \TMP3 | ||
947 | AESENC \TMP3, \XMM1 # Round 6 | ||
948 | AESENC \TMP3, \XMM2 | ||
949 | AESENC \TMP3, \XMM3 | ||
950 | AESENC \TMP3, \XMM4 | ||
951 | PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 | ||
952 | movaps 0x70(%arg1), \TMP3 | ||
953 | AESENC \TMP3, \XMM1 # Round 7 | ||
954 | AESENC \TMP3, \XMM2 | ||
955 | AESENC \TMP3, \XMM3 | ||
956 | AESENC \TMP3, \XMM4 | ||
957 | movdqa HashKey_2_k(%rsp), \TMP5 | ||
958 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
959 | movaps 0x80(%arg1), \TMP3 | ||
960 | AESENC \TMP3, \XMM1 # Round 8 | ||
961 | AESENC \TMP3, \XMM2 | ||
962 | AESENC \TMP3, \XMM3 | ||
963 | AESENC \TMP3, \XMM4 | ||
964 | pxor \TMP1, \TMP4 | ||
965 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
966 | pxor \XMM7, \XMM5 | ||
967 | pxor \TMP2, \TMP6 | ||
968 | |||
969 | # Multiply XMM8 * HashKey | ||
970 | # XMM8 and TMP5 hold the values for the two operands | ||
971 | |||
972 | movdqa \XMM8, \TMP1 | ||
973 | pshufd $78, \XMM8, \TMP2 | ||
974 | pxor \XMM8, \TMP2 | ||
975 | movdqa HashKey(%rsp), \TMP5 | ||
976 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
977 | movaps 0x90(%arg1), \TMP3 | ||
978 | AESENC \TMP3, \XMM1 # Round 9 | ||
979 | AESENC \TMP3, \XMM2 | ||
980 | AESENC \TMP3, \XMM3 | ||
981 | AESENC \TMP3, \XMM4 | ||
982 | PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 | ||
983 | movaps 0xa0(%arg1), \TMP3 | ||
984 | AESENCLAST \TMP3, \XMM1 # Round 10 | ||
985 | AESENCLAST \TMP3, \XMM2 | ||
986 | AESENCLAST \TMP3, \XMM3 | ||
987 | AESENCLAST \TMP3, \XMM4 | ||
988 | movdqa HashKey_k(%rsp), \TMP5 | ||
989 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
990 | movdqu (%arg3,%r11,1), \TMP3 | ||
991 | pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK | ||
992 | movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer | ||
993 | movdqa \TMP3, \XMM1 | ||
994 | movdqu 16(%arg3,%r11,1), \TMP3 | ||
995 | pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK | ||
996 | movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer | ||
997 | movdqa \TMP3, \XMM2 | ||
998 | movdqu 32(%arg3,%r11,1), \TMP3 | ||
999 | pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK | ||
1000 | movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer | ||
1001 | movdqa \TMP3, \XMM3 | ||
1002 | movdqu 48(%arg3,%r11,1), \TMP3 | ||
1003 | pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK | ||
1004 | movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer | ||
1005 | movdqa \TMP3, \XMM4 | ||
1006 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
1007 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
1008 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
1009 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
1010 | |||
1011 | pxor \TMP4, \TMP1 | ||
1012 | pxor \XMM8, \XMM5 | ||
1013 | pxor \TMP6, \TMP2 | ||
1014 | pxor \TMP1, \TMP2 | ||
1015 | pxor \XMM5, \TMP2 | ||
1016 | movdqa \TMP2, \TMP3 | ||
1017 | pslldq $8, \TMP3 # left shift TMP3 2 DWs | ||
1018 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
1019 | pxor \TMP3, \XMM5 | ||
1020 | pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 | ||
1021 | |||
1022 | # first phase of reduction | ||
1023 | |||
1024 | movdqa \XMM5, \TMP2 | ||
1025 | movdqa \XMM5, \TMP3 | ||
1026 | movdqa \XMM5, \TMP4 | ||
1027 | # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently | ||
1028 | pslld $31, \TMP2 # packed right shift << 31 | ||
1029 | pslld $30, \TMP3 # packed right shift << 30 | ||
1030 | pslld $25, \TMP4 # packed right shift << 25 | ||
1031 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
1032 | pxor \TMP4, \TMP2 | ||
1033 | movdqa \TMP2, \TMP5 | ||
1034 | psrldq $4, \TMP5 # right shift T5 1 DW | ||
1035 | pslldq $12, \TMP2 # left shift T2 3 DWs | ||
1036 | pxor \TMP2, \XMM5 | ||
1037 | |||
1038 | # second phase of reduction | ||
1039 | |||
1040 | movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 | ||
1041 | movdqa \XMM5,\TMP3 | ||
1042 | movdqa \XMM5,\TMP4 | ||
1043 | psrld $1, \TMP2 # packed left shift >>1 | ||
1044 | psrld $2, \TMP3 # packed left shift >>2 | ||
1045 | psrld $7, \TMP4 # packed left shift >>7 | ||
1046 | pxor \TMP3,\TMP2 # xor the shifted versions | ||
1047 | pxor \TMP4,\TMP2 | ||
1048 | pxor \TMP5, \TMP2 | ||
1049 | pxor \TMP2, \XMM5 | ||
1050 | pxor \TMP1, \XMM5 # result is in TMP1 | ||
1051 | |||
1052 | pxor \XMM5, \XMM1 | ||
1053 | .endm | ||
1054 | |||
1055 | /* GHASH the last 4 ciphertext blocks. */ | ||
1056 | .macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \ | ||
1057 | TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst | ||
1058 | |||
1059 | # Multiply TMP6 * HashKey (using Karatsuba) | ||
1060 | |||
1061 | movdqa \XMM1, \TMP6 | ||
1062 | pshufd $78, \XMM1, \TMP2 | ||
1063 | pxor \XMM1, \TMP2 | ||
1064 | movdqa HashKey_4(%rsp), \TMP5 | ||
1065 | PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 | ||
1066 | PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 | ||
1067 | movdqa HashKey_4_k(%rsp), \TMP4 | ||
1068 | PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
1069 | movdqa \XMM1, \XMMDst | ||
1070 | movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 | ||
1071 | |||
1072 | # Multiply TMP1 * HashKey (using Karatsuba) | ||
1073 | |||
1074 | movdqa \XMM2, \TMP1 | ||
1075 | pshufd $78, \XMM2, \TMP2 | ||
1076 | pxor \XMM2, \TMP2 | ||
1077 | movdqa HashKey_3(%rsp), \TMP5 | ||
1078 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
1079 | PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 | ||
1080 | movdqa HashKey_3_k(%rsp), \TMP4 | ||
1081 | PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
1082 | pxor \TMP1, \TMP6 | ||
1083 | pxor \XMM2, \XMMDst | ||
1084 | pxor \TMP2, \XMM1 | ||
1085 | # results accumulated in TMP6, XMMDst, XMM1 | ||
1086 | |||
1087 | # Multiply TMP1 * HashKey (using Karatsuba) | ||
1088 | |||
1089 | movdqa \XMM3, \TMP1 | ||
1090 | pshufd $78, \XMM3, \TMP2 | ||
1091 | pxor \XMM3, \TMP2 | ||
1092 | movdqa HashKey_2(%rsp), \TMP5 | ||
1093 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
1094 | PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 | ||
1095 | movdqa HashKey_2_k(%rsp), \TMP4 | ||
1096 | PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
1097 | pxor \TMP1, \TMP6 | ||
1098 | pxor \XMM3, \XMMDst | ||
1099 | pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 | ||
1100 | |||
1101 | # Multiply TMP1 * HashKey (using Karatsuba) | ||
1102 | movdqa \XMM4, \TMP1 | ||
1103 | pshufd $78, \XMM4, \TMP2 | ||
1104 | pxor \XMM4, \TMP2 | ||
1105 | movdqa HashKey(%rsp), \TMP5 | ||
1106 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
1107 | PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 | ||
1108 | movdqa HashKey_k(%rsp), \TMP4 | ||
1109 | PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
1110 | pxor \TMP1, \TMP6 | ||
1111 | pxor \XMM4, \XMMDst | ||
1112 | pxor \XMM1, \TMP2 | ||
1113 | pxor \TMP6, \TMP2 | ||
1114 | pxor \XMMDst, \TMP2 | ||
1115 | # middle section of the temp results combined as in karatsuba algorithm | ||
1116 | movdqa \TMP2, \TMP4 | ||
1117 | pslldq $8, \TMP4 # left shift TMP4 2 DWs | ||
1118 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
1119 | pxor \TMP4, \XMMDst | ||
1120 | pxor \TMP2, \TMP6 | ||
1121 | # TMP6:XMMDst holds the result of the accumulated carry-less multiplications | ||
1122 | # first phase of the reduction | ||
1123 | movdqa \XMMDst, \TMP2 | ||
1124 | movdqa \XMMDst, \TMP3 | ||
1125 | movdqa \XMMDst, \TMP4 | ||
1126 | # move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently | ||
1127 | pslld $31, \TMP2 # packed right shifting << 31 | ||
1128 | pslld $30, \TMP3 # packed right shifting << 30 | ||
1129 | pslld $25, \TMP4 # packed right shifting << 25 | ||
1130 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
1131 | pxor \TMP4, \TMP2 | ||
1132 | movdqa \TMP2, \TMP7 | ||
1133 | psrldq $4, \TMP7 # right shift TMP7 1 DW | ||
1134 | pslldq $12, \TMP2 # left shift TMP2 3 DWs | ||
1135 | pxor \TMP2, \XMMDst | ||
1136 | |||
1137 | # second phase of the reduction | ||
1138 | movdqa \XMMDst, \TMP2 | ||
1139 | # make 3 copies of XMMDst for doing 3 shift operations | ||
1140 | movdqa \XMMDst, \TMP3 | ||
1141 | movdqa \XMMDst, \TMP4 | ||
1142 | psrld $1, \TMP2 # packed left shift >> 1 | ||
1143 | psrld $2, \TMP3 # packed left shift >> 2 | ||
1144 | psrld $7, \TMP4 # packed left shift >> 7 | ||
1145 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
1146 | pxor \TMP4, \TMP2 | ||
1147 | pxor \TMP7, \TMP2 | ||
1148 | pxor \TMP2, \XMMDst | ||
1149 | pxor \TMP6, \XMMDst # reduced result is in XMMDst | ||
1150 | .endm | ||
1151 | |||
1152 | /* Encryption of a single block done*/ | ||
1153 | .macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 | ||
1154 | |||
1155 | pxor (%arg1), \XMM0 | ||
1156 | movaps 16(%arg1), \TMP1 | ||
1157 | AESENC \TMP1, \XMM0 | ||
1158 | movaps 32(%arg1), \TMP1 | ||
1159 | AESENC \TMP1, \XMM0 | ||
1160 | movaps 48(%arg1), \TMP1 | ||
1161 | AESENC \TMP1, \XMM0 | ||
1162 | movaps 64(%arg1), \TMP1 | ||
1163 | AESENC \TMP1, \XMM0 | ||
1164 | movaps 80(%arg1), \TMP1 | ||
1165 | AESENC \TMP1, \XMM0 | ||
1166 | movaps 96(%arg1), \TMP1 | ||
1167 | AESENC \TMP1, \XMM0 | ||
1168 | movaps 112(%arg1), \TMP1 | ||
1169 | AESENC \TMP1, \XMM0 | ||
1170 | movaps 128(%arg1), \TMP1 | ||
1171 | AESENC \TMP1, \XMM0 | ||
1172 | movaps 144(%arg1), \TMP1 | ||
1173 | AESENC \TMP1, \XMM0 | ||
1174 | movaps 160(%arg1), \TMP1 | ||
1175 | AESENCLAST \TMP1, \XMM0 | ||
1176 | .endm | ||
1177 | |||
1178 | |||
1179 | /***************************************************************************** | ||
1180 | * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. | ||
1181 | * u8 *out, // Plaintext output. Encrypt in-place is allowed. | ||
1182 | * const u8 *in, // Ciphertext input | ||
1183 | * u64 plaintext_len, // Length of data in bytes for decryption. | ||
1184 | * u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) | ||
1185 | * // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) | ||
1186 | * // concatenated with 0x00000001. 16-byte aligned pointer. | ||
1187 | * u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. | ||
1188 | * const u8 *aad, // Additional Authentication Data (AAD) | ||
1189 | * u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes | ||
1190 | * u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the | ||
1191 | * // given authentication tag and only return the plaintext if they match. | ||
1192 | * u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 | ||
1193 | * // (most likely), 12 or 8. | ||
1194 | * | ||
1195 | * Assumptions: | ||
1196 | * | ||
1197 | * keys: | ||
1198 | * keys are pre-expanded and aligned to 16 bytes. we are using the first | ||
1199 | * set of 11 keys in the data structure void *aes_ctx | ||
1200 | * | ||
1201 | * iv: | ||
1202 | * 0 1 2 3 | ||
1203 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1204 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1205 | * | Salt (From the SA) | | ||
1206 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1207 | * | Initialization Vector | | ||
1208 | * | (This is the sequence number from IPSec header) | | ||
1209 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1210 | * | 0x1 | | ||
1211 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1212 | * | ||
1213 | * | ||
1214 | * | ||
1215 | * AAD: | ||
1216 | * AAD padded to 128 bits with 0 | ||
1217 | * for example, assume AAD is a u32 vector | ||
1218 | * | ||
1219 | * if AAD is 8 bytes: | ||
1220 | * AAD[3] = {A0, A1}; | ||
1221 | * padded AAD in xmm register = {A1 A0 0 0} | ||
1222 | * | ||
1223 | * 0 1 2 3 | ||
1224 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1225 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1226 | * | SPI (A1) | | ||
1227 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1228 | * | 32-bit Sequence Number (A0) | | ||
1229 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1230 | * | 0x0 | | ||
1231 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1232 | * | ||
1233 | * AAD Format with 32-bit Sequence Number | ||
1234 | * | ||
1235 | * if AAD is 12 bytes: | ||
1236 | * AAD[3] = {A0, A1, A2}; | ||
1237 | * padded AAD in xmm register = {A2 A1 A0 0} | ||
1238 | * | ||
1239 | * 0 1 2 3 | ||
1240 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1241 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1242 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1243 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1244 | * | SPI (A2) | | ||
1245 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1246 | * | 64-bit Extended Sequence Number {A1,A0} | | ||
1247 | * | | | ||
1248 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1249 | * | 0x0 | | ||
1250 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1251 | * | ||
1252 | * AAD Format with 64-bit Extended Sequence Number | ||
1253 | * | ||
1254 | * aadLen: | ||
1255 | * from the definition of the spec, aadLen can only be 8 or 12 bytes. | ||
1256 | * The code supports 16 too but for other sizes, the code will fail. | ||
1257 | * | ||
1258 | * TLen: | ||
1259 | * from the definition of the spec, TLen can only be 8, 12 or 16 bytes. | ||
1260 | * For other sizes, the code will fail. | ||
1261 | * | ||
1262 | * poly = x^128 + x^127 + x^126 + x^121 + 1 | ||
1263 | * | ||
1264 | *****************************************************************************/ | ||
1265 | |||
1266 | ENTRY(aesni_gcm_dec) | ||
1267 | push %r12 | ||
1268 | push %r13 | ||
1269 | push %r14 | ||
1270 | mov %rsp, %r14 | ||
1271 | /* | ||
1272 | * states of %xmm registers %xmm6:%xmm15 not saved | ||
1273 | * all %xmm registers are clobbered | ||
1274 | */ | ||
1275 | sub $VARIABLE_OFFSET, %rsp | ||
1276 | and $~63, %rsp # align rsp to 64 bytes | ||
1277 | mov %arg6, %r12 | ||
1278 | movdqu (%r12), %xmm13 # %xmm13 = HashKey | ||
1279 | movdqa SHUF_MASK(%rip), %xmm2 | ||
1280 | PSHUFB_XMM %xmm2, %xmm13 | ||
1281 | |||
1282 | |||
1283 | # Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) | ||
1284 | |||
1285 | movdqa %xmm13, %xmm2 | ||
1286 | psllq $1, %xmm13 | ||
1287 | psrlq $63, %xmm2 | ||
1288 | movdqa %xmm2, %xmm1 | ||
1289 | pslldq $8, %xmm2 | ||
1290 | psrldq $8, %xmm1 | ||
1291 | por %xmm2, %xmm13 | ||
1292 | |||
1293 | # Reduction | ||
1294 | |||
1295 | pshufd $0x24, %xmm1, %xmm2 | ||
1296 | pcmpeqd TWOONE(%rip), %xmm2 | ||
1297 | pand POLY(%rip), %xmm2 | ||
1298 | pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly) | ||
1299 | |||
1300 | |||
1301 | # Decrypt first few blocks | ||
1302 | |||
1303 | movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly) | ||
1304 | mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext | ||
1305 | and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) | ||
1306 | mov %r13, %r12 | ||
1307 | and $(3<<4), %r12 | ||
1308 | jz _initial_num_blocks_is_0_decrypt | ||
1309 | cmp $(2<<4), %r12 | ||
1310 | jb _initial_num_blocks_is_1_decrypt | ||
1311 | je _initial_num_blocks_is_2_decrypt | ||
1312 | _initial_num_blocks_is_3_decrypt: | ||
1313 | INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1314 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec | ||
1315 | sub $48, %r13 | ||
1316 | jmp _initial_blocks_decrypted | ||
1317 | _initial_num_blocks_is_2_decrypt: | ||
1318 | INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1319 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec | ||
1320 | sub $32, %r13 | ||
1321 | jmp _initial_blocks_decrypted | ||
1322 | _initial_num_blocks_is_1_decrypt: | ||
1323 | INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1324 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec | ||
1325 | sub $16, %r13 | ||
1326 | jmp _initial_blocks_decrypted | ||
1327 | _initial_num_blocks_is_0_decrypt: | ||
1328 | INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1329 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec | ||
1330 | _initial_blocks_decrypted: | ||
1331 | cmp $0, %r13 | ||
1332 | je _zero_cipher_left_decrypt | ||
1333 | sub $64, %r13 | ||
1334 | je _four_cipher_left_decrypt | ||
1335 | _decrypt_by_4: | ||
1336 | GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ | ||
1337 | %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec | ||
1338 | add $64, %r11 | ||
1339 | sub $64, %r13 | ||
1340 | jne _decrypt_by_4 | ||
1341 | _four_cipher_left_decrypt: | ||
1342 | GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ | ||
1343 | %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 | ||
1344 | _zero_cipher_left_decrypt: | ||
1345 | mov %arg4, %r13 | ||
1346 | and $15, %r13 # %r13 = arg4 (mod 16) | ||
1347 | je _multiple_of_16_bytes_decrypt | ||
1348 | |||
1349 | # Handle the last <16 byte block separately | ||
1350 | |||
1351 | paddd ONE(%rip), %xmm0 # increment CNT to get Yn | ||
1352 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1353 | PSHUFB_XMM %xmm10, %xmm0 | ||
1354 | |||
1355 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) | ||
1356 | sub $16, %r11 | ||
1357 | add %r13, %r11 | ||
1358 | movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte block | ||
1359 | lea SHIFT_MASK+16(%rip), %r12 | ||
1360 | sub %r13, %r12 | ||
1361 | # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes | ||
1362 | # (%r13 is the number of bytes in plaintext mod 16) | ||
1363 | movdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
1364 | PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes | ||
1365 | |||
1366 | movdqa %xmm1, %xmm2 | ||
1367 | pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) | ||
1368 | movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 | ||
1369 | # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 | ||
1370 | pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 | ||
1371 | pand %xmm1, %xmm2 | ||
1372 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1373 | PSHUFB_XMM %xmm10 ,%xmm2 | ||
1374 | |||
1375 | pxor %xmm2, %xmm8 | ||
1376 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1377 | # GHASH computation for the last <16 byte block | ||
1378 | sub %r13, %r11 | ||
1379 | add $16, %r11 | ||
1380 | |||
1381 | # output %r13 bytes | ||
1382 | MOVQ_R64_XMM %xmm0, %rax | ||
1383 | cmp $8, %r13 | ||
1384 | jle _less_than_8_bytes_left_decrypt | ||
1385 | mov %rax, (%arg2 , %r11, 1) | ||
1386 | add $8, %r11 | ||
1387 | psrldq $8, %xmm0 | ||
1388 | MOVQ_R64_XMM %xmm0, %rax | ||
1389 | sub $8, %r13 | ||
1390 | _less_than_8_bytes_left_decrypt: | ||
1391 | mov %al, (%arg2, %r11, 1) | ||
1392 | add $1, %r11 | ||
1393 | shr $8, %rax | ||
1394 | sub $1, %r13 | ||
1395 | jne _less_than_8_bytes_left_decrypt | ||
1396 | _multiple_of_16_bytes_decrypt: | ||
1397 | mov arg8, %r12 # %r13 = aadLen (number of bytes) | ||
1398 | shl $3, %r12 # convert into number of bits | ||
1399 | movd %r12d, %xmm15 # len(A) in %xmm15 | ||
1400 | shl $3, %arg4 # len(C) in bits (*128) | ||
1401 | MOVQ_R64_XMM %arg4, %xmm1 | ||
1402 | pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 | ||
1403 | pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) | ||
1404 | pxor %xmm15, %xmm8 | ||
1405 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1406 | # final GHASH computation | ||
1407 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1408 | PSHUFB_XMM %xmm10, %xmm8 | ||
1409 | |||
1410 | mov %arg5, %rax # %rax = *Y0 | ||
1411 | movdqu (%rax), %xmm0 # %xmm0 = Y0 | ||
1412 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) | ||
1413 | pxor %xmm8, %xmm0 | ||
1414 | _return_T_decrypt: | ||
1415 | mov arg9, %r10 # %r10 = authTag | ||
1416 | mov arg10, %r11 # %r11 = auth_tag_len | ||
1417 | cmp $16, %r11 | ||
1418 | je _T_16_decrypt | ||
1419 | cmp $12, %r11 | ||
1420 | je _T_12_decrypt | ||
1421 | _T_8_decrypt: | ||
1422 | MOVQ_R64_XMM %xmm0, %rax | ||
1423 | mov %rax, (%r10) | ||
1424 | jmp _return_T_done_decrypt | ||
1425 | _T_12_decrypt: | ||
1426 | MOVQ_R64_XMM %xmm0, %rax | ||
1427 | mov %rax, (%r10) | ||
1428 | psrldq $8, %xmm0 | ||
1429 | movd %xmm0, %eax | ||
1430 | mov %eax, 8(%r10) | ||
1431 | jmp _return_T_done_decrypt | ||
1432 | _T_16_decrypt: | ||
1433 | movdqu %xmm0, (%r10) | ||
1434 | _return_T_done_decrypt: | ||
1435 | mov %r14, %rsp | ||
1436 | pop %r14 | ||
1437 | pop %r13 | ||
1438 | pop %r12 | ||
1439 | ret | ||
1440 | |||
1441 | |||
1442 | /***************************************************************************** | ||
1443 | * void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. | ||
1444 | * u8 *out, // Ciphertext output. Encrypt in-place is allowed. | ||
1445 | * const u8 *in, // Plaintext input | ||
1446 | * u64 plaintext_len, // Length of data in bytes for encryption. | ||
1447 | * u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) | ||
1448 | * // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) | ||
1449 | * // concatenated with 0x00000001. 16-byte aligned pointer. | ||
1450 | * u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. | ||
1451 | * const u8 *aad, // Additional Authentication Data (AAD) | ||
1452 | * u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes | ||
1453 | * u8 *auth_tag, // Authenticated Tag output. | ||
1454 | * u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), | ||
1455 | * // 12 or 8. | ||
1456 | * | ||
1457 | * Assumptions: | ||
1458 | * | ||
1459 | * keys: | ||
1460 | * keys are pre-expanded and aligned to 16 bytes. we are using the | ||
1461 | * first set of 11 keys in the data structure void *aes_ctx | ||
1462 | * | ||
1463 | * | ||
1464 | * iv: | ||
1465 | * 0 1 2 3 | ||
1466 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1467 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1468 | * | Salt (From the SA) | | ||
1469 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1470 | * | Initialization Vector | | ||
1471 | * | (This is the sequence number from IPSec header) | | ||
1472 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1473 | * | 0x1 | | ||
1474 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1475 | * | ||
1476 | * | ||
1477 | * | ||
1478 | * AAD: | ||
1479 | * AAD padded to 128 bits with 0 | ||
1480 | * for example, assume AAD is a u32 vector | ||
1481 | * | ||
1482 | * if AAD is 8 bytes: | ||
1483 | * AAD[3] = {A0, A1}; | ||
1484 | * padded AAD in xmm register = {A1 A0 0 0} | ||
1485 | * | ||
1486 | * 0 1 2 3 | ||
1487 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1488 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1489 | * | SPI (A1) | | ||
1490 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1491 | * | 32-bit Sequence Number (A0) | | ||
1492 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1493 | * | 0x0 | | ||
1494 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1495 | * | ||
1496 | * AAD Format with 32-bit Sequence Number | ||
1497 | * | ||
1498 | * if AAD is 12 bytes: | ||
1499 | * AAD[3] = {A0, A1, A2}; | ||
1500 | * padded AAD in xmm register = {A2 A1 A0 0} | ||
1501 | * | ||
1502 | * 0 1 2 3 | ||
1503 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1504 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1505 | * | SPI (A2) | | ||
1506 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1507 | * | 64-bit Extended Sequence Number {A1,A0} | | ||
1508 | * | | | ||
1509 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1510 | * | 0x0 | | ||
1511 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1512 | * | ||
1513 | * AAD Format with 64-bit Extended Sequence Number | ||
1514 | * | ||
1515 | * aadLen: | ||
1516 | * from the definition of the spec, aadLen can only be 8 or 12 bytes. | ||
1517 | * The code supports 16 too but for other sizes, the code will fail. | ||
1518 | * | ||
1519 | * TLen: | ||
1520 | * from the definition of the spec, TLen can only be 8, 12 or 16 bytes. | ||
1521 | * For other sizes, the code will fail. | ||
1522 | * | ||
1523 | * poly = x^128 + x^127 + x^126 + x^121 + 1 | ||
1524 | ***************************************************************************/ | ||
1525 | ENTRY(aesni_gcm_enc) | ||
1526 | push %r12 | ||
1527 | push %r13 | ||
1528 | push %r14 | ||
1529 | mov %rsp, %r14 | ||
1530 | # | ||
1531 | # states of %xmm registers %xmm6:%xmm15 not saved | ||
1532 | # all %xmm registers are clobbered | ||
1533 | # | ||
1534 | sub $VARIABLE_OFFSET, %rsp | ||
1535 | and $~63, %rsp | ||
1536 | mov %arg6, %r12 | ||
1537 | movdqu (%r12), %xmm13 | ||
1538 | movdqa SHUF_MASK(%rip), %xmm2 | ||
1539 | PSHUFB_XMM %xmm2, %xmm13 | ||
1540 | |||
1541 | |||
1542 | # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) | ||
1543 | |||
1544 | movdqa %xmm13, %xmm2 | ||
1545 | psllq $1, %xmm13 | ||
1546 | psrlq $63, %xmm2 | ||
1547 | movdqa %xmm2, %xmm1 | ||
1548 | pslldq $8, %xmm2 | ||
1549 | psrldq $8, %xmm1 | ||
1550 | por %xmm2, %xmm13 | ||
1551 | |||
1552 | # reduce HashKey<<1 | ||
1553 | |||
1554 | pshufd $0x24, %xmm1, %xmm2 | ||
1555 | pcmpeqd TWOONE(%rip), %xmm2 | ||
1556 | pand POLY(%rip), %xmm2 | ||
1557 | pxor %xmm2, %xmm13 | ||
1558 | movdqa %xmm13, HashKey(%rsp) | ||
1559 | mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) | ||
1560 | and $-16, %r13 | ||
1561 | mov %r13, %r12 | ||
1562 | |||
1563 | # Encrypt first few blocks | ||
1564 | |||
1565 | and $(3<<4), %r12 | ||
1566 | jz _initial_num_blocks_is_0_encrypt | ||
1567 | cmp $(2<<4), %r12 | ||
1568 | jb _initial_num_blocks_is_1_encrypt | ||
1569 | je _initial_num_blocks_is_2_encrypt | ||
1570 | _initial_num_blocks_is_3_encrypt: | ||
1571 | INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1572 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc | ||
1573 | sub $48, %r13 | ||
1574 | jmp _initial_blocks_encrypted | ||
1575 | _initial_num_blocks_is_2_encrypt: | ||
1576 | INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1577 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc | ||
1578 | sub $32, %r13 | ||
1579 | jmp _initial_blocks_encrypted | ||
1580 | _initial_num_blocks_is_1_encrypt: | ||
1581 | INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1582 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc | ||
1583 | sub $16, %r13 | ||
1584 | jmp _initial_blocks_encrypted | ||
1585 | _initial_num_blocks_is_0_encrypt: | ||
1586 | INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1587 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc | ||
1588 | _initial_blocks_encrypted: | ||
1589 | |||
1590 | # Main loop - Encrypt remaining blocks | ||
1591 | |||
1592 | cmp $0, %r13 | ||
1593 | je _zero_cipher_left_encrypt | ||
1594 | sub $64, %r13 | ||
1595 | je _four_cipher_left_encrypt | ||
1596 | _encrypt_by_4_encrypt: | ||
1597 | GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ | ||
1598 | %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc | ||
1599 | add $64, %r11 | ||
1600 | sub $64, %r13 | ||
1601 | jne _encrypt_by_4_encrypt | ||
1602 | _four_cipher_left_encrypt: | ||
1603 | GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ | ||
1604 | %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 | ||
1605 | _zero_cipher_left_encrypt: | ||
1606 | mov %arg4, %r13 | ||
1607 | and $15, %r13 # %r13 = arg4 (mod 16) | ||
1608 | je _multiple_of_16_bytes_encrypt | ||
1609 | |||
1610 | # Handle the last <16 Byte block separately | ||
1611 | paddd ONE(%rip), %xmm0 # INCR CNT to get Yn | ||
1612 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1613 | PSHUFB_XMM %xmm10, %xmm0 | ||
1614 | |||
1615 | |||
1616 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) | ||
1617 | sub $16, %r11 | ||
1618 | add %r13, %r11 | ||
1619 | movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte blocks | ||
1620 | lea SHIFT_MASK+16(%rip), %r12 | ||
1621 | sub %r13, %r12 | ||
1622 | # adjust the shuffle mask pointer to be able to shift 16-r13 bytes | ||
1623 | # (%r13 is the number of bytes in plaintext mod 16) | ||
1624 | movdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
1625 | PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte | ||
1626 | pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) | ||
1627 | movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 | ||
1628 | # get the appropriate mask to mask out top 16-r13 bytes of xmm0 | ||
1629 | pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 | ||
1630 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1631 | PSHUFB_XMM %xmm10,%xmm0 | ||
1632 | |||
1633 | pxor %xmm0, %xmm8 | ||
1634 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1635 | # GHASH computation for the last <16 byte block | ||
1636 | sub %r13, %r11 | ||
1637 | add $16, %r11 | ||
1638 | |||
1639 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1640 | PSHUFB_XMM %xmm10, %xmm0 | ||
1641 | |||
1642 | # shuffle xmm0 back to output as ciphertext | ||
1643 | |||
1644 | # Output %r13 bytes | ||
1645 | MOVQ_R64_XMM %xmm0, %rax | ||
1646 | cmp $8, %r13 | ||
1647 | jle _less_than_8_bytes_left_encrypt | ||
1648 | mov %rax, (%arg2 , %r11, 1) | ||
1649 | add $8, %r11 | ||
1650 | psrldq $8, %xmm0 | ||
1651 | MOVQ_R64_XMM %xmm0, %rax | ||
1652 | sub $8, %r13 | ||
1653 | _less_than_8_bytes_left_encrypt: | ||
1654 | mov %al, (%arg2, %r11, 1) | ||
1655 | add $1, %r11 | ||
1656 | shr $8, %rax | ||
1657 | sub $1, %r13 | ||
1658 | jne _less_than_8_bytes_left_encrypt | ||
1659 | _multiple_of_16_bytes_encrypt: | ||
1660 | mov arg8, %r12 # %r12 = addLen (number of bytes) | ||
1661 | shl $3, %r12 | ||
1662 | movd %r12d, %xmm15 # len(A) in %xmm15 | ||
1663 | shl $3, %arg4 # len(C) in bits (*128) | ||
1664 | MOVQ_R64_XMM %arg4, %xmm1 | ||
1665 | pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 | ||
1666 | pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) | ||
1667 | pxor %xmm15, %xmm8 | ||
1668 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1669 | # final GHASH computation | ||
1670 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1671 | PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap | ||
1672 | |||
1673 | mov %arg5, %rax # %rax = *Y0 | ||
1674 | movdqu (%rax), %xmm0 # %xmm0 = Y0 | ||
1675 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) | ||
1676 | pxor %xmm8, %xmm0 | ||
1677 | _return_T_encrypt: | ||
1678 | mov arg9, %r10 # %r10 = authTag | ||
1679 | mov arg10, %r11 # %r11 = auth_tag_len | ||
1680 | cmp $16, %r11 | ||
1681 | je _T_16_encrypt | ||
1682 | cmp $12, %r11 | ||
1683 | je _T_12_encrypt | ||
1684 | _T_8_encrypt: | ||
1685 | MOVQ_R64_XMM %xmm0, %rax | ||
1686 | mov %rax, (%r10) | ||
1687 | jmp _return_T_done_encrypt | ||
1688 | _T_12_encrypt: | ||
1689 | MOVQ_R64_XMM %xmm0, %rax | ||
1690 | mov %rax, (%r10) | ||
1691 | psrldq $8, %xmm0 | ||
1692 | movd %xmm0, %eax | ||
1693 | mov %eax, 8(%r10) | ||
1694 | jmp _return_T_done_encrypt | ||
1695 | _T_16_encrypt: | ||
1696 | movdqu %xmm0, (%r10) | ||
1697 | _return_T_done_encrypt: | ||
1698 | mov %r14, %rsp | ||
1699 | pop %r14 | ||
1700 | pop %r13 | ||
1701 | pop %r12 | ||
1702 | ret | ||
1703 | |||
1704 | #endif | ||
1705 | |||
49 | 1706 | ||
50 | _key_expansion_128: | 1707 | _key_expansion_128: |
51 | _key_expansion_256a: | 1708 | _key_expansion_256a: |
@@ -55,10 +1712,11 @@ _key_expansion_256a: | |||
55 | shufps $0b10001100, %xmm0, %xmm4 | 1712 | shufps $0b10001100, %xmm0, %xmm4 |
56 | pxor %xmm4, %xmm0 | 1713 | pxor %xmm4, %xmm0 |
57 | pxor %xmm1, %xmm0 | 1714 | pxor %xmm1, %xmm0 |
58 | movaps %xmm0, (%rcx) | 1715 | movaps %xmm0, (TKEYP) |
59 | add $0x10, %rcx | 1716 | add $0x10, TKEYP |
60 | ret | 1717 | ret |
61 | 1718 | ||
1719 | .align 4 | ||
62 | _key_expansion_192a: | 1720 | _key_expansion_192a: |
63 | pshufd $0b01010101, %xmm1, %xmm1 | 1721 | pshufd $0b01010101, %xmm1, %xmm1 |
64 | shufps $0b00010000, %xmm0, %xmm4 | 1722 | shufps $0b00010000, %xmm0, %xmm4 |
@@ -76,12 +1734,13 @@ _key_expansion_192a: | |||
76 | 1734 | ||
77 | movaps %xmm0, %xmm1 | 1735 | movaps %xmm0, %xmm1 |
78 | shufps $0b01000100, %xmm0, %xmm6 | 1736 | shufps $0b01000100, %xmm0, %xmm6 |
79 | movaps %xmm6, (%rcx) | 1737 | movaps %xmm6, (TKEYP) |
80 | shufps $0b01001110, %xmm2, %xmm1 | 1738 | shufps $0b01001110, %xmm2, %xmm1 |
81 | movaps %xmm1, 16(%rcx) | 1739 | movaps %xmm1, 0x10(TKEYP) |
82 | add $0x20, %rcx | 1740 | add $0x20, TKEYP |
83 | ret | 1741 | ret |
84 | 1742 | ||
1743 | .align 4 | ||
85 | _key_expansion_192b: | 1744 | _key_expansion_192b: |
86 | pshufd $0b01010101, %xmm1, %xmm1 | 1745 | pshufd $0b01010101, %xmm1, %xmm1 |
87 | shufps $0b00010000, %xmm0, %xmm4 | 1746 | shufps $0b00010000, %xmm0, %xmm4 |
@@ -96,10 +1755,11 @@ _key_expansion_192b: | |||
96 | pxor %xmm3, %xmm2 | 1755 | pxor %xmm3, %xmm2 |
97 | pxor %xmm5, %xmm2 | 1756 | pxor %xmm5, %xmm2 |
98 | 1757 | ||
99 | movaps %xmm0, (%rcx) | 1758 | movaps %xmm0, (TKEYP) |
100 | add $0x10, %rcx | 1759 | add $0x10, TKEYP |
101 | ret | 1760 | ret |
102 | 1761 | ||
1762 | .align 4 | ||
103 | _key_expansion_256b: | 1763 | _key_expansion_256b: |
104 | pshufd $0b10101010, %xmm1, %xmm1 | 1764 | pshufd $0b10101010, %xmm1, %xmm1 |
105 | shufps $0b00010000, %xmm2, %xmm4 | 1765 | shufps $0b00010000, %xmm2, %xmm4 |
@@ -107,8 +1767,8 @@ _key_expansion_256b: | |||
107 | shufps $0b10001100, %xmm2, %xmm4 | 1767 | shufps $0b10001100, %xmm2, %xmm4 |
108 | pxor %xmm4, %xmm2 | 1768 | pxor %xmm4, %xmm2 |
109 | pxor %xmm1, %xmm2 | 1769 | pxor %xmm1, %xmm2 |
110 | movaps %xmm2, (%rcx) | 1770 | movaps %xmm2, (TKEYP) |
111 | add $0x10, %rcx | 1771 | add $0x10, TKEYP |
112 | ret | 1772 | ret |
113 | 1773 | ||
114 | /* | 1774 | /* |
@@ -116,17 +1776,23 @@ _key_expansion_256b: | |||
116 | * unsigned int key_len) | 1776 | * unsigned int key_len) |
117 | */ | 1777 | */ |
118 | ENTRY(aesni_set_key) | 1778 | ENTRY(aesni_set_key) |
119 | movups (%rsi), %xmm0 # user key (first 16 bytes) | 1779 | #ifndef __x86_64__ |
120 | movaps %xmm0, (%rdi) | 1780 | pushl KEYP |
121 | lea 0x10(%rdi), %rcx # key addr | 1781 | movl 8(%esp), KEYP # ctx |
122 | movl %edx, 480(%rdi) | 1782 | movl 12(%esp), UKEYP # in_key |
1783 | movl 16(%esp), %edx # key_len | ||
1784 | #endif | ||
1785 | movups (UKEYP), %xmm0 # user key (first 16 bytes) | ||
1786 | movaps %xmm0, (KEYP) | ||
1787 | lea 0x10(KEYP), TKEYP # key addr | ||
1788 | movl %edx, 480(KEYP) | ||
123 | pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x | 1789 | pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x |
124 | cmp $24, %dl | 1790 | cmp $24, %dl |
125 | jb .Lenc_key128 | 1791 | jb .Lenc_key128 |
126 | je .Lenc_key192 | 1792 | je .Lenc_key192 |
127 | movups 0x10(%rsi), %xmm2 # other user key | 1793 | movups 0x10(UKEYP), %xmm2 # other user key |
128 | movaps %xmm2, (%rcx) | 1794 | movaps %xmm2, (TKEYP) |
129 | add $0x10, %rcx | 1795 | add $0x10, TKEYP |
130 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 | 1796 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
131 | call _key_expansion_256a | 1797 | call _key_expansion_256a |
132 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 | 1798 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 |
@@ -155,7 +1821,7 @@ ENTRY(aesni_set_key) | |||
155 | call _key_expansion_256a | 1821 | call _key_expansion_256a |
156 | jmp .Ldec_key | 1822 | jmp .Ldec_key |
157 | .Lenc_key192: | 1823 | .Lenc_key192: |
158 | movq 0x10(%rsi), %xmm2 # other user key | 1824 | movq 0x10(UKEYP), %xmm2 # other user key |
159 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 | 1825 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
160 | call _key_expansion_192a | 1826 | call _key_expansion_192a |
161 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 | 1827 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 |
@@ -195,33 +1861,47 @@ ENTRY(aesni_set_key) | |||
195 | AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 | 1861 | AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 |
196 | call _key_expansion_128 | 1862 | call _key_expansion_128 |
197 | .Ldec_key: | 1863 | .Ldec_key: |
198 | sub $0x10, %rcx | 1864 | sub $0x10, TKEYP |
199 | movaps (%rdi), %xmm0 | 1865 | movaps (KEYP), %xmm0 |
200 | movaps (%rcx), %xmm1 | 1866 | movaps (TKEYP), %xmm1 |
201 | movaps %xmm0, 240(%rcx) | 1867 | movaps %xmm0, 240(TKEYP) |
202 | movaps %xmm1, 240(%rdi) | 1868 | movaps %xmm1, 240(KEYP) |
203 | add $0x10, %rdi | 1869 | add $0x10, KEYP |
204 | lea 240-16(%rcx), %rsi | 1870 | lea 240-16(TKEYP), UKEYP |
205 | .align 4 | 1871 | .align 4 |
206 | .Ldec_key_loop: | 1872 | .Ldec_key_loop: |
207 | movaps (%rdi), %xmm0 | 1873 | movaps (KEYP), %xmm0 |
208 | AESIMC %xmm0 %xmm1 | 1874 | AESIMC %xmm0 %xmm1 |
209 | movaps %xmm1, (%rsi) | 1875 | movaps %xmm1, (UKEYP) |
210 | add $0x10, %rdi | 1876 | add $0x10, KEYP |
211 | sub $0x10, %rsi | 1877 | sub $0x10, UKEYP |
212 | cmp %rcx, %rdi | 1878 | cmp TKEYP, KEYP |
213 | jb .Ldec_key_loop | 1879 | jb .Ldec_key_loop |
214 | xor %rax, %rax | 1880 | xor AREG, AREG |
1881 | #ifndef __x86_64__ | ||
1882 | popl KEYP | ||
1883 | #endif | ||
215 | ret | 1884 | ret |
216 | 1885 | ||
217 | /* | 1886 | /* |
218 | * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | 1887 | * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) |
219 | */ | 1888 | */ |
220 | ENTRY(aesni_enc) | 1889 | ENTRY(aesni_enc) |
1890 | #ifndef __x86_64__ | ||
1891 | pushl KEYP | ||
1892 | pushl KLEN | ||
1893 | movl 12(%esp), KEYP | ||
1894 | movl 16(%esp), OUTP | ||
1895 | movl 20(%esp), INP | ||
1896 | #endif | ||
221 | movl 480(KEYP), KLEN # key length | 1897 | movl 480(KEYP), KLEN # key length |
222 | movups (INP), STATE # input | 1898 | movups (INP), STATE # input |
223 | call _aesni_enc1 | 1899 | call _aesni_enc1 |
224 | movups STATE, (OUTP) # output | 1900 | movups STATE, (OUTP) # output |
1901 | #ifndef __x86_64__ | ||
1902 | popl KLEN | ||
1903 | popl KEYP | ||
1904 | #endif | ||
225 | ret | 1905 | ret |
226 | 1906 | ||
227 | /* | 1907 | /* |
@@ -236,6 +1916,7 @@ ENTRY(aesni_enc) | |||
236 | * KEY | 1916 | * KEY |
237 | * TKEYP (T1) | 1917 | * TKEYP (T1) |
238 | */ | 1918 | */ |
1919 | .align 4 | ||
239 | _aesni_enc1: | 1920 | _aesni_enc1: |
240 | movaps (KEYP), KEY # key | 1921 | movaps (KEYP), KEY # key |
241 | mov KEYP, TKEYP | 1922 | mov KEYP, TKEYP |
@@ -298,6 +1979,7 @@ _aesni_enc1: | |||
298 | * KEY | 1979 | * KEY |
299 | * TKEYP (T1) | 1980 | * TKEYP (T1) |
300 | */ | 1981 | */ |
1982 | .align 4 | ||
301 | _aesni_enc4: | 1983 | _aesni_enc4: |
302 | movaps (KEYP), KEY # key | 1984 | movaps (KEYP), KEY # key |
303 | mov KEYP, TKEYP | 1985 | mov KEYP, TKEYP |
@@ -391,11 +2073,22 @@ _aesni_enc4: | |||
391 | * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | 2073 | * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) |
392 | */ | 2074 | */ |
393 | ENTRY(aesni_dec) | 2075 | ENTRY(aesni_dec) |
2076 | #ifndef __x86_64__ | ||
2077 | pushl KEYP | ||
2078 | pushl KLEN | ||
2079 | movl 12(%esp), KEYP | ||
2080 | movl 16(%esp), OUTP | ||
2081 | movl 20(%esp), INP | ||
2082 | #endif | ||
394 | mov 480(KEYP), KLEN # key length | 2083 | mov 480(KEYP), KLEN # key length |
395 | add $240, KEYP | 2084 | add $240, KEYP |
396 | movups (INP), STATE # input | 2085 | movups (INP), STATE # input |
397 | call _aesni_dec1 | 2086 | call _aesni_dec1 |
398 | movups STATE, (OUTP) #output | 2087 | movups STATE, (OUTP) #output |
2088 | #ifndef __x86_64__ | ||
2089 | popl KLEN | ||
2090 | popl KEYP | ||
2091 | #endif | ||
399 | ret | 2092 | ret |
400 | 2093 | ||
401 | /* | 2094 | /* |
@@ -410,6 +2103,7 @@ ENTRY(aesni_dec) | |||
410 | * KEY | 2103 | * KEY |
411 | * TKEYP (T1) | 2104 | * TKEYP (T1) |
412 | */ | 2105 | */ |
2106 | .align 4 | ||
413 | _aesni_dec1: | 2107 | _aesni_dec1: |
414 | movaps (KEYP), KEY # key | 2108 | movaps (KEYP), KEY # key |
415 | mov KEYP, TKEYP | 2109 | mov KEYP, TKEYP |
@@ -472,6 +2166,7 @@ _aesni_dec1: | |||
472 | * KEY | 2166 | * KEY |
473 | * TKEYP (T1) | 2167 | * TKEYP (T1) |
474 | */ | 2168 | */ |
2169 | .align 4 | ||
475 | _aesni_dec4: | 2170 | _aesni_dec4: |
476 | movaps (KEYP), KEY # key | 2171 | movaps (KEYP), KEY # key |
477 | mov KEYP, TKEYP | 2172 | mov KEYP, TKEYP |
@@ -566,6 +2261,15 @@ _aesni_dec4: | |||
566 | * size_t len) | 2261 | * size_t len) |
567 | */ | 2262 | */ |
568 | ENTRY(aesni_ecb_enc) | 2263 | ENTRY(aesni_ecb_enc) |
2264 | #ifndef __x86_64__ | ||
2265 | pushl LEN | ||
2266 | pushl KEYP | ||
2267 | pushl KLEN | ||
2268 | movl 16(%esp), KEYP | ||
2269 | movl 20(%esp), OUTP | ||
2270 | movl 24(%esp), INP | ||
2271 | movl 28(%esp), LEN | ||
2272 | #endif | ||
569 | test LEN, LEN # check length | 2273 | test LEN, LEN # check length |
570 | jz .Lecb_enc_ret | 2274 | jz .Lecb_enc_ret |
571 | mov 480(KEYP), KLEN | 2275 | mov 480(KEYP), KLEN |
@@ -602,6 +2306,11 @@ ENTRY(aesni_ecb_enc) | |||
602 | cmp $16, LEN | 2306 | cmp $16, LEN |
603 | jge .Lecb_enc_loop1 | 2307 | jge .Lecb_enc_loop1 |
604 | .Lecb_enc_ret: | 2308 | .Lecb_enc_ret: |
2309 | #ifndef __x86_64__ | ||
2310 | popl KLEN | ||
2311 | popl KEYP | ||
2312 | popl LEN | ||
2313 | #endif | ||
605 | ret | 2314 | ret |
606 | 2315 | ||
607 | /* | 2316 | /* |
@@ -609,6 +2318,15 @@ ENTRY(aesni_ecb_enc) | |||
609 | * size_t len); | 2318 | * size_t len); |
610 | */ | 2319 | */ |
611 | ENTRY(aesni_ecb_dec) | 2320 | ENTRY(aesni_ecb_dec) |
2321 | #ifndef __x86_64__ | ||
2322 | pushl LEN | ||
2323 | pushl KEYP | ||
2324 | pushl KLEN | ||
2325 | movl 16(%esp), KEYP | ||
2326 | movl 20(%esp), OUTP | ||
2327 | movl 24(%esp), INP | ||
2328 | movl 28(%esp), LEN | ||
2329 | #endif | ||
612 | test LEN, LEN | 2330 | test LEN, LEN |
613 | jz .Lecb_dec_ret | 2331 | jz .Lecb_dec_ret |
614 | mov 480(KEYP), KLEN | 2332 | mov 480(KEYP), KLEN |
@@ -646,6 +2364,11 @@ ENTRY(aesni_ecb_dec) | |||
646 | cmp $16, LEN | 2364 | cmp $16, LEN |
647 | jge .Lecb_dec_loop1 | 2365 | jge .Lecb_dec_loop1 |
648 | .Lecb_dec_ret: | 2366 | .Lecb_dec_ret: |
2367 | #ifndef __x86_64__ | ||
2368 | popl KLEN | ||
2369 | popl KEYP | ||
2370 | popl LEN | ||
2371 | #endif | ||
649 | ret | 2372 | ret |
650 | 2373 | ||
651 | /* | 2374 | /* |
@@ -653,6 +2376,17 @@ ENTRY(aesni_ecb_dec) | |||
653 | * size_t len, u8 *iv) | 2376 | * size_t len, u8 *iv) |
654 | */ | 2377 | */ |
655 | ENTRY(aesni_cbc_enc) | 2378 | ENTRY(aesni_cbc_enc) |
2379 | #ifndef __x86_64__ | ||
2380 | pushl IVP | ||
2381 | pushl LEN | ||
2382 | pushl KEYP | ||
2383 | pushl KLEN | ||
2384 | movl 20(%esp), KEYP | ||
2385 | movl 24(%esp), OUTP | ||
2386 | movl 28(%esp), INP | ||
2387 | movl 32(%esp), LEN | ||
2388 | movl 36(%esp), IVP | ||
2389 | #endif | ||
656 | cmp $16, LEN | 2390 | cmp $16, LEN |
657 | jb .Lcbc_enc_ret | 2391 | jb .Lcbc_enc_ret |
658 | mov 480(KEYP), KLEN | 2392 | mov 480(KEYP), KLEN |
@@ -670,6 +2404,12 @@ ENTRY(aesni_cbc_enc) | |||
670 | jge .Lcbc_enc_loop | 2404 | jge .Lcbc_enc_loop |
671 | movups STATE, (IVP) | 2405 | movups STATE, (IVP) |
672 | .Lcbc_enc_ret: | 2406 | .Lcbc_enc_ret: |
2407 | #ifndef __x86_64__ | ||
2408 | popl KLEN | ||
2409 | popl KEYP | ||
2410 | popl LEN | ||
2411 | popl IVP | ||
2412 | #endif | ||
673 | ret | 2413 | ret |
674 | 2414 | ||
675 | /* | 2415 | /* |
@@ -677,6 +2417,17 @@ ENTRY(aesni_cbc_enc) | |||
677 | * size_t len, u8 *iv) | 2417 | * size_t len, u8 *iv) |
678 | */ | 2418 | */ |
679 | ENTRY(aesni_cbc_dec) | 2419 | ENTRY(aesni_cbc_dec) |
2420 | #ifndef __x86_64__ | ||
2421 | pushl IVP | ||
2422 | pushl LEN | ||
2423 | pushl KEYP | ||
2424 | pushl KLEN | ||
2425 | movl 20(%esp), KEYP | ||
2426 | movl 24(%esp), OUTP | ||
2427 | movl 28(%esp), INP | ||
2428 | movl 32(%esp), LEN | ||
2429 | movl 36(%esp), IVP | ||
2430 | #endif | ||
680 | cmp $16, LEN | 2431 | cmp $16, LEN |
681 | jb .Lcbc_dec_just_ret | 2432 | jb .Lcbc_dec_just_ret |
682 | mov 480(KEYP), KLEN | 2433 | mov 480(KEYP), KLEN |
@@ -690,16 +2441,30 @@ ENTRY(aesni_cbc_dec) | |||
690 | movaps IN1, STATE1 | 2441 | movaps IN1, STATE1 |
691 | movups 0x10(INP), IN2 | 2442 | movups 0x10(INP), IN2 |
692 | movaps IN2, STATE2 | 2443 | movaps IN2, STATE2 |
2444 | #ifdef __x86_64__ | ||
693 | movups 0x20(INP), IN3 | 2445 | movups 0x20(INP), IN3 |
694 | movaps IN3, STATE3 | 2446 | movaps IN3, STATE3 |
695 | movups 0x30(INP), IN4 | 2447 | movups 0x30(INP), IN4 |
696 | movaps IN4, STATE4 | 2448 | movaps IN4, STATE4 |
2449 | #else | ||
2450 | movups 0x20(INP), IN1 | ||
2451 | movaps IN1, STATE3 | ||
2452 | movups 0x30(INP), IN2 | ||
2453 | movaps IN2, STATE4 | ||
2454 | #endif | ||
697 | call _aesni_dec4 | 2455 | call _aesni_dec4 |
698 | pxor IV, STATE1 | 2456 | pxor IV, STATE1 |
2457 | #ifdef __x86_64__ | ||
699 | pxor IN1, STATE2 | 2458 | pxor IN1, STATE2 |
700 | pxor IN2, STATE3 | 2459 | pxor IN2, STATE3 |
701 | pxor IN3, STATE4 | 2460 | pxor IN3, STATE4 |
702 | movaps IN4, IV | 2461 | movaps IN4, IV |
2462 | #else | ||
2463 | pxor (INP), STATE2 | ||
2464 | pxor 0x10(INP), STATE3 | ||
2465 | pxor IN1, STATE4 | ||
2466 | movaps IN2, IV | ||
2467 | #endif | ||
703 | movups STATE1, (OUTP) | 2468 | movups STATE1, (OUTP) |
704 | movups STATE2, 0x10(OUTP) | 2469 | movups STATE2, 0x10(OUTP) |
705 | movups STATE3, 0x20(OUTP) | 2470 | movups STATE3, 0x20(OUTP) |
@@ -727,8 +2492,15 @@ ENTRY(aesni_cbc_dec) | |||
727 | .Lcbc_dec_ret: | 2492 | .Lcbc_dec_ret: |
728 | movups IV, (IVP) | 2493 | movups IV, (IVP) |
729 | .Lcbc_dec_just_ret: | 2494 | .Lcbc_dec_just_ret: |
2495 | #ifndef __x86_64__ | ||
2496 | popl KLEN | ||
2497 | popl KEYP | ||
2498 | popl LEN | ||
2499 | popl IVP | ||
2500 | #endif | ||
730 | ret | 2501 | ret |
731 | 2502 | ||
2503 | #ifdef __x86_64__ | ||
732 | .align 16 | 2504 | .align 16 |
733 | .Lbswap_mask: | 2505 | .Lbswap_mask: |
734 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | 2506 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 |
@@ -744,6 +2516,7 @@ ENTRY(aesni_cbc_dec) | |||
744 | * INC: == 1, in little endian | 2516 | * INC: == 1, in little endian |
745 | * BSWAP_MASK == endian swapping mask | 2517 | * BSWAP_MASK == endian swapping mask |
746 | */ | 2518 | */ |
2519 | .align 4 | ||
747 | _aesni_inc_init: | 2520 | _aesni_inc_init: |
748 | movaps .Lbswap_mask, BSWAP_MASK | 2521 | movaps .Lbswap_mask, BSWAP_MASK |
749 | movaps IV, CTR | 2522 | movaps IV, CTR |
@@ -768,6 +2541,7 @@ _aesni_inc_init: | |||
768 | * CTR: == output IV, in little endian | 2541 | * CTR: == output IV, in little endian |
769 | * TCTR_LOW: == lower qword of CTR | 2542 | * TCTR_LOW: == lower qword of CTR |
770 | */ | 2543 | */ |
2544 | .align 4 | ||
771 | _aesni_inc: | 2545 | _aesni_inc: |
772 | paddq INC, CTR | 2546 | paddq INC, CTR |
773 | add $1, TCTR_LOW | 2547 | add $1, TCTR_LOW |
@@ -839,3 +2613,4 @@ ENTRY(aesni_ctr_enc) | |||
839 | movups IV, (IVP) | 2613 | movups IV, (IVP) |
840 | .Lctr_enc_just_ret: | 2614 | .Lctr_enc_just_ret: |
841 | ret | 2615 | ret |
2616 | #endif | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2cb3dcc4490a..feee8ff1d05e 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -5,6 +5,14 @@ | |||
5 | * Copyright (C) 2008, Intel Corp. | 5 | * Copyright (C) 2008, Intel Corp. |
6 | * Author: Huang Ying <ying.huang@intel.com> | 6 | * Author: Huang Ying <ying.huang@intel.com> |
7 | * | 7 | * |
8 | * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD | ||
9 | * interface for 64-bit kernels. | ||
10 | * Authors: Adrian Hoban <adrian.hoban@intel.com> | ||
11 | * Gabriele Paoloni <gabriele.paoloni@intel.com> | ||
12 | * Tadeusz Struk (tadeusz.struk@intel.com) | ||
13 | * Aidan O'Mahony (aidan.o.mahony@intel.com) | ||
14 | * Copyright (c) 2010, Intel Corporation. | ||
15 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | 16 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by | 17 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or | 18 | * the Free Software Foundation; either version 2 of the License, or |
@@ -21,6 +29,10 @@ | |||
21 | #include <crypto/ctr.h> | 29 | #include <crypto/ctr.h> |
22 | #include <asm/i387.h> | 30 | #include <asm/i387.h> |
23 | #include <asm/aes.h> | 31 | #include <asm/aes.h> |
32 | #include <crypto/scatterwalk.h> | ||
33 | #include <crypto/internal/aead.h> | ||
34 | #include <linux/workqueue.h> | ||
35 | #include <linux/spinlock.h> | ||
24 | 36 | ||
25 | #if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) | 37 | #if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) |
26 | #define HAS_CTR | 38 | #define HAS_CTR |
@@ -42,8 +54,31 @@ struct async_aes_ctx { | |||
42 | struct cryptd_ablkcipher *cryptd_tfm; | 54 | struct cryptd_ablkcipher *cryptd_tfm; |
43 | }; | 55 | }; |
44 | 56 | ||
45 | #define AESNI_ALIGN 16 | 57 | /* This data is stored at the end of the crypto_tfm struct. |
58 | * It's a type of per "session" data storage location. | ||
59 | * This needs to be 16 byte aligned. | ||
60 | */ | ||
61 | struct aesni_rfc4106_gcm_ctx { | ||
62 | u8 hash_subkey[16]; | ||
63 | struct crypto_aes_ctx aes_key_expanded; | ||
64 | u8 nonce[4]; | ||
65 | struct cryptd_aead *cryptd_tfm; | ||
66 | }; | ||
67 | |||
68 | struct aesni_gcm_set_hash_subkey_result { | ||
69 | int err; | ||
70 | struct completion completion; | ||
71 | }; | ||
72 | |||
73 | struct aesni_hash_subkey_req_data { | ||
74 | u8 iv[16]; | ||
75 | struct aesni_gcm_set_hash_subkey_result result; | ||
76 | struct scatterlist sg; | ||
77 | }; | ||
78 | |||
79 | #define AESNI_ALIGN (16) | ||
46 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) | 80 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) |
81 | #define RFC4106_HASH_SUBKEY_SIZE 16 | ||
47 | 82 | ||
48 | asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, | 83 | asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, |
49 | unsigned int key_len); | 84 | unsigned int key_len); |
@@ -59,9 +94,66 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, | |||
59 | const u8 *in, unsigned int len, u8 *iv); | 94 | const u8 *in, unsigned int len, u8 *iv); |
60 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | 95 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, |
61 | const u8 *in, unsigned int len, u8 *iv); | 96 | const u8 *in, unsigned int len, u8 *iv); |
97 | |||
98 | int crypto_fpu_init(void); | ||
99 | void crypto_fpu_exit(void); | ||
100 | |||
101 | #ifdef CONFIG_X86_64 | ||
62 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | 102 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, |
63 | const u8 *in, unsigned int len, u8 *iv); | 103 | const u8 *in, unsigned int len, u8 *iv); |
64 | 104 | ||
105 | /* asmlinkage void aesni_gcm_enc() | ||
106 | * void *ctx, AES Key schedule. Starts on a 16 byte boundary. | ||
107 | * u8 *out, Ciphertext output. Encrypt in-place is allowed. | ||
108 | * const u8 *in, Plaintext input | ||
109 | * unsigned long plaintext_len, Length of data in bytes for encryption. | ||
110 | * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) | ||
111 | * concatenated with 8 byte Initialisation Vector (from IPSec ESP | ||
112 | * Payload) concatenated with 0x00000001. 16-byte aligned pointer. | ||
113 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
114 | * const u8 *aad, Additional Authentication Data (AAD) | ||
115 | * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this | ||
116 | * is going to be 8 or 12 bytes | ||
117 | * u8 *auth_tag, Authenticated Tag output. | ||
118 | * unsigned long auth_tag_len), Authenticated Tag Length in bytes. | ||
119 | * Valid values are 16 (most likely), 12 or 8. | ||
120 | */ | ||
121 | asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, | ||
122 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
123 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
124 | u8 *auth_tag, unsigned long auth_tag_len); | ||
125 | |||
126 | /* asmlinkage void aesni_gcm_dec() | ||
127 | * void *ctx, AES Key schedule. Starts on a 16 byte boundary. | ||
128 | * u8 *out, Plaintext output. Decrypt in-place is allowed. | ||
129 | * const u8 *in, Ciphertext input | ||
130 | * unsigned long ciphertext_len, Length of data in bytes for decryption. | ||
131 | * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) | ||
132 | * concatenated with 8 byte Initialisation Vector (from IPSec ESP | ||
133 | * Payload) concatenated with 0x00000001. 16-byte aligned pointer. | ||
134 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
135 | * const u8 *aad, Additional Authentication Data (AAD) | ||
136 | * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going | ||
137 | * to be 8 or 12 bytes | ||
138 | * u8 *auth_tag, Authenticated Tag output. | ||
139 | * unsigned long auth_tag_len) Authenticated Tag Length in bytes. | ||
140 | * Valid values are 16 (most likely), 12 or 8. | ||
141 | */ | ||
142 | asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, | ||
143 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
144 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
145 | u8 *auth_tag, unsigned long auth_tag_len); | ||
146 | |||
147 | static inline struct | ||
148 | aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) | ||
149 | { | ||
150 | return | ||
151 | (struct aesni_rfc4106_gcm_ctx *) | ||
152 | PTR_ALIGN((u8 *) | ||
153 | crypto_tfm_ctx(crypto_aead_tfm(tfm)), AESNI_ALIGN); | ||
154 | } | ||
155 | #endif | ||
156 | |||
65 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) | 157 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) |
66 | { | 158 | { |
67 | unsigned long addr = (unsigned long)raw_ctx; | 159 | unsigned long addr = (unsigned long)raw_ctx; |
@@ -324,6 +416,7 @@ static struct crypto_alg blk_cbc_alg = { | |||
324 | }, | 416 | }, |
325 | }; | 417 | }; |
326 | 418 | ||
419 | #ifdef CONFIG_X86_64 | ||
327 | static void ctr_crypt_final(struct crypto_aes_ctx *ctx, | 420 | static void ctr_crypt_final(struct crypto_aes_ctx *ctx, |
328 | struct blkcipher_walk *walk) | 421 | struct blkcipher_walk *walk) |
329 | { | 422 | { |
@@ -389,6 +482,7 @@ static struct crypto_alg blk_ctr_alg = { | |||
389 | }, | 482 | }, |
390 | }, | 483 | }, |
391 | }; | 484 | }; |
485 | #endif | ||
392 | 486 | ||
393 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | 487 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, |
394 | unsigned int key_len) | 488 | unsigned int key_len) |
@@ -536,6 +630,7 @@ static struct crypto_alg ablk_cbc_alg = { | |||
536 | }, | 630 | }, |
537 | }; | 631 | }; |
538 | 632 | ||
633 | #ifdef CONFIG_X86_64 | ||
539 | static int ablk_ctr_init(struct crypto_tfm *tfm) | 634 | static int ablk_ctr_init(struct crypto_tfm *tfm) |
540 | { | 635 | { |
541 | struct cryptd_ablkcipher *cryptd_tfm; | 636 | struct cryptd_ablkcipher *cryptd_tfm; |
@@ -612,6 +707,7 @@ static struct crypto_alg ablk_rfc3686_ctr_alg = { | |||
612 | }, | 707 | }, |
613 | }; | 708 | }; |
614 | #endif | 709 | #endif |
710 | #endif | ||
615 | 711 | ||
616 | #ifdef HAS_LRW | 712 | #ifdef HAS_LRW |
617 | static int ablk_lrw_init(struct crypto_tfm *tfm) | 713 | static int ablk_lrw_init(struct crypto_tfm *tfm) |
@@ -730,6 +826,432 @@ static struct crypto_alg ablk_xts_alg = { | |||
730 | }; | 826 | }; |
731 | #endif | 827 | #endif |
732 | 828 | ||
829 | #ifdef CONFIG_X86_64 | ||
830 | static int rfc4106_init(struct crypto_tfm *tfm) | ||
831 | { | ||
832 | struct cryptd_aead *cryptd_tfm; | ||
833 | struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *) | ||
834 | PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); | ||
835 | struct crypto_aead *cryptd_child; | ||
836 | struct aesni_rfc4106_gcm_ctx *child_ctx; | ||
837 | cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); | ||
838 | if (IS_ERR(cryptd_tfm)) | ||
839 | return PTR_ERR(cryptd_tfm); | ||
840 | |||
841 | cryptd_child = cryptd_aead_child(cryptd_tfm); | ||
842 | child_ctx = aesni_rfc4106_gcm_ctx_get(cryptd_child); | ||
843 | memcpy(child_ctx, ctx, sizeof(*ctx)); | ||
844 | ctx->cryptd_tfm = cryptd_tfm; | ||
845 | tfm->crt_aead.reqsize = sizeof(struct aead_request) | ||
846 | + crypto_aead_reqsize(&cryptd_tfm->base); | ||
847 | return 0; | ||
848 | } | ||
849 | |||
850 | static void rfc4106_exit(struct crypto_tfm *tfm) | ||
851 | { | ||
852 | struct aesni_rfc4106_gcm_ctx *ctx = | ||
853 | (struct aesni_rfc4106_gcm_ctx *) | ||
854 | PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); | ||
855 | if (!IS_ERR(ctx->cryptd_tfm)) | ||
856 | cryptd_free_aead(ctx->cryptd_tfm); | ||
857 | return; | ||
858 | } | ||
859 | |||
860 | static void | ||
861 | rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err) | ||
862 | { | ||
863 | struct aesni_gcm_set_hash_subkey_result *result = req->data; | ||
864 | |||
865 | if (err == -EINPROGRESS) | ||
866 | return; | ||
867 | result->err = err; | ||
868 | complete(&result->completion); | ||
869 | } | ||
870 | |||
871 | static int | ||
872 | rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) | ||
873 | { | ||
874 | struct crypto_ablkcipher *ctr_tfm; | ||
875 | struct ablkcipher_request *req; | ||
876 | int ret = -EINVAL; | ||
877 | struct aesni_hash_subkey_req_data *req_data; | ||
878 | |||
879 | ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0); | ||
880 | if (IS_ERR(ctr_tfm)) | ||
881 | return PTR_ERR(ctr_tfm); | ||
882 | |||
883 | crypto_ablkcipher_clear_flags(ctr_tfm, ~0); | ||
884 | |||
885 | ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); | ||
886 | if (ret) | ||
887 | goto out_free_ablkcipher; | ||
888 | |||
889 | ret = -ENOMEM; | ||
890 | req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); | ||
891 | if (!req) | ||
892 | goto out_free_ablkcipher; | ||
893 | |||
894 | req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); | ||
895 | if (!req_data) | ||
896 | goto out_free_request; | ||
897 | |||
898 | memset(req_data->iv, 0, sizeof(req_data->iv)); | ||
899 | |||
900 | /* Clear the data in the hash sub key container to zero.*/ | ||
901 | /* We want to cipher all zeros to create the hash sub key. */ | ||
902 | memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); | ||
903 | |||
904 | init_completion(&req_data->result.completion); | ||
905 | sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE); | ||
906 | ablkcipher_request_set_tfm(req, ctr_tfm); | ||
907 | ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | | ||
908 | CRYPTO_TFM_REQ_MAY_BACKLOG, | ||
909 | rfc4106_set_hash_subkey_done, | ||
910 | &req_data->result); | ||
911 | |||
912 | ablkcipher_request_set_crypt(req, &req_data->sg, | ||
913 | &req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv); | ||
914 | |||
915 | ret = crypto_ablkcipher_encrypt(req); | ||
916 | if (ret == -EINPROGRESS || ret == -EBUSY) { | ||
917 | ret = wait_for_completion_interruptible | ||
918 | (&req_data->result.completion); | ||
919 | if (!ret) | ||
920 | ret = req_data->result.err; | ||
921 | } | ||
922 | kfree(req_data); | ||
923 | out_free_request: | ||
924 | ablkcipher_request_free(req); | ||
925 | out_free_ablkcipher: | ||
926 | crypto_free_ablkcipher(ctr_tfm); | ||
927 | return ret; | ||
928 | } | ||
929 | |||
930 | static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | ||
931 | unsigned int key_len) | ||
932 | { | ||
933 | int ret = 0; | ||
934 | struct crypto_tfm *tfm = crypto_aead_tfm(parent); | ||
935 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | ||
936 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
937 | struct aesni_rfc4106_gcm_ctx *child_ctx = | ||
938 | aesni_rfc4106_gcm_ctx_get(cryptd_child); | ||
939 | u8 *new_key_mem = NULL; | ||
940 | |||
941 | if (key_len < 4) { | ||
942 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
943 | return -EINVAL; | ||
944 | } | ||
945 | /*Account for 4 byte nonce at the end.*/ | ||
946 | key_len -= 4; | ||
947 | if (key_len != AES_KEYSIZE_128) { | ||
948 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
949 | return -EINVAL; | ||
950 | } | ||
951 | |||
952 | memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce)); | ||
953 | /*This must be on a 16 byte boundary!*/ | ||
954 | if ((unsigned long)(&(ctx->aes_key_expanded.key_enc[0])) % AESNI_ALIGN) | ||
955 | return -EINVAL; | ||
956 | |||
957 | if ((unsigned long)key % AESNI_ALIGN) { | ||
958 | /*key is not aligned: use an auxuliar aligned pointer*/ | ||
959 | new_key_mem = kmalloc(key_len+AESNI_ALIGN, GFP_KERNEL); | ||
960 | if (!new_key_mem) | ||
961 | return -ENOMEM; | ||
962 | |||
963 | new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); | ||
964 | memcpy(new_key_mem, key, key_len); | ||
965 | key = new_key_mem; | ||
966 | } | ||
967 | |||
968 | if (!irq_fpu_usable()) | ||
969 | ret = crypto_aes_expand_key(&(ctx->aes_key_expanded), | ||
970 | key, key_len); | ||
971 | else { | ||
972 | kernel_fpu_begin(); | ||
973 | ret = aesni_set_key(&(ctx->aes_key_expanded), key, key_len); | ||
974 | kernel_fpu_end(); | ||
975 | } | ||
976 | /*This must be on a 16 byte boundary!*/ | ||
977 | if ((unsigned long)(&(ctx->hash_subkey[0])) % AESNI_ALIGN) { | ||
978 | ret = -EINVAL; | ||
979 | goto exit; | ||
980 | } | ||
981 | ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); | ||
982 | memcpy(child_ctx, ctx, sizeof(*ctx)); | ||
983 | exit: | ||
984 | kfree(new_key_mem); | ||
985 | return ret; | ||
986 | } | ||
987 | |||
988 | /* This is the Integrity Check Value (aka the authentication tag length and can | ||
989 | * be 8, 12 or 16 bytes long. */ | ||
990 | static int rfc4106_set_authsize(struct crypto_aead *parent, | ||
991 | unsigned int authsize) | ||
992 | { | ||
993 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | ||
994 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
995 | |||
996 | switch (authsize) { | ||
997 | case 8: | ||
998 | case 12: | ||
999 | case 16: | ||
1000 | break; | ||
1001 | default: | ||
1002 | return -EINVAL; | ||
1003 | } | ||
1004 | crypto_aead_crt(parent)->authsize = authsize; | ||
1005 | crypto_aead_crt(cryptd_child)->authsize = authsize; | ||
1006 | return 0; | ||
1007 | } | ||
1008 | |||
1009 | static int rfc4106_encrypt(struct aead_request *req) | ||
1010 | { | ||
1011 | int ret; | ||
1012 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1013 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1014 | |||
1015 | if (!irq_fpu_usable()) { | ||
1016 | struct aead_request *cryptd_req = | ||
1017 | (struct aead_request *) aead_request_ctx(req); | ||
1018 | memcpy(cryptd_req, req, sizeof(*req)); | ||
1019 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1020 | return crypto_aead_encrypt(cryptd_req); | ||
1021 | } else { | ||
1022 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
1023 | kernel_fpu_begin(); | ||
1024 | ret = cryptd_child->base.crt_aead.encrypt(req); | ||
1025 | kernel_fpu_end(); | ||
1026 | return ret; | ||
1027 | } | ||
1028 | } | ||
1029 | |||
1030 | static int rfc4106_decrypt(struct aead_request *req) | ||
1031 | { | ||
1032 | int ret; | ||
1033 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1034 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1035 | |||
1036 | if (!irq_fpu_usable()) { | ||
1037 | struct aead_request *cryptd_req = | ||
1038 | (struct aead_request *) aead_request_ctx(req); | ||
1039 | memcpy(cryptd_req, req, sizeof(*req)); | ||
1040 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1041 | return crypto_aead_decrypt(cryptd_req); | ||
1042 | } else { | ||
1043 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
1044 | kernel_fpu_begin(); | ||
1045 | ret = cryptd_child->base.crt_aead.decrypt(req); | ||
1046 | kernel_fpu_end(); | ||
1047 | return ret; | ||
1048 | } | ||
1049 | } | ||
1050 | |||
1051 | static struct crypto_alg rfc4106_alg = { | ||
1052 | .cra_name = "rfc4106(gcm(aes))", | ||
1053 | .cra_driver_name = "rfc4106-gcm-aesni", | ||
1054 | .cra_priority = 400, | ||
1055 | .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, | ||
1056 | .cra_blocksize = 1, | ||
1057 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, | ||
1058 | .cra_alignmask = 0, | ||
1059 | .cra_type = &crypto_nivaead_type, | ||
1060 | .cra_module = THIS_MODULE, | ||
1061 | .cra_list = LIST_HEAD_INIT(rfc4106_alg.cra_list), | ||
1062 | .cra_init = rfc4106_init, | ||
1063 | .cra_exit = rfc4106_exit, | ||
1064 | .cra_u = { | ||
1065 | .aead = { | ||
1066 | .setkey = rfc4106_set_key, | ||
1067 | .setauthsize = rfc4106_set_authsize, | ||
1068 | .encrypt = rfc4106_encrypt, | ||
1069 | .decrypt = rfc4106_decrypt, | ||
1070 | .geniv = "seqiv", | ||
1071 | .ivsize = 8, | ||
1072 | .maxauthsize = 16, | ||
1073 | }, | ||
1074 | }, | ||
1075 | }; | ||
1076 | |||
1077 | static int __driver_rfc4106_encrypt(struct aead_request *req) | ||
1078 | { | ||
1079 | u8 one_entry_in_sg = 0; | ||
1080 | u8 *src, *dst, *assoc; | ||
1081 | __be32 counter = cpu_to_be32(1); | ||
1082 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1083 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1084 | void *aes_ctx = &(ctx->aes_key_expanded); | ||
1085 | unsigned long auth_tag_len = crypto_aead_authsize(tfm); | ||
1086 | u8 iv_tab[16+AESNI_ALIGN]; | ||
1087 | u8* iv = (u8 *) PTR_ALIGN((u8 *)iv_tab, AESNI_ALIGN); | ||
1088 | struct scatter_walk src_sg_walk; | ||
1089 | struct scatter_walk assoc_sg_walk; | ||
1090 | struct scatter_walk dst_sg_walk; | ||
1091 | unsigned int i; | ||
1092 | |||
1093 | /* Assuming we are supporting rfc4106 64-bit extended */ | ||
1094 | /* sequence numbers We need to have the AAD length equal */ | ||
1095 | /* to 8 or 12 bytes */ | ||
1096 | if (unlikely(req->assoclen != 8 && req->assoclen != 12)) | ||
1097 | return -EINVAL; | ||
1098 | /* IV below built */ | ||
1099 | for (i = 0; i < 4; i++) | ||
1100 | *(iv+i) = ctx->nonce[i]; | ||
1101 | for (i = 0; i < 8; i++) | ||
1102 | *(iv+4+i) = req->iv[i]; | ||
1103 | *((__be32 *)(iv+12)) = counter; | ||
1104 | |||
1105 | if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { | ||
1106 | one_entry_in_sg = 1; | ||
1107 | scatterwalk_start(&src_sg_walk, req->src); | ||
1108 | scatterwalk_start(&assoc_sg_walk, req->assoc); | ||
1109 | src = scatterwalk_map(&src_sg_walk, 0); | ||
1110 | assoc = scatterwalk_map(&assoc_sg_walk, 0); | ||
1111 | dst = src; | ||
1112 | if (unlikely(req->src != req->dst)) { | ||
1113 | scatterwalk_start(&dst_sg_walk, req->dst); | ||
1114 | dst = scatterwalk_map(&dst_sg_walk, 0); | ||
1115 | } | ||
1116 | |||
1117 | } else { | ||
1118 | /* Allocate memory for src, dst, assoc */ | ||
1119 | src = kmalloc(req->cryptlen + auth_tag_len + req->assoclen, | ||
1120 | GFP_ATOMIC); | ||
1121 | if (unlikely(!src)) | ||
1122 | return -ENOMEM; | ||
1123 | assoc = (src + req->cryptlen + auth_tag_len); | ||
1124 | scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); | ||
1125 | scatterwalk_map_and_copy(assoc, req->assoc, 0, | ||
1126 | req->assoclen, 0); | ||
1127 | dst = src; | ||
1128 | } | ||
1129 | |||
1130 | aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, | ||
1131 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst | ||
1132 | + ((unsigned long)req->cryptlen), auth_tag_len); | ||
1133 | |||
1134 | /* The authTag (aka the Integrity Check Value) needs to be written | ||
1135 | * back to the packet. */ | ||
1136 | if (one_entry_in_sg) { | ||
1137 | if (unlikely(req->src != req->dst)) { | ||
1138 | scatterwalk_unmap(dst, 0); | ||
1139 | scatterwalk_done(&dst_sg_walk, 0, 0); | ||
1140 | } | ||
1141 | scatterwalk_unmap(src, 0); | ||
1142 | scatterwalk_unmap(assoc, 0); | ||
1143 | scatterwalk_done(&src_sg_walk, 0, 0); | ||
1144 | scatterwalk_done(&assoc_sg_walk, 0, 0); | ||
1145 | } else { | ||
1146 | scatterwalk_map_and_copy(dst, req->dst, 0, | ||
1147 | req->cryptlen + auth_tag_len, 1); | ||
1148 | kfree(src); | ||
1149 | } | ||
1150 | return 0; | ||
1151 | } | ||
1152 | |||
1153 | static int __driver_rfc4106_decrypt(struct aead_request *req) | ||
1154 | { | ||
1155 | u8 one_entry_in_sg = 0; | ||
1156 | u8 *src, *dst, *assoc; | ||
1157 | unsigned long tempCipherLen = 0; | ||
1158 | __be32 counter = cpu_to_be32(1); | ||
1159 | int retval = 0; | ||
1160 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1161 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1162 | void *aes_ctx = &(ctx->aes_key_expanded); | ||
1163 | unsigned long auth_tag_len = crypto_aead_authsize(tfm); | ||
1164 | u8 iv_and_authTag[32+AESNI_ALIGN]; | ||
1165 | u8 *iv = (u8 *) PTR_ALIGN((u8 *)iv_and_authTag, AESNI_ALIGN); | ||
1166 | u8 *authTag = iv + 16; | ||
1167 | struct scatter_walk src_sg_walk; | ||
1168 | struct scatter_walk assoc_sg_walk; | ||
1169 | struct scatter_walk dst_sg_walk; | ||
1170 | unsigned int i; | ||
1171 | |||
1172 | if (unlikely((req->cryptlen < auth_tag_len) || | ||
1173 | (req->assoclen != 8 && req->assoclen != 12))) | ||
1174 | return -EINVAL; | ||
1175 | /* Assuming we are supporting rfc4106 64-bit extended */ | ||
1176 | /* sequence numbers We need to have the AAD length */ | ||
1177 | /* equal to 8 or 12 bytes */ | ||
1178 | |||
1179 | tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); | ||
1180 | /* IV below built */ | ||
1181 | for (i = 0; i < 4; i++) | ||
1182 | *(iv+i) = ctx->nonce[i]; | ||
1183 | for (i = 0; i < 8; i++) | ||
1184 | *(iv+4+i) = req->iv[i]; | ||
1185 | *((__be32 *)(iv+12)) = counter; | ||
1186 | |||
1187 | if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { | ||
1188 | one_entry_in_sg = 1; | ||
1189 | scatterwalk_start(&src_sg_walk, req->src); | ||
1190 | scatterwalk_start(&assoc_sg_walk, req->assoc); | ||
1191 | src = scatterwalk_map(&src_sg_walk, 0); | ||
1192 | assoc = scatterwalk_map(&assoc_sg_walk, 0); | ||
1193 | dst = src; | ||
1194 | if (unlikely(req->src != req->dst)) { | ||
1195 | scatterwalk_start(&dst_sg_walk, req->dst); | ||
1196 | dst = scatterwalk_map(&dst_sg_walk, 0); | ||
1197 | } | ||
1198 | |||
1199 | } else { | ||
1200 | /* Allocate memory for src, dst, assoc */ | ||
1201 | src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); | ||
1202 | if (!src) | ||
1203 | return -ENOMEM; | ||
1204 | assoc = (src + req->cryptlen + auth_tag_len); | ||
1205 | scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); | ||
1206 | scatterwalk_map_and_copy(assoc, req->assoc, 0, | ||
1207 | req->assoclen, 0); | ||
1208 | dst = src; | ||
1209 | } | ||
1210 | |||
1211 | aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, | ||
1212 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, | ||
1213 | authTag, auth_tag_len); | ||
1214 | |||
1215 | /* Compare generated tag with passed in tag. */ | ||
1216 | retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? | ||
1217 | -EBADMSG : 0; | ||
1218 | |||
1219 | if (one_entry_in_sg) { | ||
1220 | if (unlikely(req->src != req->dst)) { | ||
1221 | scatterwalk_unmap(dst, 0); | ||
1222 | scatterwalk_done(&dst_sg_walk, 0, 0); | ||
1223 | } | ||
1224 | scatterwalk_unmap(src, 0); | ||
1225 | scatterwalk_unmap(assoc, 0); | ||
1226 | scatterwalk_done(&src_sg_walk, 0, 0); | ||
1227 | scatterwalk_done(&assoc_sg_walk, 0, 0); | ||
1228 | } else { | ||
1229 | scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1); | ||
1230 | kfree(src); | ||
1231 | } | ||
1232 | return retval; | ||
1233 | } | ||
1234 | |||
1235 | static struct crypto_alg __rfc4106_alg = { | ||
1236 | .cra_name = "__gcm-aes-aesni", | ||
1237 | .cra_driver_name = "__driver-gcm-aes-aesni", | ||
1238 | .cra_priority = 0, | ||
1239 | .cra_flags = CRYPTO_ALG_TYPE_AEAD, | ||
1240 | .cra_blocksize = 1, | ||
1241 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, | ||
1242 | .cra_alignmask = 0, | ||
1243 | .cra_type = &crypto_aead_type, | ||
1244 | .cra_module = THIS_MODULE, | ||
1245 | .cra_list = LIST_HEAD_INIT(__rfc4106_alg.cra_list), | ||
1246 | .cra_u = { | ||
1247 | .aead = { | ||
1248 | .encrypt = __driver_rfc4106_encrypt, | ||
1249 | .decrypt = __driver_rfc4106_decrypt, | ||
1250 | }, | ||
1251 | }, | ||
1252 | }; | ||
1253 | #endif | ||
1254 | |||
733 | static int __init aesni_init(void) | 1255 | static int __init aesni_init(void) |
734 | { | 1256 | { |
735 | int err; | 1257 | int err; |
@@ -738,6 +1260,9 @@ static int __init aesni_init(void) | |||
738 | printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); | 1260 | printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); |
739 | return -ENODEV; | 1261 | return -ENODEV; |
740 | } | 1262 | } |
1263 | |||
1264 | if ((err = crypto_fpu_init())) | ||
1265 | goto fpu_err; | ||
741 | if ((err = crypto_register_alg(&aesni_alg))) | 1266 | if ((err = crypto_register_alg(&aesni_alg))) |
742 | goto aes_err; | 1267 | goto aes_err; |
743 | if ((err = crypto_register_alg(&__aesni_alg))) | 1268 | if ((err = crypto_register_alg(&__aesni_alg))) |
@@ -746,18 +1271,24 @@ static int __init aesni_init(void) | |||
746 | goto blk_ecb_err; | 1271 | goto blk_ecb_err; |
747 | if ((err = crypto_register_alg(&blk_cbc_alg))) | 1272 | if ((err = crypto_register_alg(&blk_cbc_alg))) |
748 | goto blk_cbc_err; | 1273 | goto blk_cbc_err; |
749 | if ((err = crypto_register_alg(&blk_ctr_alg))) | ||
750 | goto blk_ctr_err; | ||
751 | if ((err = crypto_register_alg(&ablk_ecb_alg))) | 1274 | if ((err = crypto_register_alg(&ablk_ecb_alg))) |
752 | goto ablk_ecb_err; | 1275 | goto ablk_ecb_err; |
753 | if ((err = crypto_register_alg(&ablk_cbc_alg))) | 1276 | if ((err = crypto_register_alg(&ablk_cbc_alg))) |
754 | goto ablk_cbc_err; | 1277 | goto ablk_cbc_err; |
1278 | #ifdef CONFIG_X86_64 | ||
1279 | if ((err = crypto_register_alg(&blk_ctr_alg))) | ||
1280 | goto blk_ctr_err; | ||
755 | if ((err = crypto_register_alg(&ablk_ctr_alg))) | 1281 | if ((err = crypto_register_alg(&ablk_ctr_alg))) |
756 | goto ablk_ctr_err; | 1282 | goto ablk_ctr_err; |
1283 | if ((err = crypto_register_alg(&__rfc4106_alg))) | ||
1284 | goto __aead_gcm_err; | ||
1285 | if ((err = crypto_register_alg(&rfc4106_alg))) | ||
1286 | goto aead_gcm_err; | ||
757 | #ifdef HAS_CTR | 1287 | #ifdef HAS_CTR |
758 | if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) | 1288 | if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) |
759 | goto ablk_rfc3686_ctr_err; | 1289 | goto ablk_rfc3686_ctr_err; |
760 | #endif | 1290 | #endif |
1291 | #endif | ||
761 | #ifdef HAS_LRW | 1292 | #ifdef HAS_LRW |
762 | if ((err = crypto_register_alg(&ablk_lrw_alg))) | 1293 | if ((err = crypto_register_alg(&ablk_lrw_alg))) |
763 | goto ablk_lrw_err; | 1294 | goto ablk_lrw_err; |
@@ -770,7 +1301,6 @@ static int __init aesni_init(void) | |||
770 | if ((err = crypto_register_alg(&ablk_xts_alg))) | 1301 | if ((err = crypto_register_alg(&ablk_xts_alg))) |
771 | goto ablk_xts_err; | 1302 | goto ablk_xts_err; |
772 | #endif | 1303 | #endif |
773 | |||
774 | return err; | 1304 | return err; |
775 | 1305 | ||
776 | #ifdef HAS_XTS | 1306 | #ifdef HAS_XTS |
@@ -784,18 +1314,24 @@ ablk_pcbc_err: | |||
784 | crypto_unregister_alg(&ablk_lrw_alg); | 1314 | crypto_unregister_alg(&ablk_lrw_alg); |
785 | ablk_lrw_err: | 1315 | ablk_lrw_err: |
786 | #endif | 1316 | #endif |
1317 | #ifdef CONFIG_X86_64 | ||
787 | #ifdef HAS_CTR | 1318 | #ifdef HAS_CTR |
788 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); | 1319 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); |
789 | ablk_rfc3686_ctr_err: | 1320 | ablk_rfc3686_ctr_err: |
790 | #endif | 1321 | #endif |
1322 | crypto_unregister_alg(&rfc4106_alg); | ||
1323 | aead_gcm_err: | ||
1324 | crypto_unregister_alg(&__rfc4106_alg); | ||
1325 | __aead_gcm_err: | ||
791 | crypto_unregister_alg(&ablk_ctr_alg); | 1326 | crypto_unregister_alg(&ablk_ctr_alg); |
792 | ablk_ctr_err: | 1327 | ablk_ctr_err: |
1328 | crypto_unregister_alg(&blk_ctr_alg); | ||
1329 | blk_ctr_err: | ||
1330 | #endif | ||
793 | crypto_unregister_alg(&ablk_cbc_alg); | 1331 | crypto_unregister_alg(&ablk_cbc_alg); |
794 | ablk_cbc_err: | 1332 | ablk_cbc_err: |
795 | crypto_unregister_alg(&ablk_ecb_alg); | 1333 | crypto_unregister_alg(&ablk_ecb_alg); |
796 | ablk_ecb_err: | 1334 | ablk_ecb_err: |
797 | crypto_unregister_alg(&blk_ctr_alg); | ||
798 | blk_ctr_err: | ||
799 | crypto_unregister_alg(&blk_cbc_alg); | 1335 | crypto_unregister_alg(&blk_cbc_alg); |
800 | blk_cbc_err: | 1336 | blk_cbc_err: |
801 | crypto_unregister_alg(&blk_ecb_alg); | 1337 | crypto_unregister_alg(&blk_ecb_alg); |
@@ -804,6 +1340,7 @@ blk_ecb_err: | |||
804 | __aes_err: | 1340 | __aes_err: |
805 | crypto_unregister_alg(&aesni_alg); | 1341 | crypto_unregister_alg(&aesni_alg); |
806 | aes_err: | 1342 | aes_err: |
1343 | fpu_err: | ||
807 | return err; | 1344 | return err; |
808 | } | 1345 | } |
809 | 1346 | ||
@@ -818,17 +1355,23 @@ static void __exit aesni_exit(void) | |||
818 | #ifdef HAS_LRW | 1355 | #ifdef HAS_LRW |
819 | crypto_unregister_alg(&ablk_lrw_alg); | 1356 | crypto_unregister_alg(&ablk_lrw_alg); |
820 | #endif | 1357 | #endif |
1358 | #ifdef CONFIG_X86_64 | ||
821 | #ifdef HAS_CTR | 1359 | #ifdef HAS_CTR |
822 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); | 1360 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); |
823 | #endif | 1361 | #endif |
1362 | crypto_unregister_alg(&rfc4106_alg); | ||
1363 | crypto_unregister_alg(&__rfc4106_alg); | ||
824 | crypto_unregister_alg(&ablk_ctr_alg); | 1364 | crypto_unregister_alg(&ablk_ctr_alg); |
1365 | crypto_unregister_alg(&blk_ctr_alg); | ||
1366 | #endif | ||
825 | crypto_unregister_alg(&ablk_cbc_alg); | 1367 | crypto_unregister_alg(&ablk_cbc_alg); |
826 | crypto_unregister_alg(&ablk_ecb_alg); | 1368 | crypto_unregister_alg(&ablk_ecb_alg); |
827 | crypto_unregister_alg(&blk_ctr_alg); | ||
828 | crypto_unregister_alg(&blk_cbc_alg); | 1369 | crypto_unregister_alg(&blk_cbc_alg); |
829 | crypto_unregister_alg(&blk_ecb_alg); | 1370 | crypto_unregister_alg(&blk_ecb_alg); |
830 | crypto_unregister_alg(&__aesni_alg); | 1371 | crypto_unregister_alg(&__aesni_alg); |
831 | crypto_unregister_alg(&aesni_alg); | 1372 | crypto_unregister_alg(&aesni_alg); |
1373 | |||
1374 | crypto_fpu_exit(); | ||
832 | } | 1375 | } |
833 | 1376 | ||
834 | module_init(aesni_init); | 1377 | module_init(aesni_init); |
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index 1a8f8649c035..98d7a188f46b 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c | |||
@@ -150,18 +150,12 @@ static struct crypto_template crypto_fpu_tmpl = { | |||
150 | .module = THIS_MODULE, | 150 | .module = THIS_MODULE, |
151 | }; | 151 | }; |
152 | 152 | ||
153 | static int __init crypto_fpu_module_init(void) | 153 | int __init crypto_fpu_init(void) |
154 | { | 154 | { |
155 | return crypto_register_template(&crypto_fpu_tmpl); | 155 | return crypto_register_template(&crypto_fpu_tmpl); |
156 | } | 156 | } |
157 | 157 | ||
158 | static void __exit crypto_fpu_module_exit(void) | 158 | void __exit crypto_fpu_exit(void) |
159 | { | 159 | { |
160 | crypto_unregister_template(&crypto_fpu_tmpl); | 160 | crypto_unregister_template(&crypto_fpu_tmpl); |
161 | } | 161 | } |
162 | |||
163 | module_init(crypto_fpu_module_init); | ||
164 | module_exit(crypto_fpu_module_exit); | ||
165 | |||
166 | MODULE_LICENSE("GPL"); | ||
167 | MODULE_DESCRIPTION("FPU block cipher wrapper"); | ||
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index cbcc8d8ea93a..7a6e68e4f748 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c | |||
@@ -10,6 +10,7 @@ | |||
10 | * by the Free Software Foundation. | 10 | * by the Free Software Foundation. |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/err.h> | ||
13 | #include <linux/module.h> | 14 | #include <linux/module.h> |
14 | #include <linux/init.h> | 15 | #include <linux/init.h> |
15 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |