diff options
48 files changed, 2541 insertions, 2563 deletions
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index 195451bf7706..6a8acb01b1d3 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi | |||
@@ -736,7 +736,7 @@ | |||
736 | dcp@80028000 { | 736 | dcp@80028000 { |
737 | reg = <0x80028000 0x2000>; | 737 | reg = <0x80028000 0x2000>; |
738 | interrupts = <52 53 54>; | 738 | interrupts = <52 53 54>; |
739 | status = "disabled"; | 739 | compatible = "fsl-dcp"; |
740 | }; | 740 | }; |
741 | 741 | ||
742 | pxp@8002a000 { | 742 | pxp@8002a000 { |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index a3a0ed80f17c..7d6ba9db1be9 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -3,8 +3,6 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) | 5 | avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) |
6 | avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ | ||
7 | $(comma)4)$(comma)%ymm2,yes,no) | ||
8 | 6 | ||
9 | obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o | 7 | obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o |
10 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o | 8 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o |
@@ -29,6 +27,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o | |||
29 | obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o | 27 | obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o |
30 | obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o | 28 | obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o |
31 | obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o | 29 | obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o |
30 | obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o | ||
32 | 31 | ||
33 | # These modules require assembler to support AVX. | 32 | # These modules require assembler to support AVX. |
34 | ifeq ($(avx_supported),yes) | 33 | ifeq ($(avx_supported),yes) |
@@ -42,10 +41,8 @@ endif | |||
42 | 41 | ||
43 | # These modules require assembler to support AVX2. | 42 | # These modules require assembler to support AVX2. |
44 | ifeq ($(avx2_supported),yes) | 43 | ifeq ($(avx2_supported),yes) |
45 | obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o | ||
46 | obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o | 44 | obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o |
47 | obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o | 45 | obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o |
48 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o | ||
49 | endif | 46 | endif |
50 | 47 | ||
51 | aes-i586-y := aes-i586-asm_32.o aes_glue.o | 48 | aes-i586-y := aes-i586-asm_32.o aes_glue.o |
@@ -73,10 +70,8 @@ ifeq ($(avx_supported),yes) | |||
73 | endif | 70 | endif |
74 | 71 | ||
75 | ifeq ($(avx2_supported),yes) | 72 | ifeq ($(avx2_supported),yes) |
76 | blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o | ||
77 | camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o | 73 | camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o |
78 | serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o | 74 | serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o |
79 | twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o | ||
80 | endif | 75 | endif |
81 | 76 | ||
82 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 77 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
@@ -87,3 +82,4 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o | |||
87 | crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o | 82 | crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o |
88 | sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o | 83 | sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o |
89 | sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o | 84 | sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o |
85 | crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o | ||
diff --git a/arch/x86/crypto/blowfish-avx2-asm_64.S b/arch/x86/crypto/blowfish-avx2-asm_64.S deleted file mode 100644 index 784452e0d05d..000000000000 --- a/arch/x86/crypto/blowfish-avx2-asm_64.S +++ /dev/null | |||
@@ -1,449 +0,0 @@ | |||
1 | /* | ||
2 | * x86_64/AVX2 assembler optimized version of Blowfish | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/linkage.h> | ||
14 | |||
15 | .file "blowfish-avx2-asm_64.S" | ||
16 | |||
17 | .data | ||
18 | .align 32 | ||
19 | |||
20 | .Lprefetch_mask: | ||
21 | .long 0*64 | ||
22 | .long 1*64 | ||
23 | .long 2*64 | ||
24 | .long 3*64 | ||
25 | .long 4*64 | ||
26 | .long 5*64 | ||
27 | .long 6*64 | ||
28 | .long 7*64 | ||
29 | |||
30 | .Lbswap32_mask: | ||
31 | .long 0x00010203 | ||
32 | .long 0x04050607 | ||
33 | .long 0x08090a0b | ||
34 | .long 0x0c0d0e0f | ||
35 | |||
36 | .Lbswap128_mask: | ||
37 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
38 | .Lbswap_iv_mask: | ||
39 | .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0 | ||
40 | |||
41 | .text | ||
42 | /* structure of crypto context */ | ||
43 | #define p 0 | ||
44 | #define s0 ((16 + 2) * 4) | ||
45 | #define s1 ((16 + 2 + (1 * 256)) * 4) | ||
46 | #define s2 ((16 + 2 + (2 * 256)) * 4) | ||
47 | #define s3 ((16 + 2 + (3 * 256)) * 4) | ||
48 | |||
49 | /* register macros */ | ||
50 | #define CTX %rdi | ||
51 | #define RIO %rdx | ||
52 | |||
53 | #define RS0 %rax | ||
54 | #define RS1 %r8 | ||
55 | #define RS2 %r9 | ||
56 | #define RS3 %r10 | ||
57 | |||
58 | #define RLOOP %r11 | ||
59 | #define RLOOPd %r11d | ||
60 | |||
61 | #define RXr0 %ymm8 | ||
62 | #define RXr1 %ymm9 | ||
63 | #define RXr2 %ymm10 | ||
64 | #define RXr3 %ymm11 | ||
65 | #define RXl0 %ymm12 | ||
66 | #define RXl1 %ymm13 | ||
67 | #define RXl2 %ymm14 | ||
68 | #define RXl3 %ymm15 | ||
69 | |||
70 | /* temp regs */ | ||
71 | #define RT0 %ymm0 | ||
72 | #define RT0x %xmm0 | ||
73 | #define RT1 %ymm1 | ||
74 | #define RT1x %xmm1 | ||
75 | #define RIDX0 %ymm2 | ||
76 | #define RIDX1 %ymm3 | ||
77 | #define RIDX1x %xmm3 | ||
78 | #define RIDX2 %ymm4 | ||
79 | #define RIDX3 %ymm5 | ||
80 | |||
81 | /* vpgatherdd mask and '-1' */ | ||
82 | #define RNOT %ymm6 | ||
83 | |||
84 | /* byte mask, (-1 >> 24) */ | ||
85 | #define RBYTE %ymm7 | ||
86 | |||
87 | /*********************************************************************** | ||
88 | * 32-way AVX2 blowfish | ||
89 | ***********************************************************************/ | ||
90 | #define F(xl, xr) \ | ||
91 | vpsrld $24, xl, RIDX0; \ | ||
92 | vpsrld $16, xl, RIDX1; \ | ||
93 | vpsrld $8, xl, RIDX2; \ | ||
94 | vpand RBYTE, RIDX1, RIDX1; \ | ||
95 | vpand RBYTE, RIDX2, RIDX2; \ | ||
96 | vpand RBYTE, xl, RIDX3; \ | ||
97 | \ | ||
98 | vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \ | ||
99 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
100 | vpcmpeqd RIDX0, RIDX0, RIDX0; \ | ||
101 | \ | ||
102 | vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \ | ||
103 | vpcmpeqd RIDX1, RIDX1, RIDX1; \ | ||
104 | vpaddd RT0, RT1, RT0; \ | ||
105 | \ | ||
106 | vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \ | ||
107 | vpxor RT0, RT1, RT0; \ | ||
108 | \ | ||
109 | vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \ | ||
110 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
111 | vpaddd RT0, RT1, RT0; \ | ||
112 | \ | ||
113 | vpxor RT0, xr, xr; | ||
114 | |||
115 | #define add_roundkey(xl, nmem) \ | ||
116 | vpbroadcastd nmem, RT0; \ | ||
117 | vpxor RT0, xl ## 0, xl ## 0; \ | ||
118 | vpxor RT0, xl ## 1, xl ## 1; \ | ||
119 | vpxor RT0, xl ## 2, xl ## 2; \ | ||
120 | vpxor RT0, xl ## 3, xl ## 3; | ||
121 | |||
122 | #define round_enc() \ | ||
123 | add_roundkey(RXr, p(CTX,RLOOP,4)); \ | ||
124 | F(RXl0, RXr0); \ | ||
125 | F(RXl1, RXr1); \ | ||
126 | F(RXl2, RXr2); \ | ||
127 | F(RXl3, RXr3); \ | ||
128 | \ | ||
129 | add_roundkey(RXl, p+4(CTX,RLOOP,4)); \ | ||
130 | F(RXr0, RXl0); \ | ||
131 | F(RXr1, RXl1); \ | ||
132 | F(RXr2, RXl2); \ | ||
133 | F(RXr3, RXl3); | ||
134 | |||
135 | #define round_dec() \ | ||
136 | add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \ | ||
137 | F(RXl0, RXr0); \ | ||
138 | F(RXl1, RXr1); \ | ||
139 | F(RXl2, RXr2); \ | ||
140 | F(RXl3, RXr3); \ | ||
141 | \ | ||
142 | add_roundkey(RXl, p+4(CTX,RLOOP,4)); \ | ||
143 | F(RXr0, RXl0); \ | ||
144 | F(RXr1, RXl1); \ | ||
145 | F(RXr2, RXl2); \ | ||
146 | F(RXr3, RXl3); | ||
147 | |||
148 | #define init_round_constants() \ | ||
149 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
150 | leaq s0(CTX), RS0; \ | ||
151 | leaq s1(CTX), RS1; \ | ||
152 | leaq s2(CTX), RS2; \ | ||
153 | leaq s3(CTX), RS3; \ | ||
154 | vpsrld $24, RNOT, RBYTE; | ||
155 | |||
156 | #define transpose_2x2(x0, x1, t0) \ | ||
157 | vpunpckldq x0, x1, t0; \ | ||
158 | vpunpckhdq x0, x1, x1; \ | ||
159 | \ | ||
160 | vpunpcklqdq t0, x1, x0; \ | ||
161 | vpunpckhqdq t0, x1, x1; | ||
162 | |||
163 | #define read_block(xl, xr) \ | ||
164 | vbroadcasti128 .Lbswap32_mask, RT1; \ | ||
165 | \ | ||
166 | vpshufb RT1, xl ## 0, xl ## 0; \ | ||
167 | vpshufb RT1, xr ## 0, xr ## 0; \ | ||
168 | vpshufb RT1, xl ## 1, xl ## 1; \ | ||
169 | vpshufb RT1, xr ## 1, xr ## 1; \ | ||
170 | vpshufb RT1, xl ## 2, xl ## 2; \ | ||
171 | vpshufb RT1, xr ## 2, xr ## 2; \ | ||
172 | vpshufb RT1, xl ## 3, xl ## 3; \ | ||
173 | vpshufb RT1, xr ## 3, xr ## 3; \ | ||
174 | \ | ||
175 | transpose_2x2(xl ## 0, xr ## 0, RT0); \ | ||
176 | transpose_2x2(xl ## 1, xr ## 1, RT0); \ | ||
177 | transpose_2x2(xl ## 2, xr ## 2, RT0); \ | ||
178 | transpose_2x2(xl ## 3, xr ## 3, RT0); | ||
179 | |||
180 | #define write_block(xl, xr) \ | ||
181 | vbroadcasti128 .Lbswap32_mask, RT1; \ | ||
182 | \ | ||
183 | transpose_2x2(xl ## 0, xr ## 0, RT0); \ | ||
184 | transpose_2x2(xl ## 1, xr ## 1, RT0); \ | ||
185 | transpose_2x2(xl ## 2, xr ## 2, RT0); \ | ||
186 | transpose_2x2(xl ## 3, xr ## 3, RT0); \ | ||
187 | \ | ||
188 | vpshufb RT1, xl ## 0, xl ## 0; \ | ||
189 | vpshufb RT1, xr ## 0, xr ## 0; \ | ||
190 | vpshufb RT1, xl ## 1, xl ## 1; \ | ||
191 | vpshufb RT1, xr ## 1, xr ## 1; \ | ||
192 | vpshufb RT1, xl ## 2, xl ## 2; \ | ||
193 | vpshufb RT1, xr ## 2, xr ## 2; \ | ||
194 | vpshufb RT1, xl ## 3, xl ## 3; \ | ||
195 | vpshufb RT1, xr ## 3, xr ## 3; | ||
196 | |||
197 | .align 8 | ||
198 | __blowfish_enc_blk32: | ||
199 | /* input: | ||
200 | * %rdi: ctx, CTX | ||
201 | * RXl0..4, RXr0..4: plaintext | ||
202 | * output: | ||
203 | * RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped) | ||
204 | */ | ||
205 | init_round_constants(); | ||
206 | |||
207 | read_block(RXl, RXr); | ||
208 | |||
209 | movl $1, RLOOPd; | ||
210 | add_roundkey(RXl, p+4*(0)(CTX)); | ||
211 | |||
212 | .align 4 | ||
213 | .L__enc_loop: | ||
214 | round_enc(); | ||
215 | |||
216 | leal 2(RLOOPd), RLOOPd; | ||
217 | cmpl $17, RLOOPd; | ||
218 | jne .L__enc_loop; | ||
219 | |||
220 | add_roundkey(RXr, p+4*(17)(CTX)); | ||
221 | |||
222 | write_block(RXl, RXr); | ||
223 | |||
224 | ret; | ||
225 | ENDPROC(__blowfish_enc_blk32) | ||
226 | |||
227 | .align 8 | ||
228 | __blowfish_dec_blk32: | ||
229 | /* input: | ||
230 | * %rdi: ctx, CTX | ||
231 | * RXl0..4, RXr0..4: ciphertext | ||
232 | * output: | ||
233 | * RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped) | ||
234 | */ | ||
235 | init_round_constants(); | ||
236 | |||
237 | read_block(RXl, RXr); | ||
238 | |||
239 | movl $14, RLOOPd; | ||
240 | add_roundkey(RXl, p+4*(17)(CTX)); | ||
241 | |||
242 | .align 4 | ||
243 | .L__dec_loop: | ||
244 | round_dec(); | ||
245 | |||
246 | addl $-2, RLOOPd; | ||
247 | jns .L__dec_loop; | ||
248 | |||
249 | add_roundkey(RXr, p+4*(0)(CTX)); | ||
250 | |||
251 | write_block(RXl, RXr); | ||
252 | |||
253 | ret; | ||
254 | ENDPROC(__blowfish_dec_blk32) | ||
255 | |||
256 | ENTRY(blowfish_ecb_enc_32way) | ||
257 | /* input: | ||
258 | * %rdi: ctx, CTX | ||
259 | * %rsi: dst | ||
260 | * %rdx: src | ||
261 | */ | ||
262 | |||
263 | vzeroupper; | ||
264 | |||
265 | vmovdqu 0*32(%rdx), RXl0; | ||
266 | vmovdqu 1*32(%rdx), RXr0; | ||
267 | vmovdqu 2*32(%rdx), RXl1; | ||
268 | vmovdqu 3*32(%rdx), RXr1; | ||
269 | vmovdqu 4*32(%rdx), RXl2; | ||
270 | vmovdqu 5*32(%rdx), RXr2; | ||
271 | vmovdqu 6*32(%rdx), RXl3; | ||
272 | vmovdqu 7*32(%rdx), RXr3; | ||
273 | |||
274 | call __blowfish_enc_blk32; | ||
275 | |||
276 | vmovdqu RXr0, 0*32(%rsi); | ||
277 | vmovdqu RXl0, 1*32(%rsi); | ||
278 | vmovdqu RXr1, 2*32(%rsi); | ||
279 | vmovdqu RXl1, 3*32(%rsi); | ||
280 | vmovdqu RXr2, 4*32(%rsi); | ||
281 | vmovdqu RXl2, 5*32(%rsi); | ||
282 | vmovdqu RXr3, 6*32(%rsi); | ||
283 | vmovdqu RXl3, 7*32(%rsi); | ||
284 | |||
285 | vzeroupper; | ||
286 | |||
287 | ret; | ||
288 | ENDPROC(blowfish_ecb_enc_32way) | ||
289 | |||
290 | ENTRY(blowfish_ecb_dec_32way) | ||
291 | /* input: | ||
292 | * %rdi: ctx, CTX | ||
293 | * %rsi: dst | ||
294 | * %rdx: src | ||
295 | */ | ||
296 | |||
297 | vzeroupper; | ||
298 | |||
299 | vmovdqu 0*32(%rdx), RXl0; | ||
300 | vmovdqu 1*32(%rdx), RXr0; | ||
301 | vmovdqu 2*32(%rdx), RXl1; | ||
302 | vmovdqu 3*32(%rdx), RXr1; | ||
303 | vmovdqu 4*32(%rdx), RXl2; | ||
304 | vmovdqu 5*32(%rdx), RXr2; | ||
305 | vmovdqu 6*32(%rdx), RXl3; | ||
306 | vmovdqu 7*32(%rdx), RXr3; | ||
307 | |||
308 | call __blowfish_dec_blk32; | ||
309 | |||
310 | vmovdqu RXr0, 0*32(%rsi); | ||
311 | vmovdqu RXl0, 1*32(%rsi); | ||
312 | vmovdqu RXr1, 2*32(%rsi); | ||
313 | vmovdqu RXl1, 3*32(%rsi); | ||
314 | vmovdqu RXr2, 4*32(%rsi); | ||
315 | vmovdqu RXl2, 5*32(%rsi); | ||
316 | vmovdqu RXr3, 6*32(%rsi); | ||
317 | vmovdqu RXl3, 7*32(%rsi); | ||
318 | |||
319 | vzeroupper; | ||
320 | |||
321 | ret; | ||
322 | ENDPROC(blowfish_ecb_dec_32way) | ||
323 | |||
324 | ENTRY(blowfish_cbc_dec_32way) | ||
325 | /* input: | ||
326 | * %rdi: ctx, CTX | ||
327 | * %rsi: dst | ||
328 | * %rdx: src | ||
329 | */ | ||
330 | |||
331 | vzeroupper; | ||
332 | |||
333 | vmovdqu 0*32(%rdx), RXl0; | ||
334 | vmovdqu 1*32(%rdx), RXr0; | ||
335 | vmovdqu 2*32(%rdx), RXl1; | ||
336 | vmovdqu 3*32(%rdx), RXr1; | ||
337 | vmovdqu 4*32(%rdx), RXl2; | ||
338 | vmovdqu 5*32(%rdx), RXr2; | ||
339 | vmovdqu 6*32(%rdx), RXl3; | ||
340 | vmovdqu 7*32(%rdx), RXr3; | ||
341 | |||
342 | call __blowfish_dec_blk32; | ||
343 | |||
344 | /* xor with src */ | ||
345 | vmovq (%rdx), RT0x; | ||
346 | vpshufd $0x4f, RT0x, RT0x; | ||
347 | vinserti128 $1, 8(%rdx), RT0, RT0; | ||
348 | vpxor RT0, RXr0, RXr0; | ||
349 | vpxor 0*32+24(%rdx), RXl0, RXl0; | ||
350 | vpxor 1*32+24(%rdx), RXr1, RXr1; | ||
351 | vpxor 2*32+24(%rdx), RXl1, RXl1; | ||
352 | vpxor 3*32+24(%rdx), RXr2, RXr2; | ||
353 | vpxor 4*32+24(%rdx), RXl2, RXl2; | ||
354 | vpxor 5*32+24(%rdx), RXr3, RXr3; | ||
355 | vpxor 6*32+24(%rdx), RXl3, RXl3; | ||
356 | |||
357 | vmovdqu RXr0, (0*32)(%rsi); | ||
358 | vmovdqu RXl0, (1*32)(%rsi); | ||
359 | vmovdqu RXr1, (2*32)(%rsi); | ||
360 | vmovdqu RXl1, (3*32)(%rsi); | ||
361 | vmovdqu RXr2, (4*32)(%rsi); | ||
362 | vmovdqu RXl2, (5*32)(%rsi); | ||
363 | vmovdqu RXr3, (6*32)(%rsi); | ||
364 | vmovdqu RXl3, (7*32)(%rsi); | ||
365 | |||
366 | vzeroupper; | ||
367 | |||
368 | ret; | ||
369 | ENDPROC(blowfish_cbc_dec_32way) | ||
370 | |||
371 | ENTRY(blowfish_ctr_32way) | ||
372 | /* input: | ||
373 | * %rdi: ctx, CTX | ||
374 | * %rsi: dst | ||
375 | * %rdx: src | ||
376 | * %rcx: iv (big endian, 64bit) | ||
377 | */ | ||
378 | |||
379 | vzeroupper; | ||
380 | |||
381 | vpcmpeqd RT0, RT0, RT0; | ||
382 | vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */ | ||
383 | |||
384 | vpcmpeqd RT1x, RT1x, RT1x; | ||
385 | vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */ | ||
386 | vpxor RIDX0, RIDX0, RIDX0; | ||
387 | vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */ | ||
388 | |||
389 | vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */ | ||
390 | |||
391 | vpcmpeqd RT1, RT1, RT1; | ||
392 | vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */ | ||
393 | vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */ | ||
394 | |||
395 | vbroadcasti128 .Lbswap_iv_mask, RIDX0; | ||
396 | vbroadcasti128 .Lbswap128_mask, RIDX1; | ||
397 | |||
398 | /* load IV and byteswap */ | ||
399 | vmovq (%rcx), RT1x; | ||
400 | vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */ | ||
401 | vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */ | ||
402 | |||
403 | /* construct IVs */ | ||
404 | vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */ | ||
405 | vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */ | ||
406 | vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */ | ||
407 | vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */ | ||
408 | vpsubq RIDX2, RT1, RT1; | ||
409 | vpshufb RIDX1, RT1, RXl1; | ||
410 | vpsubq RIDX2, RT1, RT1; | ||
411 | vpshufb RIDX1, RT1, RXr1; | ||
412 | vpsubq RIDX2, RT1, RT1; | ||
413 | vpshufb RIDX1, RT1, RXl2; | ||
414 | vpsubq RIDX2, RT1, RT1; | ||
415 | vpshufb RIDX1, RT1, RXr2; | ||
416 | vpsubq RIDX2, RT1, RT1; | ||
417 | vpshufb RIDX1, RT1, RXl3; | ||
418 | vpsubq RIDX2, RT1, RT1; | ||
419 | vpshufb RIDX1, RT1, RXr3; | ||
420 | |||
421 | /* store last IV */ | ||
422 | vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */ | ||
423 | vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */ | ||
424 | vmovq RT1x, (%rcx); | ||
425 | |||
426 | call __blowfish_enc_blk32; | ||
427 | |||
428 | /* dst = src ^ iv */ | ||
429 | vpxor 0*32(%rdx), RXr0, RXr0; | ||
430 | vpxor 1*32(%rdx), RXl0, RXl0; | ||
431 | vpxor 2*32(%rdx), RXr1, RXr1; | ||
432 | vpxor 3*32(%rdx), RXl1, RXl1; | ||
433 | vpxor 4*32(%rdx), RXr2, RXr2; | ||
434 | vpxor 5*32(%rdx), RXl2, RXl2; | ||
435 | vpxor 6*32(%rdx), RXr3, RXr3; | ||
436 | vpxor 7*32(%rdx), RXl3, RXl3; | ||
437 | vmovdqu RXr0, (0*32)(%rsi); | ||
438 | vmovdqu RXl0, (1*32)(%rsi); | ||
439 | vmovdqu RXr1, (2*32)(%rsi); | ||
440 | vmovdqu RXl1, (3*32)(%rsi); | ||
441 | vmovdqu RXr2, (4*32)(%rsi); | ||
442 | vmovdqu RXl2, (5*32)(%rsi); | ||
443 | vmovdqu RXr3, (6*32)(%rsi); | ||
444 | vmovdqu RXl3, (7*32)(%rsi); | ||
445 | |||
446 | vzeroupper; | ||
447 | |||
448 | ret; | ||
449 | ENDPROC(blowfish_ctr_32way) | ||
diff --git a/arch/x86/crypto/blowfish_avx2_glue.c b/arch/x86/crypto/blowfish_avx2_glue.c deleted file mode 100644 index 4417e9aea78d..000000000000 --- a/arch/x86/crypto/blowfish_avx2_glue.c +++ /dev/null | |||
@@ -1,585 +0,0 @@ | |||
1 | /* | ||
2 | * Glue Code for x86_64/AVX2 assembler optimized version of Blowfish | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/module.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/crypto.h> | ||
26 | #include <linux/err.h> | ||
27 | #include <crypto/algapi.h> | ||
28 | #include <crypto/blowfish.h> | ||
29 | #include <crypto/cryptd.h> | ||
30 | #include <crypto/ctr.h> | ||
31 | #include <asm/i387.h> | ||
32 | #include <asm/xcr.h> | ||
33 | #include <asm/xsave.h> | ||
34 | #include <asm/crypto/blowfish.h> | ||
35 | #include <asm/crypto/ablk_helper.h> | ||
36 | #include <crypto/scatterwalk.h> | ||
37 | |||
38 | #define BF_AVX2_PARALLEL_BLOCKS 32 | ||
39 | |||
40 | /* 32-way AVX2 parallel cipher functions */ | ||
41 | asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst, | ||
42 | const u8 *src); | ||
43 | asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst, | ||
44 | const u8 *src); | ||
45 | asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst, | ||
46 | const u8 *src); | ||
47 | asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src, | ||
48 | __be64 *iv); | ||
49 | |||
50 | static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
51 | { | ||
52 | if (fpu_enabled) | ||
53 | return true; | ||
54 | |||
55 | /* FPU is only used when chunk to be processed is large enough, so | ||
56 | * do not enable FPU until it is necessary. | ||
57 | */ | ||
58 | if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS) | ||
59 | return false; | ||
60 | |||
61 | kernel_fpu_begin(); | ||
62 | return true; | ||
63 | } | ||
64 | |||
65 | static inline void bf_fpu_end(bool fpu_enabled) | ||
66 | { | ||
67 | if (fpu_enabled) | ||
68 | kernel_fpu_end(); | ||
69 | } | ||
70 | |||
71 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | ||
72 | bool enc) | ||
73 | { | ||
74 | bool fpu_enabled = false; | ||
75 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
76 | const unsigned int bsize = BF_BLOCK_SIZE; | ||
77 | unsigned int nbytes; | ||
78 | int err; | ||
79 | |||
80 | err = blkcipher_walk_virt(desc, walk); | ||
81 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
82 | |||
83 | while ((nbytes = walk->nbytes)) { | ||
84 | u8 *wsrc = walk->src.virt.addr; | ||
85 | u8 *wdst = walk->dst.virt.addr; | ||
86 | |||
87 | fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); | ||
88 | |||
89 | /* Process multi-block AVX2 batch */ | ||
90 | if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { | ||
91 | do { | ||
92 | if (enc) | ||
93 | blowfish_ecb_enc_32way(ctx, wdst, wsrc); | ||
94 | else | ||
95 | blowfish_ecb_dec_32way(ctx, wdst, wsrc); | ||
96 | |||
97 | wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS; | ||
98 | wdst += bsize * BF_AVX2_PARALLEL_BLOCKS; | ||
99 | nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS; | ||
100 | } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); | ||
101 | |||
102 | if (nbytes < bsize) | ||
103 | goto done; | ||
104 | } | ||
105 | |||
106 | /* Process multi-block batch */ | ||
107 | if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { | ||
108 | do { | ||
109 | if (enc) | ||
110 | blowfish_enc_blk_4way(ctx, wdst, wsrc); | ||
111 | else | ||
112 | blowfish_dec_blk_4way(ctx, wdst, wsrc); | ||
113 | |||
114 | wsrc += bsize * BF_PARALLEL_BLOCKS; | ||
115 | wdst += bsize * BF_PARALLEL_BLOCKS; | ||
116 | nbytes -= bsize * BF_PARALLEL_BLOCKS; | ||
117 | } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); | ||
118 | |||
119 | if (nbytes < bsize) | ||
120 | goto done; | ||
121 | } | ||
122 | |||
123 | /* Handle leftovers */ | ||
124 | do { | ||
125 | if (enc) | ||
126 | blowfish_enc_blk(ctx, wdst, wsrc); | ||
127 | else | ||
128 | blowfish_dec_blk(ctx, wdst, wsrc); | ||
129 | |||
130 | wsrc += bsize; | ||
131 | wdst += bsize; | ||
132 | nbytes -= bsize; | ||
133 | } while (nbytes >= bsize); | ||
134 | |||
135 | done: | ||
136 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
137 | } | ||
138 | |||
139 | bf_fpu_end(fpu_enabled); | ||
140 | return err; | ||
141 | } | ||
142 | |||
143 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
144 | struct scatterlist *src, unsigned int nbytes) | ||
145 | { | ||
146 | struct blkcipher_walk walk; | ||
147 | |||
148 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
149 | return ecb_crypt(desc, &walk, true); | ||
150 | } | ||
151 | |||
152 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
153 | struct scatterlist *src, unsigned int nbytes) | ||
154 | { | ||
155 | struct blkcipher_walk walk; | ||
156 | |||
157 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
158 | return ecb_crypt(desc, &walk, false); | ||
159 | } | ||
160 | |||
161 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | ||
162 | struct blkcipher_walk *walk) | ||
163 | { | ||
164 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
165 | unsigned int bsize = BF_BLOCK_SIZE; | ||
166 | unsigned int nbytes = walk->nbytes; | ||
167 | u64 *src = (u64 *)walk->src.virt.addr; | ||
168 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
169 | u64 *iv = (u64 *)walk->iv; | ||
170 | |||
171 | do { | ||
172 | *dst = *src ^ *iv; | ||
173 | blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
174 | iv = dst; | ||
175 | |||
176 | src += 1; | ||
177 | dst += 1; | ||
178 | nbytes -= bsize; | ||
179 | } while (nbytes >= bsize); | ||
180 | |||
181 | *(u64 *)walk->iv = *iv; | ||
182 | return nbytes; | ||
183 | } | ||
184 | |||
185 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
186 | struct scatterlist *src, unsigned int nbytes) | ||
187 | { | ||
188 | struct blkcipher_walk walk; | ||
189 | int err; | ||
190 | |||
191 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
192 | err = blkcipher_walk_virt(desc, &walk); | ||
193 | |||
194 | while ((nbytes = walk.nbytes)) { | ||
195 | nbytes = __cbc_encrypt(desc, &walk); | ||
196 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
197 | } | ||
198 | |||
199 | return err; | ||
200 | } | ||
201 | |||
202 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | ||
203 | struct blkcipher_walk *walk) | ||
204 | { | ||
205 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
206 | const unsigned int bsize = BF_BLOCK_SIZE; | ||
207 | unsigned int nbytes = walk->nbytes; | ||
208 | u64 *src = (u64 *)walk->src.virt.addr; | ||
209 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
210 | u64 last_iv; | ||
211 | int i; | ||
212 | |||
213 | /* Start of the last block. */ | ||
214 | src += nbytes / bsize - 1; | ||
215 | dst += nbytes / bsize - 1; | ||
216 | |||
217 | last_iv = *src; | ||
218 | |||
219 | /* Process multi-block AVX2 batch */ | ||
220 | if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { | ||
221 | do { | ||
222 | nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1); | ||
223 | src -= BF_AVX2_PARALLEL_BLOCKS - 1; | ||
224 | dst -= BF_AVX2_PARALLEL_BLOCKS - 1; | ||
225 | |||
226 | blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src); | ||
227 | |||
228 | nbytes -= bsize; | ||
229 | if (nbytes < bsize) | ||
230 | goto done; | ||
231 | |||
232 | *dst ^= *(src - 1); | ||
233 | src -= 1; | ||
234 | dst -= 1; | ||
235 | } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); | ||
236 | |||
237 | if (nbytes < bsize) | ||
238 | goto done; | ||
239 | } | ||
240 | |||
241 | /* Process multi-block batch */ | ||
242 | if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { | ||
243 | u64 ivs[BF_PARALLEL_BLOCKS - 1]; | ||
244 | |||
245 | do { | ||
246 | nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1); | ||
247 | src -= BF_PARALLEL_BLOCKS - 1; | ||
248 | dst -= BF_PARALLEL_BLOCKS - 1; | ||
249 | |||
250 | for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++) | ||
251 | ivs[i] = src[i]; | ||
252 | |||
253 | blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); | ||
254 | |||
255 | for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++) | ||
256 | dst[i + 1] ^= ivs[i]; | ||
257 | |||
258 | nbytes -= bsize; | ||
259 | if (nbytes < bsize) | ||
260 | goto done; | ||
261 | |||
262 | *dst ^= *(src - 1); | ||
263 | src -= 1; | ||
264 | dst -= 1; | ||
265 | } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); | ||
266 | |||
267 | if (nbytes < bsize) | ||
268 | goto done; | ||
269 | } | ||
270 | |||
271 | /* Handle leftovers */ | ||
272 | for (;;) { | ||
273 | blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); | ||
274 | |||
275 | nbytes -= bsize; | ||
276 | if (nbytes < bsize) | ||
277 | break; | ||
278 | |||
279 | *dst ^= *(src - 1); | ||
280 | src -= 1; | ||
281 | dst -= 1; | ||
282 | } | ||
283 | |||
284 | done: | ||
285 | *dst ^= *(u64 *)walk->iv; | ||
286 | *(u64 *)walk->iv = last_iv; | ||
287 | |||
288 | return nbytes; | ||
289 | } | ||
290 | |||
291 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
292 | struct scatterlist *src, unsigned int nbytes) | ||
293 | { | ||
294 | bool fpu_enabled = false; | ||
295 | struct blkcipher_walk walk; | ||
296 | int err; | ||
297 | |||
298 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
299 | err = blkcipher_walk_virt(desc, &walk); | ||
300 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
301 | |||
302 | while ((nbytes = walk.nbytes)) { | ||
303 | fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); | ||
304 | nbytes = __cbc_decrypt(desc, &walk); | ||
305 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
306 | } | ||
307 | |||
308 | bf_fpu_end(fpu_enabled); | ||
309 | return err; | ||
310 | } | ||
311 | |||
312 | static void ctr_crypt_final(struct blkcipher_desc *desc, | ||
313 | struct blkcipher_walk *walk) | ||
314 | { | ||
315 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
316 | u8 *ctrblk = walk->iv; | ||
317 | u8 keystream[BF_BLOCK_SIZE]; | ||
318 | u8 *src = walk->src.virt.addr; | ||
319 | u8 *dst = walk->dst.virt.addr; | ||
320 | unsigned int nbytes = walk->nbytes; | ||
321 | |||
322 | blowfish_enc_blk(ctx, keystream, ctrblk); | ||
323 | crypto_xor(keystream, src, nbytes); | ||
324 | memcpy(dst, keystream, nbytes); | ||
325 | |||
326 | crypto_inc(ctrblk, BF_BLOCK_SIZE); | ||
327 | } | ||
328 | |||
329 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | ||
330 | struct blkcipher_walk *walk) | ||
331 | { | ||
332 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
333 | unsigned int bsize = BF_BLOCK_SIZE; | ||
334 | unsigned int nbytes = walk->nbytes; | ||
335 | u64 *src = (u64 *)walk->src.virt.addr; | ||
336 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
337 | int i; | ||
338 | |||
339 | /* Process multi-block AVX2 batch */ | ||
340 | if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { | ||
341 | do { | ||
342 | blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src, | ||
343 | (__be64 *)walk->iv); | ||
344 | |||
345 | src += BF_AVX2_PARALLEL_BLOCKS; | ||
346 | dst += BF_AVX2_PARALLEL_BLOCKS; | ||
347 | nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS; | ||
348 | } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); | ||
349 | |||
350 | if (nbytes < bsize) | ||
351 | goto done; | ||
352 | } | ||
353 | |||
354 | /* Process four block batch */ | ||
355 | if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { | ||
356 | __be64 ctrblocks[BF_PARALLEL_BLOCKS]; | ||
357 | u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); | ||
358 | |||
359 | do { | ||
360 | /* create ctrblks for parallel encrypt */ | ||
361 | for (i = 0; i < BF_PARALLEL_BLOCKS; i++) { | ||
362 | if (dst != src) | ||
363 | dst[i] = src[i]; | ||
364 | |||
365 | ctrblocks[i] = cpu_to_be64(ctrblk++); | ||
366 | } | ||
367 | |||
368 | blowfish_enc_blk_xor_4way(ctx, (u8 *)dst, | ||
369 | (u8 *)ctrblocks); | ||
370 | |||
371 | src += BF_PARALLEL_BLOCKS; | ||
372 | dst += BF_PARALLEL_BLOCKS; | ||
373 | nbytes -= bsize * BF_PARALLEL_BLOCKS; | ||
374 | } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); | ||
375 | |||
376 | *(__be64 *)walk->iv = cpu_to_be64(ctrblk); | ||
377 | |||
378 | if (nbytes < bsize) | ||
379 | goto done; | ||
380 | } | ||
381 | |||
382 | /* Handle leftovers */ | ||
383 | do { | ||
384 | u64 ctrblk; | ||
385 | |||
386 | if (dst != src) | ||
387 | *dst = *src; | ||
388 | |||
389 | ctrblk = *(u64 *)walk->iv; | ||
390 | be64_add_cpu((__be64 *)walk->iv, 1); | ||
391 | |||
392 | blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); | ||
393 | |||
394 | src += 1; | ||
395 | dst += 1; | ||
396 | } while ((nbytes -= bsize) >= bsize); | ||
397 | |||
398 | done: | ||
399 | return nbytes; | ||
400 | } | ||
401 | |||
402 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
403 | struct scatterlist *src, unsigned int nbytes) | ||
404 | { | ||
405 | bool fpu_enabled = false; | ||
406 | struct blkcipher_walk walk; | ||
407 | int err; | ||
408 | |||
409 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
410 | err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); | ||
411 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
412 | |||
413 | while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { | ||
414 | fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); | ||
415 | nbytes = __ctr_crypt(desc, &walk); | ||
416 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
417 | } | ||
418 | |||
419 | bf_fpu_end(fpu_enabled); | ||
420 | |||
421 | if (walk.nbytes) { | ||
422 | ctr_crypt_final(desc, &walk); | ||
423 | err = blkcipher_walk_done(desc, &walk, 0); | ||
424 | } | ||
425 | |||
426 | return err; | ||
427 | } | ||
428 | |||
429 | static struct crypto_alg bf_algs[6] = { { | ||
430 | .cra_name = "__ecb-blowfish-avx2", | ||
431 | .cra_driver_name = "__driver-ecb-blowfish-avx2", | ||
432 | .cra_priority = 0, | ||
433 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
434 | .cra_blocksize = BF_BLOCK_SIZE, | ||
435 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
436 | .cra_alignmask = 0, | ||
437 | .cra_type = &crypto_blkcipher_type, | ||
438 | .cra_module = THIS_MODULE, | ||
439 | .cra_u = { | ||
440 | .blkcipher = { | ||
441 | .min_keysize = BF_MIN_KEY_SIZE, | ||
442 | .max_keysize = BF_MAX_KEY_SIZE, | ||
443 | .setkey = blowfish_setkey, | ||
444 | .encrypt = ecb_encrypt, | ||
445 | .decrypt = ecb_decrypt, | ||
446 | }, | ||
447 | }, | ||
448 | }, { | ||
449 | .cra_name = "__cbc-blowfish-avx2", | ||
450 | .cra_driver_name = "__driver-cbc-blowfish-avx2", | ||
451 | .cra_priority = 0, | ||
452 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
453 | .cra_blocksize = BF_BLOCK_SIZE, | ||
454 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
455 | .cra_alignmask = 0, | ||
456 | .cra_type = &crypto_blkcipher_type, | ||
457 | .cra_module = THIS_MODULE, | ||
458 | .cra_u = { | ||
459 | .blkcipher = { | ||
460 | .min_keysize = BF_MIN_KEY_SIZE, | ||
461 | .max_keysize = BF_MAX_KEY_SIZE, | ||
462 | .setkey = blowfish_setkey, | ||
463 | .encrypt = cbc_encrypt, | ||
464 | .decrypt = cbc_decrypt, | ||
465 | }, | ||
466 | }, | ||
467 | }, { | ||
468 | .cra_name = "__ctr-blowfish-avx2", | ||
469 | .cra_driver_name = "__driver-ctr-blowfish-avx2", | ||
470 | .cra_priority = 0, | ||
471 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
472 | .cra_blocksize = 1, | ||
473 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
474 | .cra_alignmask = 0, | ||
475 | .cra_type = &crypto_blkcipher_type, | ||
476 | .cra_module = THIS_MODULE, | ||
477 | .cra_u = { | ||
478 | .blkcipher = { | ||
479 | .min_keysize = BF_MIN_KEY_SIZE, | ||
480 | .max_keysize = BF_MAX_KEY_SIZE, | ||
481 | .ivsize = BF_BLOCK_SIZE, | ||
482 | .setkey = blowfish_setkey, | ||
483 | .encrypt = ctr_crypt, | ||
484 | .decrypt = ctr_crypt, | ||
485 | }, | ||
486 | }, | ||
487 | }, { | ||
488 | .cra_name = "ecb(blowfish)", | ||
489 | .cra_driver_name = "ecb-blowfish-avx2", | ||
490 | .cra_priority = 400, | ||
491 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
492 | .cra_blocksize = BF_BLOCK_SIZE, | ||
493 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
494 | .cra_alignmask = 0, | ||
495 | .cra_type = &crypto_ablkcipher_type, | ||
496 | .cra_module = THIS_MODULE, | ||
497 | .cra_init = ablk_init, | ||
498 | .cra_exit = ablk_exit, | ||
499 | .cra_u = { | ||
500 | .ablkcipher = { | ||
501 | .min_keysize = BF_MIN_KEY_SIZE, | ||
502 | .max_keysize = BF_MAX_KEY_SIZE, | ||
503 | .setkey = ablk_set_key, | ||
504 | .encrypt = ablk_encrypt, | ||
505 | .decrypt = ablk_decrypt, | ||
506 | }, | ||
507 | }, | ||
508 | }, { | ||
509 | .cra_name = "cbc(blowfish)", | ||
510 | .cra_driver_name = "cbc-blowfish-avx2", | ||
511 | .cra_priority = 400, | ||
512 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
513 | .cra_blocksize = BF_BLOCK_SIZE, | ||
514 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
515 | .cra_alignmask = 0, | ||
516 | .cra_type = &crypto_ablkcipher_type, | ||
517 | .cra_module = THIS_MODULE, | ||
518 | .cra_init = ablk_init, | ||
519 | .cra_exit = ablk_exit, | ||
520 | .cra_u = { | ||
521 | .ablkcipher = { | ||
522 | .min_keysize = BF_MIN_KEY_SIZE, | ||
523 | .max_keysize = BF_MAX_KEY_SIZE, | ||
524 | .ivsize = BF_BLOCK_SIZE, | ||
525 | .setkey = ablk_set_key, | ||
526 | .encrypt = __ablk_encrypt, | ||
527 | .decrypt = ablk_decrypt, | ||
528 | }, | ||
529 | }, | ||
530 | }, { | ||
531 | .cra_name = "ctr(blowfish)", | ||
532 | .cra_driver_name = "ctr-blowfish-avx2", | ||
533 | .cra_priority = 400, | ||
534 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
535 | .cra_blocksize = 1, | ||
536 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
537 | .cra_alignmask = 0, | ||
538 | .cra_type = &crypto_ablkcipher_type, | ||
539 | .cra_module = THIS_MODULE, | ||
540 | .cra_init = ablk_init, | ||
541 | .cra_exit = ablk_exit, | ||
542 | .cra_u = { | ||
543 | .ablkcipher = { | ||
544 | .min_keysize = BF_MIN_KEY_SIZE, | ||
545 | .max_keysize = BF_MAX_KEY_SIZE, | ||
546 | .ivsize = BF_BLOCK_SIZE, | ||
547 | .setkey = ablk_set_key, | ||
548 | .encrypt = ablk_encrypt, | ||
549 | .decrypt = ablk_encrypt, | ||
550 | .geniv = "chainiv", | ||
551 | }, | ||
552 | }, | ||
553 | } }; | ||
554 | |||
555 | |||
556 | static int __init init(void) | ||
557 | { | ||
558 | u64 xcr0; | ||
559 | |||
560 | if (!cpu_has_avx2 || !cpu_has_osxsave) { | ||
561 | pr_info("AVX2 instructions are not detected.\n"); | ||
562 | return -ENODEV; | ||
563 | } | ||
564 | |||
565 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
566 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
567 | pr_info("AVX detected but unusable.\n"); | ||
568 | return -ENODEV; | ||
569 | } | ||
570 | |||
571 | return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); | ||
572 | } | ||
573 | |||
574 | static void __exit fini(void) | ||
575 | { | ||
576 | crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); | ||
577 | } | ||
578 | |||
579 | module_init(init); | ||
580 | module_exit(fini); | ||
581 | |||
582 | MODULE_LICENSE("GPL"); | ||
583 | MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized"); | ||
584 | MODULE_ALIAS("blowfish"); | ||
585 | MODULE_ALIAS("blowfish-asm"); | ||
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 3548d76dbaa9..50ec333b70e6 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Glue Code for assembler optimized version of Blowfish | 2 | * Glue Code for assembler optimized version of Blowfish |
3 | * | 3 | * |
4 | * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | 4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> |
5 | * | 5 | * |
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | 6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: |
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | 7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> |
@@ -32,24 +32,40 @@ | |||
32 | #include <linux/module.h> | 32 | #include <linux/module.h> |
33 | #include <linux/types.h> | 33 | #include <linux/types.h> |
34 | #include <crypto/algapi.h> | 34 | #include <crypto/algapi.h> |
35 | #include <asm/crypto/blowfish.h> | ||
36 | 35 | ||
37 | /* regular block cipher functions */ | 36 | /* regular block cipher functions */ |
38 | asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, | 37 | asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, |
39 | bool xor); | 38 | bool xor); |
40 | EXPORT_SYMBOL_GPL(__blowfish_enc_blk); | ||
41 | |||
42 | asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); | 39 | asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); |
43 | EXPORT_SYMBOL_GPL(blowfish_dec_blk); | ||
44 | 40 | ||
45 | /* 4-way parallel cipher functions */ | 41 | /* 4-way parallel cipher functions */ |
46 | asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | 42 | asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, |
47 | const u8 *src, bool xor); | 43 | const u8 *src, bool xor); |
48 | EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way); | ||
49 | |||
50 | asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, | 44 | asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, |
51 | const u8 *src); | 45 | const u8 *src); |
52 | EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way); | 46 | |
47 | static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) | ||
48 | { | ||
49 | __blowfish_enc_blk(ctx, dst, src, false); | ||
50 | } | ||
51 | |||
52 | static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, | ||
53 | const u8 *src) | ||
54 | { | ||
55 | __blowfish_enc_blk(ctx, dst, src, true); | ||
56 | } | ||
57 | |||
58 | static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
59 | const u8 *src) | ||
60 | { | ||
61 | __blowfish_enc_blk_4way(ctx, dst, src, false); | ||
62 | } | ||
63 | |||
64 | static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, | ||
65 | const u8 *src) | ||
66 | { | ||
67 | __blowfish_enc_blk_4way(ctx, dst, src, true); | ||
68 | } | ||
53 | 69 | ||
54 | static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | 70 | static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) |
55 | { | 71 | { |
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 91a1878fcc3e..0e0b8863a34b 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S | |||
@@ -51,16 +51,6 @@ | |||
51 | #define ymm14_x xmm14 | 51 | #define ymm14_x xmm14 |
52 | #define ymm15_x xmm15 | 52 | #define ymm15_x xmm15 |
53 | 53 | ||
54 | /* | ||
55 | * AES-NI instructions do not support ymmX registers, so we need splitting and | ||
56 | * merging. | ||
57 | */ | ||
58 | #define vaesenclast256(zero, yreg, tmp) \ | ||
59 | vextracti128 $1, yreg, tmp##_x; \ | ||
60 | vaesenclast zero##_x, yreg##_x, yreg##_x; \ | ||
61 | vaesenclast zero##_x, tmp##_x, tmp##_x; \ | ||
62 | vinserti128 $1, tmp##_x, yreg, yreg; | ||
63 | |||
64 | /********************************************************************** | 54 | /********************************************************************** |
65 | 32-way camellia | 55 | 32-way camellia |
66 | **********************************************************************/ | 56 | **********************************************************************/ |
@@ -79,46 +69,70 @@ | |||
79 | * S-function with AES subbytes \ | 69 | * S-function with AES subbytes \ |
80 | */ \ | 70 | */ \ |
81 | vbroadcasti128 .Linv_shift_row, t4; \ | 71 | vbroadcasti128 .Linv_shift_row, t4; \ |
82 | vpbroadcastb .L0f0f0f0f, t7; \ | 72 | vpbroadcastd .L0f0f0f0f, t7; \ |
83 | vbroadcasti128 .Lpre_tf_lo_s1, t0; \ | 73 | vbroadcasti128 .Lpre_tf_lo_s1, t5; \ |
84 | vbroadcasti128 .Lpre_tf_hi_s1, t1; \ | 74 | vbroadcasti128 .Lpre_tf_hi_s1, t6; \ |
75 | vbroadcasti128 .Lpre_tf_lo_s4, t2; \ | ||
76 | vbroadcasti128 .Lpre_tf_hi_s4, t3; \ | ||
85 | \ | 77 | \ |
86 | /* AES inverse shift rows */ \ | 78 | /* AES inverse shift rows */ \ |
87 | vpshufb t4, x0, x0; \ | 79 | vpshufb t4, x0, x0; \ |
88 | vpshufb t4, x7, x7; \ | 80 | vpshufb t4, x7, x7; \ |
89 | vpshufb t4, x1, x1; \ | ||
90 | vpshufb t4, x4, x4; \ | ||
91 | vpshufb t4, x2, x2; \ | ||
92 | vpshufb t4, x5, x5; \ | ||
93 | vpshufb t4, x3, x3; \ | 81 | vpshufb t4, x3, x3; \ |
94 | vpshufb t4, x6, x6; \ | 82 | vpshufb t4, x6, x6; \ |
83 | vpshufb t4, x2, x2; \ | ||
84 | vpshufb t4, x5, x5; \ | ||
85 | vpshufb t4, x1, x1; \ | ||
86 | vpshufb t4, x4, x4; \ | ||
95 | \ | 87 | \ |
96 | /* prefilter sboxes 1, 2 and 3 */ \ | 88 | /* prefilter sboxes 1, 2 and 3 */ \ |
97 | vbroadcasti128 .Lpre_tf_lo_s4, t2; \ | ||
98 | vbroadcasti128 .Lpre_tf_hi_s4, t3; \ | ||
99 | filter_8bit(x0, t0, t1, t7, t6); \ | ||
100 | filter_8bit(x7, t0, t1, t7, t6); \ | ||
101 | filter_8bit(x1, t0, t1, t7, t6); \ | ||
102 | filter_8bit(x4, t0, t1, t7, t6); \ | ||
103 | filter_8bit(x2, t0, t1, t7, t6); \ | ||
104 | filter_8bit(x5, t0, t1, t7, t6); \ | ||
105 | \ | ||
106 | /* prefilter sbox 4 */ \ | 89 | /* prefilter sbox 4 */ \ |
90 | filter_8bit(x0, t5, t6, t7, t4); \ | ||
91 | filter_8bit(x7, t5, t6, t7, t4); \ | ||
92 | vextracti128 $1, x0, t0##_x; \ | ||
93 | vextracti128 $1, x7, t1##_x; \ | ||
94 | filter_8bit(x3, t2, t3, t7, t4); \ | ||
95 | filter_8bit(x6, t2, t3, t7, t4); \ | ||
96 | vextracti128 $1, x3, t3##_x; \ | ||
97 | vextracti128 $1, x6, t2##_x; \ | ||
98 | filter_8bit(x2, t5, t6, t7, t4); \ | ||
99 | filter_8bit(x5, t5, t6, t7, t4); \ | ||
100 | filter_8bit(x1, t5, t6, t7, t4); \ | ||
101 | filter_8bit(x4, t5, t6, t7, t4); \ | ||
102 | \ | ||
107 | vpxor t4##_x, t4##_x, t4##_x; \ | 103 | vpxor t4##_x, t4##_x, t4##_x; \ |
108 | filter_8bit(x3, t2, t3, t7, t6); \ | ||
109 | filter_8bit(x6, t2, t3, t7, t6); \ | ||
110 | \ | 104 | \ |
111 | /* AES subbytes + AES shift rows */ \ | 105 | /* AES subbytes + AES shift rows */ \ |
106 | vextracti128 $1, x2, t6##_x; \ | ||
107 | vextracti128 $1, x5, t5##_x; \ | ||
108 | vaesenclast t4##_x, x0##_x, x0##_x; \ | ||
109 | vaesenclast t4##_x, t0##_x, t0##_x; \ | ||
110 | vinserti128 $1, t0##_x, x0, x0; \ | ||
111 | vaesenclast t4##_x, x7##_x, x7##_x; \ | ||
112 | vaesenclast t4##_x, t1##_x, t1##_x; \ | ||
113 | vinserti128 $1, t1##_x, x7, x7; \ | ||
114 | vaesenclast t4##_x, x3##_x, x3##_x; \ | ||
115 | vaesenclast t4##_x, t3##_x, t3##_x; \ | ||
116 | vinserti128 $1, t3##_x, x3, x3; \ | ||
117 | vaesenclast t4##_x, x6##_x, x6##_x; \ | ||
118 | vaesenclast t4##_x, t2##_x, t2##_x; \ | ||
119 | vinserti128 $1, t2##_x, x6, x6; \ | ||
120 | vextracti128 $1, x1, t3##_x; \ | ||
121 | vextracti128 $1, x4, t2##_x; \ | ||
112 | vbroadcasti128 .Lpost_tf_lo_s1, t0; \ | 122 | vbroadcasti128 .Lpost_tf_lo_s1, t0; \ |
113 | vbroadcasti128 .Lpost_tf_hi_s1, t1; \ | 123 | vbroadcasti128 .Lpost_tf_hi_s1, t1; \ |
114 | vaesenclast256(t4, x0, t5); \ | 124 | vaesenclast t4##_x, x2##_x, x2##_x; \ |
115 | vaesenclast256(t4, x7, t5); \ | 125 | vaesenclast t4##_x, t6##_x, t6##_x; \ |
116 | vaesenclast256(t4, x1, t5); \ | 126 | vinserti128 $1, t6##_x, x2, x2; \ |
117 | vaesenclast256(t4, x4, t5); \ | 127 | vaesenclast t4##_x, x5##_x, x5##_x; \ |
118 | vaesenclast256(t4, x2, t5); \ | 128 | vaesenclast t4##_x, t5##_x, t5##_x; \ |
119 | vaesenclast256(t4, x5, t5); \ | 129 | vinserti128 $1, t5##_x, x5, x5; \ |
120 | vaesenclast256(t4, x3, t5); \ | 130 | vaesenclast t4##_x, x1##_x, x1##_x; \ |
121 | vaesenclast256(t4, x6, t5); \ | 131 | vaesenclast t4##_x, t3##_x, t3##_x; \ |
132 | vinserti128 $1, t3##_x, x1, x1; \ | ||
133 | vaesenclast t4##_x, x4##_x, x4##_x; \ | ||
134 | vaesenclast t4##_x, t2##_x, t2##_x; \ | ||
135 | vinserti128 $1, t2##_x, x4, x4; \ | ||
122 | \ | 136 | \ |
123 | /* postfilter sboxes 1 and 4 */ \ | 137 | /* postfilter sboxes 1 and 4 */ \ |
124 | vbroadcasti128 .Lpost_tf_lo_s3, t2; \ | 138 | vbroadcasti128 .Lpost_tf_lo_s3, t2; \ |
@@ -139,22 +153,12 @@ | |||
139 | /* postfilter sbox 2 */ \ | 153 | /* postfilter sbox 2 */ \ |
140 | filter_8bit(x1, t4, t5, t7, t2); \ | 154 | filter_8bit(x1, t4, t5, t7, t2); \ |
141 | filter_8bit(x4, t4, t5, t7, t2); \ | 155 | filter_8bit(x4, t4, t5, t7, t2); \ |
156 | vpxor t7, t7, t7; \ | ||
142 | \ | 157 | \ |
143 | vpsrldq $1, t0, t1; \ | 158 | vpsrldq $1, t0, t1; \ |
144 | vpsrldq $2, t0, t2; \ | 159 | vpsrldq $2, t0, t2; \ |
160 | vpshufb t7, t1, t1; \ | ||
145 | vpsrldq $3, t0, t3; \ | 161 | vpsrldq $3, t0, t3; \ |
146 | vpsrldq $4, t0, t4; \ | ||
147 | vpsrldq $5, t0, t5; \ | ||
148 | vpsrldq $6, t0, t6; \ | ||
149 | vpsrldq $7, t0, t7; \ | ||
150 | vpbroadcastb t0##_x, t0; \ | ||
151 | vpbroadcastb t1##_x, t1; \ | ||
152 | vpbroadcastb t2##_x, t2; \ | ||
153 | vpbroadcastb t3##_x, t3; \ | ||
154 | vpbroadcastb t4##_x, t4; \ | ||
155 | vpbroadcastb t6##_x, t6; \ | ||
156 | vpbroadcastb t5##_x, t5; \ | ||
157 | vpbroadcastb t7##_x, t7; \ | ||
158 | \ | 162 | \ |
159 | /* P-function */ \ | 163 | /* P-function */ \ |
160 | vpxor x5, x0, x0; \ | 164 | vpxor x5, x0, x0; \ |
@@ -162,11 +166,21 @@ | |||
162 | vpxor x7, x2, x2; \ | 166 | vpxor x7, x2, x2; \ |
163 | vpxor x4, x3, x3; \ | 167 | vpxor x4, x3, x3; \ |
164 | \ | 168 | \ |
169 | vpshufb t7, t2, t2; \ | ||
170 | vpsrldq $4, t0, t4; \ | ||
171 | vpshufb t7, t3, t3; \ | ||
172 | vpsrldq $5, t0, t5; \ | ||
173 | vpshufb t7, t4, t4; \ | ||
174 | \ | ||
165 | vpxor x2, x4, x4; \ | 175 | vpxor x2, x4, x4; \ |
166 | vpxor x3, x5, x5; \ | 176 | vpxor x3, x5, x5; \ |
167 | vpxor x0, x6, x6; \ | 177 | vpxor x0, x6, x6; \ |
168 | vpxor x1, x7, x7; \ | 178 | vpxor x1, x7, x7; \ |
169 | \ | 179 | \ |
180 | vpsrldq $6, t0, t6; \ | ||
181 | vpshufb t7, t5, t5; \ | ||
182 | vpshufb t7, t6, t6; \ | ||
183 | \ | ||
170 | vpxor x7, x0, x0; \ | 184 | vpxor x7, x0, x0; \ |
171 | vpxor x4, x1, x1; \ | 185 | vpxor x4, x1, x1; \ |
172 | vpxor x5, x2, x2; \ | 186 | vpxor x5, x2, x2; \ |
@@ -179,12 +193,16 @@ | |||
179 | \ | 193 | \ |
180 | /* Add key material and result to CD (x becomes new CD) */ \ | 194 | /* Add key material and result to CD (x becomes new CD) */ \ |
181 | \ | 195 | \ |
182 | vpxor t7, x0, x0; \ | ||
183 | vpxor 4 * 32(mem_cd), x0, x0; \ | ||
184 | \ | ||
185 | vpxor t6, x1, x1; \ | 196 | vpxor t6, x1, x1; \ |
186 | vpxor 5 * 32(mem_cd), x1, x1; \ | 197 | vpxor 5 * 32(mem_cd), x1, x1; \ |
187 | \ | 198 | \ |
199 | vpsrldq $7, t0, t6; \ | ||
200 | vpshufb t7, t0, t0; \ | ||
201 | vpshufb t7, t6, t7; \ | ||
202 | \ | ||
203 | vpxor t7, x0, x0; \ | ||
204 | vpxor 4 * 32(mem_cd), x0, x0; \ | ||
205 | \ | ||
188 | vpxor t5, x2, x2; \ | 206 | vpxor t5, x2, x2; \ |
189 | vpxor 6 * 32(mem_cd), x2, x2; \ | 207 | vpxor 6 * 32(mem_cd), x2, x2; \ |
190 | \ | 208 | \ |
@@ -204,7 +222,7 @@ | |||
204 | vpxor 3 * 32(mem_cd), x7, x7; | 222 | vpxor 3 * 32(mem_cd), x7, x7; |
205 | 223 | ||
206 | /* | 224 | /* |
207 | * Size optimization... with inlined roundsm16 binary would be over 5 times | 225 | * Size optimization... with inlined roundsm32 binary would be over 5 times |
208 | * larger and would only marginally faster. | 226 | * larger and would only marginally faster. |
209 | */ | 227 | */ |
210 | .align 8 | 228 | .align 8 |
@@ -324,13 +342,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) | |||
324 | */ \ | 342 | */ \ |
325 | vpbroadcastd kll, t0; /* only lowest 32-bit used */ \ | 343 | vpbroadcastd kll, t0; /* only lowest 32-bit used */ \ |
326 | vpxor tt0, tt0, tt0; \ | 344 | vpxor tt0, tt0, tt0; \ |
327 | vpbroadcastb t0##_x, t3; \ | 345 | vpshufb tt0, t0, t3; \ |
328 | vpsrldq $1, t0, t0; \ | 346 | vpsrldq $1, t0, t0; \ |
329 | vpbroadcastb t0##_x, t2; \ | 347 | vpshufb tt0, t0, t2; \ |
330 | vpsrldq $1, t0, t0; \ | 348 | vpsrldq $1, t0, t0; \ |
331 | vpbroadcastb t0##_x, t1; \ | 349 | vpshufb tt0, t0, t1; \ |
332 | vpsrldq $1, t0, t0; \ | 350 | vpsrldq $1, t0, t0; \ |
333 | vpbroadcastb t0##_x, t0; \ | 351 | vpshufb tt0, t0, t0; \ |
334 | \ | 352 | \ |
335 | vpand l0, t0, t0; \ | 353 | vpand l0, t0, t0; \ |
336 | vpand l1, t1, t1; \ | 354 | vpand l1, t1, t1; \ |
@@ -340,6 +358,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) | |||
340 | rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ | 358 | rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ |
341 | \ | 359 | \ |
342 | vpxor l4, t0, l4; \ | 360 | vpxor l4, t0, l4; \ |
361 | vpbroadcastd krr, t0; /* only lowest 32-bit used */ \ | ||
343 | vmovdqu l4, 4 * 32(l); \ | 362 | vmovdqu l4, 4 * 32(l); \ |
344 | vpxor l5, t1, l5; \ | 363 | vpxor l5, t1, l5; \ |
345 | vmovdqu l5, 5 * 32(l); \ | 364 | vmovdqu l5, 5 * 32(l); \ |
@@ -354,14 +373,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) | |||
354 | * rl ^= t2; \ | 373 | * rl ^= t2; \ |
355 | */ \ | 374 | */ \ |
356 | \ | 375 | \ |
357 | vpbroadcastd krr, t0; /* only lowest 32-bit used */ \ | 376 | vpshufb tt0, t0, t3; \ |
358 | vpbroadcastb t0##_x, t3; \ | ||
359 | vpsrldq $1, t0, t0; \ | 377 | vpsrldq $1, t0, t0; \ |
360 | vpbroadcastb t0##_x, t2; \ | 378 | vpshufb tt0, t0, t2; \ |
361 | vpsrldq $1, t0, t0; \ | 379 | vpsrldq $1, t0, t0; \ |
362 | vpbroadcastb t0##_x, t1; \ | 380 | vpshufb tt0, t0, t1; \ |
363 | vpsrldq $1, t0, t0; \ | 381 | vpsrldq $1, t0, t0; \ |
364 | vpbroadcastb t0##_x, t0; \ | 382 | vpshufb tt0, t0, t0; \ |
365 | \ | 383 | \ |
366 | vpor 4 * 32(r), t0, t0; \ | 384 | vpor 4 * 32(r), t0, t0; \ |
367 | vpor 5 * 32(r), t1, t1; \ | 385 | vpor 5 * 32(r), t1, t1; \ |
@@ -373,6 +391,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) | |||
373 | vpxor 2 * 32(r), t2, t2; \ | 391 | vpxor 2 * 32(r), t2, t2; \ |
374 | vpxor 3 * 32(r), t3, t3; \ | 392 | vpxor 3 * 32(r), t3, t3; \ |
375 | vmovdqu t0, 0 * 32(r); \ | 393 | vmovdqu t0, 0 * 32(r); \ |
394 | vpbroadcastd krl, t0; /* only lowest 32-bit used */ \ | ||
376 | vmovdqu t1, 1 * 32(r); \ | 395 | vmovdqu t1, 1 * 32(r); \ |
377 | vmovdqu t2, 2 * 32(r); \ | 396 | vmovdqu t2, 2 * 32(r); \ |
378 | vmovdqu t3, 3 * 32(r); \ | 397 | vmovdqu t3, 3 * 32(r); \ |
@@ -382,14 +401,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) | |||
382 | * t2 &= rl; \ | 401 | * t2 &= rl; \ |
383 | * rr ^= rol32(t2, 1); \ | 402 | * rr ^= rol32(t2, 1); \ |
384 | */ \ | 403 | */ \ |
385 | vpbroadcastd krl, t0; /* only lowest 32-bit used */ \ | 404 | vpshufb tt0, t0, t3; \ |
386 | vpbroadcastb t0##_x, t3; \ | ||
387 | vpsrldq $1, t0, t0; \ | 405 | vpsrldq $1, t0, t0; \ |
388 | vpbroadcastb t0##_x, t2; \ | 406 | vpshufb tt0, t0, t2; \ |
389 | vpsrldq $1, t0, t0; \ | 407 | vpsrldq $1, t0, t0; \ |
390 | vpbroadcastb t0##_x, t1; \ | 408 | vpshufb tt0, t0, t1; \ |
391 | vpsrldq $1, t0, t0; \ | 409 | vpsrldq $1, t0, t0; \ |
392 | vpbroadcastb t0##_x, t0; \ | 410 | vpshufb tt0, t0, t0; \ |
393 | \ | 411 | \ |
394 | vpand 0 * 32(r), t0, t0; \ | 412 | vpand 0 * 32(r), t0, t0; \ |
395 | vpand 1 * 32(r), t1, t1; \ | 413 | vpand 1 * 32(r), t1, t1; \ |
@@ -403,6 +421,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) | |||
403 | vpxor 6 * 32(r), t2, t2; \ | 421 | vpxor 6 * 32(r), t2, t2; \ |
404 | vpxor 7 * 32(r), t3, t3; \ | 422 | vpxor 7 * 32(r), t3, t3; \ |
405 | vmovdqu t0, 4 * 32(r); \ | 423 | vmovdqu t0, 4 * 32(r); \ |
424 | vpbroadcastd klr, t0; /* only lowest 32-bit used */ \ | ||
406 | vmovdqu t1, 5 * 32(r); \ | 425 | vmovdqu t1, 5 * 32(r); \ |
407 | vmovdqu t2, 6 * 32(r); \ | 426 | vmovdqu t2, 6 * 32(r); \ |
408 | vmovdqu t3, 7 * 32(r); \ | 427 | vmovdqu t3, 7 * 32(r); \ |
@@ -413,14 +432,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) | |||
413 | * ll ^= t0; \ | 432 | * ll ^= t0; \ |
414 | */ \ | 433 | */ \ |
415 | \ | 434 | \ |
416 | vpbroadcastd klr, t0; /* only lowest 32-bit used */ \ | 435 | vpshufb tt0, t0, t3; \ |
417 | vpbroadcastb t0##_x, t3; \ | ||
418 | vpsrldq $1, t0, t0; \ | 436 | vpsrldq $1, t0, t0; \ |
419 | vpbroadcastb t0##_x, t2; \ | 437 | vpshufb tt0, t0, t2; \ |
420 | vpsrldq $1, t0, t0; \ | 438 | vpsrldq $1, t0, t0; \ |
421 | vpbroadcastb t0##_x, t1; \ | 439 | vpshufb tt0, t0, t1; \ |
422 | vpsrldq $1, t0, t0; \ | 440 | vpsrldq $1, t0, t0; \ |
423 | vpbroadcastb t0##_x, t0; \ | 441 | vpshufb tt0, t0, t0; \ |
424 | \ | 442 | \ |
425 | vpor l4, t0, t0; \ | 443 | vpor l4, t0, t0; \ |
426 | vpor l5, t1, t1; \ | 444 | vpor l5, t1, t1; \ |
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S new file mode 100644 index 000000000000..35e97569d05f --- /dev/null +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S | |||
@@ -0,0 +1,643 @@ | |||
1 | ######################################################################## | ||
2 | # Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions | ||
3 | # | ||
4 | # Copyright (c) 2013, Intel Corporation | ||
5 | # | ||
6 | # Authors: | ||
7 | # Erdinc Ozturk <erdinc.ozturk@intel.com> | ||
8 | # Vinodh Gopal <vinodh.gopal@intel.com> | ||
9 | # James Guilford <james.guilford@intel.com> | ||
10 | # Tim Chen <tim.c.chen@linux.intel.com> | ||
11 | # | ||
12 | # This software is available to you under a choice of one of two | ||
13 | # licenses. You may choose to be licensed under the terms of the GNU | ||
14 | # General Public License (GPL) Version 2, available from the file | ||
15 | # COPYING in the main directory of this source tree, or the | ||
16 | # OpenIB.org BSD license below: | ||
17 | # | ||
18 | # Redistribution and use in source and binary forms, with or without | ||
19 | # modification, are permitted provided that the following conditions are | ||
20 | # met: | ||
21 | # | ||
22 | # * Redistributions of source code must retain the above copyright | ||
23 | # notice, this list of conditions and the following disclaimer. | ||
24 | # | ||
25 | # * Redistributions in binary form must reproduce the above copyright | ||
26 | # notice, this list of conditions and the following disclaimer in the | ||
27 | # documentation and/or other materials provided with the | ||
28 | # distribution. | ||
29 | # | ||
30 | # * Neither the name of the Intel Corporation nor the names of its | ||
31 | # contributors may be used to endorse or promote products derived from | ||
32 | # this software without specific prior written permission. | ||
33 | # | ||
34 | # | ||
35 | # THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY | ||
36 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
37 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
38 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR | ||
39 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
40 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
41 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
42 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
43 | # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
44 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
45 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
46 | ######################################################################## | ||
47 | # Function API: | ||
48 | # UINT16 crc_t10dif_pcl( | ||
49 | # UINT16 init_crc, //initial CRC value, 16 bits | ||
50 | # const unsigned char *buf, //buffer pointer to calculate CRC on | ||
51 | # UINT64 len //buffer length in bytes (64-bit data) | ||
52 | # ); | ||
53 | # | ||
54 | # Reference paper titled "Fast CRC Computation for Generic | ||
55 | # Polynomials Using PCLMULQDQ Instruction" | ||
56 | # URL: http://www.intel.com/content/dam/www/public/us/en/documents | ||
57 | # /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf | ||
58 | # | ||
59 | # | ||
60 | |||
61 | #include <linux/linkage.h> | ||
62 | |||
63 | .text | ||
64 | |||
65 | #define arg1 %rdi | ||
66 | #define arg2 %rsi | ||
67 | #define arg3 %rdx | ||
68 | |||
69 | #define arg1_low32 %edi | ||
70 | |||
71 | ENTRY(crc_t10dif_pcl) | ||
72 | .align 16 | ||
73 | |||
74 | # adjust the 16-bit initial_crc value, scale it to 32 bits | ||
75 | shl $16, arg1_low32 | ||
76 | |||
77 | # Allocate Stack Space | ||
78 | mov %rsp, %rcx | ||
79 | sub $16*2, %rsp | ||
80 | # align stack to 16 byte boundary | ||
81 | and $~(0x10 - 1), %rsp | ||
82 | |||
83 | # check if smaller than 256 | ||
84 | cmp $256, arg3 | ||
85 | |||
86 | # for sizes less than 128, we can't fold 64B at a time... | ||
87 | jl _less_than_128 | ||
88 | |||
89 | |||
90 | # load the initial crc value | ||
91 | movd arg1_low32, %xmm10 # initial crc | ||
92 | |||
93 | # crc value does not need to be byte-reflected, but it needs | ||
94 | # to be moved to the high part of the register. | ||
95 | # because data will be byte-reflected and will align with | ||
96 | # initial crc at correct place. | ||
97 | pslldq $12, %xmm10 | ||
98 | |||
99 | movdqa SHUF_MASK(%rip), %xmm11 | ||
100 | # receive the initial 64B data, xor the initial crc value | ||
101 | movdqu 16*0(arg2), %xmm0 | ||
102 | movdqu 16*1(arg2), %xmm1 | ||
103 | movdqu 16*2(arg2), %xmm2 | ||
104 | movdqu 16*3(arg2), %xmm3 | ||
105 | movdqu 16*4(arg2), %xmm4 | ||
106 | movdqu 16*5(arg2), %xmm5 | ||
107 | movdqu 16*6(arg2), %xmm6 | ||
108 | movdqu 16*7(arg2), %xmm7 | ||
109 | |||
110 | pshufb %xmm11, %xmm0 | ||
111 | # XOR the initial_crc value | ||
112 | pxor %xmm10, %xmm0 | ||
113 | pshufb %xmm11, %xmm1 | ||
114 | pshufb %xmm11, %xmm2 | ||
115 | pshufb %xmm11, %xmm3 | ||
116 | pshufb %xmm11, %xmm4 | ||
117 | pshufb %xmm11, %xmm5 | ||
118 | pshufb %xmm11, %xmm6 | ||
119 | pshufb %xmm11, %xmm7 | ||
120 | |||
121 | movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4 | ||
122 | #imm value of pclmulqdq instruction | ||
123 | #will determine which constant to use | ||
124 | |||
125 | ################################################################# | ||
126 | # we subtract 256 instead of 128 to save one instruction from the loop | ||
127 | sub $256, arg3 | ||
128 | |||
129 | # at this section of the code, there is 64*x+y (0<=y<64) bytes of | ||
130 | # buffer. The _fold_64_B_loop will fold 64B at a time | ||
131 | # until we have 64+y Bytes of buffer | ||
132 | |||
133 | |||
134 | # fold 64B at a time. This section of the code folds 4 xmm | ||
135 | # registers in parallel | ||
136 | _fold_64_B_loop: | ||
137 | |||
138 | # update the buffer pointer | ||
139 | add $128, arg2 # buf += 64# | ||
140 | |||
141 | movdqu 16*0(arg2), %xmm9 | ||
142 | movdqu 16*1(arg2), %xmm12 | ||
143 | pshufb %xmm11, %xmm9 | ||
144 | pshufb %xmm11, %xmm12 | ||
145 | movdqa %xmm0, %xmm8 | ||
146 | movdqa %xmm1, %xmm13 | ||
147 | pclmulqdq $0x0 , %xmm10, %xmm0 | ||
148 | pclmulqdq $0x11, %xmm10, %xmm8 | ||
149 | pclmulqdq $0x0 , %xmm10, %xmm1 | ||
150 | pclmulqdq $0x11, %xmm10, %xmm13 | ||
151 | pxor %xmm9 , %xmm0 | ||
152 | xorps %xmm8 , %xmm0 | ||
153 | pxor %xmm12, %xmm1 | ||
154 | xorps %xmm13, %xmm1 | ||
155 | |||
156 | movdqu 16*2(arg2), %xmm9 | ||
157 | movdqu 16*3(arg2), %xmm12 | ||
158 | pshufb %xmm11, %xmm9 | ||
159 | pshufb %xmm11, %xmm12 | ||
160 | movdqa %xmm2, %xmm8 | ||
161 | movdqa %xmm3, %xmm13 | ||
162 | pclmulqdq $0x0, %xmm10, %xmm2 | ||
163 | pclmulqdq $0x11, %xmm10, %xmm8 | ||
164 | pclmulqdq $0x0, %xmm10, %xmm3 | ||
165 | pclmulqdq $0x11, %xmm10, %xmm13 | ||
166 | pxor %xmm9 , %xmm2 | ||
167 | xorps %xmm8 , %xmm2 | ||
168 | pxor %xmm12, %xmm3 | ||
169 | xorps %xmm13, %xmm3 | ||
170 | |||
171 | movdqu 16*4(arg2), %xmm9 | ||
172 | movdqu 16*5(arg2), %xmm12 | ||
173 | pshufb %xmm11, %xmm9 | ||
174 | pshufb %xmm11, %xmm12 | ||
175 | movdqa %xmm4, %xmm8 | ||
176 | movdqa %xmm5, %xmm13 | ||
177 | pclmulqdq $0x0, %xmm10, %xmm4 | ||
178 | pclmulqdq $0x11, %xmm10, %xmm8 | ||
179 | pclmulqdq $0x0, %xmm10, %xmm5 | ||
180 | pclmulqdq $0x11, %xmm10, %xmm13 | ||
181 | pxor %xmm9 , %xmm4 | ||
182 | xorps %xmm8 , %xmm4 | ||
183 | pxor %xmm12, %xmm5 | ||
184 | xorps %xmm13, %xmm5 | ||
185 | |||
186 | movdqu 16*6(arg2), %xmm9 | ||
187 | movdqu 16*7(arg2), %xmm12 | ||
188 | pshufb %xmm11, %xmm9 | ||
189 | pshufb %xmm11, %xmm12 | ||
190 | movdqa %xmm6 , %xmm8 | ||
191 | movdqa %xmm7 , %xmm13 | ||
192 | pclmulqdq $0x0 , %xmm10, %xmm6 | ||
193 | pclmulqdq $0x11, %xmm10, %xmm8 | ||
194 | pclmulqdq $0x0 , %xmm10, %xmm7 | ||
195 | pclmulqdq $0x11, %xmm10, %xmm13 | ||
196 | pxor %xmm9 , %xmm6 | ||
197 | xorps %xmm8 , %xmm6 | ||
198 | pxor %xmm12, %xmm7 | ||
199 | xorps %xmm13, %xmm7 | ||
200 | |||
201 | sub $128, arg3 | ||
202 | |||
203 | # check if there is another 64B in the buffer to be able to fold | ||
204 | jge _fold_64_B_loop | ||
205 | ################################################################## | ||
206 | |||
207 | |||
208 | add $128, arg2 | ||
209 | # at this point, the buffer pointer is pointing at the last y Bytes | ||
210 | # of the buffer the 64B of folded data is in 4 of the xmm | ||
211 | # registers: xmm0, xmm1, xmm2, xmm3 | ||
212 | |||
213 | |||
214 | # fold the 8 xmm registers to 1 xmm register with different constants | ||
215 | |||
216 | movdqa rk9(%rip), %xmm10 | ||
217 | movdqa %xmm0, %xmm8 | ||
218 | pclmulqdq $0x11, %xmm10, %xmm0 | ||
219 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
220 | pxor %xmm8, %xmm7 | ||
221 | xorps %xmm0, %xmm7 | ||
222 | |||
223 | movdqa rk11(%rip), %xmm10 | ||
224 | movdqa %xmm1, %xmm8 | ||
225 | pclmulqdq $0x11, %xmm10, %xmm1 | ||
226 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
227 | pxor %xmm8, %xmm7 | ||
228 | xorps %xmm1, %xmm7 | ||
229 | |||
230 | movdqa rk13(%rip), %xmm10 | ||
231 | movdqa %xmm2, %xmm8 | ||
232 | pclmulqdq $0x11, %xmm10, %xmm2 | ||
233 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
234 | pxor %xmm8, %xmm7 | ||
235 | pxor %xmm2, %xmm7 | ||
236 | |||
237 | movdqa rk15(%rip), %xmm10 | ||
238 | movdqa %xmm3, %xmm8 | ||
239 | pclmulqdq $0x11, %xmm10, %xmm3 | ||
240 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
241 | pxor %xmm8, %xmm7 | ||
242 | xorps %xmm3, %xmm7 | ||
243 | |||
244 | movdqa rk17(%rip), %xmm10 | ||
245 | movdqa %xmm4, %xmm8 | ||
246 | pclmulqdq $0x11, %xmm10, %xmm4 | ||
247 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
248 | pxor %xmm8, %xmm7 | ||
249 | pxor %xmm4, %xmm7 | ||
250 | |||
251 | movdqa rk19(%rip), %xmm10 | ||
252 | movdqa %xmm5, %xmm8 | ||
253 | pclmulqdq $0x11, %xmm10, %xmm5 | ||
254 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
255 | pxor %xmm8, %xmm7 | ||
256 | xorps %xmm5, %xmm7 | ||
257 | |||
258 | movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2 | ||
259 | #imm value of pclmulqdq instruction | ||
260 | #will determine which constant to use | ||
261 | movdqa %xmm6, %xmm8 | ||
262 | pclmulqdq $0x11, %xmm10, %xmm6 | ||
263 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
264 | pxor %xmm8, %xmm7 | ||
265 | pxor %xmm6, %xmm7 | ||
266 | |||
267 | |||
268 | # instead of 64, we add 48 to the loop counter to save 1 instruction | ||
269 | # from the loop instead of a cmp instruction, we use the negative | ||
270 | # flag with the jl instruction | ||
271 | add $128-16, arg3 | ||
272 | jl _final_reduction_for_128 | ||
273 | |||
274 | # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 | ||
275 | # and the rest is in memory. We can fold 16 bytes at a time if y>=16 | ||
276 | # continue folding 16B at a time | ||
277 | |||
278 | _16B_reduction_loop: | ||
279 | movdqa %xmm7, %xmm8 | ||
280 | pclmulqdq $0x11, %xmm10, %xmm7 | ||
281 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
282 | pxor %xmm8, %xmm7 | ||
283 | movdqu (arg2), %xmm0 | ||
284 | pshufb %xmm11, %xmm0 | ||
285 | pxor %xmm0 , %xmm7 | ||
286 | add $16, arg2 | ||
287 | sub $16, arg3 | ||
288 | # instead of a cmp instruction, we utilize the flags with the | ||
289 | # jge instruction equivalent of: cmp arg3, 16-16 | ||
290 | # check if there is any more 16B in the buffer to be able to fold | ||
291 | jge _16B_reduction_loop | ||
292 | |||
293 | #now we have 16+z bytes left to reduce, where 0<= z < 16. | ||
294 | #first, we reduce the data in the xmm7 register | ||
295 | |||
296 | |||
297 | _final_reduction_for_128: | ||
298 | # check if any more data to fold. If not, compute the CRC of | ||
299 | # the final 128 bits | ||
300 | add $16, arg3 | ||
301 | je _128_done | ||
302 | |||
303 | # here we are getting data that is less than 16 bytes. | ||
304 | # since we know that there was data before the pointer, we can | ||
305 | # offset the input pointer before the actual point, to receive | ||
306 | # exactly 16 bytes. after that the registers need to be adjusted. | ||
307 | _get_last_two_xmms: | ||
308 | movdqa %xmm7, %xmm2 | ||
309 | |||
310 | movdqu -16(arg2, arg3), %xmm1 | ||
311 | pshufb %xmm11, %xmm1 | ||
312 | |||
313 | # get rid of the extra data that was loaded before | ||
314 | # load the shift constant | ||
315 | lea pshufb_shf_table+16(%rip), %rax | ||
316 | sub arg3, %rax | ||
317 | movdqu (%rax), %xmm0 | ||
318 | |||
319 | # shift xmm2 to the left by arg3 bytes | ||
320 | pshufb %xmm0, %xmm2 | ||
321 | |||
322 | # shift xmm7 to the right by 16-arg3 bytes | ||
323 | pxor mask1(%rip), %xmm0 | ||
324 | pshufb %xmm0, %xmm7 | ||
325 | pblendvb %xmm2, %xmm1 #xmm0 is implicit | ||
326 | |||
327 | # fold 16 Bytes | ||
328 | movdqa %xmm1, %xmm2 | ||
329 | movdqa %xmm7, %xmm8 | ||
330 | pclmulqdq $0x11, %xmm10, %xmm7 | ||
331 | pclmulqdq $0x0 , %xmm10, %xmm8 | ||
332 | pxor %xmm8, %xmm7 | ||
333 | pxor %xmm2, %xmm7 | ||
334 | |||
335 | _128_done: | ||
336 | # compute crc of a 128-bit value | ||
337 | movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10 | ||
338 | movdqa %xmm7, %xmm0 | ||
339 | |||
340 | #64b fold | ||
341 | pclmulqdq $0x1, %xmm10, %xmm7 | ||
342 | pslldq $8 , %xmm0 | ||
343 | pxor %xmm0, %xmm7 | ||
344 | |||
345 | #32b fold | ||
346 | movdqa %xmm7, %xmm0 | ||
347 | |||
348 | pand mask2(%rip), %xmm0 | ||
349 | |||
350 | psrldq $12, %xmm7 | ||
351 | pclmulqdq $0x10, %xmm10, %xmm7 | ||
352 | pxor %xmm0, %xmm7 | ||
353 | |||
354 | #barrett reduction | ||
355 | _barrett: | ||
356 | movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10 | ||
357 | movdqa %xmm7, %xmm0 | ||
358 | pclmulqdq $0x01, %xmm10, %xmm7 | ||
359 | pslldq $4, %xmm7 | ||
360 | pclmulqdq $0x11, %xmm10, %xmm7 | ||
361 | |||
362 | pslldq $4, %xmm7 | ||
363 | pxor %xmm0, %xmm7 | ||
364 | pextrd $1, %xmm7, %eax | ||
365 | |||
366 | _cleanup: | ||
367 | # scale the result back to 16 bits | ||
368 | shr $16, %eax | ||
369 | mov %rcx, %rsp | ||
370 | ret | ||
371 | |||
372 | ######################################################################## | ||
373 | |||
374 | .align 16 | ||
375 | _less_than_128: | ||
376 | |||
377 | # check if there is enough buffer to be able to fold 16B at a time | ||
378 | cmp $32, arg3 | ||
379 | jl _less_than_32 | ||
380 | movdqa SHUF_MASK(%rip), %xmm11 | ||
381 | |||
382 | # now if there is, load the constants | ||
383 | movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 | ||
384 | |||
385 | movd arg1_low32, %xmm0 # get the initial crc value | ||
386 | pslldq $12, %xmm0 # align it to its correct place | ||
387 | movdqu (arg2), %xmm7 # load the plaintext | ||
388 | pshufb %xmm11, %xmm7 # byte-reflect the plaintext | ||
389 | pxor %xmm0, %xmm7 | ||
390 | |||
391 | |||
392 | # update the buffer pointer | ||
393 | add $16, arg2 | ||
394 | |||
395 | # update the counter. subtract 32 instead of 16 to save one | ||
396 | # instruction from the loop | ||
397 | sub $32, arg3 | ||
398 | |||
399 | jmp _16B_reduction_loop | ||
400 | |||
401 | |||
402 | .align 16 | ||
403 | _less_than_32: | ||
404 | # mov initial crc to the return value. this is necessary for | ||
405 | # zero-length buffers. | ||
406 | mov arg1_low32, %eax | ||
407 | test arg3, arg3 | ||
408 | je _cleanup | ||
409 | |||
410 | movdqa SHUF_MASK(%rip), %xmm11 | ||
411 | |||
412 | movd arg1_low32, %xmm0 # get the initial crc value | ||
413 | pslldq $12, %xmm0 # align it to its correct place | ||
414 | |||
415 | cmp $16, arg3 | ||
416 | je _exact_16_left | ||
417 | jl _less_than_16_left | ||
418 | |||
419 | movdqu (arg2), %xmm7 # load the plaintext | ||
420 | pshufb %xmm11, %xmm7 # byte-reflect the plaintext | ||
421 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
422 | add $16, arg2 | ||
423 | sub $16, arg3 | ||
424 | movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10 | ||
425 | jmp _get_last_two_xmms | ||
426 | |||
427 | |||
428 | .align 16 | ||
429 | _less_than_16_left: | ||
430 | # use stack space to load data less than 16 bytes, zero-out | ||
431 | # the 16B in memory first. | ||
432 | |||
433 | pxor %xmm1, %xmm1 | ||
434 | mov %rsp, %r11 | ||
435 | movdqa %xmm1, (%r11) | ||
436 | |||
437 | cmp $4, arg3 | ||
438 | jl _only_less_than_4 | ||
439 | |||
440 | # backup the counter value | ||
441 | mov arg3, %r9 | ||
442 | cmp $8, arg3 | ||
443 | jl _less_than_8_left | ||
444 | |||
445 | # load 8 Bytes | ||
446 | mov (arg2), %rax | ||
447 | mov %rax, (%r11) | ||
448 | add $8, %r11 | ||
449 | sub $8, arg3 | ||
450 | add $8, arg2 | ||
451 | _less_than_8_left: | ||
452 | |||
453 | cmp $4, arg3 | ||
454 | jl _less_than_4_left | ||
455 | |||
456 | # load 4 Bytes | ||
457 | mov (arg2), %eax | ||
458 | mov %eax, (%r11) | ||
459 | add $4, %r11 | ||
460 | sub $4, arg3 | ||
461 | add $4, arg2 | ||
462 | _less_than_4_left: | ||
463 | |||
464 | cmp $2, arg3 | ||
465 | jl _less_than_2_left | ||
466 | |||
467 | # load 2 Bytes | ||
468 | mov (arg2), %ax | ||
469 | mov %ax, (%r11) | ||
470 | add $2, %r11 | ||
471 | sub $2, arg3 | ||
472 | add $2, arg2 | ||
473 | _less_than_2_left: | ||
474 | cmp $1, arg3 | ||
475 | jl _zero_left | ||
476 | |||
477 | # load 1 Byte | ||
478 | mov (arg2), %al | ||
479 | mov %al, (%r11) | ||
480 | _zero_left: | ||
481 | movdqa (%rsp), %xmm7 | ||
482 | pshufb %xmm11, %xmm7 | ||
483 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
484 | |||
485 | # shl r9, 4 | ||
486 | lea pshufb_shf_table+16(%rip), %rax | ||
487 | sub %r9, %rax | ||
488 | movdqu (%rax), %xmm0 | ||
489 | pxor mask1(%rip), %xmm0 | ||
490 | |||
491 | pshufb %xmm0, %xmm7 | ||
492 | jmp _128_done | ||
493 | |||
494 | .align 16 | ||
495 | _exact_16_left: | ||
496 | movdqu (arg2), %xmm7 | ||
497 | pshufb %xmm11, %xmm7 | ||
498 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
499 | |||
500 | jmp _128_done | ||
501 | |||
502 | _only_less_than_4: | ||
503 | cmp $3, arg3 | ||
504 | jl _only_less_than_3 | ||
505 | |||
506 | # load 3 Bytes | ||
507 | mov (arg2), %al | ||
508 | mov %al, (%r11) | ||
509 | |||
510 | mov 1(arg2), %al | ||
511 | mov %al, 1(%r11) | ||
512 | |||
513 | mov 2(arg2), %al | ||
514 | mov %al, 2(%r11) | ||
515 | |||
516 | movdqa (%rsp), %xmm7 | ||
517 | pshufb %xmm11, %xmm7 | ||
518 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
519 | |||
520 | psrldq $5, %xmm7 | ||
521 | |||
522 | jmp _barrett | ||
523 | _only_less_than_3: | ||
524 | cmp $2, arg3 | ||
525 | jl _only_less_than_2 | ||
526 | |||
527 | # load 2 Bytes | ||
528 | mov (arg2), %al | ||
529 | mov %al, (%r11) | ||
530 | |||
531 | mov 1(arg2), %al | ||
532 | mov %al, 1(%r11) | ||
533 | |||
534 | movdqa (%rsp), %xmm7 | ||
535 | pshufb %xmm11, %xmm7 | ||
536 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
537 | |||
538 | psrldq $6, %xmm7 | ||
539 | |||
540 | jmp _barrett | ||
541 | _only_less_than_2: | ||
542 | |||
543 | # load 1 Byte | ||
544 | mov (arg2), %al | ||
545 | mov %al, (%r11) | ||
546 | |||
547 | movdqa (%rsp), %xmm7 | ||
548 | pshufb %xmm11, %xmm7 | ||
549 | pxor %xmm0 , %xmm7 # xor the initial crc value | ||
550 | |||
551 | psrldq $7, %xmm7 | ||
552 | |||
553 | jmp _barrett | ||
554 | |||
555 | ENDPROC(crc_t10dif_pcl) | ||
556 | |||
557 | .data | ||
558 | |||
559 | # precomputed constants | ||
560 | # these constants are precomputed from the poly: | ||
561 | # 0x8bb70000 (0x8bb7 scaled to 32 bits) | ||
562 | .align 16 | ||
563 | # Q = 0x18BB70000 | ||
564 | # rk1 = 2^(32*3) mod Q << 32 | ||
565 | # rk2 = 2^(32*5) mod Q << 32 | ||
566 | # rk3 = 2^(32*15) mod Q << 32 | ||
567 | # rk4 = 2^(32*17) mod Q << 32 | ||
568 | # rk5 = 2^(32*3) mod Q << 32 | ||
569 | # rk6 = 2^(32*2) mod Q << 32 | ||
570 | # rk7 = floor(2^64/Q) | ||
571 | # rk8 = Q | ||
572 | rk1: | ||
573 | .quad 0x2d56000000000000 | ||
574 | rk2: | ||
575 | .quad 0x06df000000000000 | ||
576 | rk3: | ||
577 | .quad 0x9d9d000000000000 | ||
578 | rk4: | ||
579 | .quad 0x7cf5000000000000 | ||
580 | rk5: | ||
581 | .quad 0x2d56000000000000 | ||
582 | rk6: | ||
583 | .quad 0x1368000000000000 | ||
584 | rk7: | ||
585 | .quad 0x00000001f65a57f8 | ||
586 | rk8: | ||
587 | .quad 0x000000018bb70000 | ||
588 | |||
589 | rk9: | ||
590 | .quad 0xceae000000000000 | ||
591 | rk10: | ||
592 | .quad 0xbfd6000000000000 | ||
593 | rk11: | ||
594 | .quad 0x1e16000000000000 | ||
595 | rk12: | ||
596 | .quad 0x713c000000000000 | ||
597 | rk13: | ||
598 | .quad 0xf7f9000000000000 | ||
599 | rk14: | ||
600 | .quad 0x80a6000000000000 | ||
601 | rk15: | ||
602 | .quad 0x044c000000000000 | ||
603 | rk16: | ||
604 | .quad 0xe658000000000000 | ||
605 | rk17: | ||
606 | .quad 0xad18000000000000 | ||
607 | rk18: | ||
608 | .quad 0xa497000000000000 | ||
609 | rk19: | ||
610 | .quad 0x6ee3000000000000 | ||
611 | rk20: | ||
612 | .quad 0xe7b5000000000000 | ||
613 | |||
614 | |||
615 | |||
616 | mask1: | ||
617 | .octa 0x80808080808080808080808080808080 | ||
618 | mask2: | ||
619 | .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF | ||
620 | |||
621 | SHUF_MASK: | ||
622 | .octa 0x000102030405060708090A0B0C0D0E0F | ||
623 | |||
624 | pshufb_shf_table: | ||
625 | # use these values for shift constants for the pshufb instruction | ||
626 | # different alignments result in values as shown: | ||
627 | # DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1 | ||
628 | # DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2 | ||
629 | # DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3 | ||
630 | # DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4 | ||
631 | # DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5 | ||
632 | # DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6 | ||
633 | # DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7 | ||
634 | # DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8 | ||
635 | # DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9 | ||
636 | # DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10 | ||
637 | # DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11 | ||
638 | # DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12 | ||
639 | # DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13 | ||
640 | # DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14 | ||
641 | # DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15 | ||
642 | .octa 0x8f8e8d8c8b8a89888786858483828100 | ||
643 | .octa 0x000e0d0c0b0a09080706050403020100 | ||
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c new file mode 100644 index 000000000000..7845d7fd54c0 --- /dev/null +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c | |||
@@ -0,0 +1,151 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions | ||
5 | * | ||
6 | * Copyright (C) 2013 Intel Corporation | ||
7 | * Author: Tim Chen <tim.c.chen@linux.intel.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms of the GNU General Public License as published by the Free | ||
11 | * Software Foundation; either version 2 of the License, or (at your option) | ||
12 | * any later version. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
15 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
16 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
17 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
18 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
19 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
20 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
21 | * SOFTWARE. | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/types.h> | ||
26 | #include <linux/module.h> | ||
27 | #include <linux/crc-t10dif.h> | ||
28 | #include <crypto/internal/hash.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/string.h> | ||
31 | #include <linux/kernel.h> | ||
32 | #include <asm/i387.h> | ||
33 | #include <asm/cpufeature.h> | ||
34 | #include <asm/cpu_device_id.h> | ||
35 | |||
36 | asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf, | ||
37 | size_t len); | ||
38 | |||
39 | struct chksum_desc_ctx { | ||
40 | __u16 crc; | ||
41 | }; | ||
42 | |||
43 | /* | ||
44 | * Steps through buffer one byte at at time, calculates reflected | ||
45 | * crc using table. | ||
46 | */ | ||
47 | |||
48 | static int chksum_init(struct shash_desc *desc) | ||
49 | { | ||
50 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
51 | |||
52 | ctx->crc = 0; | ||
53 | |||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | static int chksum_update(struct shash_desc *desc, const u8 *data, | ||
58 | unsigned int length) | ||
59 | { | ||
60 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
61 | |||
62 | if (irq_fpu_usable()) { | ||
63 | kernel_fpu_begin(); | ||
64 | ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); | ||
65 | kernel_fpu_end(); | ||
66 | } else | ||
67 | ctx->crc = crc_t10dif_generic(ctx->crc, data, length); | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static int chksum_final(struct shash_desc *desc, u8 *out) | ||
72 | { | ||
73 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
74 | |||
75 | *(__u16 *)out = ctx->crc; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, | ||
80 | u8 *out) | ||
81 | { | ||
82 | if (irq_fpu_usable()) { | ||
83 | kernel_fpu_begin(); | ||
84 | *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); | ||
85 | kernel_fpu_end(); | ||
86 | } else | ||
87 | *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static int chksum_finup(struct shash_desc *desc, const u8 *data, | ||
92 | unsigned int len, u8 *out) | ||
93 | { | ||
94 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
95 | |||
96 | return __chksum_finup(&ctx->crc, data, len, out); | ||
97 | } | ||
98 | |||
99 | static int chksum_digest(struct shash_desc *desc, const u8 *data, | ||
100 | unsigned int length, u8 *out) | ||
101 | { | ||
102 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
103 | |||
104 | return __chksum_finup(&ctx->crc, data, length, out); | ||
105 | } | ||
106 | |||
107 | static struct shash_alg alg = { | ||
108 | .digestsize = CRC_T10DIF_DIGEST_SIZE, | ||
109 | .init = chksum_init, | ||
110 | .update = chksum_update, | ||
111 | .final = chksum_final, | ||
112 | .finup = chksum_finup, | ||
113 | .digest = chksum_digest, | ||
114 | .descsize = sizeof(struct chksum_desc_ctx), | ||
115 | .base = { | ||
116 | .cra_name = "crct10dif", | ||
117 | .cra_driver_name = "crct10dif-pclmul", | ||
118 | .cra_priority = 200, | ||
119 | .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, | ||
120 | .cra_module = THIS_MODULE, | ||
121 | } | ||
122 | }; | ||
123 | |||
124 | static const struct x86_cpu_id crct10dif_cpu_id[] = { | ||
125 | X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), | ||
126 | {} | ||
127 | }; | ||
128 | MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id); | ||
129 | |||
130 | static int __init crct10dif_intel_mod_init(void) | ||
131 | { | ||
132 | if (!x86_match_cpu(crct10dif_cpu_id)) | ||
133 | return -ENODEV; | ||
134 | |||
135 | return crypto_register_shash(&alg); | ||
136 | } | ||
137 | |||
138 | static void __exit crct10dif_intel_mod_fini(void) | ||
139 | { | ||
140 | crypto_unregister_shash(&alg); | ||
141 | } | ||
142 | |||
143 | module_init(crct10dif_intel_mod_init); | ||
144 | module_exit(crct10dif_intel_mod_fini); | ||
145 | |||
146 | MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); | ||
147 | MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); | ||
148 | MODULE_LICENSE("GPL"); | ||
149 | |||
150 | MODULE_ALIAS("crct10dif"); | ||
151 | MODULE_ALIAS("crct10dif-pclmul"); | ||
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 597d4da69656..50226c4b86ed 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c | |||
@@ -187,7 +187,36 @@ static int sha256_ssse3_import(struct shash_desc *desc, const void *in) | |||
187 | return 0; | 187 | return 0; |
188 | } | 188 | } |
189 | 189 | ||
190 | static struct shash_alg alg = { | 190 | static int sha224_ssse3_init(struct shash_desc *desc) |
191 | { | ||
192 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
193 | |||
194 | sctx->state[0] = SHA224_H0; | ||
195 | sctx->state[1] = SHA224_H1; | ||
196 | sctx->state[2] = SHA224_H2; | ||
197 | sctx->state[3] = SHA224_H3; | ||
198 | sctx->state[4] = SHA224_H4; | ||
199 | sctx->state[5] = SHA224_H5; | ||
200 | sctx->state[6] = SHA224_H6; | ||
201 | sctx->state[7] = SHA224_H7; | ||
202 | sctx->count = 0; | ||
203 | |||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash) | ||
208 | { | ||
209 | u8 D[SHA256_DIGEST_SIZE]; | ||
210 | |||
211 | sha256_ssse3_final(desc, D); | ||
212 | |||
213 | memcpy(hash, D, SHA224_DIGEST_SIZE); | ||
214 | memset(D, 0, SHA256_DIGEST_SIZE); | ||
215 | |||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | static struct shash_alg algs[] = { { | ||
191 | .digestsize = SHA256_DIGEST_SIZE, | 220 | .digestsize = SHA256_DIGEST_SIZE, |
192 | .init = sha256_ssse3_init, | 221 | .init = sha256_ssse3_init, |
193 | .update = sha256_ssse3_update, | 222 | .update = sha256_ssse3_update, |
@@ -204,7 +233,24 @@ static struct shash_alg alg = { | |||
204 | .cra_blocksize = SHA256_BLOCK_SIZE, | 233 | .cra_blocksize = SHA256_BLOCK_SIZE, |
205 | .cra_module = THIS_MODULE, | 234 | .cra_module = THIS_MODULE, |
206 | } | 235 | } |
207 | }; | 236 | }, { |
237 | .digestsize = SHA224_DIGEST_SIZE, | ||
238 | .init = sha224_ssse3_init, | ||
239 | .update = sha256_ssse3_update, | ||
240 | .final = sha224_ssse3_final, | ||
241 | .export = sha256_ssse3_export, | ||
242 | .import = sha256_ssse3_import, | ||
243 | .descsize = sizeof(struct sha256_state), | ||
244 | .statesize = sizeof(struct sha256_state), | ||
245 | .base = { | ||
246 | .cra_name = "sha224", | ||
247 | .cra_driver_name = "sha224-ssse3", | ||
248 | .cra_priority = 150, | ||
249 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
250 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
251 | .cra_module = THIS_MODULE, | ||
252 | } | ||
253 | } }; | ||
208 | 254 | ||
209 | #ifdef CONFIG_AS_AVX | 255 | #ifdef CONFIG_AS_AVX |
210 | static bool __init avx_usable(void) | 256 | static bool __init avx_usable(void) |
@@ -227,7 +273,7 @@ static bool __init avx_usable(void) | |||
227 | 273 | ||
228 | static int __init sha256_ssse3_mod_init(void) | 274 | static int __init sha256_ssse3_mod_init(void) |
229 | { | 275 | { |
230 | /* test for SSE3 first */ | 276 | /* test for SSSE3 first */ |
231 | if (cpu_has_ssse3) | 277 | if (cpu_has_ssse3) |
232 | sha256_transform_asm = sha256_transform_ssse3; | 278 | sha256_transform_asm = sha256_transform_ssse3; |
233 | 279 | ||
@@ -254,7 +300,7 @@ static int __init sha256_ssse3_mod_init(void) | |||
254 | else | 300 | else |
255 | #endif | 301 | #endif |
256 | pr_info("Using SSSE3 optimized SHA-256 implementation\n"); | 302 | pr_info("Using SSSE3 optimized SHA-256 implementation\n"); |
257 | return crypto_register_shash(&alg); | 303 | return crypto_register_shashes(algs, ARRAY_SIZE(algs)); |
258 | } | 304 | } |
259 | pr_info("Neither AVX nor SSSE3 is available/usable.\n"); | 305 | pr_info("Neither AVX nor SSSE3 is available/usable.\n"); |
260 | 306 | ||
@@ -263,7 +309,7 @@ static int __init sha256_ssse3_mod_init(void) | |||
263 | 309 | ||
264 | static void __exit sha256_ssse3_mod_fini(void) | 310 | static void __exit sha256_ssse3_mod_fini(void) |
265 | { | 311 | { |
266 | crypto_unregister_shash(&alg); | 312 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); |
267 | } | 313 | } |
268 | 314 | ||
269 | module_init(sha256_ssse3_mod_init); | 315 | module_init(sha256_ssse3_mod_init); |
@@ -273,3 +319,4 @@ MODULE_LICENSE("GPL"); | |||
273 | MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); | 319 | MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); |
274 | 320 | ||
275 | MODULE_ALIAS("sha256"); | 321 | MODULE_ALIAS("sha256"); |
322 | MODULE_ALIAS("sha384"); | ||
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 6cbd8df348d2..f30cd10293f0 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c | |||
@@ -194,7 +194,37 @@ static int sha512_ssse3_import(struct shash_desc *desc, const void *in) | |||
194 | return 0; | 194 | return 0; |
195 | } | 195 | } |
196 | 196 | ||
197 | static struct shash_alg alg = { | 197 | static int sha384_ssse3_init(struct shash_desc *desc) |
198 | { | ||
199 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
200 | |||
201 | sctx->state[0] = SHA384_H0; | ||
202 | sctx->state[1] = SHA384_H1; | ||
203 | sctx->state[2] = SHA384_H2; | ||
204 | sctx->state[3] = SHA384_H3; | ||
205 | sctx->state[4] = SHA384_H4; | ||
206 | sctx->state[5] = SHA384_H5; | ||
207 | sctx->state[6] = SHA384_H6; | ||
208 | sctx->state[7] = SHA384_H7; | ||
209 | |||
210 | sctx->count[0] = sctx->count[1] = 0; | ||
211 | |||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash) | ||
216 | { | ||
217 | u8 D[SHA512_DIGEST_SIZE]; | ||
218 | |||
219 | sha512_ssse3_final(desc, D); | ||
220 | |||
221 | memcpy(hash, D, SHA384_DIGEST_SIZE); | ||
222 | memset(D, 0, SHA512_DIGEST_SIZE); | ||
223 | |||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | static struct shash_alg algs[] = { { | ||
198 | .digestsize = SHA512_DIGEST_SIZE, | 228 | .digestsize = SHA512_DIGEST_SIZE, |
199 | .init = sha512_ssse3_init, | 229 | .init = sha512_ssse3_init, |
200 | .update = sha512_ssse3_update, | 230 | .update = sha512_ssse3_update, |
@@ -211,7 +241,24 @@ static struct shash_alg alg = { | |||
211 | .cra_blocksize = SHA512_BLOCK_SIZE, | 241 | .cra_blocksize = SHA512_BLOCK_SIZE, |
212 | .cra_module = THIS_MODULE, | 242 | .cra_module = THIS_MODULE, |
213 | } | 243 | } |
214 | }; | 244 | }, { |
245 | .digestsize = SHA384_DIGEST_SIZE, | ||
246 | .init = sha384_ssse3_init, | ||
247 | .update = sha512_ssse3_update, | ||
248 | .final = sha384_ssse3_final, | ||
249 | .export = sha512_ssse3_export, | ||
250 | .import = sha512_ssse3_import, | ||
251 | .descsize = sizeof(struct sha512_state), | ||
252 | .statesize = sizeof(struct sha512_state), | ||
253 | .base = { | ||
254 | .cra_name = "sha384", | ||
255 | .cra_driver_name = "sha384-ssse3", | ||
256 | .cra_priority = 150, | ||
257 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
258 | .cra_blocksize = SHA384_BLOCK_SIZE, | ||
259 | .cra_module = THIS_MODULE, | ||
260 | } | ||
261 | } }; | ||
215 | 262 | ||
216 | #ifdef CONFIG_AS_AVX | 263 | #ifdef CONFIG_AS_AVX |
217 | static bool __init avx_usable(void) | 264 | static bool __init avx_usable(void) |
@@ -234,7 +281,7 @@ static bool __init avx_usable(void) | |||
234 | 281 | ||
235 | static int __init sha512_ssse3_mod_init(void) | 282 | static int __init sha512_ssse3_mod_init(void) |
236 | { | 283 | { |
237 | /* test for SSE3 first */ | 284 | /* test for SSSE3 first */ |
238 | if (cpu_has_ssse3) | 285 | if (cpu_has_ssse3) |
239 | sha512_transform_asm = sha512_transform_ssse3; | 286 | sha512_transform_asm = sha512_transform_ssse3; |
240 | 287 | ||
@@ -261,7 +308,7 @@ static int __init sha512_ssse3_mod_init(void) | |||
261 | else | 308 | else |
262 | #endif | 309 | #endif |
263 | pr_info("Using SSSE3 optimized SHA-512 implementation\n"); | 310 | pr_info("Using SSSE3 optimized SHA-512 implementation\n"); |
264 | return crypto_register_shash(&alg); | 311 | return crypto_register_shashes(algs, ARRAY_SIZE(algs)); |
265 | } | 312 | } |
266 | pr_info("Neither AVX nor SSSE3 is available/usable.\n"); | 313 | pr_info("Neither AVX nor SSSE3 is available/usable.\n"); |
267 | 314 | ||
@@ -270,7 +317,7 @@ static int __init sha512_ssse3_mod_init(void) | |||
270 | 317 | ||
271 | static void __exit sha512_ssse3_mod_fini(void) | 318 | static void __exit sha512_ssse3_mod_fini(void) |
272 | { | 319 | { |
273 | crypto_unregister_shash(&alg); | 320 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); |
274 | } | 321 | } |
275 | 322 | ||
276 | module_init(sha512_ssse3_mod_init); | 323 | module_init(sha512_ssse3_mod_init); |
@@ -280,3 +327,4 @@ MODULE_LICENSE("GPL"); | |||
280 | MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); | 327 | MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); |
281 | 328 | ||
282 | MODULE_ALIAS("sha512"); | 329 | MODULE_ALIAS("sha512"); |
330 | MODULE_ALIAS("sha384"); | ||
diff --git a/arch/x86/crypto/twofish-avx2-asm_64.S b/arch/x86/crypto/twofish-avx2-asm_64.S deleted file mode 100644 index e1a83b9cd389..000000000000 --- a/arch/x86/crypto/twofish-avx2-asm_64.S +++ /dev/null | |||
@@ -1,600 +0,0 @@ | |||
1 | /* | ||
2 | * x86_64/AVX2 assembler optimized version of Twofish | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/linkage.h> | ||
14 | #include "glue_helper-asm-avx2.S" | ||
15 | |||
16 | .file "twofish-avx2-asm_64.S" | ||
17 | |||
18 | .data | ||
19 | .align 16 | ||
20 | |||
21 | .Lvpshufb_mask0: | ||
22 | .long 0x80808000 | ||
23 | .long 0x80808004 | ||
24 | .long 0x80808008 | ||
25 | .long 0x8080800c | ||
26 | |||
27 | .Lbswap128_mask: | ||
28 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
29 | .Lxts_gf128mul_and_shl1_mask_0: | ||
30 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | ||
31 | .Lxts_gf128mul_and_shl1_mask_1: | ||
32 | .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 | ||
33 | |||
34 | .text | ||
35 | |||
36 | /* structure of crypto context */ | ||
37 | #define s0 0 | ||
38 | #define s1 1024 | ||
39 | #define s2 2048 | ||
40 | #define s3 3072 | ||
41 | #define w 4096 | ||
42 | #define k 4128 | ||
43 | |||
44 | /* register macros */ | ||
45 | #define CTX %rdi | ||
46 | |||
47 | #define RS0 CTX | ||
48 | #define RS1 %r8 | ||
49 | #define RS2 %r9 | ||
50 | #define RS3 %r10 | ||
51 | #define RK %r11 | ||
52 | #define RW %rax | ||
53 | #define RROUND %r12 | ||
54 | #define RROUNDd %r12d | ||
55 | |||
56 | #define RA0 %ymm8 | ||
57 | #define RB0 %ymm9 | ||
58 | #define RC0 %ymm10 | ||
59 | #define RD0 %ymm11 | ||
60 | #define RA1 %ymm12 | ||
61 | #define RB1 %ymm13 | ||
62 | #define RC1 %ymm14 | ||
63 | #define RD1 %ymm15 | ||
64 | |||
65 | /* temp regs */ | ||
66 | #define RX0 %ymm0 | ||
67 | #define RY0 %ymm1 | ||
68 | #define RX1 %ymm2 | ||
69 | #define RY1 %ymm3 | ||
70 | #define RT0 %ymm4 | ||
71 | #define RIDX %ymm5 | ||
72 | |||
73 | #define RX0x %xmm0 | ||
74 | #define RY0x %xmm1 | ||
75 | #define RX1x %xmm2 | ||
76 | #define RY1x %xmm3 | ||
77 | #define RT0x %xmm4 | ||
78 | |||
79 | /* vpgatherdd mask and '-1' */ | ||
80 | #define RNOT %ymm6 | ||
81 | |||
82 | /* byte mask, (-1 >> 24) */ | ||
83 | #define RBYTE %ymm7 | ||
84 | |||
85 | /********************************************************************** | ||
86 | 16-way AVX2 twofish | ||
87 | **********************************************************************/ | ||
88 | #define init_round_constants() \ | ||
89 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
90 | vpsrld $24, RNOT, RBYTE; \ | ||
91 | leaq k(CTX), RK; \ | ||
92 | leaq w(CTX), RW; \ | ||
93 | leaq s1(CTX), RS1; \ | ||
94 | leaq s2(CTX), RS2; \ | ||
95 | leaq s3(CTX), RS3; \ | ||
96 | |||
97 | #define g16(ab, rs0, rs1, rs2, rs3, xy) \ | ||
98 | vpand RBYTE, ab ## 0, RIDX; \ | ||
99 | vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \ | ||
100 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
101 | \ | ||
102 | vpand RBYTE, ab ## 1, RIDX; \ | ||
103 | vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \ | ||
104 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
105 | \ | ||
106 | vpsrld $8, ab ## 0, RIDX; \ | ||
107 | vpand RBYTE, RIDX, RIDX; \ | ||
108 | vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \ | ||
109 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
110 | vpxor RT0, xy ## 0, xy ## 0; \ | ||
111 | \ | ||
112 | vpsrld $8, ab ## 1, RIDX; \ | ||
113 | vpand RBYTE, RIDX, RIDX; \ | ||
114 | vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \ | ||
115 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
116 | vpxor RT0, xy ## 1, xy ## 1; \ | ||
117 | \ | ||
118 | vpsrld $16, ab ## 0, RIDX; \ | ||
119 | vpand RBYTE, RIDX, RIDX; \ | ||
120 | vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \ | ||
121 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
122 | vpxor RT0, xy ## 0, xy ## 0; \ | ||
123 | \ | ||
124 | vpsrld $16, ab ## 1, RIDX; \ | ||
125 | vpand RBYTE, RIDX, RIDX; \ | ||
126 | vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \ | ||
127 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
128 | vpxor RT0, xy ## 1, xy ## 1; \ | ||
129 | \ | ||
130 | vpsrld $24, ab ## 0, RIDX; \ | ||
131 | vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \ | ||
132 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
133 | vpxor RT0, xy ## 0, xy ## 0; \ | ||
134 | \ | ||
135 | vpsrld $24, ab ## 1, RIDX; \ | ||
136 | vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \ | ||
137 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
138 | vpxor RT0, xy ## 1, xy ## 1; | ||
139 | |||
140 | #define g1_16(a, x) \ | ||
141 | g16(a, RS0, RS1, RS2, RS3, x); | ||
142 | |||
143 | #define g2_16(b, y) \ | ||
144 | g16(b, RS1, RS2, RS3, RS0, y); | ||
145 | |||
146 | #define encrypt_round_end16(a, b, c, d, nk) \ | ||
147 | vpaddd RY0, RX0, RX0; \ | ||
148 | vpaddd RX0, RY0, RY0; \ | ||
149 | vpbroadcastd nk(RK,RROUND,8), RT0; \ | ||
150 | vpaddd RT0, RX0, RX0; \ | ||
151 | vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ | ||
152 | vpaddd RT0, RY0, RY0; \ | ||
153 | \ | ||
154 | vpxor RY0, d ## 0, d ## 0; \ | ||
155 | \ | ||
156 | vpxor RX0, c ## 0, c ## 0; \ | ||
157 | vpsrld $1, c ## 0, RT0; \ | ||
158 | vpslld $31, c ## 0, c ## 0; \ | ||
159 | vpor RT0, c ## 0, c ## 0; \ | ||
160 | \ | ||
161 | vpaddd RY1, RX1, RX1; \ | ||
162 | vpaddd RX1, RY1, RY1; \ | ||
163 | vpbroadcastd nk(RK,RROUND,8), RT0; \ | ||
164 | vpaddd RT0, RX1, RX1; \ | ||
165 | vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ | ||
166 | vpaddd RT0, RY1, RY1; \ | ||
167 | \ | ||
168 | vpxor RY1, d ## 1, d ## 1; \ | ||
169 | \ | ||
170 | vpxor RX1, c ## 1, c ## 1; \ | ||
171 | vpsrld $1, c ## 1, RT0; \ | ||
172 | vpslld $31, c ## 1, c ## 1; \ | ||
173 | vpor RT0, c ## 1, c ## 1; \ | ||
174 | |||
175 | #define encrypt_round16(a, b, c, d, nk) \ | ||
176 | g2_16(b, RY); \ | ||
177 | \ | ||
178 | vpslld $1, b ## 0, RT0; \ | ||
179 | vpsrld $31, b ## 0, b ## 0; \ | ||
180 | vpor RT0, b ## 0, b ## 0; \ | ||
181 | \ | ||
182 | vpslld $1, b ## 1, RT0; \ | ||
183 | vpsrld $31, b ## 1, b ## 1; \ | ||
184 | vpor RT0, b ## 1, b ## 1; \ | ||
185 | \ | ||
186 | g1_16(a, RX); \ | ||
187 | \ | ||
188 | encrypt_round_end16(a, b, c, d, nk); | ||
189 | |||
190 | #define encrypt_round_first16(a, b, c, d, nk) \ | ||
191 | vpslld $1, d ## 0, RT0; \ | ||
192 | vpsrld $31, d ## 0, d ## 0; \ | ||
193 | vpor RT0, d ## 0, d ## 0; \ | ||
194 | \ | ||
195 | vpslld $1, d ## 1, RT0; \ | ||
196 | vpsrld $31, d ## 1, d ## 1; \ | ||
197 | vpor RT0, d ## 1, d ## 1; \ | ||
198 | \ | ||
199 | encrypt_round16(a, b, c, d, nk); | ||
200 | |||
201 | #define encrypt_round_last16(a, b, c, d, nk) \ | ||
202 | g2_16(b, RY); \ | ||
203 | \ | ||
204 | g1_16(a, RX); \ | ||
205 | \ | ||
206 | encrypt_round_end16(a, b, c, d, nk); | ||
207 | |||
208 | #define decrypt_round_end16(a, b, c, d, nk) \ | ||
209 | vpaddd RY0, RX0, RX0; \ | ||
210 | vpaddd RX0, RY0, RY0; \ | ||
211 | vpbroadcastd nk(RK,RROUND,8), RT0; \ | ||
212 | vpaddd RT0, RX0, RX0; \ | ||
213 | vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ | ||
214 | vpaddd RT0, RY0, RY0; \ | ||
215 | \ | ||
216 | vpxor RX0, c ## 0, c ## 0; \ | ||
217 | \ | ||
218 | vpxor RY0, d ## 0, d ## 0; \ | ||
219 | vpsrld $1, d ## 0, RT0; \ | ||
220 | vpslld $31, d ## 0, d ## 0; \ | ||
221 | vpor RT0, d ## 0, d ## 0; \ | ||
222 | \ | ||
223 | vpaddd RY1, RX1, RX1; \ | ||
224 | vpaddd RX1, RY1, RY1; \ | ||
225 | vpbroadcastd nk(RK,RROUND,8), RT0; \ | ||
226 | vpaddd RT0, RX1, RX1; \ | ||
227 | vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ | ||
228 | vpaddd RT0, RY1, RY1; \ | ||
229 | \ | ||
230 | vpxor RX1, c ## 1, c ## 1; \ | ||
231 | \ | ||
232 | vpxor RY1, d ## 1, d ## 1; \ | ||
233 | vpsrld $1, d ## 1, RT0; \ | ||
234 | vpslld $31, d ## 1, d ## 1; \ | ||
235 | vpor RT0, d ## 1, d ## 1; | ||
236 | |||
237 | #define decrypt_round16(a, b, c, d, nk) \ | ||
238 | g1_16(a, RX); \ | ||
239 | \ | ||
240 | vpslld $1, a ## 0, RT0; \ | ||
241 | vpsrld $31, a ## 0, a ## 0; \ | ||
242 | vpor RT0, a ## 0, a ## 0; \ | ||
243 | \ | ||
244 | vpslld $1, a ## 1, RT0; \ | ||
245 | vpsrld $31, a ## 1, a ## 1; \ | ||
246 | vpor RT0, a ## 1, a ## 1; \ | ||
247 | \ | ||
248 | g2_16(b, RY); \ | ||
249 | \ | ||
250 | decrypt_round_end16(a, b, c, d, nk); | ||
251 | |||
252 | #define decrypt_round_first16(a, b, c, d, nk) \ | ||
253 | vpslld $1, c ## 0, RT0; \ | ||
254 | vpsrld $31, c ## 0, c ## 0; \ | ||
255 | vpor RT0, c ## 0, c ## 0; \ | ||
256 | \ | ||
257 | vpslld $1, c ## 1, RT0; \ | ||
258 | vpsrld $31, c ## 1, c ## 1; \ | ||
259 | vpor RT0, c ## 1, c ## 1; \ | ||
260 | \ | ||
261 | decrypt_round16(a, b, c, d, nk) | ||
262 | |||
263 | #define decrypt_round_last16(a, b, c, d, nk) \ | ||
264 | g1_16(a, RX); \ | ||
265 | \ | ||
266 | g2_16(b, RY); \ | ||
267 | \ | ||
268 | decrypt_round_end16(a, b, c, d, nk); | ||
269 | |||
270 | #define encrypt_cycle16() \ | ||
271 | encrypt_round16(RA, RB, RC, RD, 0); \ | ||
272 | encrypt_round16(RC, RD, RA, RB, 8); | ||
273 | |||
274 | #define encrypt_cycle_first16() \ | ||
275 | encrypt_round_first16(RA, RB, RC, RD, 0); \ | ||
276 | encrypt_round16(RC, RD, RA, RB, 8); | ||
277 | |||
278 | #define encrypt_cycle_last16() \ | ||
279 | encrypt_round16(RA, RB, RC, RD, 0); \ | ||
280 | encrypt_round_last16(RC, RD, RA, RB, 8); | ||
281 | |||
282 | #define decrypt_cycle16(n) \ | ||
283 | decrypt_round16(RC, RD, RA, RB, 8); \ | ||
284 | decrypt_round16(RA, RB, RC, RD, 0); | ||
285 | |||
286 | #define decrypt_cycle_first16(n) \ | ||
287 | decrypt_round_first16(RC, RD, RA, RB, 8); \ | ||
288 | decrypt_round16(RA, RB, RC, RD, 0); | ||
289 | |||
290 | #define decrypt_cycle_last16(n) \ | ||
291 | decrypt_round16(RC, RD, RA, RB, 8); \ | ||
292 | decrypt_round_last16(RA, RB, RC, RD, 0); | ||
293 | |||
294 | #define transpose_4x4(x0,x1,x2,x3,t1,t2) \ | ||
295 | vpunpckhdq x1, x0, t2; \ | ||
296 | vpunpckldq x1, x0, x0; \ | ||
297 | \ | ||
298 | vpunpckldq x3, x2, t1; \ | ||
299 | vpunpckhdq x3, x2, x2; \ | ||
300 | \ | ||
301 | vpunpckhqdq t1, x0, x1; \ | ||
302 | vpunpcklqdq t1, x0, x0; \ | ||
303 | \ | ||
304 | vpunpckhqdq x2, t2, x3; \ | ||
305 | vpunpcklqdq x2, t2, x2; | ||
306 | |||
307 | #define read_blocks8(offs,a,b,c,d) \ | ||
308 | transpose_4x4(a, b, c, d, RX0, RY0); | ||
309 | |||
310 | #define write_blocks8(offs,a,b,c,d) \ | ||
311 | transpose_4x4(a, b, c, d, RX0, RY0); | ||
312 | |||
313 | #define inpack_enc8(a,b,c,d) \ | ||
314 | vpbroadcastd 4*0(RW), RT0; \ | ||
315 | vpxor RT0, a, a; \ | ||
316 | \ | ||
317 | vpbroadcastd 4*1(RW), RT0; \ | ||
318 | vpxor RT0, b, b; \ | ||
319 | \ | ||
320 | vpbroadcastd 4*2(RW), RT0; \ | ||
321 | vpxor RT0, c, c; \ | ||
322 | \ | ||
323 | vpbroadcastd 4*3(RW), RT0; \ | ||
324 | vpxor RT0, d, d; | ||
325 | |||
326 | #define outunpack_enc8(a,b,c,d) \ | ||
327 | vpbroadcastd 4*4(RW), RX0; \ | ||
328 | vpbroadcastd 4*5(RW), RY0; \ | ||
329 | vpxor RX0, c, RX0; \ | ||
330 | vpxor RY0, d, RY0; \ | ||
331 | \ | ||
332 | vpbroadcastd 4*6(RW), RT0; \ | ||
333 | vpxor RT0, a, c; \ | ||
334 | vpbroadcastd 4*7(RW), RT0; \ | ||
335 | vpxor RT0, b, d; \ | ||
336 | \ | ||
337 | vmovdqa RX0, a; \ | ||
338 | vmovdqa RY0, b; | ||
339 | |||
340 | #define inpack_dec8(a,b,c,d) \ | ||
341 | vpbroadcastd 4*4(RW), RX0; \ | ||
342 | vpbroadcastd 4*5(RW), RY0; \ | ||
343 | vpxor RX0, a, RX0; \ | ||
344 | vpxor RY0, b, RY0; \ | ||
345 | \ | ||
346 | vpbroadcastd 4*6(RW), RT0; \ | ||
347 | vpxor RT0, c, a; \ | ||
348 | vpbroadcastd 4*7(RW), RT0; \ | ||
349 | vpxor RT0, d, b; \ | ||
350 | \ | ||
351 | vmovdqa RX0, c; \ | ||
352 | vmovdqa RY0, d; | ||
353 | |||
354 | #define outunpack_dec8(a,b,c,d) \ | ||
355 | vpbroadcastd 4*0(RW), RT0; \ | ||
356 | vpxor RT0, a, a; \ | ||
357 | \ | ||
358 | vpbroadcastd 4*1(RW), RT0; \ | ||
359 | vpxor RT0, b, b; \ | ||
360 | \ | ||
361 | vpbroadcastd 4*2(RW), RT0; \ | ||
362 | vpxor RT0, c, c; \ | ||
363 | \ | ||
364 | vpbroadcastd 4*3(RW), RT0; \ | ||
365 | vpxor RT0, d, d; | ||
366 | |||
367 | #define read_blocks16(a,b,c,d) \ | ||
368 | read_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
369 | read_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); | ||
370 | |||
371 | #define write_blocks16(a,b,c,d) \ | ||
372 | write_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
373 | write_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); | ||
374 | |||
375 | #define xor_blocks16(a,b,c,d) \ | ||
376 | xor_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
377 | xor_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); | ||
378 | |||
379 | #define inpack_enc16(a,b,c,d) \ | ||
380 | inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
381 | inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1); | ||
382 | |||
383 | #define outunpack_enc16(a,b,c,d) \ | ||
384 | outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
385 | outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1); | ||
386 | |||
387 | #define inpack_dec16(a,b,c,d) \ | ||
388 | inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
389 | inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1); | ||
390 | |||
391 | #define outunpack_dec16(a,b,c,d) \ | ||
392 | outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
393 | outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1); | ||
394 | |||
395 | .align 8 | ||
396 | __twofish_enc_blk16: | ||
397 | /* input: | ||
398 | * %rdi: ctx, CTX | ||
399 | * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext | ||
400 | * output: | ||
401 | * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext | ||
402 | */ | ||
403 | init_round_constants(); | ||
404 | |||
405 | read_blocks16(RA, RB, RC, RD); | ||
406 | inpack_enc16(RA, RB, RC, RD); | ||
407 | |||
408 | xorl RROUNDd, RROUNDd; | ||
409 | encrypt_cycle_first16(); | ||
410 | movl $2, RROUNDd; | ||
411 | |||
412 | .align 4 | ||
413 | .L__enc_loop: | ||
414 | encrypt_cycle16(); | ||
415 | |||
416 | addl $2, RROUNDd; | ||
417 | cmpl $14, RROUNDd; | ||
418 | jne .L__enc_loop; | ||
419 | |||
420 | encrypt_cycle_last16(); | ||
421 | |||
422 | outunpack_enc16(RA, RB, RC, RD); | ||
423 | write_blocks16(RA, RB, RC, RD); | ||
424 | |||
425 | ret; | ||
426 | ENDPROC(__twofish_enc_blk16) | ||
427 | |||
428 | .align 8 | ||
429 | __twofish_dec_blk16: | ||
430 | /* input: | ||
431 | * %rdi: ctx, CTX | ||
432 | * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext | ||
433 | * output: | ||
434 | * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext | ||
435 | */ | ||
436 | init_round_constants(); | ||
437 | |||
438 | read_blocks16(RA, RB, RC, RD); | ||
439 | inpack_dec16(RA, RB, RC, RD); | ||
440 | |||
441 | movl $14, RROUNDd; | ||
442 | decrypt_cycle_first16(); | ||
443 | movl $12, RROUNDd; | ||
444 | |||
445 | .align 4 | ||
446 | .L__dec_loop: | ||
447 | decrypt_cycle16(); | ||
448 | |||
449 | addl $-2, RROUNDd; | ||
450 | jnz .L__dec_loop; | ||
451 | |||
452 | decrypt_cycle_last16(); | ||
453 | |||
454 | outunpack_dec16(RA, RB, RC, RD); | ||
455 | write_blocks16(RA, RB, RC, RD); | ||
456 | |||
457 | ret; | ||
458 | ENDPROC(__twofish_dec_blk16) | ||
459 | |||
460 | ENTRY(twofish_ecb_enc_16way) | ||
461 | /* input: | ||
462 | * %rdi: ctx, CTX | ||
463 | * %rsi: dst | ||
464 | * %rdx: src | ||
465 | */ | ||
466 | |||
467 | vzeroupper; | ||
468 | pushq %r12; | ||
469 | |||
470 | load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
471 | |||
472 | call __twofish_enc_blk16; | ||
473 | |||
474 | store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
475 | |||
476 | popq %r12; | ||
477 | vzeroupper; | ||
478 | |||
479 | ret; | ||
480 | ENDPROC(twofish_ecb_enc_16way) | ||
481 | |||
482 | ENTRY(twofish_ecb_dec_16way) | ||
483 | /* input: | ||
484 | * %rdi: ctx, CTX | ||
485 | * %rsi: dst | ||
486 | * %rdx: src | ||
487 | */ | ||
488 | |||
489 | vzeroupper; | ||
490 | pushq %r12; | ||
491 | |||
492 | load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
493 | |||
494 | call __twofish_dec_blk16; | ||
495 | |||
496 | store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
497 | |||
498 | popq %r12; | ||
499 | vzeroupper; | ||
500 | |||
501 | ret; | ||
502 | ENDPROC(twofish_ecb_dec_16way) | ||
503 | |||
504 | ENTRY(twofish_cbc_dec_16way) | ||
505 | /* input: | ||
506 | * %rdi: ctx, CTX | ||
507 | * %rsi: dst | ||
508 | * %rdx: src | ||
509 | */ | ||
510 | |||
511 | vzeroupper; | ||
512 | pushq %r12; | ||
513 | |||
514 | load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
515 | |||
516 | call __twofish_dec_blk16; | ||
517 | |||
518 | store_cbc_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1, | ||
519 | RX0); | ||
520 | |||
521 | popq %r12; | ||
522 | vzeroupper; | ||
523 | |||
524 | ret; | ||
525 | ENDPROC(twofish_cbc_dec_16way) | ||
526 | |||
527 | ENTRY(twofish_ctr_16way) | ||
528 | /* input: | ||
529 | * %rdi: ctx, CTX | ||
530 | * %rsi: dst (16 blocks) | ||
531 | * %rdx: src (16 blocks) | ||
532 | * %rcx: iv (little endian, 128bit) | ||
533 | */ | ||
534 | |||
535 | vzeroupper; | ||
536 | pushq %r12; | ||
537 | |||
538 | load_ctr_16way(%rcx, .Lbswap128_mask, RA0, RB0, RC0, RD0, RA1, RB1, RC1, | ||
539 | RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT, | ||
540 | RBYTE); | ||
541 | |||
542 | call __twofish_enc_blk16; | ||
543 | |||
544 | store_ctr_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
545 | |||
546 | popq %r12; | ||
547 | vzeroupper; | ||
548 | |||
549 | ret; | ||
550 | ENDPROC(twofish_ctr_16way) | ||
551 | |||
552 | .align 8 | ||
553 | twofish_xts_crypt_16way: | ||
554 | /* input: | ||
555 | * %rdi: ctx, CTX | ||
556 | * %rsi: dst (16 blocks) | ||
557 | * %rdx: src (16 blocks) | ||
558 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
559 | * %r8: pointer to __twofish_enc_blk16 or __twofish_dec_blk16 | ||
560 | */ | ||
561 | |||
562 | vzeroupper; | ||
563 | pushq %r12; | ||
564 | |||
565 | load_xts_16way(%rcx, %rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, | ||
566 | RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT, | ||
567 | .Lxts_gf128mul_and_shl1_mask_0, | ||
568 | .Lxts_gf128mul_and_shl1_mask_1); | ||
569 | |||
570 | call *%r8; | ||
571 | |||
572 | store_xts_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
573 | |||
574 | popq %r12; | ||
575 | vzeroupper; | ||
576 | |||
577 | ret; | ||
578 | ENDPROC(twofish_xts_crypt_16way) | ||
579 | |||
580 | ENTRY(twofish_xts_enc_16way) | ||
581 | /* input: | ||
582 | * %rdi: ctx, CTX | ||
583 | * %rsi: dst (16 blocks) | ||
584 | * %rdx: src (16 blocks) | ||
585 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
586 | */ | ||
587 | leaq __twofish_enc_blk16, %r8; | ||
588 | jmp twofish_xts_crypt_16way; | ||
589 | ENDPROC(twofish_xts_enc_16way) | ||
590 | |||
591 | ENTRY(twofish_xts_dec_16way) | ||
592 | /* input: | ||
593 | * %rdi: ctx, CTX | ||
594 | * %rsi: dst (16 blocks) | ||
595 | * %rdx: src (16 blocks) | ||
596 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
597 | */ | ||
598 | leaq __twofish_dec_blk16, %r8; | ||
599 | jmp twofish_xts_crypt_16way; | ||
600 | ENDPROC(twofish_xts_dec_16way) | ||
diff --git a/arch/x86/crypto/twofish_avx2_glue.c b/arch/x86/crypto/twofish_avx2_glue.c deleted file mode 100644 index ce33b5be64ee..000000000000 --- a/arch/x86/crypto/twofish_avx2_glue.c +++ /dev/null | |||
@@ -1,584 +0,0 @@ | |||
1 | /* | ||
2 | * Glue Code for x86_64/AVX2 assembler optimized version of Twofish | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/crypto.h> | ||
16 | #include <linux/err.h> | ||
17 | #include <crypto/algapi.h> | ||
18 | #include <crypto/ctr.h> | ||
19 | #include <crypto/twofish.h> | ||
20 | #include <crypto/lrw.h> | ||
21 | #include <crypto/xts.h> | ||
22 | #include <asm/xcr.h> | ||
23 | #include <asm/xsave.h> | ||
24 | #include <asm/crypto/twofish.h> | ||
25 | #include <asm/crypto/ablk_helper.h> | ||
26 | #include <asm/crypto/glue_helper.h> | ||
27 | #include <crypto/scatterwalk.h> | ||
28 | |||
29 | #define TF_AVX2_PARALLEL_BLOCKS 16 | ||
30 | |||
31 | /* 16-way AVX2 parallel cipher functions */ | ||
32 | asmlinkage void twofish_ecb_enc_16way(struct twofish_ctx *ctx, u8 *dst, | ||
33 | const u8 *src); | ||
34 | asmlinkage void twofish_ecb_dec_16way(struct twofish_ctx *ctx, u8 *dst, | ||
35 | const u8 *src); | ||
36 | asmlinkage void twofish_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src); | ||
37 | |||
38 | asmlinkage void twofish_ctr_16way(void *ctx, u128 *dst, const u128 *src, | ||
39 | le128 *iv); | ||
40 | |||
41 | asmlinkage void twofish_xts_enc_16way(struct twofish_ctx *ctx, u8 *dst, | ||
42 | const u8 *src, le128 *iv); | ||
43 | asmlinkage void twofish_xts_dec_16way(struct twofish_ctx *ctx, u8 *dst, | ||
44 | const u8 *src, le128 *iv); | ||
45 | |||
46 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
47 | const u8 *src) | ||
48 | { | ||
49 | __twofish_enc_blk_3way(ctx, dst, src, false); | ||
50 | } | ||
51 | |||
52 | static const struct common_glue_ctx twofish_enc = { | ||
53 | .num_funcs = 4, | ||
54 | .fpu_blocks_limit = 8, | ||
55 | |||
56 | .funcs = { { | ||
57 | .num_blocks = 16, | ||
58 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_16way) } | ||
59 | }, { | ||
60 | .num_blocks = 8, | ||
61 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) } | ||
62 | }, { | ||
63 | .num_blocks = 3, | ||
64 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
65 | }, { | ||
66 | .num_blocks = 1, | ||
67 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
68 | } } | ||
69 | }; | ||
70 | |||
71 | static const struct common_glue_ctx twofish_ctr = { | ||
72 | .num_funcs = 4, | ||
73 | .fpu_blocks_limit = 8, | ||
74 | |||
75 | .funcs = { { | ||
76 | .num_blocks = 16, | ||
77 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_16way) } | ||
78 | }, { | ||
79 | .num_blocks = 8, | ||
80 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) } | ||
81 | }, { | ||
82 | .num_blocks = 3, | ||
83 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } | ||
84 | }, { | ||
85 | .num_blocks = 1, | ||
86 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } | ||
87 | } } | ||
88 | }; | ||
89 | |||
90 | static const struct common_glue_ctx twofish_enc_xts = { | ||
91 | .num_funcs = 3, | ||
92 | .fpu_blocks_limit = 8, | ||
93 | |||
94 | .funcs = { { | ||
95 | .num_blocks = 16, | ||
96 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way) } | ||
97 | }, { | ||
98 | .num_blocks = 8, | ||
99 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) } | ||
100 | }, { | ||
101 | .num_blocks = 1, | ||
102 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) } | ||
103 | } } | ||
104 | }; | ||
105 | |||
106 | static const struct common_glue_ctx twofish_dec = { | ||
107 | .num_funcs = 4, | ||
108 | .fpu_blocks_limit = 8, | ||
109 | |||
110 | .funcs = { { | ||
111 | .num_blocks = 16, | ||
112 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_16way) } | ||
113 | }, { | ||
114 | .num_blocks = 8, | ||
115 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) } | ||
116 | }, { | ||
117 | .num_blocks = 3, | ||
118 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
119 | }, { | ||
120 | .num_blocks = 1, | ||
121 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
122 | } } | ||
123 | }; | ||
124 | |||
125 | static const struct common_glue_ctx twofish_dec_cbc = { | ||
126 | .num_funcs = 4, | ||
127 | .fpu_blocks_limit = 8, | ||
128 | |||
129 | .funcs = { { | ||
130 | .num_blocks = 16, | ||
131 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way) } | ||
132 | }, { | ||
133 | .num_blocks = 8, | ||
134 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) } | ||
135 | }, { | ||
136 | .num_blocks = 3, | ||
137 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
138 | }, { | ||
139 | .num_blocks = 1, | ||
140 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
141 | } } | ||
142 | }; | ||
143 | |||
144 | static const struct common_glue_ctx twofish_dec_xts = { | ||
145 | .num_funcs = 3, | ||
146 | .fpu_blocks_limit = 8, | ||
147 | |||
148 | .funcs = { { | ||
149 | .num_blocks = 16, | ||
150 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way) } | ||
151 | }, { | ||
152 | .num_blocks = 8, | ||
153 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) } | ||
154 | }, { | ||
155 | .num_blocks = 1, | ||
156 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) } | ||
157 | } } | ||
158 | }; | ||
159 | |||
160 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
161 | struct scatterlist *src, unsigned int nbytes) | ||
162 | { | ||
163 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); | ||
164 | } | ||
165 | |||
166 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
167 | struct scatterlist *src, unsigned int nbytes) | ||
168 | { | ||
169 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); | ||
170 | } | ||
171 | |||
172 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
173 | struct scatterlist *src, unsigned int nbytes) | ||
174 | { | ||
175 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, | ||
176 | dst, src, nbytes); | ||
177 | } | ||
178 | |||
179 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
180 | struct scatterlist *src, unsigned int nbytes) | ||
181 | { | ||
182 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, | ||
183 | nbytes); | ||
184 | } | ||
185 | |||
186 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
187 | struct scatterlist *src, unsigned int nbytes) | ||
188 | { | ||
189 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); | ||
190 | } | ||
191 | |||
192 | static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
193 | { | ||
194 | /* since reusing AVX functions, starts using FPU at 8 parallel blocks */ | ||
195 | return glue_fpu_begin(TF_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes); | ||
196 | } | ||
197 | |||
198 | static inline void twofish_fpu_end(bool fpu_enabled) | ||
199 | { | ||
200 | glue_fpu_end(fpu_enabled); | ||
201 | } | ||
202 | |||
203 | struct crypt_priv { | ||
204 | struct twofish_ctx *ctx; | ||
205 | bool fpu_enabled; | ||
206 | }; | ||
207 | |||
208 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
209 | { | ||
210 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
211 | struct crypt_priv *ctx = priv; | ||
212 | int i; | ||
213 | |||
214 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
215 | |||
216 | while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) { | ||
217 | twofish_ecb_enc_16way(ctx->ctx, srcdst, srcdst); | ||
218 | srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS; | ||
219 | nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS; | ||
220 | } | ||
221 | |||
222 | while (nbytes >= 8 * bsize) { | ||
223 | twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst); | ||
224 | srcdst += bsize * 8; | ||
225 | nbytes -= bsize * 8; | ||
226 | } | ||
227 | |||
228 | while (nbytes >= 3 * bsize) { | ||
229 | twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); | ||
230 | srcdst += bsize * 3; | ||
231 | nbytes -= bsize * 3; | ||
232 | } | ||
233 | |||
234 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
235 | twofish_enc_blk(ctx->ctx, srcdst, srcdst); | ||
236 | } | ||
237 | |||
238 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
239 | { | ||
240 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
241 | struct crypt_priv *ctx = priv; | ||
242 | int i; | ||
243 | |||
244 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
245 | |||
246 | while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) { | ||
247 | twofish_ecb_dec_16way(ctx->ctx, srcdst, srcdst); | ||
248 | srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS; | ||
249 | nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS; | ||
250 | } | ||
251 | |||
252 | while (nbytes >= 8 * bsize) { | ||
253 | twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst); | ||
254 | srcdst += bsize * 8; | ||
255 | nbytes -= bsize * 8; | ||
256 | } | ||
257 | |||
258 | while (nbytes >= 3 * bsize) { | ||
259 | twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); | ||
260 | srcdst += bsize * 3; | ||
261 | nbytes -= bsize * 3; | ||
262 | } | ||
263 | |||
264 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
265 | twofish_dec_blk(ctx->ctx, srcdst, srcdst); | ||
266 | } | ||
267 | |||
268 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
269 | struct scatterlist *src, unsigned int nbytes) | ||
270 | { | ||
271 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
272 | be128 buf[TF_AVX2_PARALLEL_BLOCKS]; | ||
273 | struct crypt_priv crypt_ctx = { | ||
274 | .ctx = &ctx->twofish_ctx, | ||
275 | .fpu_enabled = false, | ||
276 | }; | ||
277 | struct lrw_crypt_req req = { | ||
278 | .tbuf = buf, | ||
279 | .tbuflen = sizeof(buf), | ||
280 | |||
281 | .table_ctx = &ctx->lrw_table, | ||
282 | .crypt_ctx = &crypt_ctx, | ||
283 | .crypt_fn = encrypt_callback, | ||
284 | }; | ||
285 | int ret; | ||
286 | |||
287 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
288 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
289 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
290 | |||
291 | return ret; | ||
292 | } | ||
293 | |||
294 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
295 | struct scatterlist *src, unsigned int nbytes) | ||
296 | { | ||
297 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
298 | be128 buf[TF_AVX2_PARALLEL_BLOCKS]; | ||
299 | struct crypt_priv crypt_ctx = { | ||
300 | .ctx = &ctx->twofish_ctx, | ||
301 | .fpu_enabled = false, | ||
302 | }; | ||
303 | struct lrw_crypt_req req = { | ||
304 | .tbuf = buf, | ||
305 | .tbuflen = sizeof(buf), | ||
306 | |||
307 | .table_ctx = &ctx->lrw_table, | ||
308 | .crypt_ctx = &crypt_ctx, | ||
309 | .crypt_fn = decrypt_callback, | ||
310 | }; | ||
311 | int ret; | ||
312 | |||
313 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
314 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
315 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
316 | |||
317 | return ret; | ||
318 | } | ||
319 | |||
320 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
321 | struct scatterlist *src, unsigned int nbytes) | ||
322 | { | ||
323 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
324 | |||
325 | return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes, | ||
326 | XTS_TWEAK_CAST(twofish_enc_blk), | ||
327 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
328 | } | ||
329 | |||
330 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
331 | struct scatterlist *src, unsigned int nbytes) | ||
332 | { | ||
333 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
334 | |||
335 | return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes, | ||
336 | XTS_TWEAK_CAST(twofish_enc_blk), | ||
337 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
338 | } | ||
339 | |||
340 | static struct crypto_alg tf_algs[10] = { { | ||
341 | .cra_name = "__ecb-twofish-avx2", | ||
342 | .cra_driver_name = "__driver-ecb-twofish-avx2", | ||
343 | .cra_priority = 0, | ||
344 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
345 | .cra_blocksize = TF_BLOCK_SIZE, | ||
346 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
347 | .cra_alignmask = 0, | ||
348 | .cra_type = &crypto_blkcipher_type, | ||
349 | .cra_module = THIS_MODULE, | ||
350 | .cra_u = { | ||
351 | .blkcipher = { | ||
352 | .min_keysize = TF_MIN_KEY_SIZE, | ||
353 | .max_keysize = TF_MAX_KEY_SIZE, | ||
354 | .setkey = twofish_setkey, | ||
355 | .encrypt = ecb_encrypt, | ||
356 | .decrypt = ecb_decrypt, | ||
357 | }, | ||
358 | }, | ||
359 | }, { | ||
360 | .cra_name = "__cbc-twofish-avx2", | ||
361 | .cra_driver_name = "__driver-cbc-twofish-avx2", | ||
362 | .cra_priority = 0, | ||
363 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
364 | .cra_blocksize = TF_BLOCK_SIZE, | ||
365 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
366 | .cra_alignmask = 0, | ||
367 | .cra_type = &crypto_blkcipher_type, | ||
368 | .cra_module = THIS_MODULE, | ||
369 | .cra_u = { | ||
370 | .blkcipher = { | ||
371 | .min_keysize = TF_MIN_KEY_SIZE, | ||
372 | .max_keysize = TF_MAX_KEY_SIZE, | ||
373 | .setkey = twofish_setkey, | ||
374 | .encrypt = cbc_encrypt, | ||
375 | .decrypt = cbc_decrypt, | ||
376 | }, | ||
377 | }, | ||
378 | }, { | ||
379 | .cra_name = "__ctr-twofish-avx2", | ||
380 | .cra_driver_name = "__driver-ctr-twofish-avx2", | ||
381 | .cra_priority = 0, | ||
382 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
383 | .cra_blocksize = 1, | ||
384 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
385 | .cra_alignmask = 0, | ||
386 | .cra_type = &crypto_blkcipher_type, | ||
387 | .cra_module = THIS_MODULE, | ||
388 | .cra_u = { | ||
389 | .blkcipher = { | ||
390 | .min_keysize = TF_MIN_KEY_SIZE, | ||
391 | .max_keysize = TF_MAX_KEY_SIZE, | ||
392 | .ivsize = TF_BLOCK_SIZE, | ||
393 | .setkey = twofish_setkey, | ||
394 | .encrypt = ctr_crypt, | ||
395 | .decrypt = ctr_crypt, | ||
396 | }, | ||
397 | }, | ||
398 | }, { | ||
399 | .cra_name = "__lrw-twofish-avx2", | ||
400 | .cra_driver_name = "__driver-lrw-twofish-avx2", | ||
401 | .cra_priority = 0, | ||
402 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
403 | .cra_blocksize = TF_BLOCK_SIZE, | ||
404 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | ||
405 | .cra_alignmask = 0, | ||
406 | .cra_type = &crypto_blkcipher_type, | ||
407 | .cra_module = THIS_MODULE, | ||
408 | .cra_exit = lrw_twofish_exit_tfm, | ||
409 | .cra_u = { | ||
410 | .blkcipher = { | ||
411 | .min_keysize = TF_MIN_KEY_SIZE + | ||
412 | TF_BLOCK_SIZE, | ||
413 | .max_keysize = TF_MAX_KEY_SIZE + | ||
414 | TF_BLOCK_SIZE, | ||
415 | .ivsize = TF_BLOCK_SIZE, | ||
416 | .setkey = lrw_twofish_setkey, | ||
417 | .encrypt = lrw_encrypt, | ||
418 | .decrypt = lrw_decrypt, | ||
419 | }, | ||
420 | }, | ||
421 | }, { | ||
422 | .cra_name = "__xts-twofish-avx2", | ||
423 | .cra_driver_name = "__driver-xts-twofish-avx2", | ||
424 | .cra_priority = 0, | ||
425 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
426 | .cra_blocksize = TF_BLOCK_SIZE, | ||
427 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | ||
428 | .cra_alignmask = 0, | ||
429 | .cra_type = &crypto_blkcipher_type, | ||
430 | .cra_module = THIS_MODULE, | ||
431 | .cra_u = { | ||
432 | .blkcipher = { | ||
433 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
434 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
435 | .ivsize = TF_BLOCK_SIZE, | ||
436 | .setkey = xts_twofish_setkey, | ||
437 | .encrypt = xts_encrypt, | ||
438 | .decrypt = xts_decrypt, | ||
439 | }, | ||
440 | }, | ||
441 | }, { | ||
442 | .cra_name = "ecb(twofish)", | ||
443 | .cra_driver_name = "ecb-twofish-avx2", | ||
444 | .cra_priority = 500, | ||
445 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
446 | .cra_blocksize = TF_BLOCK_SIZE, | ||
447 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
448 | .cra_alignmask = 0, | ||
449 | .cra_type = &crypto_ablkcipher_type, | ||
450 | .cra_module = THIS_MODULE, | ||
451 | .cra_init = ablk_init, | ||
452 | .cra_exit = ablk_exit, | ||
453 | .cra_u = { | ||
454 | .ablkcipher = { | ||
455 | .min_keysize = TF_MIN_KEY_SIZE, | ||
456 | .max_keysize = TF_MAX_KEY_SIZE, | ||
457 | .setkey = ablk_set_key, | ||
458 | .encrypt = ablk_encrypt, | ||
459 | .decrypt = ablk_decrypt, | ||
460 | }, | ||
461 | }, | ||
462 | }, { | ||
463 | .cra_name = "cbc(twofish)", | ||
464 | .cra_driver_name = "cbc-twofish-avx2", | ||
465 | .cra_priority = 500, | ||
466 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
467 | .cra_blocksize = TF_BLOCK_SIZE, | ||
468 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
469 | .cra_alignmask = 0, | ||
470 | .cra_type = &crypto_ablkcipher_type, | ||
471 | .cra_module = THIS_MODULE, | ||
472 | .cra_init = ablk_init, | ||
473 | .cra_exit = ablk_exit, | ||
474 | .cra_u = { | ||
475 | .ablkcipher = { | ||
476 | .min_keysize = TF_MIN_KEY_SIZE, | ||
477 | .max_keysize = TF_MAX_KEY_SIZE, | ||
478 | .ivsize = TF_BLOCK_SIZE, | ||
479 | .setkey = ablk_set_key, | ||
480 | .encrypt = __ablk_encrypt, | ||
481 | .decrypt = ablk_decrypt, | ||
482 | }, | ||
483 | }, | ||
484 | }, { | ||
485 | .cra_name = "ctr(twofish)", | ||
486 | .cra_driver_name = "ctr-twofish-avx2", | ||
487 | .cra_priority = 500, | ||
488 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
489 | .cra_blocksize = 1, | ||
490 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
491 | .cra_alignmask = 0, | ||
492 | .cra_type = &crypto_ablkcipher_type, | ||
493 | .cra_module = THIS_MODULE, | ||
494 | .cra_init = ablk_init, | ||
495 | .cra_exit = ablk_exit, | ||
496 | .cra_u = { | ||
497 | .ablkcipher = { | ||
498 | .min_keysize = TF_MIN_KEY_SIZE, | ||
499 | .max_keysize = TF_MAX_KEY_SIZE, | ||
500 | .ivsize = TF_BLOCK_SIZE, | ||
501 | .setkey = ablk_set_key, | ||
502 | .encrypt = ablk_encrypt, | ||
503 | .decrypt = ablk_encrypt, | ||
504 | .geniv = "chainiv", | ||
505 | }, | ||
506 | }, | ||
507 | }, { | ||
508 | .cra_name = "lrw(twofish)", | ||
509 | .cra_driver_name = "lrw-twofish-avx2", | ||
510 | .cra_priority = 500, | ||
511 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
512 | .cra_blocksize = TF_BLOCK_SIZE, | ||
513 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
514 | .cra_alignmask = 0, | ||
515 | .cra_type = &crypto_ablkcipher_type, | ||
516 | .cra_module = THIS_MODULE, | ||
517 | .cra_init = ablk_init, | ||
518 | .cra_exit = ablk_exit, | ||
519 | .cra_u = { | ||
520 | .ablkcipher = { | ||
521 | .min_keysize = TF_MIN_KEY_SIZE + | ||
522 | TF_BLOCK_SIZE, | ||
523 | .max_keysize = TF_MAX_KEY_SIZE + | ||
524 | TF_BLOCK_SIZE, | ||
525 | .ivsize = TF_BLOCK_SIZE, | ||
526 | .setkey = ablk_set_key, | ||
527 | .encrypt = ablk_encrypt, | ||
528 | .decrypt = ablk_decrypt, | ||
529 | }, | ||
530 | }, | ||
531 | }, { | ||
532 | .cra_name = "xts(twofish)", | ||
533 | .cra_driver_name = "xts-twofish-avx2", | ||
534 | .cra_priority = 500, | ||
535 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
536 | .cra_blocksize = TF_BLOCK_SIZE, | ||
537 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
538 | .cra_alignmask = 0, | ||
539 | .cra_type = &crypto_ablkcipher_type, | ||
540 | .cra_module = THIS_MODULE, | ||
541 | .cra_init = ablk_init, | ||
542 | .cra_exit = ablk_exit, | ||
543 | .cra_u = { | ||
544 | .ablkcipher = { | ||
545 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
546 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
547 | .ivsize = TF_BLOCK_SIZE, | ||
548 | .setkey = ablk_set_key, | ||
549 | .encrypt = ablk_encrypt, | ||
550 | .decrypt = ablk_decrypt, | ||
551 | }, | ||
552 | }, | ||
553 | } }; | ||
554 | |||
555 | static int __init init(void) | ||
556 | { | ||
557 | u64 xcr0; | ||
558 | |||
559 | if (!cpu_has_avx2 || !cpu_has_osxsave) { | ||
560 | pr_info("AVX2 instructions are not detected.\n"); | ||
561 | return -ENODEV; | ||
562 | } | ||
563 | |||
564 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
565 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
566 | pr_info("AVX2 detected but unusable.\n"); | ||
567 | return -ENODEV; | ||
568 | } | ||
569 | |||
570 | return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); | ||
571 | } | ||
572 | |||
573 | static void __exit fini(void) | ||
574 | { | ||
575 | crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); | ||
576 | } | ||
577 | |||
578 | module_init(init); | ||
579 | module_exit(fini); | ||
580 | |||
581 | MODULE_LICENSE("GPL"); | ||
582 | MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized"); | ||
583 | MODULE_ALIAS("twofish"); | ||
584 | MODULE_ALIAS("twofish-asm"); | ||
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 2047a562f6b3..a62ba541884e 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c | |||
@@ -50,26 +50,18 @@ | |||
50 | /* 8-way parallel cipher functions */ | 50 | /* 8-way parallel cipher functions */ |
51 | asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, | 51 | asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, |
52 | const u8 *src); | 52 | const u8 *src); |
53 | EXPORT_SYMBOL_GPL(twofish_ecb_enc_8way); | ||
54 | |||
55 | asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, | 53 | asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, |
56 | const u8 *src); | 54 | const u8 *src); |
57 | EXPORT_SYMBOL_GPL(twofish_ecb_dec_8way); | ||
58 | 55 | ||
59 | asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, | 56 | asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, |
60 | const u8 *src); | 57 | const u8 *src); |
61 | EXPORT_SYMBOL_GPL(twofish_cbc_dec_8way); | ||
62 | |||
63 | asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, | 58 | asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, |
64 | const u8 *src, le128 *iv); | 59 | const u8 *src, le128 *iv); |
65 | EXPORT_SYMBOL_GPL(twofish_ctr_8way); | ||
66 | 60 | ||
67 | asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, | 61 | asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, |
68 | const u8 *src, le128 *iv); | 62 | const u8 *src, le128 *iv); |
69 | EXPORT_SYMBOL_GPL(twofish_xts_enc_8way); | ||
70 | asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, | 63 | asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, |
71 | const u8 *src, le128 *iv); | 64 | const u8 *src, le128 *iv); |
72 | EXPORT_SYMBOL_GPL(twofish_xts_dec_8way); | ||
73 | 65 | ||
74 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | 66 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, |
75 | const u8 *src) | 67 | const u8 *src) |
@@ -77,19 +69,17 @@ static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | |||
77 | __twofish_enc_blk_3way(ctx, dst, src, false); | 69 | __twofish_enc_blk_3way(ctx, dst, src, false); |
78 | } | 70 | } |
79 | 71 | ||
80 | void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) | 72 | static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) |
81 | { | 73 | { |
82 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | 74 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, |
83 | GLUE_FUNC_CAST(twofish_enc_blk)); | 75 | GLUE_FUNC_CAST(twofish_enc_blk)); |
84 | } | 76 | } |
85 | EXPORT_SYMBOL_GPL(twofish_xts_enc); | ||
86 | 77 | ||
87 | void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) | 78 | static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) |
88 | { | 79 | { |
89 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | 80 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, |
90 | GLUE_FUNC_CAST(twofish_dec_blk)); | 81 | GLUE_FUNC_CAST(twofish_dec_blk)); |
91 | } | 82 | } |
92 | EXPORT_SYMBOL_GPL(twofish_xts_dec); | ||
93 | 83 | ||
94 | 84 | ||
95 | static const struct common_glue_ctx twofish_enc = { | 85 | static const struct common_glue_ctx twofish_enc = { |
diff --git a/arch/x86/include/asm/crypto/blowfish.h b/arch/x86/include/asm/crypto/blowfish.h deleted file mode 100644 index f097b2face10..000000000000 --- a/arch/x86/include/asm/crypto/blowfish.h +++ /dev/null | |||
@@ -1,43 +0,0 @@ | |||
1 | #ifndef ASM_X86_BLOWFISH_H | ||
2 | #define ASM_X86_BLOWFISH_H | ||
3 | |||
4 | #include <linux/crypto.h> | ||
5 | #include <crypto/blowfish.h> | ||
6 | |||
7 | #define BF_PARALLEL_BLOCKS 4 | ||
8 | |||
9 | /* regular block cipher functions */ | ||
10 | asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, | ||
11 | bool xor); | ||
12 | asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); | ||
13 | |||
14 | /* 4-way parallel cipher functions */ | ||
15 | asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
16 | const u8 *src, bool xor); | ||
17 | asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
18 | const u8 *src); | ||
19 | |||
20 | static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) | ||
21 | { | ||
22 | __blowfish_enc_blk(ctx, dst, src, false); | ||
23 | } | ||
24 | |||
25 | static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, | ||
26 | const u8 *src) | ||
27 | { | ||
28 | __blowfish_enc_blk(ctx, dst, src, true); | ||
29 | } | ||
30 | |||
31 | static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
32 | const u8 *src) | ||
33 | { | ||
34 | __blowfish_enc_blk_4way(ctx, dst, src, false); | ||
35 | } | ||
36 | |||
37 | static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, | ||
38 | const u8 *src) | ||
39 | { | ||
40 | __blowfish_enc_blk_4way(ctx, dst, src, true); | ||
41 | } | ||
42 | |||
43 | #endif | ||
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index e655c6029b45..878c51ceebb5 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h | |||
@@ -28,20 +28,6 @@ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | |||
28 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, | 28 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, |
29 | const u8 *src); | 29 | const u8 *src); |
30 | 30 | ||
31 | /* 8-way parallel cipher functions */ | ||
32 | asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, | ||
33 | const u8 *src); | ||
34 | asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, | ||
35 | const u8 *src); | ||
36 | asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, | ||
37 | const u8 *src); | ||
38 | asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, | ||
39 | const u8 *src, le128 *iv); | ||
40 | asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, | ||
41 | const u8 *src, le128 *iv); | ||
42 | asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, | ||
43 | const u8 *src, le128 *iv); | ||
44 | |||
45 | /* helpers from twofish_x86_64-3way module */ | 31 | /* helpers from twofish_x86_64-3way module */ |
46 | extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); | 32 | extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); |
47 | extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, | 33 | extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, |
@@ -57,8 +43,4 @@ extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); | |||
57 | extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | 43 | extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, |
58 | unsigned int keylen); | 44 | unsigned int keylen); |
59 | 45 | ||
60 | /* helpers from twofish-avx module */ | ||
61 | extern void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); | ||
62 | extern void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); | ||
63 | |||
64 | #endif /* ASM_X86_TWOFISH_H */ | 46 | #endif /* ASM_X86_TWOFISH_H */ |
diff --git a/crypto/Kconfig b/crypto/Kconfig index bf8148e74e73..904ffe838567 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig | |||
@@ -376,6 +376,25 @@ config CRYPTO_CRC32_PCLMUL | |||
376 | which will enable any routine to use the CRC-32-IEEE 802.3 checksum | 376 | which will enable any routine to use the CRC-32-IEEE 802.3 checksum |
377 | and gain better performance as compared with the table implementation. | 377 | and gain better performance as compared with the table implementation. |
378 | 378 | ||
379 | config CRYPTO_CRCT10DIF | ||
380 | tristate "CRCT10DIF algorithm" | ||
381 | select CRYPTO_HASH | ||
382 | help | ||
383 | CRC T10 Data Integrity Field computation is being cast as | ||
384 | a crypto transform. This allows for faster crc t10 diff | ||
385 | transforms to be used if they are available. | ||
386 | |||
387 | config CRYPTO_CRCT10DIF_PCLMUL | ||
388 | tristate "CRCT10DIF PCLMULQDQ hardware acceleration" | ||
389 | depends on X86 && 64BIT && CRC_T10DIF | ||
390 | select CRYPTO_HASH | ||
391 | help | ||
392 | For x86_64 processors with SSE4.2 and PCLMULQDQ supported, | ||
393 | CRC T10 DIF PCLMULQDQ computation can be hardware | ||
394 | accelerated PCLMULQDQ instruction. This option will create | ||
395 | 'crct10dif-plcmul' module, which is faster when computing the | ||
396 | crct10dif checksum as compared with the generic table implementation. | ||
397 | |||
379 | config CRYPTO_GHASH | 398 | config CRYPTO_GHASH |
380 | tristate "GHASH digest algorithm" | 399 | tristate "GHASH digest algorithm" |
381 | select CRYPTO_GF128MUL | 400 | select CRYPTO_GF128MUL |
@@ -820,25 +839,6 @@ config CRYPTO_BLOWFISH_X86_64 | |||
820 | See also: | 839 | See also: |
821 | <http://www.schneier.com/blowfish.html> | 840 | <http://www.schneier.com/blowfish.html> |
822 | 841 | ||
823 | config CRYPTO_BLOWFISH_AVX2_X86_64 | ||
824 | tristate "Blowfish cipher algorithm (x86_64/AVX2)" | ||
825 | depends on X86 && 64BIT | ||
826 | depends on BROKEN | ||
827 | select CRYPTO_ALGAPI | ||
828 | select CRYPTO_CRYPTD | ||
829 | select CRYPTO_ABLK_HELPER_X86 | ||
830 | select CRYPTO_BLOWFISH_COMMON | ||
831 | select CRYPTO_BLOWFISH_X86_64 | ||
832 | help | ||
833 | Blowfish cipher algorithm (x86_64/AVX2), by Bruce Schneier. | ||
834 | |||
835 | This is a variable key length cipher which can use keys from 32 | ||
836 | bits to 448 bits in length. It's fast, simple and specifically | ||
837 | designed for use on "large microprocessors". | ||
838 | |||
839 | See also: | ||
840 | <http://www.schneier.com/blowfish.html> | ||
841 | |||
842 | config CRYPTO_CAMELLIA | 842 | config CRYPTO_CAMELLIA |
843 | tristate "Camellia cipher algorithms" | 843 | tristate "Camellia cipher algorithms" |
844 | depends on CRYPTO | 844 | depends on CRYPTO |
@@ -1297,31 +1297,6 @@ config CRYPTO_TWOFISH_AVX_X86_64 | |||
1297 | See also: | 1297 | See also: |
1298 | <http://www.schneier.com/twofish.html> | 1298 | <http://www.schneier.com/twofish.html> |
1299 | 1299 | ||
1300 | config CRYPTO_TWOFISH_AVX2_X86_64 | ||
1301 | tristate "Twofish cipher algorithm (x86_64/AVX2)" | ||
1302 | depends on X86 && 64BIT | ||
1303 | depends on BROKEN | ||
1304 | select CRYPTO_ALGAPI | ||
1305 | select CRYPTO_CRYPTD | ||
1306 | select CRYPTO_ABLK_HELPER_X86 | ||
1307 | select CRYPTO_GLUE_HELPER_X86 | ||
1308 | select CRYPTO_TWOFISH_COMMON | ||
1309 | select CRYPTO_TWOFISH_X86_64 | ||
1310 | select CRYPTO_TWOFISH_X86_64_3WAY | ||
1311 | select CRYPTO_TWOFISH_AVX_X86_64 | ||
1312 | select CRYPTO_LRW | ||
1313 | select CRYPTO_XTS | ||
1314 | help | ||
1315 | Twofish cipher algorithm (x86_64/AVX2). | ||
1316 | |||
1317 | Twofish was submitted as an AES (Advanced Encryption Standard) | ||
1318 | candidate cipher by researchers at CounterPane Systems. It is a | ||
1319 | 16 round block cipher supporting key sizes of 128, 192, and 256 | ||
1320 | bits. | ||
1321 | |||
1322 | See also: | ||
1323 | <http://www.schneier.com/twofish.html> | ||
1324 | |||
1325 | comment "Compression" | 1300 | comment "Compression" |
1326 | 1301 | ||
1327 | config CRYPTO_DEFLATE | 1302 | config CRYPTO_DEFLATE |
diff --git a/crypto/Makefile b/crypto/Makefile index a8e9b0fefbe9..62af87df8729 100644 --- a/crypto/Makefile +++ b/crypto/Makefile | |||
@@ -83,6 +83,7 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o | |||
83 | obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o | 83 | obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o |
84 | obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o | 84 | obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o |
85 | obj-$(CONFIG_CRYPTO_CRC32) += crc32.o | 85 | obj-$(CONFIG_CRYPTO_CRC32) += crc32.o |
86 | obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o | ||
86 | obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o | 87 | obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o |
87 | obj-$(CONFIG_CRYPTO_LZO) += lzo.o | 88 | obj-$(CONFIG_CRYPTO_LZO) += lzo.o |
88 | obj-$(CONFIG_CRYPTO_842) += 842.o | 89 | obj-$(CONFIG_CRYPTO_842) += 842.o |
diff --git a/crypto/crct10dif.c b/crypto/crct10dif.c new file mode 100644 index 000000000000..92aca96d6b98 --- /dev/null +++ b/crypto/crct10dif.c | |||
@@ -0,0 +1,178 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * T10 Data Integrity Field CRC16 Crypto Transform | ||
5 | * | ||
6 | * Copyright (c) 2007 Oracle Corporation. All rights reserved. | ||
7 | * Written by Martin K. Petersen <martin.petersen@oracle.com> | ||
8 | * Copyright (C) 2013 Intel Corporation | ||
9 | * Author: Tim Chen <tim.c.chen@linux.intel.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the Free | ||
13 | * Software Foundation; either version 2 of the License, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
17 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
18 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
19 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
20 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
21 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
22 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
23 | * SOFTWARE. | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <linux/crc-t10dif.h> | ||
30 | #include <crypto/internal/hash.h> | ||
31 | #include <linux/init.h> | ||
32 | #include <linux/string.h> | ||
33 | #include <linux/kernel.h> | ||
34 | |||
35 | struct chksum_desc_ctx { | ||
36 | __u16 crc; | ||
37 | }; | ||
38 | |||
39 | /* Table generated using the following polynomium: | ||
40 | * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 | ||
41 | * gt: 0x8bb7 | ||
42 | */ | ||
43 | static const __u16 t10_dif_crc_table[256] = { | ||
44 | 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, | ||
45 | 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, | ||
46 | 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, | ||
47 | 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, | ||
48 | 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, | ||
49 | 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, | ||
50 | 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, | ||
51 | 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, | ||
52 | 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, | ||
53 | 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, | ||
54 | 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, | ||
55 | 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, | ||
56 | 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, | ||
57 | 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, | ||
58 | 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, | ||
59 | 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, | ||
60 | 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, | ||
61 | 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, | ||
62 | 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, | ||
63 | 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, | ||
64 | 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, | ||
65 | 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, | ||
66 | 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, | ||
67 | 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, | ||
68 | 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, | ||
69 | 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, | ||
70 | 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, | ||
71 | 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, | ||
72 | 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, | ||
73 | 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, | ||
74 | 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, | ||
75 | 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 | ||
76 | }; | ||
77 | |||
78 | __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len) | ||
79 | { | ||
80 | unsigned int i; | ||
81 | |||
82 | for (i = 0 ; i < len ; i++) | ||
83 | crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; | ||
84 | |||
85 | return crc; | ||
86 | } | ||
87 | EXPORT_SYMBOL(crc_t10dif_generic); | ||
88 | |||
89 | /* | ||
90 | * Steps through buffer one byte at at time, calculates reflected | ||
91 | * crc using table. | ||
92 | */ | ||
93 | |||
94 | static int chksum_init(struct shash_desc *desc) | ||
95 | { | ||
96 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
97 | |||
98 | ctx->crc = 0; | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | static int chksum_update(struct shash_desc *desc, const u8 *data, | ||
104 | unsigned int length) | ||
105 | { | ||
106 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
107 | |||
108 | ctx->crc = crc_t10dif_generic(ctx->crc, data, length); | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | static int chksum_final(struct shash_desc *desc, u8 *out) | ||
113 | { | ||
114 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
115 | |||
116 | *(__u16 *)out = ctx->crc; | ||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, | ||
121 | u8 *out) | ||
122 | { | ||
123 | *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | static int chksum_finup(struct shash_desc *desc, const u8 *data, | ||
128 | unsigned int len, u8 *out) | ||
129 | { | ||
130 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
131 | |||
132 | return __chksum_finup(&ctx->crc, data, len, out); | ||
133 | } | ||
134 | |||
135 | static int chksum_digest(struct shash_desc *desc, const u8 *data, | ||
136 | unsigned int length, u8 *out) | ||
137 | { | ||
138 | struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); | ||
139 | |||
140 | return __chksum_finup(&ctx->crc, data, length, out); | ||
141 | } | ||
142 | |||
143 | static struct shash_alg alg = { | ||
144 | .digestsize = CRC_T10DIF_DIGEST_SIZE, | ||
145 | .init = chksum_init, | ||
146 | .update = chksum_update, | ||
147 | .final = chksum_final, | ||
148 | .finup = chksum_finup, | ||
149 | .digest = chksum_digest, | ||
150 | .descsize = sizeof(struct chksum_desc_ctx), | ||
151 | .base = { | ||
152 | .cra_name = "crct10dif", | ||
153 | .cra_driver_name = "crct10dif-generic", | ||
154 | .cra_priority = 100, | ||
155 | .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, | ||
156 | .cra_module = THIS_MODULE, | ||
157 | } | ||
158 | }; | ||
159 | |||
160 | static int __init crct10dif_mod_init(void) | ||
161 | { | ||
162 | int ret; | ||
163 | |||
164 | ret = crypto_register_shash(&alg); | ||
165 | return ret; | ||
166 | } | ||
167 | |||
168 | static void __exit crct10dif_mod_fini(void) | ||
169 | { | ||
170 | crypto_unregister_shash(&alg); | ||
171 | } | ||
172 | |||
173 | module_init(crct10dif_mod_init); | ||
174 | module_exit(crct10dif_mod_fini); | ||
175 | |||
176 | MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); | ||
177 | MODULE_DESCRIPTION("T10 DIF CRC calculation."); | ||
178 | MODULE_LICENSE("GPL"); | ||
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 4c5862095679..6ed124f3ea0f 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c | |||
@@ -251,6 +251,7 @@ static struct shash_alg sha512_algs[2] = { { | |||
251 | .descsize = sizeof(struct sha512_state), | 251 | .descsize = sizeof(struct sha512_state), |
252 | .base = { | 252 | .base = { |
253 | .cra_name = "sha512", | 253 | .cra_name = "sha512", |
254 | .cra_driver_name = "sha512-generic", | ||
254 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | 255 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, |
255 | .cra_blocksize = SHA512_BLOCK_SIZE, | 256 | .cra_blocksize = SHA512_BLOCK_SIZE, |
256 | .cra_module = THIS_MODULE, | 257 | .cra_module = THIS_MODULE, |
@@ -263,6 +264,7 @@ static struct shash_alg sha512_algs[2] = { { | |||
263 | .descsize = sizeof(struct sha512_state), | 264 | .descsize = sizeof(struct sha512_state), |
264 | .base = { | 265 | .base = { |
265 | .cra_name = "sha384", | 266 | .cra_name = "sha384", |
267 | .cra_driver_name = "sha384-generic", | ||
266 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | 268 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, |
267 | .cra_blocksize = SHA384_BLOCK_SIZE, | 269 | .cra_blocksize = SHA384_BLOCK_SIZE, |
268 | .cra_module = THIS_MODULE, | 270 | .cra_module = THIS_MODULE, |
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 66d254ce0d11..25a5934f0e50 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c | |||
@@ -1174,6 +1174,10 @@ static int do_test(int m) | |||
1174 | ret += tcrypt_test("ghash"); | 1174 | ret += tcrypt_test("ghash"); |
1175 | break; | 1175 | break; |
1176 | 1176 | ||
1177 | case 47: | ||
1178 | ret += tcrypt_test("crct10dif"); | ||
1179 | break; | ||
1180 | |||
1177 | case 100: | 1181 | case 100: |
1178 | ret += tcrypt_test("hmac(md5)"); | 1182 | ret += tcrypt_test("hmac(md5)"); |
1179 | break; | 1183 | break; |
@@ -1498,6 +1502,10 @@ static int do_test(int m) | |||
1498 | test_hash_speed("crc32c", sec, generic_hash_speed_template); | 1502 | test_hash_speed("crc32c", sec, generic_hash_speed_template); |
1499 | if (mode > 300 && mode < 400) break; | 1503 | if (mode > 300 && mode < 400) break; |
1500 | 1504 | ||
1505 | case 320: | ||
1506 | test_hash_speed("crct10dif", sec, generic_hash_speed_template); | ||
1507 | if (mode > 300 && mode < 400) break; | ||
1508 | |||
1501 | case 399: | 1509 | case 399: |
1502 | break; | 1510 | break; |
1503 | 1511 | ||
diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 5823735cf381..2f00607039e2 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c | |||
@@ -184,8 +184,9 @@ static int do_one_async_hash_op(struct ahash_request *req, | |||
184 | return ret; | 184 | return ret; |
185 | } | 185 | } |
186 | 186 | ||
187 | static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, | 187 | static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, |
188 | unsigned int tcount, bool use_digest) | 188 | unsigned int tcount, bool use_digest, |
189 | const int align_offset) | ||
189 | { | 190 | { |
190 | const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm)); | 191 | const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm)); |
191 | unsigned int i, j, k, temp; | 192 | unsigned int i, j, k, temp; |
@@ -216,10 +217,15 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, | |||
216 | if (template[i].np) | 217 | if (template[i].np) |
217 | continue; | 218 | continue; |
218 | 219 | ||
220 | ret = -EINVAL; | ||
221 | if (WARN_ON(align_offset + template[i].psize > PAGE_SIZE)) | ||
222 | goto out; | ||
223 | |||
219 | j++; | 224 | j++; |
220 | memset(result, 0, 64); | 225 | memset(result, 0, 64); |
221 | 226 | ||
222 | hash_buff = xbuf[0]; | 227 | hash_buff = xbuf[0]; |
228 | hash_buff += align_offset; | ||
223 | 229 | ||
224 | memcpy(hash_buff, template[i].plaintext, template[i].psize); | 230 | memcpy(hash_buff, template[i].plaintext, template[i].psize); |
225 | sg_init_one(&sg[0], hash_buff, template[i].psize); | 231 | sg_init_one(&sg[0], hash_buff, template[i].psize); |
@@ -281,6 +287,10 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, | |||
281 | 287 | ||
282 | j = 0; | 288 | j = 0; |
283 | for (i = 0; i < tcount; i++) { | 289 | for (i = 0; i < tcount; i++) { |
290 | /* alignment tests are only done with continuous buffers */ | ||
291 | if (align_offset != 0) | ||
292 | break; | ||
293 | |||
284 | if (template[i].np) { | 294 | if (template[i].np) { |
285 | j++; | 295 | j++; |
286 | memset(result, 0, 64); | 296 | memset(result, 0, 64); |
@@ -358,9 +368,36 @@ out_nobuf: | |||
358 | return ret; | 368 | return ret; |
359 | } | 369 | } |
360 | 370 | ||
371 | static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, | ||
372 | unsigned int tcount, bool use_digest) | ||
373 | { | ||
374 | unsigned int alignmask; | ||
375 | int ret; | ||
376 | |||
377 | ret = __test_hash(tfm, template, tcount, use_digest, 0); | ||
378 | if (ret) | ||
379 | return ret; | ||
380 | |||
381 | /* test unaligned buffers, check with one byte offset */ | ||
382 | ret = __test_hash(tfm, template, tcount, use_digest, 1); | ||
383 | if (ret) | ||
384 | return ret; | ||
385 | |||
386 | alignmask = crypto_tfm_alg_alignmask(&tfm->base); | ||
387 | if (alignmask) { | ||
388 | /* Check if alignment mask for tfm is correctly set. */ | ||
389 | ret = __test_hash(tfm, template, tcount, use_digest, | ||
390 | alignmask + 1); | ||
391 | if (ret) | ||
392 | return ret; | ||
393 | } | ||
394 | |||
395 | return 0; | ||
396 | } | ||
397 | |||
361 | static int __test_aead(struct crypto_aead *tfm, int enc, | 398 | static int __test_aead(struct crypto_aead *tfm, int enc, |
362 | struct aead_testvec *template, unsigned int tcount, | 399 | struct aead_testvec *template, unsigned int tcount, |
363 | const bool diff_dst) | 400 | const bool diff_dst, const int align_offset) |
364 | { | 401 | { |
365 | const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm)); | 402 | const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm)); |
366 | unsigned int i, j, k, n, temp; | 403 | unsigned int i, j, k, n, temp; |
@@ -423,15 +460,16 @@ static int __test_aead(struct crypto_aead *tfm, int enc, | |||
423 | if (!template[i].np) { | 460 | if (!template[i].np) { |
424 | j++; | 461 | j++; |
425 | 462 | ||
426 | /* some tepmplates have no input data but they will | 463 | /* some templates have no input data but they will |
427 | * touch input | 464 | * touch input |
428 | */ | 465 | */ |
429 | input = xbuf[0]; | 466 | input = xbuf[0]; |
467 | input += align_offset; | ||
430 | assoc = axbuf[0]; | 468 | assoc = axbuf[0]; |
431 | 469 | ||
432 | ret = -EINVAL; | 470 | ret = -EINVAL; |
433 | if (WARN_ON(template[i].ilen > PAGE_SIZE || | 471 | if (WARN_ON(align_offset + template[i].ilen > |
434 | template[i].alen > PAGE_SIZE)) | 472 | PAGE_SIZE || template[i].alen > PAGE_SIZE)) |
435 | goto out; | 473 | goto out; |
436 | 474 | ||
437 | memcpy(input, template[i].input, template[i].ilen); | 475 | memcpy(input, template[i].input, template[i].ilen); |
@@ -470,6 +508,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc, | |||
470 | 508 | ||
471 | if (diff_dst) { | 509 | if (diff_dst) { |
472 | output = xoutbuf[0]; | 510 | output = xoutbuf[0]; |
511 | output += align_offset; | ||
473 | sg_init_one(&sgout[0], output, | 512 | sg_init_one(&sgout[0], output, |
474 | template[i].ilen + | 513 | template[i].ilen + |
475 | (enc ? authsize : 0)); | 514 | (enc ? authsize : 0)); |
@@ -530,6 +569,10 @@ static int __test_aead(struct crypto_aead *tfm, int enc, | |||
530 | } | 569 | } |
531 | 570 | ||
532 | for (i = 0, j = 0; i < tcount; i++) { | 571 | for (i = 0, j = 0; i < tcount; i++) { |
572 | /* alignment tests are only done with continuous buffers */ | ||
573 | if (align_offset != 0) | ||
574 | break; | ||
575 | |||
533 | if (template[i].np) { | 576 | if (template[i].np) { |
534 | j++; | 577 | j++; |
535 | 578 | ||
@@ -732,15 +775,34 @@ out_noxbuf: | |||
732 | static int test_aead(struct crypto_aead *tfm, int enc, | 775 | static int test_aead(struct crypto_aead *tfm, int enc, |
733 | struct aead_testvec *template, unsigned int tcount) | 776 | struct aead_testvec *template, unsigned int tcount) |
734 | { | 777 | { |
778 | unsigned int alignmask; | ||
735 | int ret; | 779 | int ret; |
736 | 780 | ||
737 | /* test 'dst == src' case */ | 781 | /* test 'dst == src' case */ |
738 | ret = __test_aead(tfm, enc, template, tcount, false); | 782 | ret = __test_aead(tfm, enc, template, tcount, false, 0); |
739 | if (ret) | 783 | if (ret) |
740 | return ret; | 784 | return ret; |
741 | 785 | ||
742 | /* test 'dst != src' case */ | 786 | /* test 'dst != src' case */ |
743 | return __test_aead(tfm, enc, template, tcount, true); | 787 | ret = __test_aead(tfm, enc, template, tcount, true, 0); |
788 | if (ret) | ||
789 | return ret; | ||
790 | |||
791 | /* test unaligned buffers, check with one byte offset */ | ||
792 | ret = __test_aead(tfm, enc, template, tcount, true, 1); | ||
793 | if (ret) | ||
794 | return ret; | ||
795 | |||
796 | alignmask = crypto_tfm_alg_alignmask(&tfm->base); | ||
797 | if (alignmask) { | ||
798 | /* Check if alignment mask for tfm is correctly set. */ | ||
799 | ret = __test_aead(tfm, enc, template, tcount, true, | ||
800 | alignmask + 1); | ||
801 | if (ret) | ||
802 | return ret; | ||
803 | } | ||
804 | |||
805 | return 0; | ||
744 | } | 806 | } |
745 | 807 | ||
746 | static int test_cipher(struct crypto_cipher *tfm, int enc, | 808 | static int test_cipher(struct crypto_cipher *tfm, int enc, |
@@ -820,7 +882,7 @@ out_nobuf: | |||
820 | 882 | ||
821 | static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, | 883 | static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, |
822 | struct cipher_testvec *template, unsigned int tcount, | 884 | struct cipher_testvec *template, unsigned int tcount, |
823 | const bool diff_dst) | 885 | const bool diff_dst, const int align_offset) |
824 | { | 886 | { |
825 | const char *algo = | 887 | const char *algo = |
826 | crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(tfm)); | 888 | crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(tfm)); |
@@ -876,10 +938,12 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, | |||
876 | j++; | 938 | j++; |
877 | 939 | ||
878 | ret = -EINVAL; | 940 | ret = -EINVAL; |
879 | if (WARN_ON(template[i].ilen > PAGE_SIZE)) | 941 | if (WARN_ON(align_offset + template[i].ilen > |
942 | PAGE_SIZE)) | ||
880 | goto out; | 943 | goto out; |
881 | 944 | ||
882 | data = xbuf[0]; | 945 | data = xbuf[0]; |
946 | data += align_offset; | ||
883 | memcpy(data, template[i].input, template[i].ilen); | 947 | memcpy(data, template[i].input, template[i].ilen); |
884 | 948 | ||
885 | crypto_ablkcipher_clear_flags(tfm, ~0); | 949 | crypto_ablkcipher_clear_flags(tfm, ~0); |
@@ -900,6 +964,7 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, | |||
900 | sg_init_one(&sg[0], data, template[i].ilen); | 964 | sg_init_one(&sg[0], data, template[i].ilen); |
901 | if (diff_dst) { | 965 | if (diff_dst) { |
902 | data = xoutbuf[0]; | 966 | data = xoutbuf[0]; |
967 | data += align_offset; | ||
903 | sg_init_one(&sgout[0], data, template[i].ilen); | 968 | sg_init_one(&sgout[0], data, template[i].ilen); |
904 | } | 969 | } |
905 | 970 | ||
@@ -941,6 +1006,9 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, | |||
941 | 1006 | ||
942 | j = 0; | 1007 | j = 0; |
943 | for (i = 0; i < tcount; i++) { | 1008 | for (i = 0; i < tcount; i++) { |
1009 | /* alignment tests are only done with continuous buffers */ | ||
1010 | if (align_offset != 0) | ||
1011 | break; | ||
944 | 1012 | ||
945 | if (template[i].iv) | 1013 | if (template[i].iv) |
946 | memcpy(iv, template[i].iv, MAX_IVLEN); | 1014 | memcpy(iv, template[i].iv, MAX_IVLEN); |
@@ -1075,15 +1143,34 @@ out_nobuf: | |||
1075 | static int test_skcipher(struct crypto_ablkcipher *tfm, int enc, | 1143 | static int test_skcipher(struct crypto_ablkcipher *tfm, int enc, |
1076 | struct cipher_testvec *template, unsigned int tcount) | 1144 | struct cipher_testvec *template, unsigned int tcount) |
1077 | { | 1145 | { |
1146 | unsigned int alignmask; | ||
1078 | int ret; | 1147 | int ret; |
1079 | 1148 | ||
1080 | /* test 'dst == src' case */ | 1149 | /* test 'dst == src' case */ |
1081 | ret = __test_skcipher(tfm, enc, template, tcount, false); | 1150 | ret = __test_skcipher(tfm, enc, template, tcount, false, 0); |
1082 | if (ret) | 1151 | if (ret) |
1083 | return ret; | 1152 | return ret; |
1084 | 1153 | ||
1085 | /* test 'dst != src' case */ | 1154 | /* test 'dst != src' case */ |
1086 | return __test_skcipher(tfm, enc, template, tcount, true); | 1155 | ret = __test_skcipher(tfm, enc, template, tcount, true, 0); |
1156 | if (ret) | ||
1157 | return ret; | ||
1158 | |||
1159 | /* test unaligned buffers, check with one byte offset */ | ||
1160 | ret = __test_skcipher(tfm, enc, template, tcount, true, 1); | ||
1161 | if (ret) | ||
1162 | return ret; | ||
1163 | |||
1164 | alignmask = crypto_tfm_alg_alignmask(&tfm->base); | ||
1165 | if (alignmask) { | ||
1166 | /* Check if alignment mask for tfm is correctly set. */ | ||
1167 | ret = __test_skcipher(tfm, enc, template, tcount, true, | ||
1168 | alignmask + 1); | ||
1169 | if (ret) | ||
1170 | return ret; | ||
1171 | } | ||
1172 | |||
1173 | return 0; | ||
1087 | } | 1174 | } |
1088 | 1175 | ||
1089 | static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate, | 1176 | static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate, |
@@ -1654,16 +1741,10 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1654 | .alg = "__cbc-twofish-avx", | 1741 | .alg = "__cbc-twofish-avx", |
1655 | .test = alg_test_null, | 1742 | .test = alg_test_null, |
1656 | }, { | 1743 | }, { |
1657 | .alg = "__cbc-twofish-avx2", | ||
1658 | .test = alg_test_null, | ||
1659 | }, { | ||
1660 | .alg = "__driver-cbc-aes-aesni", | 1744 | .alg = "__driver-cbc-aes-aesni", |
1661 | .test = alg_test_null, | 1745 | .test = alg_test_null, |
1662 | .fips_allowed = 1, | 1746 | .fips_allowed = 1, |
1663 | }, { | 1747 | }, { |
1664 | .alg = "__driver-cbc-blowfish-avx2", | ||
1665 | .test = alg_test_null, | ||
1666 | }, { | ||
1667 | .alg = "__driver-cbc-camellia-aesni", | 1748 | .alg = "__driver-cbc-camellia-aesni", |
1668 | .test = alg_test_null, | 1749 | .test = alg_test_null, |
1669 | }, { | 1750 | }, { |
@@ -1688,16 +1769,10 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1688 | .alg = "__driver-cbc-twofish-avx", | 1769 | .alg = "__driver-cbc-twofish-avx", |
1689 | .test = alg_test_null, | 1770 | .test = alg_test_null, |
1690 | }, { | 1771 | }, { |
1691 | .alg = "__driver-cbc-twofish-avx2", | ||
1692 | .test = alg_test_null, | ||
1693 | }, { | ||
1694 | .alg = "__driver-ecb-aes-aesni", | 1772 | .alg = "__driver-ecb-aes-aesni", |
1695 | .test = alg_test_null, | 1773 | .test = alg_test_null, |
1696 | .fips_allowed = 1, | 1774 | .fips_allowed = 1, |
1697 | }, { | 1775 | }, { |
1698 | .alg = "__driver-ecb-blowfish-avx2", | ||
1699 | .test = alg_test_null, | ||
1700 | }, { | ||
1701 | .alg = "__driver-ecb-camellia-aesni", | 1776 | .alg = "__driver-ecb-camellia-aesni", |
1702 | .test = alg_test_null, | 1777 | .test = alg_test_null, |
1703 | }, { | 1778 | }, { |
@@ -1722,9 +1797,6 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1722 | .alg = "__driver-ecb-twofish-avx", | 1797 | .alg = "__driver-ecb-twofish-avx", |
1723 | .test = alg_test_null, | 1798 | .test = alg_test_null, |
1724 | }, { | 1799 | }, { |
1725 | .alg = "__driver-ecb-twofish-avx2", | ||
1726 | .test = alg_test_null, | ||
1727 | }, { | ||
1728 | .alg = "__ghash-pclmulqdqni", | 1800 | .alg = "__ghash-pclmulqdqni", |
1729 | .test = alg_test_null, | 1801 | .test = alg_test_null, |
1730 | .fips_allowed = 1, | 1802 | .fips_allowed = 1, |
@@ -1974,12 +2046,19 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1974 | } | 2046 | } |
1975 | } | 2047 | } |
1976 | }, { | 2048 | }, { |
1977 | .alg = "cryptd(__driver-cbc-aes-aesni)", | 2049 | .alg = "crct10dif", |
1978 | .test = alg_test_null, | 2050 | .test = alg_test_hash, |
1979 | .fips_allowed = 1, | 2051 | .fips_allowed = 1, |
2052 | .suite = { | ||
2053 | .hash = { | ||
2054 | .vecs = crct10dif_tv_template, | ||
2055 | .count = CRCT10DIF_TEST_VECTORS | ||
2056 | } | ||
2057 | } | ||
1980 | }, { | 2058 | }, { |
1981 | .alg = "cryptd(__driver-cbc-blowfish-avx2)", | 2059 | .alg = "cryptd(__driver-cbc-aes-aesni)", |
1982 | .test = alg_test_null, | 2060 | .test = alg_test_null, |
2061 | .fips_allowed = 1, | ||
1983 | }, { | 2062 | }, { |
1984 | .alg = "cryptd(__driver-cbc-camellia-aesni)", | 2063 | .alg = "cryptd(__driver-cbc-camellia-aesni)", |
1985 | .test = alg_test_null, | 2064 | .test = alg_test_null, |
@@ -1994,9 +2073,6 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1994 | .test = alg_test_null, | 2073 | .test = alg_test_null, |
1995 | .fips_allowed = 1, | 2074 | .fips_allowed = 1, |
1996 | }, { | 2075 | }, { |
1997 | .alg = "cryptd(__driver-ecb-blowfish-avx2)", | ||
1998 | .test = alg_test_null, | ||
1999 | }, { | ||
2000 | .alg = "cryptd(__driver-ecb-camellia-aesni)", | 2076 | .alg = "cryptd(__driver-ecb-camellia-aesni)", |
2001 | .test = alg_test_null, | 2077 | .test = alg_test_null, |
2002 | }, { | 2078 | }, { |
@@ -2021,9 +2097,6 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
2021 | .alg = "cryptd(__driver-ecb-twofish-avx)", | 2097 | .alg = "cryptd(__driver-ecb-twofish-avx)", |
2022 | .test = alg_test_null, | 2098 | .test = alg_test_null, |
2023 | }, { | 2099 | }, { |
2024 | .alg = "cryptd(__driver-ecb-twofish-avx2)", | ||
2025 | .test = alg_test_null, | ||
2026 | }, { | ||
2027 | .alg = "cryptd(__driver-gcm-aes-aesni)", | 2100 | .alg = "cryptd(__driver-gcm-aes-aesni)", |
2028 | .test = alg_test_null, | 2101 | .test = alg_test_null, |
2029 | .fips_allowed = 1, | 2102 | .fips_allowed = 1, |
@@ -3068,6 +3141,35 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
3068 | } | 3141 | } |
3069 | }; | 3142 | }; |
3070 | 3143 | ||
3144 | static bool alg_test_descs_checked; | ||
3145 | |||
3146 | static void alg_test_descs_check_order(void) | ||
3147 | { | ||
3148 | int i; | ||
3149 | |||
3150 | /* only check once */ | ||
3151 | if (alg_test_descs_checked) | ||
3152 | return; | ||
3153 | |||
3154 | alg_test_descs_checked = true; | ||
3155 | |||
3156 | for (i = 1; i < ARRAY_SIZE(alg_test_descs); i++) { | ||
3157 | int diff = strcmp(alg_test_descs[i - 1].alg, | ||
3158 | alg_test_descs[i].alg); | ||
3159 | |||
3160 | if (WARN_ON(diff > 0)) { | ||
3161 | pr_warn("testmgr: alg_test_descs entries in wrong order: '%s' before '%s'\n", | ||
3162 | alg_test_descs[i - 1].alg, | ||
3163 | alg_test_descs[i].alg); | ||
3164 | } | ||
3165 | |||
3166 | if (WARN_ON(diff == 0)) { | ||
3167 | pr_warn("testmgr: duplicate alg_test_descs entry: '%s'\n", | ||
3168 | alg_test_descs[i].alg); | ||
3169 | } | ||
3170 | } | ||
3171 | } | ||
3172 | |||
3071 | static int alg_find_test(const char *alg) | 3173 | static int alg_find_test(const char *alg) |
3072 | { | 3174 | { |
3073 | int start = 0; | 3175 | int start = 0; |
@@ -3099,6 +3201,8 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask) | |||
3099 | int j; | 3201 | int j; |
3100 | int rc; | 3202 | int rc; |
3101 | 3203 | ||
3204 | alg_test_descs_check_order(); | ||
3205 | |||
3102 | if ((type & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_CIPHER) { | 3206 | if ((type & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_CIPHER) { |
3103 | char nalg[CRYPTO_MAX_ALG_NAME]; | 3207 | char nalg[CRYPTO_MAX_ALG_NAME]; |
3104 | 3208 | ||
diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 1e701bc075b9..7d44aa3d6b44 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h | |||
@@ -450,6 +450,39 @@ static struct hash_testvec rmd320_tv_template[] = { | |||
450 | } | 450 | } |
451 | }; | 451 | }; |
452 | 452 | ||
453 | #define CRCT10DIF_TEST_VECTORS 3 | ||
454 | static struct hash_testvec crct10dif_tv_template[] = { | ||
455 | { | ||
456 | .plaintext = "abc", | ||
457 | .psize = 3, | ||
458 | #ifdef __LITTLE_ENDIAN | ||
459 | .digest = "\x3b\x44", | ||
460 | #else | ||
461 | .digest = "\x44\x3b", | ||
462 | #endif | ||
463 | }, { | ||
464 | .plaintext = "1234567890123456789012345678901234567890" | ||
465 | "123456789012345678901234567890123456789", | ||
466 | .psize = 79, | ||
467 | #ifdef __LITTLE_ENDIAN | ||
468 | .digest = "\x70\x4b", | ||
469 | #else | ||
470 | .digest = "\x4b\x70", | ||
471 | #endif | ||
472 | }, { | ||
473 | .plaintext = | ||
474 | "abcddddddddddddddddddddddddddddddddddddddddddddddddddddd", | ||
475 | .psize = 56, | ||
476 | #ifdef __LITTLE_ENDIAN | ||
477 | .digest = "\xe3\x9c", | ||
478 | #else | ||
479 | .digest = "\x9c\xe3", | ||
480 | #endif | ||
481 | .np = 2, | ||
482 | .tap = { 28, 28 } | ||
483 | } | ||
484 | }; | ||
485 | |||
453 | /* | 486 | /* |
454 | * SHA1 test vectors from from FIPS PUB 180-1 | 487 | * SHA1 test vectors from from FIPS PUB 180-1 |
455 | * Long vector from CAVS 5.0 | 488 | * Long vector from CAVS 5.0 |
diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c index 7c73d4aca36b..bf9fc6b79328 100644 --- a/drivers/char/hw_random/atmel-rng.c +++ b/drivers/char/hw_random/atmel-rng.c | |||
@@ -108,8 +108,6 @@ static int atmel_trng_remove(struct platform_device *pdev) | |||
108 | clk_disable(trng->clk); | 108 | clk_disable(trng->clk); |
109 | clk_put(trng->clk); | 109 | clk_put(trng->clk); |
110 | 110 | ||
111 | platform_set_drvdata(pdev, NULL); | ||
112 | |||
113 | return 0; | 111 | return 0; |
114 | } | 112 | } |
115 | 113 | ||
diff --git a/drivers/char/hw_random/bcm63xx-rng.c b/drivers/char/hw_random/bcm63xx-rng.c index f343b7d0dfa1..36581ea562cb 100644 --- a/drivers/char/hw_random/bcm63xx-rng.c +++ b/drivers/char/hw_random/bcm63xx-rng.c | |||
@@ -137,7 +137,6 @@ static int bcm63xx_rng_probe(struct platform_device *pdev) | |||
137 | out_clk_disable: | 137 | out_clk_disable: |
138 | clk_disable(clk); | 138 | clk_disable(clk); |
139 | out_free_rng: | 139 | out_free_rng: |
140 | platform_set_drvdata(pdev, NULL); | ||
141 | kfree(rng); | 140 | kfree(rng); |
142 | out_free_priv: | 141 | out_free_priv: |
143 | kfree(priv); | 142 | kfree(priv); |
@@ -154,7 +153,6 @@ static int bcm63xx_rng_remove(struct platform_device *pdev) | |||
154 | clk_disable(priv->clk); | 153 | clk_disable(priv->clk); |
155 | kfree(priv); | 154 | kfree(priv); |
156 | kfree(rng); | 155 | kfree(rng); |
157 | platform_set_drvdata(pdev, NULL); | ||
158 | 156 | ||
159 | return 0; | 157 | return 0; |
160 | } | 158 | } |
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index 20b962e1d832..f9beed54d0c8 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c | |||
@@ -700,7 +700,7 @@ static int n2rng_probe(struct platform_device *op) | |||
700 | if (err) | 700 | if (err) |
701 | goto out_free_units; | 701 | goto out_free_units; |
702 | 702 | ||
703 | dev_set_drvdata(&op->dev, np); | 703 | platform_set_drvdata(op, np); |
704 | 704 | ||
705 | schedule_delayed_work(&np->work, 0); | 705 | schedule_delayed_work(&np->work, 0); |
706 | 706 | ||
@@ -721,7 +721,7 @@ out: | |||
721 | 721 | ||
722 | static int n2rng_remove(struct platform_device *op) | 722 | static int n2rng_remove(struct platform_device *op) |
723 | { | 723 | { |
724 | struct n2rng *np = dev_get_drvdata(&op->dev); | 724 | struct n2rng *np = platform_get_drvdata(op); |
725 | 725 | ||
726 | np->flags |= N2RNG_FLAG_SHUTDOWN; | 726 | np->flags |= N2RNG_FLAG_SHUTDOWN; |
727 | 727 | ||
@@ -736,8 +736,6 @@ static int n2rng_remove(struct platform_device *op) | |||
736 | 736 | ||
737 | kfree(np); | 737 | kfree(np); |
738 | 738 | ||
739 | dev_set_drvdata(&op->dev, NULL); | ||
740 | |||
741 | return 0; | 739 | return 0; |
742 | } | 740 | } |
743 | 741 | ||
diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c index 96de0249e595..232b87fb5fc9 100644 --- a/drivers/char/hw_random/nomadik-rng.c +++ b/drivers/char/hw_random/nomadik-rng.c | |||
@@ -51,7 +51,7 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id) | |||
51 | return ret; | 51 | return ret; |
52 | } | 52 | } |
53 | 53 | ||
54 | clk_enable(rng_clk); | 54 | clk_prepare_enable(rng_clk); |
55 | 55 | ||
56 | ret = amba_request_regions(dev, dev->dev.init_name); | 56 | ret = amba_request_regions(dev, dev->dev.init_name); |
57 | if (ret) | 57 | if (ret) |
diff --git a/drivers/char/hw_random/octeon-rng.c b/drivers/char/hw_random/octeon-rng.c index 1eada566ca70..f2885dbe1849 100644 --- a/drivers/char/hw_random/octeon-rng.c +++ b/drivers/char/hw_random/octeon-rng.c | |||
@@ -96,7 +96,7 @@ static int octeon_rng_probe(struct platform_device *pdev) | |||
96 | 96 | ||
97 | rng->ops = ops; | 97 | rng->ops = ops; |
98 | 98 | ||
99 | dev_set_drvdata(&pdev->dev, &rng->ops); | 99 | platform_set_drvdata(pdev, &rng->ops); |
100 | ret = hwrng_register(&rng->ops); | 100 | ret = hwrng_register(&rng->ops); |
101 | if (ret) | 101 | if (ret) |
102 | return -ENOENT; | 102 | return -ENOENT; |
@@ -108,7 +108,7 @@ static int octeon_rng_probe(struct platform_device *pdev) | |||
108 | 108 | ||
109 | static int __exit octeon_rng_remove(struct platform_device *pdev) | 109 | static int __exit octeon_rng_remove(struct platform_device *pdev) |
110 | { | 110 | { |
111 | struct hwrng *rng = dev_get_drvdata(&pdev->dev); | 111 | struct hwrng *rng = platform_get_drvdata(pdev); |
112 | 112 | ||
113 | hwrng_unregister(rng); | 113 | hwrng_unregister(rng); |
114 | 114 | ||
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c index d2903e772270..6843ec87b98b 100644 --- a/drivers/char/hw_random/omap-rng.c +++ b/drivers/char/hw_random/omap-rng.c | |||
@@ -116,7 +116,7 @@ static int omap_rng_probe(struct platform_device *pdev) | |||
116 | }; | 116 | }; |
117 | 117 | ||
118 | omap_rng_ops.priv = (unsigned long)priv; | 118 | omap_rng_ops.priv = (unsigned long)priv; |
119 | dev_set_drvdata(&pdev->dev, priv); | 119 | platform_set_drvdata(pdev, priv); |
120 | 120 | ||
121 | priv->mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | 121 | priv->mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
122 | priv->base = devm_ioremap_resource(&pdev->dev, priv->mem_res); | 122 | priv->base = devm_ioremap_resource(&pdev->dev, priv->mem_res); |
@@ -124,7 +124,7 @@ static int omap_rng_probe(struct platform_device *pdev) | |||
124 | ret = PTR_ERR(priv->base); | 124 | ret = PTR_ERR(priv->base); |
125 | goto err_ioremap; | 125 | goto err_ioremap; |
126 | } | 126 | } |
127 | dev_set_drvdata(&pdev->dev, priv); | 127 | platform_set_drvdata(pdev, priv); |
128 | 128 | ||
129 | pm_runtime_enable(&pdev->dev); | 129 | pm_runtime_enable(&pdev->dev); |
130 | pm_runtime_get_sync(&pdev->dev); | 130 | pm_runtime_get_sync(&pdev->dev); |
@@ -151,7 +151,7 @@ err_ioremap: | |||
151 | 151 | ||
152 | static int __exit omap_rng_remove(struct platform_device *pdev) | 152 | static int __exit omap_rng_remove(struct platform_device *pdev) |
153 | { | 153 | { |
154 | struct omap_rng_private_data *priv = dev_get_drvdata(&pdev->dev); | 154 | struct omap_rng_private_data *priv = platform_get_drvdata(pdev); |
155 | 155 | ||
156 | hwrng_unregister(&omap_rng_ops); | 156 | hwrng_unregister(&omap_rng_ops); |
157 | 157 | ||
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c index 3e75737f5fe1..d2120ba8f3f9 100644 --- a/drivers/char/hw_random/timeriomem-rng.c +++ b/drivers/char/hw_random/timeriomem-rng.c | |||
@@ -192,7 +192,6 @@ out_release_io: | |||
192 | out_timer: | 192 | out_timer: |
193 | del_timer_sync(&priv->timer); | 193 | del_timer_sync(&priv->timer); |
194 | out_free: | 194 | out_free: |
195 | platform_set_drvdata(pdev, NULL); | ||
196 | kfree(priv); | 195 | kfree(priv); |
197 | return err; | 196 | return err; |
198 | } | 197 | } |
@@ -209,7 +208,6 @@ static int timeriomem_rng_remove(struct platform_device *pdev) | |||
209 | del_timer_sync(&priv->timer); | 208 | del_timer_sync(&priv->timer); |
210 | iounmap(priv->io_base); | 209 | iounmap(priv->io_base); |
211 | release_mem_region(res->start, resource_size(res)); | 210 | release_mem_region(res->start, resource_size(res)); |
212 | platform_set_drvdata(pdev, NULL); | ||
213 | kfree(priv); | 211 | kfree(priv); |
214 | 212 | ||
215 | return 0; | 213 | return 0; |
diff --git a/drivers/char/hw_random/tx4939-rng.c b/drivers/char/hw_random/tx4939-rng.c index d34a24a0d484..00593c847cf0 100644 --- a/drivers/char/hw_random/tx4939-rng.c +++ b/drivers/char/hw_random/tx4939-rng.c | |||
@@ -154,7 +154,6 @@ static int __exit tx4939_rng_remove(struct platform_device *dev) | |||
154 | struct tx4939_rng *rngdev = platform_get_drvdata(dev); | 154 | struct tx4939_rng *rngdev = platform_get_drvdata(dev); |
155 | 155 | ||
156 | hwrng_unregister(&rngdev->rng); | 156 | hwrng_unregister(&rngdev->rng); |
157 | platform_set_drvdata(dev, NULL); | ||
158 | return 0; | 157 | return 0; |
159 | } | 158 | } |
160 | 159 | ||
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index dffb85525368..8ff7c230d82e 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig | |||
@@ -278,7 +278,7 @@ config CRYPTO_DEV_PICOXCELL | |||
278 | 278 | ||
279 | config CRYPTO_DEV_SAHARA | 279 | config CRYPTO_DEV_SAHARA |
280 | tristate "Support for SAHARA crypto accelerator" | 280 | tristate "Support for SAHARA crypto accelerator" |
281 | depends on ARCH_MXC && EXPERIMENTAL && OF | 281 | depends on ARCH_MXC && OF |
282 | select CRYPTO_BLKCIPHER | 282 | select CRYPTO_BLKCIPHER |
283 | select CRYPTO_AES | 283 | select CRYPTO_AES |
284 | select CRYPTO_ECB | 284 | select CRYPTO_ECB |
@@ -286,6 +286,16 @@ config CRYPTO_DEV_SAHARA | |||
286 | This option enables support for the SAHARA HW crypto accelerator | 286 | This option enables support for the SAHARA HW crypto accelerator |
287 | found in some Freescale i.MX chips. | 287 | found in some Freescale i.MX chips. |
288 | 288 | ||
289 | config CRYPTO_DEV_DCP | ||
290 | tristate "Support for the DCP engine" | ||
291 | depends on ARCH_MXS && OF | ||
292 | select CRYPTO_BLKCIPHER | ||
293 | select CRYPTO_AES | ||
294 | select CRYPTO_CBC | ||
295 | help | ||
296 | This options enables support for the hardware crypto-acceleration | ||
297 | capabilities of the DCP co-processor | ||
298 | |||
289 | config CRYPTO_DEV_S5P | 299 | config CRYPTO_DEV_S5P |
290 | tristate "Support for Samsung S5PV210 crypto accelerator" | 300 | tristate "Support for Samsung S5PV210 crypto accelerator" |
291 | depends on ARCH_S5PV210 | 301 | depends on ARCH_S5PV210 |
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 38ce13d3b79b..b4946ddd2550 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile | |||
@@ -13,6 +13,7 @@ obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o | |||
13 | obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o | 13 | obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o |
14 | obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o | 14 | obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o |
15 | obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o | 15 | obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o |
16 | obj-$(CONFIG_CRYPTO_DEV_DCP) += dcp.o | ||
16 | obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o | 17 | obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o |
17 | obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o | 18 | obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o |
18 | obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ | 19 | obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ |
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 6e94bcd94678..f5d6deced1cb 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c | |||
@@ -202,6 +202,7 @@ static int caam_probe(struct platform_device *pdev) | |||
202 | #ifdef CONFIG_DEBUG_FS | 202 | #ifdef CONFIG_DEBUG_FS |
203 | struct caam_perfmon *perfmon; | 203 | struct caam_perfmon *perfmon; |
204 | #endif | 204 | #endif |
205 | u64 cha_vid; | ||
205 | 206 | ||
206 | ctrlpriv = kzalloc(sizeof(struct caam_drv_private), GFP_KERNEL); | 207 | ctrlpriv = kzalloc(sizeof(struct caam_drv_private), GFP_KERNEL); |
207 | if (!ctrlpriv) | 208 | if (!ctrlpriv) |
@@ -293,11 +294,14 @@ static int caam_probe(struct platform_device *pdev) | |||
293 | return -ENOMEM; | 294 | return -ENOMEM; |
294 | } | 295 | } |
295 | 296 | ||
297 | cha_vid = rd_reg64(&topregs->ctrl.perfmon.cha_id); | ||
298 | |||
296 | /* | 299 | /* |
297 | * RNG4 based SECs (v5+) need special initialization prior | 300 | * If SEC has RNG version >= 4 and RNG state handle has not been |
298 | * to executing any descriptors | 301 | * already instantiated ,do RNG instantiation |
299 | */ | 302 | */ |
300 | if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) { | 303 | if ((cha_vid & CHA_ID_RNG_MASK) >> CHA_ID_RNG_SHIFT >= 4 && |
304 | !(rd_reg32(&topregs->ctrl.r4tst[0].rdsta) & RDSTA_IF0)) { | ||
301 | kick_trng(pdev); | 305 | kick_trng(pdev); |
302 | ret = instantiate_rng(ctrlpriv->jrdev[0]); | 306 | ret = instantiate_rng(ctrlpriv->jrdev[0]); |
303 | if (ret) { | 307 | if (ret) { |
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index f7f833be8c67..53b296f78b0d 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h | |||
@@ -231,7 +231,12 @@ struct sec4_sg_entry { | |||
231 | #define LDST_SRCDST_WORD_PKHA_B_SZ (0x11 << LDST_SRCDST_SHIFT) | 231 | #define LDST_SRCDST_WORD_PKHA_B_SZ (0x11 << LDST_SRCDST_SHIFT) |
232 | #define LDST_SRCDST_WORD_PKHA_N_SZ (0x12 << LDST_SRCDST_SHIFT) | 232 | #define LDST_SRCDST_WORD_PKHA_N_SZ (0x12 << LDST_SRCDST_SHIFT) |
233 | #define LDST_SRCDST_WORD_PKHA_E_SZ (0x13 << LDST_SRCDST_SHIFT) | 233 | #define LDST_SRCDST_WORD_PKHA_E_SZ (0x13 << LDST_SRCDST_SHIFT) |
234 | #define LDST_SRCDST_WORD_CLASS_CTX (0x20 << LDST_SRCDST_SHIFT) | ||
234 | #define LDST_SRCDST_WORD_DESCBUF (0x40 << LDST_SRCDST_SHIFT) | 235 | #define LDST_SRCDST_WORD_DESCBUF (0x40 << LDST_SRCDST_SHIFT) |
236 | #define LDST_SRCDST_WORD_DESCBUF_JOB (0x41 << LDST_SRCDST_SHIFT) | ||
237 | #define LDST_SRCDST_WORD_DESCBUF_SHARED (0x42 << LDST_SRCDST_SHIFT) | ||
238 | #define LDST_SRCDST_WORD_DESCBUF_JOB_WE (0x45 << LDST_SRCDST_SHIFT) | ||
239 | #define LDST_SRCDST_WORD_DESCBUF_SHARED_WE (0x46 << LDST_SRCDST_SHIFT) | ||
235 | #define LDST_SRCDST_WORD_INFO_FIFO (0x7a << LDST_SRCDST_SHIFT) | 240 | #define LDST_SRCDST_WORD_INFO_FIFO (0x7a << LDST_SRCDST_SHIFT) |
236 | 241 | ||
237 | /* Offset in source/destination */ | 242 | /* Offset in source/destination */ |
@@ -366,6 +371,7 @@ struct sec4_sg_entry { | |||
366 | #define FIFOLD_TYPE_LAST2FLUSH1 (0x05 << FIFOLD_TYPE_SHIFT) | 371 | #define FIFOLD_TYPE_LAST2FLUSH1 (0x05 << FIFOLD_TYPE_SHIFT) |
367 | #define FIFOLD_TYPE_LASTBOTH (0x06 << FIFOLD_TYPE_SHIFT) | 372 | #define FIFOLD_TYPE_LASTBOTH (0x06 << FIFOLD_TYPE_SHIFT) |
368 | #define FIFOLD_TYPE_LASTBOTHFL (0x07 << FIFOLD_TYPE_SHIFT) | 373 | #define FIFOLD_TYPE_LASTBOTHFL (0x07 << FIFOLD_TYPE_SHIFT) |
374 | #define FIFOLD_TYPE_NOINFOFIFO (0x0F << FIFOLD_TYPE_SHIFT) | ||
369 | 375 | ||
370 | #define FIFOLDST_LEN_MASK 0xffff | 376 | #define FIFOLDST_LEN_MASK 0xffff |
371 | #define FIFOLDST_EXT_LEN_MASK 0xffffffff | 377 | #define FIFOLDST_EXT_LEN_MASK 0xffffffff |
@@ -1294,10 +1300,10 @@ struct sec4_sg_entry { | |||
1294 | #define SQOUT_SGF 0x01000000 | 1300 | #define SQOUT_SGF 0x01000000 |
1295 | 1301 | ||
1296 | /* Appends to a previous pointer */ | 1302 | /* Appends to a previous pointer */ |
1297 | #define SQOUT_PRE 0x00800000 | 1303 | #define SQOUT_PRE SQIN_PRE |
1298 | 1304 | ||
1299 | /* Restore sequence with pointer/length */ | 1305 | /* Restore sequence with pointer/length */ |
1300 | #define SQOUT_RTO 0x00200000 | 1306 | #define SQOUT_RTO SQIN_RTO |
1301 | 1307 | ||
1302 | /* Use extended length following pointer */ | 1308 | /* Use extended length following pointer */ |
1303 | #define SQOUT_EXT 0x00400000 | 1309 | #define SQOUT_EXT 0x00400000 |
@@ -1359,6 +1365,7 @@ struct sec4_sg_entry { | |||
1359 | #define MOVE_DEST_MATH3 (0x07 << MOVE_DEST_SHIFT) | 1365 | #define MOVE_DEST_MATH3 (0x07 << MOVE_DEST_SHIFT) |
1360 | #define MOVE_DEST_CLASS1INFIFO (0x08 << MOVE_DEST_SHIFT) | 1366 | #define MOVE_DEST_CLASS1INFIFO (0x08 << MOVE_DEST_SHIFT) |
1361 | #define MOVE_DEST_CLASS2INFIFO (0x09 << MOVE_DEST_SHIFT) | 1367 | #define MOVE_DEST_CLASS2INFIFO (0x09 << MOVE_DEST_SHIFT) |
1368 | #define MOVE_DEST_INFIFO_NOINFO (0x0a << MOVE_DEST_SHIFT) | ||
1362 | #define MOVE_DEST_PK_A (0x0c << MOVE_DEST_SHIFT) | 1369 | #define MOVE_DEST_PK_A (0x0c << MOVE_DEST_SHIFT) |
1363 | #define MOVE_DEST_CLASS1KEY (0x0d << MOVE_DEST_SHIFT) | 1370 | #define MOVE_DEST_CLASS1KEY (0x0d << MOVE_DEST_SHIFT) |
1364 | #define MOVE_DEST_CLASS2KEY (0x0e << MOVE_DEST_SHIFT) | 1371 | #define MOVE_DEST_CLASS2KEY (0x0e << MOVE_DEST_SHIFT) |
@@ -1411,6 +1418,7 @@ struct sec4_sg_entry { | |||
1411 | #define MATH_SRC0_REG2 (0x02 << MATH_SRC0_SHIFT) | 1418 | #define MATH_SRC0_REG2 (0x02 << MATH_SRC0_SHIFT) |
1412 | #define MATH_SRC0_REG3 (0x03 << MATH_SRC0_SHIFT) | 1419 | #define MATH_SRC0_REG3 (0x03 << MATH_SRC0_SHIFT) |
1413 | #define MATH_SRC0_IMM (0x04 << MATH_SRC0_SHIFT) | 1420 | #define MATH_SRC0_IMM (0x04 << MATH_SRC0_SHIFT) |
1421 | #define MATH_SRC0_DPOVRD (0x07 << MATH_SRC0_SHIFT) | ||
1414 | #define MATH_SRC0_SEQINLEN (0x08 << MATH_SRC0_SHIFT) | 1422 | #define MATH_SRC0_SEQINLEN (0x08 << MATH_SRC0_SHIFT) |
1415 | #define MATH_SRC0_SEQOUTLEN (0x09 << MATH_SRC0_SHIFT) | 1423 | #define MATH_SRC0_SEQOUTLEN (0x09 << MATH_SRC0_SHIFT) |
1416 | #define MATH_SRC0_VARSEQINLEN (0x0a << MATH_SRC0_SHIFT) | 1424 | #define MATH_SRC0_VARSEQINLEN (0x0a << MATH_SRC0_SHIFT) |
@@ -1425,6 +1433,7 @@ struct sec4_sg_entry { | |||
1425 | #define MATH_SRC1_REG2 (0x02 << MATH_SRC1_SHIFT) | 1433 | #define MATH_SRC1_REG2 (0x02 << MATH_SRC1_SHIFT) |
1426 | #define MATH_SRC1_REG3 (0x03 << MATH_SRC1_SHIFT) | 1434 | #define MATH_SRC1_REG3 (0x03 << MATH_SRC1_SHIFT) |
1427 | #define MATH_SRC1_IMM (0x04 << MATH_SRC1_SHIFT) | 1435 | #define MATH_SRC1_IMM (0x04 << MATH_SRC1_SHIFT) |
1436 | #define MATH_SRC1_DPOVRD (0x07 << MATH_SRC0_SHIFT) | ||
1428 | #define MATH_SRC1_INFIFO (0x0a << MATH_SRC1_SHIFT) | 1437 | #define MATH_SRC1_INFIFO (0x0a << MATH_SRC1_SHIFT) |
1429 | #define MATH_SRC1_OUTFIFO (0x0b << MATH_SRC1_SHIFT) | 1438 | #define MATH_SRC1_OUTFIFO (0x0b << MATH_SRC1_SHIFT) |
1430 | #define MATH_SRC1_ONE (0x0c << MATH_SRC1_SHIFT) | 1439 | #define MATH_SRC1_ONE (0x0c << MATH_SRC1_SHIFT) |
@@ -1600,4 +1609,13 @@ struct sec4_sg_entry { | |||
1600 | #define NFIFOENTRY_PLEN_SHIFT 0 | 1609 | #define NFIFOENTRY_PLEN_SHIFT 0 |
1601 | #define NFIFOENTRY_PLEN_MASK (0xFF << NFIFOENTRY_PLEN_SHIFT) | 1610 | #define NFIFOENTRY_PLEN_MASK (0xFF << NFIFOENTRY_PLEN_SHIFT) |
1602 | 1611 | ||
1612 | /* Append Load Immediate Command */ | ||
1613 | #define FD_CMD_APPEND_LOAD_IMMEDIATE 0x80000000 | ||
1614 | |||
1615 | /* Set SEQ LIODN equal to the Non-SEQ LIODN for the job */ | ||
1616 | #define FD_CMD_SET_SEQ_LIODN_EQUAL_NONSEQ_LIODN 0x40000000 | ||
1617 | |||
1618 | /* Frame Descriptor Command for Replacement Job Descriptor */ | ||
1619 | #define FD_CMD_REPLACE_JOB_DESC 0x20000000 | ||
1620 | |||
1603 | #endif /* DESC_H */ | 1621 | #endif /* DESC_H */ |
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index c85c1f058401..fe3bfd1b08ca 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h | |||
@@ -110,6 +110,26 @@ static inline void append_cmd(u32 *desc, u32 command) | |||
110 | (*desc)++; | 110 | (*desc)++; |
111 | } | 111 | } |
112 | 112 | ||
113 | #define append_u32 append_cmd | ||
114 | |||
115 | static inline void append_u64(u32 *desc, u64 data) | ||
116 | { | ||
117 | u32 *offset = desc_end(desc); | ||
118 | |||
119 | *offset = upper_32_bits(data); | ||
120 | *(++offset) = lower_32_bits(data); | ||
121 | |||
122 | (*desc) += 2; | ||
123 | } | ||
124 | |||
125 | /* Write command without affecting header, and return pointer to next word */ | ||
126 | static inline u32 *write_cmd(u32 *desc, u32 command) | ||
127 | { | ||
128 | *desc = command; | ||
129 | |||
130 | return desc + 1; | ||
131 | } | ||
132 | |||
113 | static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len, | 133 | static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len, |
114 | u32 command) | 134 | u32 command) |
115 | { | 135 | { |
@@ -122,7 +142,8 @@ static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr, | |||
122 | unsigned int len, u32 command) | 142 | unsigned int len, u32 command) |
123 | { | 143 | { |
124 | append_cmd(desc, command); | 144 | append_cmd(desc, command); |
125 | append_ptr(desc, ptr); | 145 | if (!(command & (SQIN_RTO | SQIN_PRE))) |
146 | append_ptr(desc, ptr); | ||
126 | append_cmd(desc, len); | 147 | append_cmd(desc, len); |
127 | } | 148 | } |
128 | 149 | ||
@@ -176,17 +197,36 @@ static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \ | |||
176 | } | 197 | } |
177 | APPEND_CMD_PTR(key, KEY) | 198 | APPEND_CMD_PTR(key, KEY) |
178 | APPEND_CMD_PTR(load, LOAD) | 199 | APPEND_CMD_PTR(load, LOAD) |
179 | APPEND_CMD_PTR(store, STORE) | ||
180 | APPEND_CMD_PTR(fifo_load, FIFO_LOAD) | 200 | APPEND_CMD_PTR(fifo_load, FIFO_LOAD) |
181 | APPEND_CMD_PTR(fifo_store, FIFO_STORE) | 201 | APPEND_CMD_PTR(fifo_store, FIFO_STORE) |
182 | 202 | ||
203 | static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len, | ||
204 | u32 options) | ||
205 | { | ||
206 | u32 cmd_src; | ||
207 | |||
208 | cmd_src = options & LDST_SRCDST_MASK; | ||
209 | |||
210 | append_cmd(desc, CMD_STORE | options | len); | ||
211 | |||
212 | /* The following options do not require pointer */ | ||
213 | if (!(cmd_src == LDST_SRCDST_WORD_DESCBUF_SHARED || | ||
214 | cmd_src == LDST_SRCDST_WORD_DESCBUF_JOB || | ||
215 | cmd_src == LDST_SRCDST_WORD_DESCBUF_JOB_WE || | ||
216 | cmd_src == LDST_SRCDST_WORD_DESCBUF_SHARED_WE)) | ||
217 | append_ptr(desc, ptr); | ||
218 | } | ||
219 | |||
183 | #define APPEND_SEQ_PTR_INTLEN(cmd, op) \ | 220 | #define APPEND_SEQ_PTR_INTLEN(cmd, op) \ |
184 | static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \ | 221 | static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \ |
185 | unsigned int len, \ | 222 | unsigned int len, \ |
186 | u32 options) \ | 223 | u32 options) \ |
187 | { \ | 224 | { \ |
188 | PRINT_POS; \ | 225 | PRINT_POS; \ |
189 | append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \ | 226 | if (options & (SQIN_RTO | SQIN_PRE)) \ |
227 | append_cmd(desc, CMD_SEQ_##op##_PTR | len | options); \ | ||
228 | else \ | ||
229 | append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \ | ||
190 | } | 230 | } |
191 | APPEND_SEQ_PTR_INTLEN(in, IN) | 231 | APPEND_SEQ_PTR_INTLEN(in, IN) |
192 | APPEND_SEQ_PTR_INTLEN(out, OUT) | 232 | APPEND_SEQ_PTR_INTLEN(out, OUT) |
@@ -259,7 +299,7 @@ APPEND_CMD_RAW_IMM(load, LOAD, u32); | |||
259 | */ | 299 | */ |
260 | #define APPEND_MATH(op, desc, dest, src_0, src_1, len) \ | 300 | #define APPEND_MATH(op, desc, dest, src_0, src_1, len) \ |
261 | append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \ | 301 | append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \ |
262 | MATH_SRC0_##src_0 | MATH_SRC1_##src_1 | (u32) (len & MATH_LEN_MASK)); | 302 | MATH_SRC0_##src_0 | MATH_SRC1_##src_1 | (u32)len); |
263 | 303 | ||
264 | #define append_math_add(desc, dest, src0, src1, len) \ | 304 | #define append_math_add(desc, dest, src0, src1, len) \ |
265 | APPEND_MATH(ADD, desc, dest, src0, src1, len) | 305 | APPEND_MATH(ADD, desc, dest, src0, src1, len) |
@@ -279,6 +319,8 @@ append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \ | |||
279 | APPEND_MATH(LSHIFT, desc, dest, src0, src1, len) | 319 | APPEND_MATH(LSHIFT, desc, dest, src0, src1, len) |
280 | #define append_math_rshift(desc, dest, src0, src1, len) \ | 320 | #define append_math_rshift(desc, dest, src0, src1, len) \ |
281 | APPEND_MATH(RSHIFT, desc, dest, src0, src1, len) | 321 | APPEND_MATH(RSHIFT, desc, dest, src0, src1, len) |
322 | #define append_math_ldshift(desc, dest, src0, src1, len) \ | ||
323 | APPEND_MATH(SHLD, desc, dest, src0, src1, len) | ||
282 | 324 | ||
283 | /* Exactly one source is IMM. Data is passed in as u32 value */ | 325 | /* Exactly one source is IMM. Data is passed in as u32 value */ |
284 | #define APPEND_MATH_IMM_u32(op, desc, dest, src_0, src_1, data) \ | 326 | #define APPEND_MATH_IMM_u32(op, desc, dest, src_0, src_1, data) \ |
@@ -305,3 +347,34 @@ do { \ | |||
305 | APPEND_MATH_IMM_u32(LSHIFT, desc, dest, src0, src1, data) | 347 | APPEND_MATH_IMM_u32(LSHIFT, desc, dest, src0, src1, data) |
306 | #define append_math_rshift_imm_u32(desc, dest, src0, src1, data) \ | 348 | #define append_math_rshift_imm_u32(desc, dest, src0, src1, data) \ |
307 | APPEND_MATH_IMM_u32(RSHIFT, desc, dest, src0, src1, data) | 349 | APPEND_MATH_IMM_u32(RSHIFT, desc, dest, src0, src1, data) |
350 | |||
351 | /* Exactly one source is IMM. Data is passed in as u64 value */ | ||
352 | #define APPEND_MATH_IMM_u64(op, desc, dest, src_0, src_1, data) \ | ||
353 | do { \ | ||
354 | u32 upper = (data >> 16) >> 16; \ | ||
355 | APPEND_MATH(op, desc, dest, src_0, src_1, CAAM_CMD_SZ * 2 | \ | ||
356 | (upper ? 0 : MATH_IFB)); \ | ||
357 | if (upper) \ | ||
358 | append_u64(desc, data); \ | ||
359 | else \ | ||
360 | append_u32(desc, data); \ | ||
361 | } while (0) | ||
362 | |||
363 | #define append_math_add_imm_u64(desc, dest, src0, src1, data) \ | ||
364 | APPEND_MATH_IMM_u64(ADD, desc, dest, src0, src1, data) | ||
365 | #define append_math_sub_imm_u64(desc, dest, src0, src1, data) \ | ||
366 | APPEND_MATH_IMM_u64(SUB, desc, dest, src0, src1, data) | ||
367 | #define append_math_add_c_imm_u64(desc, dest, src0, src1, data) \ | ||
368 | APPEND_MATH_IMM_u64(ADDC, desc, dest, src0, src1, data) | ||
369 | #define append_math_sub_b_imm_u64(desc, dest, src0, src1, data) \ | ||
370 | APPEND_MATH_IMM_u64(SUBB, desc, dest, src0, src1, data) | ||
371 | #define append_math_and_imm_u64(desc, dest, src0, src1, data) \ | ||
372 | APPEND_MATH_IMM_u64(AND, desc, dest, src0, src1, data) | ||
373 | #define append_math_or_imm_u64(desc, dest, src0, src1, data) \ | ||
374 | APPEND_MATH_IMM_u64(OR, desc, dest, src0, src1, data) | ||
375 | #define append_math_xor_imm_u64(desc, dest, src0, src1, data) \ | ||
376 | APPEND_MATH_IMM_u64(XOR, desc, dest, src0, src1, data) | ||
377 | #define append_math_lshift_imm_u64(desc, dest, src0, src1, data) \ | ||
378 | APPEND_MATH_IMM_u64(LSHIFT, desc, dest, src0, src1, data) | ||
379 | #define append_math_rshift_imm_u64(desc, dest, src0, src1, data) \ | ||
380 | APPEND_MATH_IMM_u64(RSHIFT, desc, dest, src0, src1, data) | ||
diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h index 62950d22ac13..3a87c0cf879a 100644 --- a/drivers/crypto/caam/pdb.h +++ b/drivers/crypto/caam/pdb.h | |||
@@ -44,6 +44,7 @@ | |||
44 | #define PDBOPTS_ESP_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */ | 44 | #define PDBOPTS_ESP_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */ |
45 | #define PDBOPTS_ESP_INCIPHDR 0x04 /* Prepend IP header to output frame */ | 45 | #define PDBOPTS_ESP_INCIPHDR 0x04 /* Prepend IP header to output frame */ |
46 | #define PDBOPTS_ESP_IPVSN 0x02 /* process IPv6 header */ | 46 | #define PDBOPTS_ESP_IPVSN 0x02 /* process IPv6 header */ |
47 | #define PDBOPTS_ESP_AOFL 0x04 /* adjust out frame len (decap, SEC>=5.3)*/ | ||
47 | #define PDBOPTS_ESP_TUNNEL 0x01 /* tunnel mode next-header byte */ | 48 | #define PDBOPTS_ESP_TUNNEL 0x01 /* tunnel mode next-header byte */ |
48 | #define PDBOPTS_ESP_IPV6 0x02 /* ip header version is V6 */ | 49 | #define PDBOPTS_ESP_IPV6 0x02 /* ip header version is V6 */ |
49 | #define PDBOPTS_ESP_DIFFSERV 0x40 /* copy TOS/TC from inner iphdr */ | 50 | #define PDBOPTS_ESP_DIFFSERV 0x40 /* copy TOS/TC from inner iphdr */ |
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index cd6fedad9935..c09142fc13e3 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h | |||
@@ -117,6 +117,43 @@ struct jr_outentry { | |||
117 | #define CHA_NUM_DECONUM_SHIFT 56 | 117 | #define CHA_NUM_DECONUM_SHIFT 56 |
118 | #define CHA_NUM_DECONUM_MASK (0xfull << CHA_NUM_DECONUM_SHIFT) | 118 | #define CHA_NUM_DECONUM_MASK (0xfull << CHA_NUM_DECONUM_SHIFT) |
119 | 119 | ||
120 | /* CHA Version IDs */ | ||
121 | #define CHA_ID_AES_SHIFT 0 | ||
122 | #define CHA_ID_AES_MASK (0xfull << CHA_ID_AES_SHIFT) | ||
123 | |||
124 | #define CHA_ID_DES_SHIFT 4 | ||
125 | #define CHA_ID_DES_MASK (0xfull << CHA_ID_DES_SHIFT) | ||
126 | |||
127 | #define CHA_ID_ARC4_SHIFT 8 | ||
128 | #define CHA_ID_ARC4_MASK (0xfull << CHA_ID_ARC4_SHIFT) | ||
129 | |||
130 | #define CHA_ID_MD_SHIFT 12 | ||
131 | #define CHA_ID_MD_MASK (0xfull << CHA_ID_MD_SHIFT) | ||
132 | |||
133 | #define CHA_ID_RNG_SHIFT 16 | ||
134 | #define CHA_ID_RNG_MASK (0xfull << CHA_ID_RNG_SHIFT) | ||
135 | |||
136 | #define CHA_ID_SNW8_SHIFT 20 | ||
137 | #define CHA_ID_SNW8_MASK (0xfull << CHA_ID_SNW8_SHIFT) | ||
138 | |||
139 | #define CHA_ID_KAS_SHIFT 24 | ||
140 | #define CHA_ID_KAS_MASK (0xfull << CHA_ID_KAS_SHIFT) | ||
141 | |||
142 | #define CHA_ID_PK_SHIFT 28 | ||
143 | #define CHA_ID_PK_MASK (0xfull << CHA_ID_PK_SHIFT) | ||
144 | |||
145 | #define CHA_ID_CRC_SHIFT 32 | ||
146 | #define CHA_ID_CRC_MASK (0xfull << CHA_ID_CRC_SHIFT) | ||
147 | |||
148 | #define CHA_ID_SNW9_SHIFT 36 | ||
149 | #define CHA_ID_SNW9_MASK (0xfull << CHA_ID_SNW9_SHIFT) | ||
150 | |||
151 | #define CHA_ID_DECO_SHIFT 56 | ||
152 | #define CHA_ID_DECO_MASK (0xfull << CHA_ID_DECO_SHIFT) | ||
153 | |||
154 | #define CHA_ID_JR_SHIFT 60 | ||
155 | #define CHA_ID_JR_MASK (0xfull << CHA_ID_JR_SHIFT) | ||
156 | |||
120 | struct sec_vid { | 157 | struct sec_vid { |
121 | u16 ip_id; | 158 | u16 ip_id; |
122 | u8 maj_rev; | 159 | u8 maj_rev; |
@@ -228,7 +265,10 @@ struct rng4tst { | |||
228 | u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */ | 265 | u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */ |
229 | u32 rtfrqcnt; /* PRGM=0: freq. count register */ | 266 | u32 rtfrqcnt; /* PRGM=0: freq. count register */ |
230 | }; | 267 | }; |
231 | u32 rsvd1[56]; | 268 | u32 rsvd1[40]; |
269 | #define RDSTA_IF0 0x00000001 | ||
270 | u32 rdsta; | ||
271 | u32 rsvd2[15]; | ||
232 | }; | 272 | }; |
233 | 273 | ||
234 | /* | 274 | /* |
diff --git a/drivers/crypto/dcp.c b/drivers/crypto/dcp.c new file mode 100644 index 000000000000..a8a7dd4b0d25 --- /dev/null +++ b/drivers/crypto/dcp.c | |||
@@ -0,0 +1,912 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * Support for DCP cryptographic accelerator. | ||
5 | * | ||
6 | * Copyright (c) 2013 | ||
7 | * Author: Tobias Rauter <tobias.rauter@gmail.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License version 2 as published | ||
11 | * by the Free Software Foundation. | ||
12 | * | ||
13 | * Based on tegra-aes.c, dcp.c (from freescale SDK) and sahara.c | ||
14 | */ | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/errno.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/platform_device.h> | ||
20 | #include <linux/dma-mapping.h> | ||
21 | #include <linux/io.h> | ||
22 | #include <linux/mutex.h> | ||
23 | #include <linux/interrupt.h> | ||
24 | #include <linux/completion.h> | ||
25 | #include <linux/workqueue.h> | ||
26 | #include <linux/delay.h> | ||
27 | #include <linux/crypto.h> | ||
28 | #include <linux/miscdevice.h> | ||
29 | |||
30 | #include <crypto/scatterwalk.h> | ||
31 | #include <crypto/aes.h> | ||
32 | |||
33 | |||
34 | /* IOCTL for DCP OTP Key AES - taken from Freescale's SDK*/ | ||
35 | #define DBS_IOCTL_BASE 'd' | ||
36 | #define DBS_ENC _IOW(DBS_IOCTL_BASE, 0x00, uint8_t[16]) | ||
37 | #define DBS_DEC _IOW(DBS_IOCTL_BASE, 0x01, uint8_t[16]) | ||
38 | |||
39 | /* DCP channel used for AES */ | ||
40 | #define USED_CHANNEL 1 | ||
41 | /* Ring Buffers' maximum size */ | ||
42 | #define DCP_MAX_PKG 20 | ||
43 | |||
44 | /* Control Register */ | ||
45 | #define DCP_REG_CTRL 0x000 | ||
46 | #define DCP_CTRL_SFRST (1<<31) | ||
47 | #define DCP_CTRL_CLKGATE (1<<30) | ||
48 | #define DCP_CTRL_CRYPTO_PRESENT (1<<29) | ||
49 | #define DCP_CTRL_SHA_PRESENT (1<<28) | ||
50 | #define DCP_CTRL_GATHER_RES_WRITE (1<<23) | ||
51 | #define DCP_CTRL_ENABLE_CONTEXT_CACHE (1<<22) | ||
52 | #define DCP_CTRL_ENABLE_CONTEXT_SWITCH (1<<21) | ||
53 | #define DCP_CTRL_CH_IRQ_E_0 0x01 | ||
54 | #define DCP_CTRL_CH_IRQ_E_1 0x02 | ||
55 | #define DCP_CTRL_CH_IRQ_E_2 0x04 | ||
56 | #define DCP_CTRL_CH_IRQ_E_3 0x08 | ||
57 | |||
58 | /* Status register */ | ||
59 | #define DCP_REG_STAT 0x010 | ||
60 | #define DCP_STAT_OTP_KEY_READY (1<<28) | ||
61 | #define DCP_STAT_CUR_CHANNEL(stat) ((stat>>24)&0x0F) | ||
62 | #define DCP_STAT_READY_CHANNEL(stat) ((stat>>16)&0x0F) | ||
63 | #define DCP_STAT_IRQ(stat) (stat&0x0F) | ||
64 | #define DCP_STAT_CHAN_0 (0x01) | ||
65 | #define DCP_STAT_CHAN_1 (0x02) | ||
66 | #define DCP_STAT_CHAN_2 (0x04) | ||
67 | #define DCP_STAT_CHAN_3 (0x08) | ||
68 | |||
69 | /* Channel Control Register */ | ||
70 | #define DCP_REG_CHAN_CTRL 0x020 | ||
71 | #define DCP_CHAN_CTRL_CH0_IRQ_MERGED (1<<16) | ||
72 | #define DCP_CHAN_CTRL_HIGH_PRIO_0 (0x0100) | ||
73 | #define DCP_CHAN_CTRL_HIGH_PRIO_1 (0x0200) | ||
74 | #define DCP_CHAN_CTRL_HIGH_PRIO_2 (0x0400) | ||
75 | #define DCP_CHAN_CTRL_HIGH_PRIO_3 (0x0800) | ||
76 | #define DCP_CHAN_CTRL_ENABLE_0 (0x01) | ||
77 | #define DCP_CHAN_CTRL_ENABLE_1 (0x02) | ||
78 | #define DCP_CHAN_CTRL_ENABLE_2 (0x04) | ||
79 | #define DCP_CHAN_CTRL_ENABLE_3 (0x08) | ||
80 | |||
81 | /* | ||
82 | * Channel Registers: | ||
83 | * The DCP has 4 channels. Each of this channels | ||
84 | * has 4 registers (command pointer, semaphore, status and options). | ||
85 | * The address of register REG of channel CHAN is obtained by | ||
86 | * dcp_chan_reg(REG, CHAN) | ||
87 | */ | ||
88 | #define DCP_REG_CHAN_PTR 0x00000100 | ||
89 | #define DCP_REG_CHAN_SEMA 0x00000110 | ||
90 | #define DCP_REG_CHAN_STAT 0x00000120 | ||
91 | #define DCP_REG_CHAN_OPT 0x00000130 | ||
92 | |||
93 | #define DCP_CHAN_STAT_NEXT_CHAIN_IS_0 0x010000 | ||
94 | #define DCP_CHAN_STAT_NO_CHAIN 0x020000 | ||
95 | #define DCP_CHAN_STAT_CONTEXT_ERROR 0x030000 | ||
96 | #define DCP_CHAN_STAT_PAYLOAD_ERROR 0x040000 | ||
97 | #define DCP_CHAN_STAT_INVALID_MODE 0x050000 | ||
98 | #define DCP_CHAN_STAT_PAGEFAULT 0x40 | ||
99 | #define DCP_CHAN_STAT_DST 0x20 | ||
100 | #define DCP_CHAN_STAT_SRC 0x10 | ||
101 | #define DCP_CHAN_STAT_PACKET 0x08 | ||
102 | #define DCP_CHAN_STAT_SETUP 0x04 | ||
103 | #define DCP_CHAN_STAT_MISMATCH 0x02 | ||
104 | |||
105 | /* hw packet control*/ | ||
106 | |||
107 | #define DCP_PKT_PAYLOAD_KEY (1<<11) | ||
108 | #define DCP_PKT_OTP_KEY (1<<10) | ||
109 | #define DCP_PKT_CIPHER_INIT (1<<9) | ||
110 | #define DCP_PKG_CIPHER_ENCRYPT (1<<8) | ||
111 | #define DCP_PKT_CIPHER_ENABLE (1<<5) | ||
112 | #define DCP_PKT_DECR_SEM (1<<1) | ||
113 | #define DCP_PKT_CHAIN (1<<2) | ||
114 | #define DCP_PKT_IRQ 1 | ||
115 | |||
116 | #define DCP_PKT_MODE_CBC (1<<4) | ||
117 | #define DCP_PKT_KEYSELECT_OTP (0xFF<<8) | ||
118 | |||
119 | /* cipher flags */ | ||
120 | #define DCP_ENC 0x0001 | ||
121 | #define DCP_DEC 0x0002 | ||
122 | #define DCP_ECB 0x0004 | ||
123 | #define DCP_CBC 0x0008 | ||
124 | #define DCP_CBC_INIT 0x0010 | ||
125 | #define DCP_NEW_KEY 0x0040 | ||
126 | #define DCP_OTP_KEY 0x0080 | ||
127 | #define DCP_AES 0x1000 | ||
128 | |||
129 | /* DCP Flags */ | ||
130 | #define DCP_FLAG_BUSY 0x01 | ||
131 | #define DCP_FLAG_PRODUCING 0x02 | ||
132 | |||
133 | /* clock defines */ | ||
134 | #define CLOCK_ON 1 | ||
135 | #define CLOCK_OFF 0 | ||
136 | |||
137 | struct dcp_dev_req_ctx { | ||
138 | int mode; | ||
139 | }; | ||
140 | |||
141 | struct dcp_op { | ||
142 | unsigned int flags; | ||
143 | u8 key[AES_KEYSIZE_128]; | ||
144 | int keylen; | ||
145 | |||
146 | struct ablkcipher_request *req; | ||
147 | struct crypto_ablkcipher *fallback; | ||
148 | |||
149 | uint32_t stat; | ||
150 | uint32_t pkt1; | ||
151 | uint32_t pkt2; | ||
152 | struct ablkcipher_walk walk; | ||
153 | }; | ||
154 | |||
155 | struct dcp_dev { | ||
156 | struct device *dev; | ||
157 | void __iomem *dcp_regs_base; | ||
158 | |||
159 | int dcp_vmi_irq; | ||
160 | int dcp_irq; | ||
161 | |||
162 | spinlock_t queue_lock; | ||
163 | struct crypto_queue queue; | ||
164 | |||
165 | uint32_t pkt_produced; | ||
166 | uint32_t pkt_consumed; | ||
167 | |||
168 | struct dcp_hw_packet *hw_pkg[DCP_MAX_PKG]; | ||
169 | dma_addr_t hw_phys_pkg; | ||
170 | |||
171 | /* [KEY][IV] Both with 16 Bytes */ | ||
172 | u8 *payload_base; | ||
173 | dma_addr_t payload_base_dma; | ||
174 | |||
175 | |||
176 | struct tasklet_struct done_task; | ||
177 | struct tasklet_struct queue_task; | ||
178 | struct timer_list watchdog; | ||
179 | |||
180 | unsigned long flags; | ||
181 | |||
182 | struct dcp_op *ctx; | ||
183 | |||
184 | struct miscdevice dcp_bootstream_misc; | ||
185 | }; | ||
186 | |||
187 | struct dcp_hw_packet { | ||
188 | uint32_t next; | ||
189 | uint32_t pkt1; | ||
190 | uint32_t pkt2; | ||
191 | uint32_t src; | ||
192 | uint32_t dst; | ||
193 | uint32_t size; | ||
194 | uint32_t payload; | ||
195 | uint32_t stat; | ||
196 | }; | ||
197 | |||
198 | static struct dcp_dev *global_dev; | ||
199 | |||
200 | static inline u32 dcp_chan_reg(u32 reg, int chan) | ||
201 | { | ||
202 | return reg + (chan) * 0x40; | ||
203 | } | ||
204 | |||
205 | static inline void dcp_write(struct dcp_dev *dev, u32 data, u32 reg) | ||
206 | { | ||
207 | writel(data, dev->dcp_regs_base + reg); | ||
208 | } | ||
209 | |||
210 | static inline void dcp_set(struct dcp_dev *dev, u32 data, u32 reg) | ||
211 | { | ||
212 | writel(data, dev->dcp_regs_base + (reg | 0x04)); | ||
213 | } | ||
214 | |||
215 | static inline void dcp_clear(struct dcp_dev *dev, u32 data, u32 reg) | ||
216 | { | ||
217 | writel(data, dev->dcp_regs_base + (reg | 0x08)); | ||
218 | } | ||
219 | |||
220 | static inline void dcp_toggle(struct dcp_dev *dev, u32 data, u32 reg) | ||
221 | { | ||
222 | writel(data, dev->dcp_regs_base + (reg | 0x0C)); | ||
223 | } | ||
224 | |||
225 | static inline unsigned int dcp_read(struct dcp_dev *dev, u32 reg) | ||
226 | { | ||
227 | return readl(dev->dcp_regs_base + reg); | ||
228 | } | ||
229 | |||
230 | static void dcp_dma_unmap(struct dcp_dev *dev, struct dcp_hw_packet *pkt) | ||
231 | { | ||
232 | dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE); | ||
233 | dma_unmap_page(dev->dev, pkt->dst, pkt->size, DMA_FROM_DEVICE); | ||
234 | dev_dbg(dev->dev, "unmap packet %x", (unsigned int) pkt); | ||
235 | } | ||
236 | |||
237 | static int dcp_dma_map(struct dcp_dev *dev, | ||
238 | struct ablkcipher_walk *walk, struct dcp_hw_packet *pkt) | ||
239 | { | ||
240 | dev_dbg(dev->dev, "map packet %x", (unsigned int) pkt); | ||
241 | /* align to length = 16 */ | ||
242 | pkt->size = walk->nbytes - (walk->nbytes % 16); | ||
243 | |||
244 | pkt->src = dma_map_page(dev->dev, walk->src.page, walk->src.offset, | ||
245 | pkt->size, DMA_TO_DEVICE); | ||
246 | |||
247 | if (pkt->src == 0) { | ||
248 | dev_err(dev->dev, "Unable to map src"); | ||
249 | return -ENOMEM; | ||
250 | } | ||
251 | |||
252 | pkt->dst = dma_map_page(dev->dev, walk->dst.page, walk->dst.offset, | ||
253 | pkt->size, DMA_FROM_DEVICE); | ||
254 | |||
255 | if (pkt->dst == 0) { | ||
256 | dev_err(dev->dev, "Unable to map dst"); | ||
257 | dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE); | ||
258 | return -ENOMEM; | ||
259 | } | ||
260 | |||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static void dcp_op_one(struct dcp_dev *dev, struct dcp_hw_packet *pkt, | ||
265 | uint8_t last) | ||
266 | { | ||
267 | struct dcp_op *ctx = dev->ctx; | ||
268 | pkt->pkt1 = ctx->pkt1; | ||
269 | pkt->pkt2 = ctx->pkt2; | ||
270 | |||
271 | pkt->payload = (u32) dev->payload_base_dma; | ||
272 | pkt->stat = 0; | ||
273 | |||
274 | if (ctx->flags & DCP_CBC_INIT) { | ||
275 | pkt->pkt1 |= DCP_PKT_CIPHER_INIT; | ||
276 | ctx->flags &= ~DCP_CBC_INIT; | ||
277 | } | ||
278 | |||
279 | mod_timer(&dev->watchdog, jiffies + msecs_to_jiffies(500)); | ||
280 | pkt->pkt1 |= DCP_PKT_IRQ; | ||
281 | if (!last) | ||
282 | pkt->pkt1 |= DCP_PKT_CHAIN; | ||
283 | |||
284 | dev->pkt_produced++; | ||
285 | |||
286 | dcp_write(dev, 1, | ||
287 | dcp_chan_reg(DCP_REG_CHAN_SEMA, USED_CHANNEL)); | ||
288 | } | ||
289 | |||
290 | static void dcp_op_proceed(struct dcp_dev *dev) | ||
291 | { | ||
292 | struct dcp_op *ctx = dev->ctx; | ||
293 | struct dcp_hw_packet *pkt; | ||
294 | |||
295 | while (ctx->walk.nbytes) { | ||
296 | int err = 0; | ||
297 | |||
298 | pkt = dev->hw_pkg[dev->pkt_produced % DCP_MAX_PKG]; | ||
299 | err = dcp_dma_map(dev, &ctx->walk, pkt); | ||
300 | if (err) { | ||
301 | dev->ctx->stat |= err; | ||
302 | /* start timer to wait for already set up calls */ | ||
303 | mod_timer(&dev->watchdog, | ||
304 | jiffies + msecs_to_jiffies(500)); | ||
305 | break; | ||
306 | } | ||
307 | |||
308 | |||
309 | err = ctx->walk.nbytes - pkt->size; | ||
310 | ablkcipher_walk_done(dev->ctx->req, &dev->ctx->walk, err); | ||
311 | |||
312 | dcp_op_one(dev, pkt, ctx->walk.nbytes == 0); | ||
313 | /* we have to wait if no space is left in buffer */ | ||
314 | if (dev->pkt_produced - dev->pkt_consumed == DCP_MAX_PKG) | ||
315 | break; | ||
316 | } | ||
317 | clear_bit(DCP_FLAG_PRODUCING, &dev->flags); | ||
318 | } | ||
319 | |||
320 | static void dcp_op_start(struct dcp_dev *dev, uint8_t use_walk) | ||
321 | { | ||
322 | struct dcp_op *ctx = dev->ctx; | ||
323 | |||
324 | if (ctx->flags & DCP_NEW_KEY) { | ||
325 | memcpy(dev->payload_base, ctx->key, ctx->keylen); | ||
326 | ctx->flags &= ~DCP_NEW_KEY; | ||
327 | } | ||
328 | |||
329 | ctx->pkt1 = 0; | ||
330 | ctx->pkt1 |= DCP_PKT_CIPHER_ENABLE; | ||
331 | ctx->pkt1 |= DCP_PKT_DECR_SEM; | ||
332 | |||
333 | if (ctx->flags & DCP_OTP_KEY) | ||
334 | ctx->pkt1 |= DCP_PKT_OTP_KEY; | ||
335 | else | ||
336 | ctx->pkt1 |= DCP_PKT_PAYLOAD_KEY; | ||
337 | |||
338 | if (ctx->flags & DCP_ENC) | ||
339 | ctx->pkt1 |= DCP_PKG_CIPHER_ENCRYPT; | ||
340 | |||
341 | ctx->pkt2 = 0; | ||
342 | if (ctx->flags & DCP_CBC) | ||
343 | ctx->pkt2 |= DCP_PKT_MODE_CBC; | ||
344 | |||
345 | dev->pkt_produced = 0; | ||
346 | dev->pkt_consumed = 0; | ||
347 | |||
348 | ctx->stat = 0; | ||
349 | dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL)); | ||
350 | dcp_write(dev, (u32) dev->hw_phys_pkg, | ||
351 | dcp_chan_reg(DCP_REG_CHAN_PTR, USED_CHANNEL)); | ||
352 | |||
353 | set_bit(DCP_FLAG_PRODUCING, &dev->flags); | ||
354 | |||
355 | if (use_walk) { | ||
356 | ablkcipher_walk_init(&ctx->walk, ctx->req->dst, | ||
357 | ctx->req->src, ctx->req->nbytes); | ||
358 | ablkcipher_walk_phys(ctx->req, &ctx->walk); | ||
359 | dcp_op_proceed(dev); | ||
360 | } else { | ||
361 | dcp_op_one(dev, dev->hw_pkg[0], 1); | ||
362 | clear_bit(DCP_FLAG_PRODUCING, &dev->flags); | ||
363 | } | ||
364 | } | ||
365 | |||
366 | static void dcp_done_task(unsigned long data) | ||
367 | { | ||
368 | struct dcp_dev *dev = (struct dcp_dev *)data; | ||
369 | struct dcp_hw_packet *last_packet; | ||
370 | int fin; | ||
371 | fin = 0; | ||
372 | |||
373 | for (last_packet = dev->hw_pkg[(dev->pkt_consumed) % DCP_MAX_PKG]; | ||
374 | last_packet->stat == 1; | ||
375 | last_packet = | ||
376 | dev->hw_pkg[++(dev->pkt_consumed) % DCP_MAX_PKG]) { | ||
377 | |||
378 | dcp_dma_unmap(dev, last_packet); | ||
379 | last_packet->stat = 0; | ||
380 | fin++; | ||
381 | } | ||
382 | /* the last call of this function already consumed this IRQ's packet */ | ||
383 | if (fin == 0) | ||
384 | return; | ||
385 | |||
386 | dev_dbg(dev->dev, | ||
387 | "Packet(s) done with status %x; finished: %d, produced:%d, complete consumed: %d", | ||
388 | dev->ctx->stat, fin, dev->pkt_produced, dev->pkt_consumed); | ||
389 | |||
390 | last_packet = dev->hw_pkg[(dev->pkt_consumed - 1) % DCP_MAX_PKG]; | ||
391 | if (!dev->ctx->stat && last_packet->pkt1 & DCP_PKT_CHAIN) { | ||
392 | if (!test_and_set_bit(DCP_FLAG_PRODUCING, &dev->flags)) | ||
393 | dcp_op_proceed(dev); | ||
394 | return; | ||
395 | } | ||
396 | |||
397 | while (unlikely(dev->pkt_consumed < dev->pkt_produced)) { | ||
398 | dcp_dma_unmap(dev, | ||
399 | dev->hw_pkg[dev->pkt_consumed++ % DCP_MAX_PKG]); | ||
400 | } | ||
401 | |||
402 | if (dev->ctx->flags & DCP_OTP_KEY) { | ||
403 | /* we used the miscdevice, no walk to finish */ | ||
404 | clear_bit(DCP_FLAG_BUSY, &dev->flags); | ||
405 | return; | ||
406 | } | ||
407 | |||
408 | ablkcipher_walk_complete(&dev->ctx->walk); | ||
409 | dev->ctx->req->base.complete(&dev->ctx->req->base, | ||
410 | dev->ctx->stat); | ||
411 | dev->ctx->req = NULL; | ||
412 | /* in case there are other requests in the queue */ | ||
413 | tasklet_schedule(&dev->queue_task); | ||
414 | } | ||
415 | |||
416 | static void dcp_watchdog(unsigned long data) | ||
417 | { | ||
418 | struct dcp_dev *dev = (struct dcp_dev *)data; | ||
419 | dev->ctx->stat |= dcp_read(dev, | ||
420 | dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL)); | ||
421 | |||
422 | dev_err(dev->dev, "Timeout, Channel status: %x", dev->ctx->stat); | ||
423 | |||
424 | if (!dev->ctx->stat) | ||
425 | dev->ctx->stat = -ETIMEDOUT; | ||
426 | |||
427 | dcp_done_task(data); | ||
428 | } | ||
429 | |||
430 | |||
431 | static irqreturn_t dcp_common_irq(int irq, void *context) | ||
432 | { | ||
433 | u32 msk; | ||
434 | struct dcp_dev *dev = (struct dcp_dev *) context; | ||
435 | |||
436 | del_timer(&dev->watchdog); | ||
437 | |||
438 | msk = DCP_STAT_IRQ(dcp_read(dev, DCP_REG_STAT)); | ||
439 | dcp_clear(dev, msk, DCP_REG_STAT); | ||
440 | if (msk == 0) | ||
441 | return IRQ_NONE; | ||
442 | |||
443 | dev->ctx->stat |= dcp_read(dev, | ||
444 | dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL)); | ||
445 | |||
446 | if (msk & DCP_STAT_CHAN_1) | ||
447 | tasklet_schedule(&dev->done_task); | ||
448 | |||
449 | return IRQ_HANDLED; | ||
450 | } | ||
451 | |||
452 | static irqreturn_t dcp_vmi_irq(int irq, void *context) | ||
453 | { | ||
454 | return dcp_common_irq(irq, context); | ||
455 | } | ||
456 | |||
457 | static irqreturn_t dcp_irq(int irq, void *context) | ||
458 | { | ||
459 | return dcp_common_irq(irq, context); | ||
460 | } | ||
461 | |||
462 | static void dcp_crypt(struct dcp_dev *dev, struct dcp_op *ctx) | ||
463 | { | ||
464 | dev->ctx = ctx; | ||
465 | |||
466 | if ((ctx->flags & DCP_CBC) && ctx->req->info) { | ||
467 | ctx->flags |= DCP_CBC_INIT; | ||
468 | memcpy(dev->payload_base + AES_KEYSIZE_128, | ||
469 | ctx->req->info, AES_KEYSIZE_128); | ||
470 | } | ||
471 | |||
472 | dcp_op_start(dev, 1); | ||
473 | } | ||
474 | |||
475 | static void dcp_queue_task(unsigned long data) | ||
476 | { | ||
477 | struct dcp_dev *dev = (struct dcp_dev *) data; | ||
478 | struct crypto_async_request *async_req, *backlog; | ||
479 | struct crypto_ablkcipher *tfm; | ||
480 | struct dcp_op *ctx; | ||
481 | struct dcp_dev_req_ctx *rctx; | ||
482 | struct ablkcipher_request *req; | ||
483 | unsigned long flags; | ||
484 | |||
485 | spin_lock_irqsave(&dev->queue_lock, flags); | ||
486 | |||
487 | backlog = crypto_get_backlog(&dev->queue); | ||
488 | async_req = crypto_dequeue_request(&dev->queue); | ||
489 | |||
490 | spin_unlock_irqrestore(&dev->queue_lock, flags); | ||
491 | |||
492 | if (!async_req) | ||
493 | goto ret_nothing_done; | ||
494 | |||
495 | if (backlog) | ||
496 | backlog->complete(backlog, -EINPROGRESS); | ||
497 | |||
498 | req = ablkcipher_request_cast(async_req); | ||
499 | tfm = crypto_ablkcipher_reqtfm(req); | ||
500 | rctx = ablkcipher_request_ctx(req); | ||
501 | ctx = crypto_ablkcipher_ctx(tfm); | ||
502 | |||
503 | if (!req->src || !req->dst) | ||
504 | goto ret_nothing_done; | ||
505 | |||
506 | ctx->flags |= rctx->mode; | ||
507 | ctx->req = req; | ||
508 | |||
509 | dcp_crypt(dev, ctx); | ||
510 | |||
511 | return; | ||
512 | |||
513 | ret_nothing_done: | ||
514 | clear_bit(DCP_FLAG_BUSY, &dev->flags); | ||
515 | } | ||
516 | |||
517 | |||
518 | static int dcp_cra_init(struct crypto_tfm *tfm) | ||
519 | { | ||
520 | const char *name = tfm->__crt_alg->cra_name; | ||
521 | struct dcp_op *ctx = crypto_tfm_ctx(tfm); | ||
522 | |||
523 | tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_dev_req_ctx); | ||
524 | |||
525 | ctx->fallback = crypto_alloc_ablkcipher(name, 0, | ||
526 | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); | ||
527 | |||
528 | if (IS_ERR(ctx->fallback)) { | ||
529 | dev_err(global_dev->dev, "Error allocating fallback algo %s\n", | ||
530 | name); | ||
531 | return PTR_ERR(ctx->fallback); | ||
532 | } | ||
533 | |||
534 | return 0; | ||
535 | } | ||
536 | |||
537 | static void dcp_cra_exit(struct crypto_tfm *tfm) | ||
538 | { | ||
539 | struct dcp_op *ctx = crypto_tfm_ctx(tfm); | ||
540 | |||
541 | if (ctx->fallback) | ||
542 | crypto_free_ablkcipher(ctx->fallback); | ||
543 | |||
544 | ctx->fallback = NULL; | ||
545 | } | ||
546 | |||
547 | /* async interface */ | ||
548 | static int dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, | ||
549 | unsigned int len) | ||
550 | { | ||
551 | struct dcp_op *ctx = crypto_ablkcipher_ctx(tfm); | ||
552 | unsigned int ret = 0; | ||
553 | ctx->keylen = len; | ||
554 | ctx->flags = 0; | ||
555 | if (len == AES_KEYSIZE_128) { | ||
556 | if (memcmp(ctx->key, key, AES_KEYSIZE_128)) { | ||
557 | memcpy(ctx->key, key, len); | ||
558 | ctx->flags |= DCP_NEW_KEY; | ||
559 | } | ||
560 | return 0; | ||
561 | } | ||
562 | |||
563 | ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; | ||
564 | ctx->fallback->base.crt_flags |= | ||
565 | (tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); | ||
566 | |||
567 | ret = crypto_ablkcipher_setkey(ctx->fallback, key, len); | ||
568 | if (ret) { | ||
569 | struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm); | ||
570 | |||
571 | tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK; | ||
572 | tfm_aux->crt_flags |= | ||
573 | (ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK); | ||
574 | } | ||
575 | return ret; | ||
576 | } | ||
577 | |||
578 | static int dcp_aes_cbc_crypt(struct ablkcipher_request *req, int mode) | ||
579 | { | ||
580 | struct dcp_dev_req_ctx *rctx = ablkcipher_request_ctx(req); | ||
581 | struct dcp_dev *dev = global_dev; | ||
582 | unsigned long flags; | ||
583 | int err = 0; | ||
584 | |||
585 | if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) | ||
586 | return -EINVAL; | ||
587 | |||
588 | rctx->mode = mode; | ||
589 | |||
590 | spin_lock_irqsave(&dev->queue_lock, flags); | ||
591 | err = ablkcipher_enqueue_request(&dev->queue, req); | ||
592 | spin_unlock_irqrestore(&dev->queue_lock, flags); | ||
593 | |||
594 | flags = test_and_set_bit(DCP_FLAG_BUSY, &dev->flags); | ||
595 | |||
596 | if (!(flags & DCP_FLAG_BUSY)) | ||
597 | tasklet_schedule(&dev->queue_task); | ||
598 | |||
599 | return err; | ||
600 | } | ||
601 | |||
602 | static int dcp_aes_cbc_encrypt(struct ablkcipher_request *req) | ||
603 | { | ||
604 | struct crypto_tfm *tfm = | ||
605 | crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); | ||
606 | struct dcp_op *ctx = crypto_ablkcipher_ctx( | ||
607 | crypto_ablkcipher_reqtfm(req)); | ||
608 | |||
609 | if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { | ||
610 | int err = 0; | ||
611 | ablkcipher_request_set_tfm(req, ctx->fallback); | ||
612 | err = crypto_ablkcipher_encrypt(req); | ||
613 | ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); | ||
614 | return err; | ||
615 | } | ||
616 | |||
617 | return dcp_aes_cbc_crypt(req, DCP_AES | DCP_ENC | DCP_CBC); | ||
618 | } | ||
619 | |||
620 | static int dcp_aes_cbc_decrypt(struct ablkcipher_request *req) | ||
621 | { | ||
622 | struct crypto_tfm *tfm = | ||
623 | crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); | ||
624 | struct dcp_op *ctx = crypto_ablkcipher_ctx( | ||
625 | crypto_ablkcipher_reqtfm(req)); | ||
626 | |||
627 | if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { | ||
628 | int err = 0; | ||
629 | ablkcipher_request_set_tfm(req, ctx->fallback); | ||
630 | err = crypto_ablkcipher_decrypt(req); | ||
631 | ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); | ||
632 | return err; | ||
633 | } | ||
634 | return dcp_aes_cbc_crypt(req, DCP_AES | DCP_DEC | DCP_CBC); | ||
635 | } | ||
636 | |||
637 | static struct crypto_alg algs[] = { | ||
638 | { | ||
639 | .cra_name = "cbc(aes)", | ||
640 | .cra_driver_name = "dcp-cbc-aes", | ||
641 | .cra_alignmask = 3, | ||
642 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC | | ||
643 | CRYPTO_ALG_NEED_FALLBACK, | ||
644 | .cra_blocksize = AES_KEYSIZE_128, | ||
645 | .cra_type = &crypto_ablkcipher_type, | ||
646 | .cra_priority = 300, | ||
647 | .cra_u.ablkcipher = { | ||
648 | .min_keysize = AES_KEYSIZE_128, | ||
649 | .max_keysize = AES_KEYSIZE_128, | ||
650 | .setkey = dcp_aes_setkey, | ||
651 | .encrypt = dcp_aes_cbc_encrypt, | ||
652 | .decrypt = dcp_aes_cbc_decrypt, | ||
653 | .ivsize = AES_KEYSIZE_128, | ||
654 | } | ||
655 | |||
656 | }, | ||
657 | }; | ||
658 | |||
659 | /* DCP bootstream verification interface: uses OTP key for crypto */ | ||
660 | static int dcp_bootstream_open(struct inode *inode, struct file *file) | ||
661 | { | ||
662 | file->private_data = container_of((file->private_data), | ||
663 | struct dcp_dev, dcp_bootstream_misc); | ||
664 | return 0; | ||
665 | } | ||
666 | |||
667 | static long dcp_bootstream_ioctl(struct file *file, | ||
668 | unsigned int cmd, unsigned long arg) | ||
669 | { | ||
670 | struct dcp_dev *dev = (struct dcp_dev *) file->private_data; | ||
671 | void __user *argp = (void __user *)arg; | ||
672 | int ret; | ||
673 | |||
674 | if (dev == NULL) | ||
675 | return -EBADF; | ||
676 | |||
677 | if (cmd != DBS_ENC && cmd != DBS_DEC) | ||
678 | return -EINVAL; | ||
679 | |||
680 | if (copy_from_user(dev->payload_base, argp, 16)) | ||
681 | return -EFAULT; | ||
682 | |||
683 | if (test_and_set_bit(DCP_FLAG_BUSY, &dev->flags)) | ||
684 | return -EAGAIN; | ||
685 | |||
686 | dev->ctx = kzalloc(sizeof(struct dcp_op), GFP_KERNEL); | ||
687 | if (!dev->ctx) { | ||
688 | dev_err(dev->dev, | ||
689 | "cannot allocate context for OTP crypto"); | ||
690 | clear_bit(DCP_FLAG_BUSY, &dev->flags); | ||
691 | return -ENOMEM; | ||
692 | } | ||
693 | |||
694 | dev->ctx->flags = DCP_AES | DCP_ECB | DCP_OTP_KEY | DCP_CBC_INIT; | ||
695 | dev->ctx->flags |= (cmd == DBS_ENC) ? DCP_ENC : DCP_DEC; | ||
696 | dev->hw_pkg[0]->src = dev->payload_base_dma; | ||
697 | dev->hw_pkg[0]->dst = dev->payload_base_dma; | ||
698 | dev->hw_pkg[0]->size = 16; | ||
699 | |||
700 | dcp_op_start(dev, 0); | ||
701 | |||
702 | while (test_bit(DCP_FLAG_BUSY, &dev->flags)) | ||
703 | cpu_relax(); | ||
704 | |||
705 | ret = dev->ctx->stat; | ||
706 | if (!ret && copy_to_user(argp, dev->payload_base, 16)) | ||
707 | ret = -EFAULT; | ||
708 | |||
709 | kfree(dev->ctx); | ||
710 | |||
711 | return ret; | ||
712 | } | ||
713 | |||
714 | static const struct file_operations dcp_bootstream_fops = { | ||
715 | .owner = THIS_MODULE, | ||
716 | .unlocked_ioctl = dcp_bootstream_ioctl, | ||
717 | .open = dcp_bootstream_open, | ||
718 | }; | ||
719 | |||
720 | static int dcp_probe(struct platform_device *pdev) | ||
721 | { | ||
722 | struct dcp_dev *dev = NULL; | ||
723 | struct resource *r; | ||
724 | int i, ret, j; | ||
725 | |||
726 | dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); | ||
727 | if (!dev) | ||
728 | return -ENOMEM; | ||
729 | |||
730 | global_dev = dev; | ||
731 | dev->dev = &pdev->dev; | ||
732 | |||
733 | platform_set_drvdata(pdev, dev); | ||
734 | |||
735 | r = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
736 | if (!r) { | ||
737 | dev_err(&pdev->dev, "failed to get IORESOURCE_MEM\n"); | ||
738 | return -ENXIO; | ||
739 | } | ||
740 | dev->dcp_regs_base = devm_ioremap(&pdev->dev, r->start, | ||
741 | resource_size(r)); | ||
742 | |||
743 | dcp_set(dev, DCP_CTRL_SFRST, DCP_REG_CTRL); | ||
744 | udelay(10); | ||
745 | dcp_clear(dev, DCP_CTRL_SFRST | DCP_CTRL_CLKGATE, DCP_REG_CTRL); | ||
746 | |||
747 | dcp_write(dev, DCP_CTRL_GATHER_RES_WRITE | | ||
748 | DCP_CTRL_ENABLE_CONTEXT_CACHE | DCP_CTRL_CH_IRQ_E_1, | ||
749 | DCP_REG_CTRL); | ||
750 | |||
751 | dcp_write(dev, DCP_CHAN_CTRL_ENABLE_1, DCP_REG_CHAN_CTRL); | ||
752 | |||
753 | for (i = 0; i < 4; i++) | ||
754 | dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, i)); | ||
755 | |||
756 | dcp_clear(dev, -1, DCP_REG_STAT); | ||
757 | |||
758 | |||
759 | r = platform_get_resource(pdev, IORESOURCE_IRQ, 0); | ||
760 | if (!r) { | ||
761 | dev_err(&pdev->dev, "can't get IRQ resource (0)\n"); | ||
762 | return -EIO; | ||
763 | } | ||
764 | dev->dcp_vmi_irq = r->start; | ||
765 | ret = request_irq(dev->dcp_vmi_irq, dcp_vmi_irq, 0, "dcp", dev); | ||
766 | if (ret != 0) { | ||
767 | dev_err(&pdev->dev, "can't request_irq (0)\n"); | ||
768 | return -EIO; | ||
769 | } | ||
770 | |||
771 | r = platform_get_resource(pdev, IORESOURCE_IRQ, 1); | ||
772 | if (!r) { | ||
773 | dev_err(&pdev->dev, "can't get IRQ resource (1)\n"); | ||
774 | ret = -EIO; | ||
775 | goto err_free_irq0; | ||
776 | } | ||
777 | dev->dcp_irq = r->start; | ||
778 | ret = request_irq(dev->dcp_irq, dcp_irq, 0, "dcp", dev); | ||
779 | if (ret != 0) { | ||
780 | dev_err(&pdev->dev, "can't request_irq (1)\n"); | ||
781 | ret = -EIO; | ||
782 | goto err_free_irq0; | ||
783 | } | ||
784 | |||
785 | dev->hw_pkg[0] = dma_alloc_coherent(&pdev->dev, | ||
786 | DCP_MAX_PKG * sizeof(struct dcp_hw_packet), | ||
787 | &dev->hw_phys_pkg, | ||
788 | GFP_KERNEL); | ||
789 | if (!dev->hw_pkg[0]) { | ||
790 | dev_err(&pdev->dev, "Could not allocate hw descriptors\n"); | ||
791 | ret = -ENOMEM; | ||
792 | goto err_free_irq1; | ||
793 | } | ||
794 | |||
795 | for (i = 1; i < DCP_MAX_PKG; i++) { | ||
796 | dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg | ||
797 | + i * sizeof(struct dcp_hw_packet); | ||
798 | dev->hw_pkg[i] = dev->hw_pkg[i - 1] + 1; | ||
799 | } | ||
800 | dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg; | ||
801 | |||
802 | |||
803 | dev->payload_base = dma_alloc_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, | ||
804 | &dev->payload_base_dma, GFP_KERNEL); | ||
805 | if (!dev->payload_base) { | ||
806 | dev_err(&pdev->dev, "Could not allocate memory for key\n"); | ||
807 | ret = -ENOMEM; | ||
808 | goto err_free_hw_packet; | ||
809 | } | ||
810 | tasklet_init(&dev->queue_task, dcp_queue_task, | ||
811 | (unsigned long) dev); | ||
812 | tasklet_init(&dev->done_task, dcp_done_task, | ||
813 | (unsigned long) dev); | ||
814 | spin_lock_init(&dev->queue_lock); | ||
815 | |||
816 | crypto_init_queue(&dev->queue, 10); | ||
817 | |||
818 | init_timer(&dev->watchdog); | ||
819 | dev->watchdog.function = &dcp_watchdog; | ||
820 | dev->watchdog.data = (unsigned long)dev; | ||
821 | |||
822 | dev->dcp_bootstream_misc.minor = MISC_DYNAMIC_MINOR, | ||
823 | dev->dcp_bootstream_misc.name = "dcpboot", | ||
824 | dev->dcp_bootstream_misc.fops = &dcp_bootstream_fops, | ||
825 | ret = misc_register(&dev->dcp_bootstream_misc); | ||
826 | if (ret != 0) { | ||
827 | dev_err(dev->dev, "Unable to register misc device\n"); | ||
828 | goto err_free_key_iv; | ||
829 | } | ||
830 | |||
831 | for (i = 0; i < ARRAY_SIZE(algs); i++) { | ||
832 | algs[i].cra_priority = 300; | ||
833 | algs[i].cra_ctxsize = sizeof(struct dcp_op); | ||
834 | algs[i].cra_module = THIS_MODULE; | ||
835 | algs[i].cra_init = dcp_cra_init; | ||
836 | algs[i].cra_exit = dcp_cra_exit; | ||
837 | if (crypto_register_alg(&algs[i])) { | ||
838 | dev_err(&pdev->dev, "register algorithm failed\n"); | ||
839 | ret = -ENOMEM; | ||
840 | goto err_unregister; | ||
841 | } | ||
842 | } | ||
843 | dev_notice(&pdev->dev, "DCP crypto enabled.!\n"); | ||
844 | |||
845 | return 0; | ||
846 | |||
847 | err_unregister: | ||
848 | for (j = 0; j < i; j++) | ||
849 | crypto_unregister_alg(&algs[j]); | ||
850 | err_free_key_iv: | ||
851 | dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base, | ||
852 | dev->payload_base_dma); | ||
853 | err_free_hw_packet: | ||
854 | dma_free_coherent(&pdev->dev, DCP_MAX_PKG * | ||
855 | sizeof(struct dcp_hw_packet), dev->hw_pkg[0], | ||
856 | dev->hw_phys_pkg); | ||
857 | err_free_irq1: | ||
858 | free_irq(dev->dcp_irq, dev); | ||
859 | err_free_irq0: | ||
860 | free_irq(dev->dcp_vmi_irq, dev); | ||
861 | |||
862 | return ret; | ||
863 | } | ||
864 | |||
865 | static int dcp_remove(struct platform_device *pdev) | ||
866 | { | ||
867 | struct dcp_dev *dev; | ||
868 | int j; | ||
869 | dev = platform_get_drvdata(pdev); | ||
870 | |||
871 | dma_free_coherent(&pdev->dev, | ||
872 | DCP_MAX_PKG * sizeof(struct dcp_hw_packet), | ||
873 | dev->hw_pkg[0], dev->hw_phys_pkg); | ||
874 | |||
875 | dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base, | ||
876 | dev->payload_base_dma); | ||
877 | |||
878 | free_irq(dev->dcp_irq, dev); | ||
879 | free_irq(dev->dcp_vmi_irq, dev); | ||
880 | |||
881 | tasklet_kill(&dev->done_task); | ||
882 | tasklet_kill(&dev->queue_task); | ||
883 | |||
884 | for (j = 0; j < ARRAY_SIZE(algs); j++) | ||
885 | crypto_unregister_alg(&algs[j]); | ||
886 | |||
887 | misc_deregister(&dev->dcp_bootstream_misc); | ||
888 | |||
889 | return 0; | ||
890 | } | ||
891 | |||
892 | static struct of_device_id fs_dcp_of_match[] = { | ||
893 | { .compatible = "fsl-dcp"}, | ||
894 | {}, | ||
895 | }; | ||
896 | |||
897 | static struct platform_driver fs_dcp_driver = { | ||
898 | .probe = dcp_probe, | ||
899 | .remove = dcp_remove, | ||
900 | .driver = { | ||
901 | .name = "fsl-dcp", | ||
902 | .owner = THIS_MODULE, | ||
903 | .of_match_table = fs_dcp_of_match | ||
904 | } | ||
905 | }; | ||
906 | |||
907 | module_platform_driver(fs_dcp_driver); | ||
908 | |||
909 | |||
910 | MODULE_AUTHOR("Tobias Rauter <tobias.rauter@gmail.com>"); | ||
911 | MODULE_DESCRIPTION("Freescale DCP Crypto Driver"); | ||
912 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c index ebf130e894b5..12fea3e22348 100644 --- a/drivers/crypto/hifn_795x.c +++ b/drivers/crypto/hifn_795x.c | |||
@@ -2676,7 +2676,7 @@ err_out_stop_device: | |||
2676 | hifn_reset_dma(dev, 1); | 2676 | hifn_reset_dma(dev, 1); |
2677 | hifn_stop_device(dev); | 2677 | hifn_stop_device(dev); |
2678 | err_out_free_irq: | 2678 | err_out_free_irq: |
2679 | free_irq(dev->irq, dev->name); | 2679 | free_irq(dev->irq, dev); |
2680 | tasklet_kill(&dev->tasklet); | 2680 | tasklet_kill(&dev->tasklet); |
2681 | err_out_free_desc: | 2681 | err_out_free_desc: |
2682 | pci_free_consistent(pdev, sizeof(struct hifn_dma), | 2682 | pci_free_consistent(pdev, sizeof(struct hifn_dma), |
@@ -2711,7 +2711,7 @@ static void hifn_remove(struct pci_dev *pdev) | |||
2711 | hifn_reset_dma(dev, 1); | 2711 | hifn_reset_dma(dev, 1); |
2712 | hifn_stop_device(dev); | 2712 | hifn_stop_device(dev); |
2713 | 2713 | ||
2714 | free_irq(dev->irq, dev->name); | 2714 | free_irq(dev->irq, dev); |
2715 | tasklet_kill(&dev->tasklet); | 2715 | tasklet_kill(&dev->tasklet); |
2716 | 2716 | ||
2717 | hifn_flush(dev); | 2717 | hifn_flush(dev); |
diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index ce6290e5471a..3374a3ebe4c7 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c | |||
@@ -1146,7 +1146,6 @@ err_unmap_reg: | |||
1146 | err: | 1146 | err: |
1147 | kfree(cp); | 1147 | kfree(cp); |
1148 | cpg = NULL; | 1148 | cpg = NULL; |
1149 | platform_set_drvdata(pdev, NULL); | ||
1150 | return ret; | 1149 | return ret; |
1151 | } | 1150 | } |
1152 | 1151 | ||
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index ee15b0f7849a..5f7980586850 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c | |||
@@ -203,13 +203,6 @@ static void omap_aes_write_n(struct omap_aes_dev *dd, u32 offset, | |||
203 | 203 | ||
204 | static int omap_aes_hw_init(struct omap_aes_dev *dd) | 204 | static int omap_aes_hw_init(struct omap_aes_dev *dd) |
205 | { | 205 | { |
206 | /* | ||
207 | * clocks are enabled when request starts and disabled when finished. | ||
208 | * It may be long delays between requests. | ||
209 | * Device might go to off mode to save power. | ||
210 | */ | ||
211 | pm_runtime_get_sync(dd->dev); | ||
212 | |||
213 | if (!(dd->flags & FLAGS_INIT)) { | 206 | if (!(dd->flags & FLAGS_INIT)) { |
214 | dd->flags |= FLAGS_INIT; | 207 | dd->flags |= FLAGS_INIT; |
215 | dd->err = 0; | 208 | dd->err = 0; |
@@ -636,7 +629,6 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err) | |||
636 | 629 | ||
637 | pr_debug("err: %d\n", err); | 630 | pr_debug("err: %d\n", err); |
638 | 631 | ||
639 | pm_runtime_put(dd->dev); | ||
640 | dd->flags &= ~FLAGS_BUSY; | 632 | dd->flags &= ~FLAGS_BUSY; |
641 | 633 | ||
642 | req->base.complete(&req->base, err); | 634 | req->base.complete(&req->base, err); |
@@ -837,8 +829,16 @@ static int omap_aes_ctr_decrypt(struct ablkcipher_request *req) | |||
837 | 829 | ||
838 | static int omap_aes_cra_init(struct crypto_tfm *tfm) | 830 | static int omap_aes_cra_init(struct crypto_tfm *tfm) |
839 | { | 831 | { |
840 | pr_debug("enter\n"); | 832 | struct omap_aes_dev *dd = NULL; |
833 | |||
834 | /* Find AES device, currently picks the first device */ | ||
835 | spin_lock_bh(&list_lock); | ||
836 | list_for_each_entry(dd, &dev_list, list) { | ||
837 | break; | ||
838 | } | ||
839 | spin_unlock_bh(&list_lock); | ||
841 | 840 | ||
841 | pm_runtime_get_sync(dd->dev); | ||
842 | tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx); | 842 | tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx); |
843 | 843 | ||
844 | return 0; | 844 | return 0; |
@@ -846,7 +846,16 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm) | |||
846 | 846 | ||
847 | static void omap_aes_cra_exit(struct crypto_tfm *tfm) | 847 | static void omap_aes_cra_exit(struct crypto_tfm *tfm) |
848 | { | 848 | { |
849 | pr_debug("enter\n"); | 849 | struct omap_aes_dev *dd = NULL; |
850 | |||
851 | /* Find AES device, currently picks the first device */ | ||
852 | spin_lock_bh(&list_lock); | ||
853 | list_for_each_entry(dd, &dev_list, list) { | ||
854 | break; | ||
855 | } | ||
856 | spin_unlock_bh(&list_lock); | ||
857 | |||
858 | pm_runtime_put_sync(dd->dev); | ||
850 | } | 859 | } |
851 | 860 | ||
852 | /* ********************** ALGS ************************************ */ | 861 | /* ********************** ALGS ************************************ */ |
@@ -1125,10 +1134,9 @@ static int omap_aes_probe(struct platform_device *pdev) | |||
1125 | if (err) | 1134 | if (err) |
1126 | goto err_res; | 1135 | goto err_res; |
1127 | 1136 | ||
1128 | dd->io_base = devm_request_and_ioremap(dev, &res); | 1137 | dd->io_base = devm_ioremap_resource(dev, &res); |
1129 | if (!dd->io_base) { | 1138 | if (IS_ERR(dd->io_base)) { |
1130 | dev_err(dev, "can't ioremap\n"); | 1139 | err = PTR_ERR(dd->io_base); |
1131 | err = -ENOMEM; | ||
1132 | goto err_res; | 1140 | goto err_res; |
1133 | } | 1141 | } |
1134 | dd->phys_base = res.start; | 1142 | dd->phys_base = res.start; |
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index a1e1b4756ee5..4bb67652c200 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c | |||
@@ -1686,10 +1686,9 @@ static int omap_sham_probe(struct platform_device *pdev) | |||
1686 | if (err) | 1686 | if (err) |
1687 | goto res_err; | 1687 | goto res_err; |
1688 | 1688 | ||
1689 | dd->io_base = devm_request_and_ioremap(dev, &res); | 1689 | dd->io_base = devm_ioremap_resource(dev, &res); |
1690 | if (!dd->io_base) { | 1690 | if (IS_ERR(dd->io_base)) { |
1691 | dev_err(dev, "can't ioremap\n"); | 1691 | err = PTR_ERR(dd->io_base); |
1692 | err = -ENOMEM; | ||
1693 | goto res_err; | 1692 | goto res_err; |
1694 | } | 1693 | } |
1695 | dd->phys_base = res.start; | 1694 | dd->phys_base = res.start; |
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index ac30724d923d..888f7f4a6d3f 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c | |||
@@ -1298,7 +1298,7 @@ static ssize_t spacc_stat_irq_thresh_store(struct device *dev, | |||
1298 | struct spacc_engine *engine = spacc_dev_to_engine(dev); | 1298 | struct spacc_engine *engine = spacc_dev_to_engine(dev); |
1299 | unsigned long thresh; | 1299 | unsigned long thresh; |
1300 | 1300 | ||
1301 | if (strict_strtoul(buf, 0, &thresh)) | 1301 | if (kstrtoul(buf, 0, &thresh)) |
1302 | return -EINVAL; | 1302 | return -EINVAL; |
1303 | 1303 | ||
1304 | thresh = clamp(thresh, 1UL, engine->fifo_sz - 1); | 1304 | thresh = clamp(thresh, 1UL, engine->fifo_sz - 1); |
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 4b314326f48a..cf149b19ff47 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c | |||
@@ -647,7 +647,6 @@ static int s5p_aes_probe(struct platform_device *pdev) | |||
647 | clk_disable(pdata->clk); | 647 | clk_disable(pdata->clk); |
648 | 648 | ||
649 | s5p_dev = NULL; | 649 | s5p_dev = NULL; |
650 | platform_set_drvdata(pdev, NULL); | ||
651 | 650 | ||
652 | return err; | 651 | return err; |
653 | } | 652 | } |
@@ -668,7 +667,6 @@ static int s5p_aes_remove(struct platform_device *pdev) | |||
668 | clk_disable(pdata->clk); | 667 | clk_disable(pdata->clk); |
669 | 668 | ||
670 | s5p_dev = NULL; | 669 | s5p_dev = NULL; |
671 | platform_set_drvdata(pdev, NULL); | ||
672 | 670 | ||
673 | return 0; | 671 | return 0; |
674 | } | 672 | } |
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index 83d79b964d12..a999f537228f 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c | |||
@@ -1629,7 +1629,7 @@ static int ux500_cryp_remove(struct platform_device *pdev) | |||
1629 | 1629 | ||
1630 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | 1630 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
1631 | if (res) | 1631 | if (res) |
1632 | release_mem_region(res->start, res->end - res->start + 1); | 1632 | release_mem_region(res->start, resource_size(res)); |
1633 | 1633 | ||
1634 | kfree(device_data); | 1634 | kfree(device_data); |
1635 | 1635 | ||
diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index a9c96d865ee7..b3cb71f0d3b0 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h | |||
@@ -3,6 +3,10 @@ | |||
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | 5 | ||
6 | #define CRC_T10DIF_DIGEST_SIZE 2 | ||
7 | #define CRC_T10DIF_BLOCK_SIZE 1 | ||
8 | |||
9 | __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len); | ||
6 | __u16 crc_t10dif(unsigned char const *, size_t); | 10 | __u16 crc_t10dif(unsigned char const *, size_t); |
7 | 11 | ||
8 | #endif | 12 | #endif |
diff --git a/lib/Kconfig b/lib/Kconfig index 5a5203ded0dd..f1ed53c3aa44 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
@@ -66,6 +66,8 @@ config CRC16 | |||
66 | 66 | ||
67 | config CRC_T10DIF | 67 | config CRC_T10DIF |
68 | tristate "CRC calculation for the T10 Data Integrity Field" | 68 | tristate "CRC calculation for the T10 Data Integrity Field" |
69 | select CRYPTO | ||
70 | select CRYPTO_CRCT10DIF | ||
69 | help | 71 | help |
70 | This option is only needed if a module that's not in the | 72 | This option is only needed if a module that's not in the |
71 | kernel tree needs to calculate CRC checks for use with the | 73 | kernel tree needs to calculate CRC checks for use with the |
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index fbbd66ed86cd..fe3428c07b47 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c | |||
@@ -11,57 +11,44 @@ | |||
11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/crc-t10dif.h> | 13 | #include <linux/crc-t10dif.h> |
14 | #include <linux/err.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <crypto/hash.h> | ||
14 | 17 | ||
15 | /* Table generated using the following polynomium: | 18 | static struct crypto_shash *crct10dif_tfm; |
16 | * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 | ||
17 | * gt: 0x8bb7 | ||
18 | */ | ||
19 | static const __u16 t10_dif_crc_table[256] = { | ||
20 | 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, | ||
21 | 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, | ||
22 | 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, | ||
23 | 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, | ||
24 | 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, | ||
25 | 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, | ||
26 | 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, | ||
27 | 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, | ||
28 | 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, | ||
29 | 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, | ||
30 | 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, | ||
31 | 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, | ||
32 | 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, | ||
33 | 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, | ||
34 | 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, | ||
35 | 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, | ||
36 | 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, | ||
37 | 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, | ||
38 | 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, | ||
39 | 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, | ||
40 | 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, | ||
41 | 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, | ||
42 | 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, | ||
43 | 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, | ||
44 | 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, | ||
45 | 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, | ||
46 | 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, | ||
47 | 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, | ||
48 | 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, | ||
49 | 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, | ||
50 | 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, | ||
51 | 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 | ||
52 | }; | ||
53 | 19 | ||
54 | __u16 crc_t10dif(const unsigned char *buffer, size_t len) | 20 | __u16 crc_t10dif(const unsigned char *buffer, size_t len) |
55 | { | 21 | { |
56 | __u16 crc = 0; | 22 | struct { |
57 | unsigned int i; | 23 | struct shash_desc shash; |
24 | char ctx[2]; | ||
25 | } desc; | ||
26 | int err; | ||
27 | |||
28 | desc.shash.tfm = crct10dif_tfm; | ||
29 | desc.shash.flags = 0; | ||
30 | *(__u16 *)desc.ctx = 0; | ||
58 | 31 | ||
59 | for (i = 0 ; i < len ; i++) | 32 | err = crypto_shash_update(&desc.shash, buffer, len); |
60 | crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; | 33 | BUG_ON(err); |
61 | 34 | ||
62 | return crc; | 35 | return *(__u16 *)desc.ctx; |
63 | } | 36 | } |
64 | EXPORT_SYMBOL(crc_t10dif); | 37 | EXPORT_SYMBOL(crc_t10dif); |
65 | 38 | ||
39 | static int __init crc_t10dif_mod_init(void) | ||
40 | { | ||
41 | crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0); | ||
42 | return PTR_RET(crct10dif_tfm); | ||
43 | } | ||
44 | |||
45 | static void __exit crc_t10dif_mod_fini(void) | ||
46 | { | ||
47 | crypto_free_shash(crct10dif_tfm); | ||
48 | } | ||
49 | |||
50 | module_init(crc_t10dif_mod_init); | ||
51 | module_exit(crc_t10dif_mod_fini); | ||
52 | |||
66 | MODULE_DESCRIPTION("T10 DIF CRC calculation"); | 53 | MODULE_DESCRIPTION("T10 DIF CRC calculation"); |
67 | MODULE_LICENSE("GPL"); | 54 | MODULE_LICENSE("GPL"); |