aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-05 15:12:33 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-05 15:12:33 -0400
commitb2c311075db578f1433d9b303698491bfa21279a (patch)
tree41d5f1b5ad6f45be7211f524328de81f7e9754be
parent45175476ae2dbebc860d5cf486f2916044343513 (diff)
parent02c0241b600e4ab8a732c89749e252165145d60c (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: - Do not idle omap device between crypto operations in one session. - Added sha224/sha384 shims for SSSE3. - More optimisations for camellia-aesni-avx2. - Removed defunct blowfish/twofish AVX2 implementations. - Added unaligned buffer self-tests. - Added PCLMULQDQ optimisation for CRCT10DIF. - Added support for Freescale's DCP co-processor - Misc fixes. * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (44 commits) crypto: testmgr - test hash implementations with unaligned buffers crypto: testmgr - test AEADs with unaligned buffers crypto: testmgr - test skciphers with unaligned buffers crypto: testmgr - check that entries in alg_test_descs are in correct order Revert "crypto: twofish - add AVX2/x86_64 assembler implementation of twofish cipher" Revert "crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher" crypto: camellia-aesni-avx2 - tune assembly code for more performance hwrng: bcm2835 - fix MODULE_LICENSE tag hwrng: nomadik - use clk_prepare_enable() crypto: picoxcell - replace strict_strtoul() with kstrtoul() crypto: dcp - Staticize local symbols crypto: dcp - Use NULL instead of 0 crypto: dcp - Use devm_* APIs crypto: dcp - Remove redundant platform_set_drvdata() hwrng: use platform_{get,set}_drvdata() crypto: omap-aes - Don't idle/start AES device between Encrypt operations crypto: crct10dif - Use PTR_RET crypto: ux500 - Cocci spatch "resource_size.spatch" crypto: sha256_ssse3 - add sha224 support crypto: sha512_ssse3 - add sha384 support ...
-rw-r--r--arch/arm/boot/dts/imx28.dtsi2
-rw-r--r--arch/x86/crypto/Makefile8
-rw-r--r--arch/x86/crypto/blowfish-avx2-asm_64.S449
-rw-r--r--arch/x86/crypto/blowfish_avx2_glue.c585
-rw-r--r--arch/x86/crypto/blowfish_glue.c32
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S160
-rw-r--r--arch/x86/crypto/crct10dif-pcl-asm_64.S643
-rw-r--r--arch/x86/crypto/crct10dif-pclmul_glue.c151
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c57
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c58
-rw-r--r--arch/x86/crypto/twofish-avx2-asm_64.S600
-rw-r--r--arch/x86/crypto/twofish_avx2_glue.c584
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c14
-rw-r--r--arch/x86/include/asm/crypto/blowfish.h43
-rw-r--r--arch/x86/include/asm/crypto/twofish.h18
-rw-r--r--crypto/Kconfig63
-rw-r--r--crypto/Makefile1
-rw-r--r--crypto/crct10dif.c178
-rw-r--r--crypto/sha512_generic.c2
-rw-r--r--crypto/tcrypt.c8
-rw-r--r--crypto/testmgr.c176
-rw-r--r--crypto/testmgr.h33
-rw-r--r--drivers/char/hw_random/atmel-rng.c2
-rw-r--r--drivers/char/hw_random/bcm63xx-rng.c2
-rw-r--r--drivers/char/hw_random/n2-drv.c6
-rw-r--r--drivers/char/hw_random/nomadik-rng.c2
-rw-r--r--drivers/char/hw_random/octeon-rng.c4
-rw-r--r--drivers/char/hw_random/omap-rng.c6
-rw-r--r--drivers/char/hw_random/timeriomem-rng.c2
-rw-r--r--drivers/char/hw_random/tx4939-rng.c1
-rw-r--r--drivers/crypto/Kconfig12
-rw-r--r--drivers/crypto/Makefile1
-rw-r--r--drivers/crypto/caam/ctrl.c10
-rw-r--r--drivers/crypto/caam/desc.h22
-rw-r--r--drivers/crypto/caam/desc_constr.h81
-rw-r--r--drivers/crypto/caam/pdb.h1
-rw-r--r--drivers/crypto/caam/regs.h42
-rw-r--r--drivers/crypto/dcp.c912
-rw-r--r--drivers/crypto/hifn_795x.c4
-rw-r--r--drivers/crypto/mv_cesa.c1
-rw-r--r--drivers/crypto/omap-aes.c36
-rw-r--r--drivers/crypto/omap-sham.c7
-rw-r--r--drivers/crypto/picoxcell_crypto.c2
-rw-r--r--drivers/crypto/s5p-sss.c2
-rw-r--r--drivers/crypto/ux500/cryp/cryp_core.c2
-rw-r--r--include/linux/crc-t10dif.h4
-rw-r--r--lib/Kconfig2
-rw-r--r--lib/crc-t10dif.c73
48 files changed, 2541 insertions, 2563 deletions
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index 195451bf7706..6a8acb01b1d3 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -736,7 +736,7 @@
736 dcp@80028000 { 736 dcp@80028000 {
737 reg = <0x80028000 0x2000>; 737 reg = <0x80028000 0x2000>;
738 interrupts = <52 53 54>; 738 interrupts = <52 53 54>;
739 status = "disabled"; 739 compatible = "fsl-dcp";
740 }; 740 };
741 741
742 pxp@8002a000 { 742 pxp@8002a000 {
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a3a0ed80f17c..7d6ba9db1be9 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -3,8 +3,6 @@
3# 3#
4 4
5avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) 5avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
6avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
7 $(comma)4)$(comma)%ymm2,yes,no)
8 6
9obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o 7obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
10obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o 8obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
@@ -29,6 +27,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
29obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o 27obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
30obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o 28obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
31obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o 29obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
30obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
32 31
33# These modules require assembler to support AVX. 32# These modules require assembler to support AVX.
34ifeq ($(avx_supported),yes) 33ifeq ($(avx_supported),yes)
@@ -42,10 +41,8 @@ endif
42 41
43# These modules require assembler to support AVX2. 42# These modules require assembler to support AVX2.
44ifeq ($(avx2_supported),yes) 43ifeq ($(avx2_supported),yes)
45 obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o
46 obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o 44 obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
47 obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o 45 obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
48 obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o
49endif 46endif
50 47
51aes-i586-y := aes-i586-asm_32.o aes_glue.o 48aes-i586-y := aes-i586-asm_32.o aes_glue.o
@@ -73,10 +70,8 @@ ifeq ($(avx_supported),yes)
73endif 70endif
74 71
75ifeq ($(avx2_supported),yes) 72ifeq ($(avx2_supported),yes)
76 blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o
77 camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o 73 camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
78 serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o 74 serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
79 twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o
80endif 75endif
81 76
82aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o 77aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
@@ -87,3 +82,4 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
87crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o 82crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
88sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o 83sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
89sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o 84sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
85crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
diff --git a/arch/x86/crypto/blowfish-avx2-asm_64.S b/arch/x86/crypto/blowfish-avx2-asm_64.S
deleted file mode 100644
index 784452e0d05d..000000000000
--- a/arch/x86/crypto/blowfish-avx2-asm_64.S
+++ /dev/null
@@ -1,449 +0,0 @@
1/*
2 * x86_64/AVX2 assembler optimized version of Blowfish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/linkage.h>
14
15.file "blowfish-avx2-asm_64.S"
16
17.data
18.align 32
19
20.Lprefetch_mask:
21.long 0*64
22.long 1*64
23.long 2*64
24.long 3*64
25.long 4*64
26.long 5*64
27.long 6*64
28.long 7*64
29
30.Lbswap32_mask:
31.long 0x00010203
32.long 0x04050607
33.long 0x08090a0b
34.long 0x0c0d0e0f
35
36.Lbswap128_mask:
37 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
38.Lbswap_iv_mask:
39 .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
40
41.text
42/* structure of crypto context */
43#define p 0
44#define s0 ((16 + 2) * 4)
45#define s1 ((16 + 2 + (1 * 256)) * 4)
46#define s2 ((16 + 2 + (2 * 256)) * 4)
47#define s3 ((16 + 2 + (3 * 256)) * 4)
48
49/* register macros */
50#define CTX %rdi
51#define RIO %rdx
52
53#define RS0 %rax
54#define RS1 %r8
55#define RS2 %r9
56#define RS3 %r10
57
58#define RLOOP %r11
59#define RLOOPd %r11d
60
61#define RXr0 %ymm8
62#define RXr1 %ymm9
63#define RXr2 %ymm10
64#define RXr3 %ymm11
65#define RXl0 %ymm12
66#define RXl1 %ymm13
67#define RXl2 %ymm14
68#define RXl3 %ymm15
69
70/* temp regs */
71#define RT0 %ymm0
72#define RT0x %xmm0
73#define RT1 %ymm1
74#define RT1x %xmm1
75#define RIDX0 %ymm2
76#define RIDX1 %ymm3
77#define RIDX1x %xmm3
78#define RIDX2 %ymm4
79#define RIDX3 %ymm5
80
81/* vpgatherdd mask and '-1' */
82#define RNOT %ymm6
83
84/* byte mask, (-1 >> 24) */
85#define RBYTE %ymm7
86
87/***********************************************************************
88 * 32-way AVX2 blowfish
89 ***********************************************************************/
90#define F(xl, xr) \
91 vpsrld $24, xl, RIDX0; \
92 vpsrld $16, xl, RIDX1; \
93 vpsrld $8, xl, RIDX2; \
94 vpand RBYTE, RIDX1, RIDX1; \
95 vpand RBYTE, RIDX2, RIDX2; \
96 vpand RBYTE, xl, RIDX3; \
97 \
98 vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \
99 vpcmpeqd RNOT, RNOT, RNOT; \
100 vpcmpeqd RIDX0, RIDX0, RIDX0; \
101 \
102 vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \
103 vpcmpeqd RIDX1, RIDX1, RIDX1; \
104 vpaddd RT0, RT1, RT0; \
105 \
106 vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \
107 vpxor RT0, RT1, RT0; \
108 \
109 vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \
110 vpcmpeqd RNOT, RNOT, RNOT; \
111 vpaddd RT0, RT1, RT0; \
112 \
113 vpxor RT0, xr, xr;
114
115#define add_roundkey(xl, nmem) \
116 vpbroadcastd nmem, RT0; \
117 vpxor RT0, xl ## 0, xl ## 0; \
118 vpxor RT0, xl ## 1, xl ## 1; \
119 vpxor RT0, xl ## 2, xl ## 2; \
120 vpxor RT0, xl ## 3, xl ## 3;
121
122#define round_enc() \
123 add_roundkey(RXr, p(CTX,RLOOP,4)); \
124 F(RXl0, RXr0); \
125 F(RXl1, RXr1); \
126 F(RXl2, RXr2); \
127 F(RXl3, RXr3); \
128 \
129 add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
130 F(RXr0, RXl0); \
131 F(RXr1, RXl1); \
132 F(RXr2, RXl2); \
133 F(RXr3, RXl3);
134
135#define round_dec() \
136 add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \
137 F(RXl0, RXr0); \
138 F(RXl1, RXr1); \
139 F(RXl2, RXr2); \
140 F(RXl3, RXr3); \
141 \
142 add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
143 F(RXr0, RXl0); \
144 F(RXr1, RXl1); \
145 F(RXr2, RXl2); \
146 F(RXr3, RXl3);
147
148#define init_round_constants() \
149 vpcmpeqd RNOT, RNOT, RNOT; \
150 leaq s0(CTX), RS0; \
151 leaq s1(CTX), RS1; \
152 leaq s2(CTX), RS2; \
153 leaq s3(CTX), RS3; \
154 vpsrld $24, RNOT, RBYTE;
155
156#define transpose_2x2(x0, x1, t0) \
157 vpunpckldq x0, x1, t0; \
158 vpunpckhdq x0, x1, x1; \
159 \
160 vpunpcklqdq t0, x1, x0; \
161 vpunpckhqdq t0, x1, x1;
162
163#define read_block(xl, xr) \
164 vbroadcasti128 .Lbswap32_mask, RT1; \
165 \
166 vpshufb RT1, xl ## 0, xl ## 0; \
167 vpshufb RT1, xr ## 0, xr ## 0; \
168 vpshufb RT1, xl ## 1, xl ## 1; \
169 vpshufb RT1, xr ## 1, xr ## 1; \
170 vpshufb RT1, xl ## 2, xl ## 2; \
171 vpshufb RT1, xr ## 2, xr ## 2; \
172 vpshufb RT1, xl ## 3, xl ## 3; \
173 vpshufb RT1, xr ## 3, xr ## 3; \
174 \
175 transpose_2x2(xl ## 0, xr ## 0, RT0); \
176 transpose_2x2(xl ## 1, xr ## 1, RT0); \
177 transpose_2x2(xl ## 2, xr ## 2, RT0); \
178 transpose_2x2(xl ## 3, xr ## 3, RT0);
179
180#define write_block(xl, xr) \
181 vbroadcasti128 .Lbswap32_mask, RT1; \
182 \
183 transpose_2x2(xl ## 0, xr ## 0, RT0); \
184 transpose_2x2(xl ## 1, xr ## 1, RT0); \
185 transpose_2x2(xl ## 2, xr ## 2, RT0); \
186 transpose_2x2(xl ## 3, xr ## 3, RT0); \
187 \
188 vpshufb RT1, xl ## 0, xl ## 0; \
189 vpshufb RT1, xr ## 0, xr ## 0; \
190 vpshufb RT1, xl ## 1, xl ## 1; \
191 vpshufb RT1, xr ## 1, xr ## 1; \
192 vpshufb RT1, xl ## 2, xl ## 2; \
193 vpshufb RT1, xr ## 2, xr ## 2; \
194 vpshufb RT1, xl ## 3, xl ## 3; \
195 vpshufb RT1, xr ## 3, xr ## 3;
196
197.align 8
198__blowfish_enc_blk32:
199 /* input:
200 * %rdi: ctx, CTX
201 * RXl0..4, RXr0..4: plaintext
202 * output:
203 * RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped)
204 */
205 init_round_constants();
206
207 read_block(RXl, RXr);
208
209 movl $1, RLOOPd;
210 add_roundkey(RXl, p+4*(0)(CTX));
211
212.align 4
213.L__enc_loop:
214 round_enc();
215
216 leal 2(RLOOPd), RLOOPd;
217 cmpl $17, RLOOPd;
218 jne .L__enc_loop;
219
220 add_roundkey(RXr, p+4*(17)(CTX));
221
222 write_block(RXl, RXr);
223
224 ret;
225ENDPROC(__blowfish_enc_blk32)
226
227.align 8
228__blowfish_dec_blk32:
229 /* input:
230 * %rdi: ctx, CTX
231 * RXl0..4, RXr0..4: ciphertext
232 * output:
233 * RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped)
234 */
235 init_round_constants();
236
237 read_block(RXl, RXr);
238
239 movl $14, RLOOPd;
240 add_roundkey(RXl, p+4*(17)(CTX));
241
242.align 4
243.L__dec_loop:
244 round_dec();
245
246 addl $-2, RLOOPd;
247 jns .L__dec_loop;
248
249 add_roundkey(RXr, p+4*(0)(CTX));
250
251 write_block(RXl, RXr);
252
253 ret;
254ENDPROC(__blowfish_dec_blk32)
255
256ENTRY(blowfish_ecb_enc_32way)
257 /* input:
258 * %rdi: ctx, CTX
259 * %rsi: dst
260 * %rdx: src
261 */
262
263 vzeroupper;
264
265 vmovdqu 0*32(%rdx), RXl0;
266 vmovdqu 1*32(%rdx), RXr0;
267 vmovdqu 2*32(%rdx), RXl1;
268 vmovdqu 3*32(%rdx), RXr1;
269 vmovdqu 4*32(%rdx), RXl2;
270 vmovdqu 5*32(%rdx), RXr2;
271 vmovdqu 6*32(%rdx), RXl3;
272 vmovdqu 7*32(%rdx), RXr3;
273
274 call __blowfish_enc_blk32;
275
276 vmovdqu RXr0, 0*32(%rsi);
277 vmovdqu RXl0, 1*32(%rsi);
278 vmovdqu RXr1, 2*32(%rsi);
279 vmovdqu RXl1, 3*32(%rsi);
280 vmovdqu RXr2, 4*32(%rsi);
281 vmovdqu RXl2, 5*32(%rsi);
282 vmovdqu RXr3, 6*32(%rsi);
283 vmovdqu RXl3, 7*32(%rsi);
284
285 vzeroupper;
286
287 ret;
288ENDPROC(blowfish_ecb_enc_32way)
289
290ENTRY(blowfish_ecb_dec_32way)
291 /* input:
292 * %rdi: ctx, CTX
293 * %rsi: dst
294 * %rdx: src
295 */
296
297 vzeroupper;
298
299 vmovdqu 0*32(%rdx), RXl0;
300 vmovdqu 1*32(%rdx), RXr0;
301 vmovdqu 2*32(%rdx), RXl1;
302 vmovdqu 3*32(%rdx), RXr1;
303 vmovdqu 4*32(%rdx), RXl2;
304 vmovdqu 5*32(%rdx), RXr2;
305 vmovdqu 6*32(%rdx), RXl3;
306 vmovdqu 7*32(%rdx), RXr3;
307
308 call __blowfish_dec_blk32;
309
310 vmovdqu RXr0, 0*32(%rsi);
311 vmovdqu RXl0, 1*32(%rsi);
312 vmovdqu RXr1, 2*32(%rsi);
313 vmovdqu RXl1, 3*32(%rsi);
314 vmovdqu RXr2, 4*32(%rsi);
315 vmovdqu RXl2, 5*32(%rsi);
316 vmovdqu RXr3, 6*32(%rsi);
317 vmovdqu RXl3, 7*32(%rsi);
318
319 vzeroupper;
320
321 ret;
322ENDPROC(blowfish_ecb_dec_32way)
323
324ENTRY(blowfish_cbc_dec_32way)
325 /* input:
326 * %rdi: ctx, CTX
327 * %rsi: dst
328 * %rdx: src
329 */
330
331 vzeroupper;
332
333 vmovdqu 0*32(%rdx), RXl0;
334 vmovdqu 1*32(%rdx), RXr0;
335 vmovdqu 2*32(%rdx), RXl1;
336 vmovdqu 3*32(%rdx), RXr1;
337 vmovdqu 4*32(%rdx), RXl2;
338 vmovdqu 5*32(%rdx), RXr2;
339 vmovdqu 6*32(%rdx), RXl3;
340 vmovdqu 7*32(%rdx), RXr3;
341
342 call __blowfish_dec_blk32;
343
344 /* xor with src */
345 vmovq (%rdx), RT0x;
346 vpshufd $0x4f, RT0x, RT0x;
347 vinserti128 $1, 8(%rdx), RT0, RT0;
348 vpxor RT0, RXr0, RXr0;
349 vpxor 0*32+24(%rdx), RXl0, RXl0;
350 vpxor 1*32+24(%rdx), RXr1, RXr1;
351 vpxor 2*32+24(%rdx), RXl1, RXl1;
352 vpxor 3*32+24(%rdx), RXr2, RXr2;
353 vpxor 4*32+24(%rdx), RXl2, RXl2;
354 vpxor 5*32+24(%rdx), RXr3, RXr3;
355 vpxor 6*32+24(%rdx), RXl3, RXl3;
356
357 vmovdqu RXr0, (0*32)(%rsi);
358 vmovdqu RXl0, (1*32)(%rsi);
359 vmovdqu RXr1, (2*32)(%rsi);
360 vmovdqu RXl1, (3*32)(%rsi);
361 vmovdqu RXr2, (4*32)(%rsi);
362 vmovdqu RXl2, (5*32)(%rsi);
363 vmovdqu RXr3, (6*32)(%rsi);
364 vmovdqu RXl3, (7*32)(%rsi);
365
366 vzeroupper;
367
368 ret;
369ENDPROC(blowfish_cbc_dec_32way)
370
371ENTRY(blowfish_ctr_32way)
372 /* input:
373 * %rdi: ctx, CTX
374 * %rsi: dst
375 * %rdx: src
376 * %rcx: iv (big endian, 64bit)
377 */
378
379 vzeroupper;
380
381 vpcmpeqd RT0, RT0, RT0;
382 vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */
383
384 vpcmpeqd RT1x, RT1x, RT1x;
385 vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */
386 vpxor RIDX0, RIDX0, RIDX0;
387 vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */
388
389 vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */
390
391 vpcmpeqd RT1, RT1, RT1;
392 vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */
393 vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */
394
395 vbroadcasti128 .Lbswap_iv_mask, RIDX0;
396 vbroadcasti128 .Lbswap128_mask, RIDX1;
397
398 /* load IV and byteswap */
399 vmovq (%rcx), RT1x;
400 vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */
401 vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */
402
403 /* construct IVs */
404 vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */
405 vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */
406 vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */
407 vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */
408 vpsubq RIDX2, RT1, RT1;
409 vpshufb RIDX1, RT1, RXl1;
410 vpsubq RIDX2, RT1, RT1;
411 vpshufb RIDX1, RT1, RXr1;
412 vpsubq RIDX2, RT1, RT1;
413 vpshufb RIDX1, RT1, RXl2;
414 vpsubq RIDX2, RT1, RT1;
415 vpshufb RIDX1, RT1, RXr2;
416 vpsubq RIDX2, RT1, RT1;
417 vpshufb RIDX1, RT1, RXl3;
418 vpsubq RIDX2, RT1, RT1;
419 vpshufb RIDX1, RT1, RXr3;
420
421 /* store last IV */
422 vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */
423 vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */
424 vmovq RT1x, (%rcx);
425
426 call __blowfish_enc_blk32;
427
428 /* dst = src ^ iv */
429 vpxor 0*32(%rdx), RXr0, RXr0;
430 vpxor 1*32(%rdx), RXl0, RXl0;
431 vpxor 2*32(%rdx), RXr1, RXr1;
432 vpxor 3*32(%rdx), RXl1, RXl1;
433 vpxor 4*32(%rdx), RXr2, RXr2;
434 vpxor 5*32(%rdx), RXl2, RXl2;
435 vpxor 6*32(%rdx), RXr3, RXr3;
436 vpxor 7*32(%rdx), RXl3, RXl3;
437 vmovdqu RXr0, (0*32)(%rsi);
438 vmovdqu RXl0, (1*32)(%rsi);
439 vmovdqu RXr1, (2*32)(%rsi);
440 vmovdqu RXl1, (3*32)(%rsi);
441 vmovdqu RXr2, (4*32)(%rsi);
442 vmovdqu RXl2, (5*32)(%rsi);
443 vmovdqu RXr3, (6*32)(%rsi);
444 vmovdqu RXl3, (7*32)(%rsi);
445
446 vzeroupper;
447
448 ret;
449ENDPROC(blowfish_ctr_32way)
diff --git a/arch/x86/crypto/blowfish_avx2_glue.c b/arch/x86/crypto/blowfish_avx2_glue.c
deleted file mode 100644
index 4417e9aea78d..000000000000
--- a/arch/x86/crypto/blowfish_avx2_glue.c
+++ /dev/null
@@ -1,585 +0,0 @@
1/*
2 * Glue Code for x86_64/AVX2 assembler optimized version of Blowfish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8 * CTR part based on code (crypto/ctr.c) by:
9 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/types.h>
25#include <linux/crypto.h>
26#include <linux/err.h>
27#include <crypto/algapi.h>
28#include <crypto/blowfish.h>
29#include <crypto/cryptd.h>
30#include <crypto/ctr.h>
31#include <asm/i387.h>
32#include <asm/xcr.h>
33#include <asm/xsave.h>
34#include <asm/crypto/blowfish.h>
35#include <asm/crypto/ablk_helper.h>
36#include <crypto/scatterwalk.h>
37
38#define BF_AVX2_PARALLEL_BLOCKS 32
39
40/* 32-way AVX2 parallel cipher functions */
41asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst,
42 const u8 *src);
43asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst,
44 const u8 *src);
45asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst,
46 const u8 *src);
47asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src,
48 __be64 *iv);
49
50static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes)
51{
52 if (fpu_enabled)
53 return true;
54
55 /* FPU is only used when chunk to be processed is large enough, so
56 * do not enable FPU until it is necessary.
57 */
58 if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS)
59 return false;
60
61 kernel_fpu_begin();
62 return true;
63}
64
65static inline void bf_fpu_end(bool fpu_enabled)
66{
67 if (fpu_enabled)
68 kernel_fpu_end();
69}
70
71static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
72 bool enc)
73{
74 bool fpu_enabled = false;
75 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
76 const unsigned int bsize = BF_BLOCK_SIZE;
77 unsigned int nbytes;
78 int err;
79
80 err = blkcipher_walk_virt(desc, walk);
81 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
82
83 while ((nbytes = walk->nbytes)) {
84 u8 *wsrc = walk->src.virt.addr;
85 u8 *wdst = walk->dst.virt.addr;
86
87 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
88
89 /* Process multi-block AVX2 batch */
90 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
91 do {
92 if (enc)
93 blowfish_ecb_enc_32way(ctx, wdst, wsrc);
94 else
95 blowfish_ecb_dec_32way(ctx, wdst, wsrc);
96
97 wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS;
98 wdst += bsize * BF_AVX2_PARALLEL_BLOCKS;
99 nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
100 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
101
102 if (nbytes < bsize)
103 goto done;
104 }
105
106 /* Process multi-block batch */
107 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
108 do {
109 if (enc)
110 blowfish_enc_blk_4way(ctx, wdst, wsrc);
111 else
112 blowfish_dec_blk_4way(ctx, wdst, wsrc);
113
114 wsrc += bsize * BF_PARALLEL_BLOCKS;
115 wdst += bsize * BF_PARALLEL_BLOCKS;
116 nbytes -= bsize * BF_PARALLEL_BLOCKS;
117 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
118
119 if (nbytes < bsize)
120 goto done;
121 }
122
123 /* Handle leftovers */
124 do {
125 if (enc)
126 blowfish_enc_blk(ctx, wdst, wsrc);
127 else
128 blowfish_dec_blk(ctx, wdst, wsrc);
129
130 wsrc += bsize;
131 wdst += bsize;
132 nbytes -= bsize;
133 } while (nbytes >= bsize);
134
135done:
136 err = blkcipher_walk_done(desc, walk, nbytes);
137 }
138
139 bf_fpu_end(fpu_enabled);
140 return err;
141}
142
143static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
144 struct scatterlist *src, unsigned int nbytes)
145{
146 struct blkcipher_walk walk;
147
148 blkcipher_walk_init(&walk, dst, src, nbytes);
149 return ecb_crypt(desc, &walk, true);
150}
151
152static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
153 struct scatterlist *src, unsigned int nbytes)
154{
155 struct blkcipher_walk walk;
156
157 blkcipher_walk_init(&walk, dst, src, nbytes);
158 return ecb_crypt(desc, &walk, false);
159}
160
161static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
162 struct blkcipher_walk *walk)
163{
164 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
165 unsigned int bsize = BF_BLOCK_SIZE;
166 unsigned int nbytes = walk->nbytes;
167 u64 *src = (u64 *)walk->src.virt.addr;
168 u64 *dst = (u64 *)walk->dst.virt.addr;
169 u64 *iv = (u64 *)walk->iv;
170
171 do {
172 *dst = *src ^ *iv;
173 blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
174 iv = dst;
175
176 src += 1;
177 dst += 1;
178 nbytes -= bsize;
179 } while (nbytes >= bsize);
180
181 *(u64 *)walk->iv = *iv;
182 return nbytes;
183}
184
185static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
186 struct scatterlist *src, unsigned int nbytes)
187{
188 struct blkcipher_walk walk;
189 int err;
190
191 blkcipher_walk_init(&walk, dst, src, nbytes);
192 err = blkcipher_walk_virt(desc, &walk);
193
194 while ((nbytes = walk.nbytes)) {
195 nbytes = __cbc_encrypt(desc, &walk);
196 err = blkcipher_walk_done(desc, &walk, nbytes);
197 }
198
199 return err;
200}
201
202static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
203 struct blkcipher_walk *walk)
204{
205 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
206 const unsigned int bsize = BF_BLOCK_SIZE;
207 unsigned int nbytes = walk->nbytes;
208 u64 *src = (u64 *)walk->src.virt.addr;
209 u64 *dst = (u64 *)walk->dst.virt.addr;
210 u64 last_iv;
211 int i;
212
213 /* Start of the last block. */
214 src += nbytes / bsize - 1;
215 dst += nbytes / bsize - 1;
216
217 last_iv = *src;
218
219 /* Process multi-block AVX2 batch */
220 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
221 do {
222 nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1);
223 src -= BF_AVX2_PARALLEL_BLOCKS - 1;
224 dst -= BF_AVX2_PARALLEL_BLOCKS - 1;
225
226 blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src);
227
228 nbytes -= bsize;
229 if (nbytes < bsize)
230 goto done;
231
232 *dst ^= *(src - 1);
233 src -= 1;
234 dst -= 1;
235 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
236
237 if (nbytes < bsize)
238 goto done;
239 }
240
241 /* Process multi-block batch */
242 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
243 u64 ivs[BF_PARALLEL_BLOCKS - 1];
244
245 do {
246 nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1);
247 src -= BF_PARALLEL_BLOCKS - 1;
248 dst -= BF_PARALLEL_BLOCKS - 1;
249
250 for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
251 ivs[i] = src[i];
252
253 blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
254
255 for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
256 dst[i + 1] ^= ivs[i];
257
258 nbytes -= bsize;
259 if (nbytes < bsize)
260 goto done;
261
262 *dst ^= *(src - 1);
263 src -= 1;
264 dst -= 1;
265 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
266
267 if (nbytes < bsize)
268 goto done;
269 }
270
271 /* Handle leftovers */
272 for (;;) {
273 blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
274
275 nbytes -= bsize;
276 if (nbytes < bsize)
277 break;
278
279 *dst ^= *(src - 1);
280 src -= 1;
281 dst -= 1;
282 }
283
284done:
285 *dst ^= *(u64 *)walk->iv;
286 *(u64 *)walk->iv = last_iv;
287
288 return nbytes;
289}
290
291static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
292 struct scatterlist *src, unsigned int nbytes)
293{
294 bool fpu_enabled = false;
295 struct blkcipher_walk walk;
296 int err;
297
298 blkcipher_walk_init(&walk, dst, src, nbytes);
299 err = blkcipher_walk_virt(desc, &walk);
300 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
301
302 while ((nbytes = walk.nbytes)) {
303 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
304 nbytes = __cbc_decrypt(desc, &walk);
305 err = blkcipher_walk_done(desc, &walk, nbytes);
306 }
307
308 bf_fpu_end(fpu_enabled);
309 return err;
310}
311
312static void ctr_crypt_final(struct blkcipher_desc *desc,
313 struct blkcipher_walk *walk)
314{
315 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
316 u8 *ctrblk = walk->iv;
317 u8 keystream[BF_BLOCK_SIZE];
318 u8 *src = walk->src.virt.addr;
319 u8 *dst = walk->dst.virt.addr;
320 unsigned int nbytes = walk->nbytes;
321
322 blowfish_enc_blk(ctx, keystream, ctrblk);
323 crypto_xor(keystream, src, nbytes);
324 memcpy(dst, keystream, nbytes);
325
326 crypto_inc(ctrblk, BF_BLOCK_SIZE);
327}
328
329static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
330 struct blkcipher_walk *walk)
331{
332 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
333 unsigned int bsize = BF_BLOCK_SIZE;
334 unsigned int nbytes = walk->nbytes;
335 u64 *src = (u64 *)walk->src.virt.addr;
336 u64 *dst = (u64 *)walk->dst.virt.addr;
337 int i;
338
339 /* Process multi-block AVX2 batch */
340 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
341 do {
342 blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src,
343 (__be64 *)walk->iv);
344
345 src += BF_AVX2_PARALLEL_BLOCKS;
346 dst += BF_AVX2_PARALLEL_BLOCKS;
347 nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
348 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
349
350 if (nbytes < bsize)
351 goto done;
352 }
353
354 /* Process four block batch */
355 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
356 __be64 ctrblocks[BF_PARALLEL_BLOCKS];
357 u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
358
359 do {
360 /* create ctrblks for parallel encrypt */
361 for (i = 0; i < BF_PARALLEL_BLOCKS; i++) {
362 if (dst != src)
363 dst[i] = src[i];
364
365 ctrblocks[i] = cpu_to_be64(ctrblk++);
366 }
367
368 blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
369 (u8 *)ctrblocks);
370
371 src += BF_PARALLEL_BLOCKS;
372 dst += BF_PARALLEL_BLOCKS;
373 nbytes -= bsize * BF_PARALLEL_BLOCKS;
374 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
375
376 *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
377
378 if (nbytes < bsize)
379 goto done;
380 }
381
382 /* Handle leftovers */
383 do {
384 u64 ctrblk;
385
386 if (dst != src)
387 *dst = *src;
388
389 ctrblk = *(u64 *)walk->iv;
390 be64_add_cpu((__be64 *)walk->iv, 1);
391
392 blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
393
394 src += 1;
395 dst += 1;
396 } while ((nbytes -= bsize) >= bsize);
397
398done:
399 return nbytes;
400}
401
402static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
403 struct scatterlist *src, unsigned int nbytes)
404{
405 bool fpu_enabled = false;
406 struct blkcipher_walk walk;
407 int err;
408
409 blkcipher_walk_init(&walk, dst, src, nbytes);
410 err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
411 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
412
413 while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
414 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
415 nbytes = __ctr_crypt(desc, &walk);
416 err = blkcipher_walk_done(desc, &walk, nbytes);
417 }
418
419 bf_fpu_end(fpu_enabled);
420
421 if (walk.nbytes) {
422 ctr_crypt_final(desc, &walk);
423 err = blkcipher_walk_done(desc, &walk, 0);
424 }
425
426 return err;
427}
428
429static struct crypto_alg bf_algs[6] = { {
430 .cra_name = "__ecb-blowfish-avx2",
431 .cra_driver_name = "__driver-ecb-blowfish-avx2",
432 .cra_priority = 0,
433 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
434 .cra_blocksize = BF_BLOCK_SIZE,
435 .cra_ctxsize = sizeof(struct bf_ctx),
436 .cra_alignmask = 0,
437 .cra_type = &crypto_blkcipher_type,
438 .cra_module = THIS_MODULE,
439 .cra_u = {
440 .blkcipher = {
441 .min_keysize = BF_MIN_KEY_SIZE,
442 .max_keysize = BF_MAX_KEY_SIZE,
443 .setkey = blowfish_setkey,
444 .encrypt = ecb_encrypt,
445 .decrypt = ecb_decrypt,
446 },
447 },
448}, {
449 .cra_name = "__cbc-blowfish-avx2",
450 .cra_driver_name = "__driver-cbc-blowfish-avx2",
451 .cra_priority = 0,
452 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
453 .cra_blocksize = BF_BLOCK_SIZE,
454 .cra_ctxsize = sizeof(struct bf_ctx),
455 .cra_alignmask = 0,
456 .cra_type = &crypto_blkcipher_type,
457 .cra_module = THIS_MODULE,
458 .cra_u = {
459 .blkcipher = {
460 .min_keysize = BF_MIN_KEY_SIZE,
461 .max_keysize = BF_MAX_KEY_SIZE,
462 .setkey = blowfish_setkey,
463 .encrypt = cbc_encrypt,
464 .decrypt = cbc_decrypt,
465 },
466 },
467}, {
468 .cra_name = "__ctr-blowfish-avx2",
469 .cra_driver_name = "__driver-ctr-blowfish-avx2",
470 .cra_priority = 0,
471 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
472 .cra_blocksize = 1,
473 .cra_ctxsize = sizeof(struct bf_ctx),
474 .cra_alignmask = 0,
475 .cra_type = &crypto_blkcipher_type,
476 .cra_module = THIS_MODULE,
477 .cra_u = {
478 .blkcipher = {
479 .min_keysize = BF_MIN_KEY_SIZE,
480 .max_keysize = BF_MAX_KEY_SIZE,
481 .ivsize = BF_BLOCK_SIZE,
482 .setkey = blowfish_setkey,
483 .encrypt = ctr_crypt,
484 .decrypt = ctr_crypt,
485 },
486 },
487}, {
488 .cra_name = "ecb(blowfish)",
489 .cra_driver_name = "ecb-blowfish-avx2",
490 .cra_priority = 400,
491 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
492 .cra_blocksize = BF_BLOCK_SIZE,
493 .cra_ctxsize = sizeof(struct async_helper_ctx),
494 .cra_alignmask = 0,
495 .cra_type = &crypto_ablkcipher_type,
496 .cra_module = THIS_MODULE,
497 .cra_init = ablk_init,
498 .cra_exit = ablk_exit,
499 .cra_u = {
500 .ablkcipher = {
501 .min_keysize = BF_MIN_KEY_SIZE,
502 .max_keysize = BF_MAX_KEY_SIZE,
503 .setkey = ablk_set_key,
504 .encrypt = ablk_encrypt,
505 .decrypt = ablk_decrypt,
506 },
507 },
508}, {
509 .cra_name = "cbc(blowfish)",
510 .cra_driver_name = "cbc-blowfish-avx2",
511 .cra_priority = 400,
512 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
513 .cra_blocksize = BF_BLOCK_SIZE,
514 .cra_ctxsize = sizeof(struct async_helper_ctx),
515 .cra_alignmask = 0,
516 .cra_type = &crypto_ablkcipher_type,
517 .cra_module = THIS_MODULE,
518 .cra_init = ablk_init,
519 .cra_exit = ablk_exit,
520 .cra_u = {
521 .ablkcipher = {
522 .min_keysize = BF_MIN_KEY_SIZE,
523 .max_keysize = BF_MAX_KEY_SIZE,
524 .ivsize = BF_BLOCK_SIZE,
525 .setkey = ablk_set_key,
526 .encrypt = __ablk_encrypt,
527 .decrypt = ablk_decrypt,
528 },
529 },
530}, {
531 .cra_name = "ctr(blowfish)",
532 .cra_driver_name = "ctr-blowfish-avx2",
533 .cra_priority = 400,
534 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
535 .cra_blocksize = 1,
536 .cra_ctxsize = sizeof(struct async_helper_ctx),
537 .cra_alignmask = 0,
538 .cra_type = &crypto_ablkcipher_type,
539 .cra_module = THIS_MODULE,
540 .cra_init = ablk_init,
541 .cra_exit = ablk_exit,
542 .cra_u = {
543 .ablkcipher = {
544 .min_keysize = BF_MIN_KEY_SIZE,
545 .max_keysize = BF_MAX_KEY_SIZE,
546 .ivsize = BF_BLOCK_SIZE,
547 .setkey = ablk_set_key,
548 .encrypt = ablk_encrypt,
549 .decrypt = ablk_encrypt,
550 .geniv = "chainiv",
551 },
552 },
553} };
554
555
556static int __init init(void)
557{
558 u64 xcr0;
559
560 if (!cpu_has_avx2 || !cpu_has_osxsave) {
561 pr_info("AVX2 instructions are not detected.\n");
562 return -ENODEV;
563 }
564
565 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
566 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
567 pr_info("AVX detected but unusable.\n");
568 return -ENODEV;
569 }
570
571 return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
572}
573
574static void __exit fini(void)
575{
576 crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
577}
578
579module_init(init);
580module_exit(fini);
581
582MODULE_LICENSE("GPL");
583MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized");
584MODULE_ALIAS("blowfish");
585MODULE_ALIAS("blowfish-asm");
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 3548d76dbaa9..50ec333b70e6 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Glue Code for assembler optimized version of Blowfish 2 * Glue Code for assembler optimized version of Blowfish
3 * 3 *
4 * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> 4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 * 5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: 6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> 7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
@@ -32,24 +32,40 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/types.h> 33#include <linux/types.h>
34#include <crypto/algapi.h> 34#include <crypto/algapi.h>
35#include <asm/crypto/blowfish.h>
36 35
37/* regular block cipher functions */ 36/* regular block cipher functions */
38asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, 37asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
39 bool xor); 38 bool xor);
40EXPORT_SYMBOL_GPL(__blowfish_enc_blk);
41
42asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); 39asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
43EXPORT_SYMBOL_GPL(blowfish_dec_blk);
44 40
45/* 4-way parallel cipher functions */ 41/* 4-way parallel cipher functions */
46asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, 42asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
47 const u8 *src, bool xor); 43 const u8 *src, bool xor);
48EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way);
49
50asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, 44asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
51 const u8 *src); 45 const u8 *src);
52EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way); 46
47static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
48{
49 __blowfish_enc_blk(ctx, dst, src, false);
50}
51
52static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
53 const u8 *src)
54{
55 __blowfish_enc_blk(ctx, dst, src, true);
56}
57
58static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
59 const u8 *src)
60{
61 __blowfish_enc_blk_4way(ctx, dst, src, false);
62}
63
64static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
65 const u8 *src)
66{
67 __blowfish_enc_blk_4way(ctx, dst, src, true);
68}
53 69
54static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 70static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
55{ 71{
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 91a1878fcc3e..0e0b8863a34b 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -51,16 +51,6 @@
51#define ymm14_x xmm14 51#define ymm14_x xmm14
52#define ymm15_x xmm15 52#define ymm15_x xmm15
53 53
54/*
55 * AES-NI instructions do not support ymmX registers, so we need splitting and
56 * merging.
57 */
58#define vaesenclast256(zero, yreg, tmp) \
59 vextracti128 $1, yreg, tmp##_x; \
60 vaesenclast zero##_x, yreg##_x, yreg##_x; \
61 vaesenclast zero##_x, tmp##_x, tmp##_x; \
62 vinserti128 $1, tmp##_x, yreg, yreg;
63
64/********************************************************************** 54/**********************************************************************
65 32-way camellia 55 32-way camellia
66 **********************************************************************/ 56 **********************************************************************/
@@ -79,46 +69,70 @@
79 * S-function with AES subbytes \ 69 * S-function with AES subbytes \
80 */ \ 70 */ \
81 vbroadcasti128 .Linv_shift_row, t4; \ 71 vbroadcasti128 .Linv_shift_row, t4; \
82 vpbroadcastb .L0f0f0f0f, t7; \ 72 vpbroadcastd .L0f0f0f0f, t7; \
83 vbroadcasti128 .Lpre_tf_lo_s1, t0; \ 73 vbroadcasti128 .Lpre_tf_lo_s1, t5; \
84 vbroadcasti128 .Lpre_tf_hi_s1, t1; \ 74 vbroadcasti128 .Lpre_tf_hi_s1, t6; \
75 vbroadcasti128 .Lpre_tf_lo_s4, t2; \
76 vbroadcasti128 .Lpre_tf_hi_s4, t3; \
85 \ 77 \
86 /* AES inverse shift rows */ \ 78 /* AES inverse shift rows */ \
87 vpshufb t4, x0, x0; \ 79 vpshufb t4, x0, x0; \
88 vpshufb t4, x7, x7; \ 80 vpshufb t4, x7, x7; \
89 vpshufb t4, x1, x1; \
90 vpshufb t4, x4, x4; \
91 vpshufb t4, x2, x2; \
92 vpshufb t4, x5, x5; \
93 vpshufb t4, x3, x3; \ 81 vpshufb t4, x3, x3; \
94 vpshufb t4, x6, x6; \ 82 vpshufb t4, x6, x6; \
83 vpshufb t4, x2, x2; \
84 vpshufb t4, x5, x5; \
85 vpshufb t4, x1, x1; \
86 vpshufb t4, x4, x4; \
95 \ 87 \
96 /* prefilter sboxes 1, 2 and 3 */ \ 88 /* prefilter sboxes 1, 2 and 3 */ \
97 vbroadcasti128 .Lpre_tf_lo_s4, t2; \
98 vbroadcasti128 .Lpre_tf_hi_s4, t3; \
99 filter_8bit(x0, t0, t1, t7, t6); \
100 filter_8bit(x7, t0, t1, t7, t6); \
101 filter_8bit(x1, t0, t1, t7, t6); \
102 filter_8bit(x4, t0, t1, t7, t6); \
103 filter_8bit(x2, t0, t1, t7, t6); \
104 filter_8bit(x5, t0, t1, t7, t6); \
105 \
106 /* prefilter sbox 4 */ \ 89 /* prefilter sbox 4 */ \
90 filter_8bit(x0, t5, t6, t7, t4); \
91 filter_8bit(x7, t5, t6, t7, t4); \
92 vextracti128 $1, x0, t0##_x; \
93 vextracti128 $1, x7, t1##_x; \
94 filter_8bit(x3, t2, t3, t7, t4); \
95 filter_8bit(x6, t2, t3, t7, t4); \
96 vextracti128 $1, x3, t3##_x; \
97 vextracti128 $1, x6, t2##_x; \
98 filter_8bit(x2, t5, t6, t7, t4); \
99 filter_8bit(x5, t5, t6, t7, t4); \
100 filter_8bit(x1, t5, t6, t7, t4); \
101 filter_8bit(x4, t5, t6, t7, t4); \
102 \
107 vpxor t4##_x, t4##_x, t4##_x; \ 103 vpxor t4##_x, t4##_x, t4##_x; \
108 filter_8bit(x3, t2, t3, t7, t6); \
109 filter_8bit(x6, t2, t3, t7, t6); \
110 \ 104 \
111 /* AES subbytes + AES shift rows */ \ 105 /* AES subbytes + AES shift rows */ \
106 vextracti128 $1, x2, t6##_x; \
107 vextracti128 $1, x5, t5##_x; \
108 vaesenclast t4##_x, x0##_x, x0##_x; \
109 vaesenclast t4##_x, t0##_x, t0##_x; \
110 vinserti128 $1, t0##_x, x0, x0; \
111 vaesenclast t4##_x, x7##_x, x7##_x; \
112 vaesenclast t4##_x, t1##_x, t1##_x; \
113 vinserti128 $1, t1##_x, x7, x7; \
114 vaesenclast t4##_x, x3##_x, x3##_x; \
115 vaesenclast t4##_x, t3##_x, t3##_x; \
116 vinserti128 $1, t3##_x, x3, x3; \
117 vaesenclast t4##_x, x6##_x, x6##_x; \
118 vaesenclast t4##_x, t2##_x, t2##_x; \
119 vinserti128 $1, t2##_x, x6, x6; \
120 vextracti128 $1, x1, t3##_x; \
121 vextracti128 $1, x4, t2##_x; \
112 vbroadcasti128 .Lpost_tf_lo_s1, t0; \ 122 vbroadcasti128 .Lpost_tf_lo_s1, t0; \
113 vbroadcasti128 .Lpost_tf_hi_s1, t1; \ 123 vbroadcasti128 .Lpost_tf_hi_s1, t1; \
114 vaesenclast256(t4, x0, t5); \ 124 vaesenclast t4##_x, x2##_x, x2##_x; \
115 vaesenclast256(t4, x7, t5); \ 125 vaesenclast t4##_x, t6##_x, t6##_x; \
116 vaesenclast256(t4, x1, t5); \ 126 vinserti128 $1, t6##_x, x2, x2; \
117 vaesenclast256(t4, x4, t5); \ 127 vaesenclast t4##_x, x5##_x, x5##_x; \
118 vaesenclast256(t4, x2, t5); \ 128 vaesenclast t4##_x, t5##_x, t5##_x; \
119 vaesenclast256(t4, x5, t5); \ 129 vinserti128 $1, t5##_x, x5, x5; \
120 vaesenclast256(t4, x3, t5); \ 130 vaesenclast t4##_x, x1##_x, x1##_x; \
121 vaesenclast256(t4, x6, t5); \ 131 vaesenclast t4##_x, t3##_x, t3##_x; \
132 vinserti128 $1, t3##_x, x1, x1; \
133 vaesenclast t4##_x, x4##_x, x4##_x; \
134 vaesenclast t4##_x, t2##_x, t2##_x; \
135 vinserti128 $1, t2##_x, x4, x4; \
122 \ 136 \
123 /* postfilter sboxes 1 and 4 */ \ 137 /* postfilter sboxes 1 and 4 */ \
124 vbroadcasti128 .Lpost_tf_lo_s3, t2; \ 138 vbroadcasti128 .Lpost_tf_lo_s3, t2; \
@@ -139,22 +153,12 @@
139 /* postfilter sbox 2 */ \ 153 /* postfilter sbox 2 */ \
140 filter_8bit(x1, t4, t5, t7, t2); \ 154 filter_8bit(x1, t4, t5, t7, t2); \
141 filter_8bit(x4, t4, t5, t7, t2); \ 155 filter_8bit(x4, t4, t5, t7, t2); \
156 vpxor t7, t7, t7; \
142 \ 157 \
143 vpsrldq $1, t0, t1; \ 158 vpsrldq $1, t0, t1; \
144 vpsrldq $2, t0, t2; \ 159 vpsrldq $2, t0, t2; \
160 vpshufb t7, t1, t1; \
145 vpsrldq $3, t0, t3; \ 161 vpsrldq $3, t0, t3; \
146 vpsrldq $4, t0, t4; \
147 vpsrldq $5, t0, t5; \
148 vpsrldq $6, t0, t6; \
149 vpsrldq $7, t0, t7; \
150 vpbroadcastb t0##_x, t0; \
151 vpbroadcastb t1##_x, t1; \
152 vpbroadcastb t2##_x, t2; \
153 vpbroadcastb t3##_x, t3; \
154 vpbroadcastb t4##_x, t4; \
155 vpbroadcastb t6##_x, t6; \
156 vpbroadcastb t5##_x, t5; \
157 vpbroadcastb t7##_x, t7; \
158 \ 162 \
159 /* P-function */ \ 163 /* P-function */ \
160 vpxor x5, x0, x0; \ 164 vpxor x5, x0, x0; \
@@ -162,11 +166,21 @@
162 vpxor x7, x2, x2; \ 166 vpxor x7, x2, x2; \
163 vpxor x4, x3, x3; \ 167 vpxor x4, x3, x3; \
164 \ 168 \
169 vpshufb t7, t2, t2; \
170 vpsrldq $4, t0, t4; \
171 vpshufb t7, t3, t3; \
172 vpsrldq $5, t0, t5; \
173 vpshufb t7, t4, t4; \
174 \
165 vpxor x2, x4, x4; \ 175 vpxor x2, x4, x4; \
166 vpxor x3, x5, x5; \ 176 vpxor x3, x5, x5; \
167 vpxor x0, x6, x6; \ 177 vpxor x0, x6, x6; \
168 vpxor x1, x7, x7; \ 178 vpxor x1, x7, x7; \
169 \ 179 \
180 vpsrldq $6, t0, t6; \
181 vpshufb t7, t5, t5; \
182 vpshufb t7, t6, t6; \
183 \
170 vpxor x7, x0, x0; \ 184 vpxor x7, x0, x0; \
171 vpxor x4, x1, x1; \ 185 vpxor x4, x1, x1; \
172 vpxor x5, x2, x2; \ 186 vpxor x5, x2, x2; \
@@ -179,12 +193,16 @@
179 \ 193 \
180 /* Add key material and result to CD (x becomes new CD) */ \ 194 /* Add key material and result to CD (x becomes new CD) */ \
181 \ 195 \
182 vpxor t7, x0, x0; \
183 vpxor 4 * 32(mem_cd), x0, x0; \
184 \
185 vpxor t6, x1, x1; \ 196 vpxor t6, x1, x1; \
186 vpxor 5 * 32(mem_cd), x1, x1; \ 197 vpxor 5 * 32(mem_cd), x1, x1; \
187 \ 198 \
199 vpsrldq $7, t0, t6; \
200 vpshufb t7, t0, t0; \
201 vpshufb t7, t6, t7; \
202 \
203 vpxor t7, x0, x0; \
204 vpxor 4 * 32(mem_cd), x0, x0; \
205 \
188 vpxor t5, x2, x2; \ 206 vpxor t5, x2, x2; \
189 vpxor 6 * 32(mem_cd), x2, x2; \ 207 vpxor 6 * 32(mem_cd), x2, x2; \
190 \ 208 \
@@ -204,7 +222,7 @@
204 vpxor 3 * 32(mem_cd), x7, x7; 222 vpxor 3 * 32(mem_cd), x7, x7;
205 223
206/* 224/*
207 * Size optimization... with inlined roundsm16 binary would be over 5 times 225 * Size optimization... with inlined roundsm32 binary would be over 5 times
208 * larger and would only marginally faster. 226 * larger and would only marginally faster.
209 */ 227 */
210.align 8 228.align 8
@@ -324,13 +342,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
324 */ \ 342 */ \
325 vpbroadcastd kll, t0; /* only lowest 32-bit used */ \ 343 vpbroadcastd kll, t0; /* only lowest 32-bit used */ \
326 vpxor tt0, tt0, tt0; \ 344 vpxor tt0, tt0, tt0; \
327 vpbroadcastb t0##_x, t3; \ 345 vpshufb tt0, t0, t3; \
328 vpsrldq $1, t0, t0; \ 346 vpsrldq $1, t0, t0; \
329 vpbroadcastb t0##_x, t2; \ 347 vpshufb tt0, t0, t2; \
330 vpsrldq $1, t0, t0; \ 348 vpsrldq $1, t0, t0; \
331 vpbroadcastb t0##_x, t1; \ 349 vpshufb tt0, t0, t1; \
332 vpsrldq $1, t0, t0; \ 350 vpsrldq $1, t0, t0; \
333 vpbroadcastb t0##_x, t0; \ 351 vpshufb tt0, t0, t0; \
334 \ 352 \
335 vpand l0, t0, t0; \ 353 vpand l0, t0, t0; \
336 vpand l1, t1, t1; \ 354 vpand l1, t1, t1; \
@@ -340,6 +358,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
340 rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ 358 rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
341 \ 359 \
342 vpxor l4, t0, l4; \ 360 vpxor l4, t0, l4; \
361 vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
343 vmovdqu l4, 4 * 32(l); \ 362 vmovdqu l4, 4 * 32(l); \
344 vpxor l5, t1, l5; \ 363 vpxor l5, t1, l5; \
345 vmovdqu l5, 5 * 32(l); \ 364 vmovdqu l5, 5 * 32(l); \
@@ -354,14 +373,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
354 * rl ^= t2; \ 373 * rl ^= t2; \
355 */ \ 374 */ \
356 \ 375 \
357 vpbroadcastd krr, t0; /* only lowest 32-bit used */ \ 376 vpshufb tt0, t0, t3; \
358 vpbroadcastb t0##_x, t3; \
359 vpsrldq $1, t0, t0; \ 377 vpsrldq $1, t0, t0; \
360 vpbroadcastb t0##_x, t2; \ 378 vpshufb tt0, t0, t2; \
361 vpsrldq $1, t0, t0; \ 379 vpsrldq $1, t0, t0; \
362 vpbroadcastb t0##_x, t1; \ 380 vpshufb tt0, t0, t1; \
363 vpsrldq $1, t0, t0; \ 381 vpsrldq $1, t0, t0; \
364 vpbroadcastb t0##_x, t0; \ 382 vpshufb tt0, t0, t0; \
365 \ 383 \
366 vpor 4 * 32(r), t0, t0; \ 384 vpor 4 * 32(r), t0, t0; \
367 vpor 5 * 32(r), t1, t1; \ 385 vpor 5 * 32(r), t1, t1; \
@@ -373,6 +391,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
373 vpxor 2 * 32(r), t2, t2; \ 391 vpxor 2 * 32(r), t2, t2; \
374 vpxor 3 * 32(r), t3, t3; \ 392 vpxor 3 * 32(r), t3, t3; \
375 vmovdqu t0, 0 * 32(r); \ 393 vmovdqu t0, 0 * 32(r); \
394 vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
376 vmovdqu t1, 1 * 32(r); \ 395 vmovdqu t1, 1 * 32(r); \
377 vmovdqu t2, 2 * 32(r); \ 396 vmovdqu t2, 2 * 32(r); \
378 vmovdqu t3, 3 * 32(r); \ 397 vmovdqu t3, 3 * 32(r); \
@@ -382,14 +401,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
382 * t2 &= rl; \ 401 * t2 &= rl; \
383 * rr ^= rol32(t2, 1); \ 402 * rr ^= rol32(t2, 1); \
384 */ \ 403 */ \
385 vpbroadcastd krl, t0; /* only lowest 32-bit used */ \ 404 vpshufb tt0, t0, t3; \
386 vpbroadcastb t0##_x, t3; \
387 vpsrldq $1, t0, t0; \ 405 vpsrldq $1, t0, t0; \
388 vpbroadcastb t0##_x, t2; \ 406 vpshufb tt0, t0, t2; \
389 vpsrldq $1, t0, t0; \ 407 vpsrldq $1, t0, t0; \
390 vpbroadcastb t0##_x, t1; \ 408 vpshufb tt0, t0, t1; \
391 vpsrldq $1, t0, t0; \ 409 vpsrldq $1, t0, t0; \
392 vpbroadcastb t0##_x, t0; \ 410 vpshufb tt0, t0, t0; \
393 \ 411 \
394 vpand 0 * 32(r), t0, t0; \ 412 vpand 0 * 32(r), t0, t0; \
395 vpand 1 * 32(r), t1, t1; \ 413 vpand 1 * 32(r), t1, t1; \
@@ -403,6 +421,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
403 vpxor 6 * 32(r), t2, t2; \ 421 vpxor 6 * 32(r), t2, t2; \
404 vpxor 7 * 32(r), t3, t3; \ 422 vpxor 7 * 32(r), t3, t3; \
405 vmovdqu t0, 4 * 32(r); \ 423 vmovdqu t0, 4 * 32(r); \
424 vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
406 vmovdqu t1, 5 * 32(r); \ 425 vmovdqu t1, 5 * 32(r); \
407 vmovdqu t2, 6 * 32(r); \ 426 vmovdqu t2, 6 * 32(r); \
408 vmovdqu t3, 7 * 32(r); \ 427 vmovdqu t3, 7 * 32(r); \
@@ -413,14 +432,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
413 * ll ^= t0; \ 432 * ll ^= t0; \
414 */ \ 433 */ \
415 \ 434 \
416 vpbroadcastd klr, t0; /* only lowest 32-bit used */ \ 435 vpshufb tt0, t0, t3; \
417 vpbroadcastb t0##_x, t3; \
418 vpsrldq $1, t0, t0; \ 436 vpsrldq $1, t0, t0; \
419 vpbroadcastb t0##_x, t2; \ 437 vpshufb tt0, t0, t2; \
420 vpsrldq $1, t0, t0; \ 438 vpsrldq $1, t0, t0; \
421 vpbroadcastb t0##_x, t1; \ 439 vpshufb tt0, t0, t1; \
422 vpsrldq $1, t0, t0; \ 440 vpsrldq $1, t0, t0; \
423 vpbroadcastb t0##_x, t0; \ 441 vpshufb tt0, t0, t0; \
424 \ 442 \
425 vpor l4, t0, t0; \ 443 vpor l4, t0, t0; \
426 vpor l5, t1, t1; \ 444 vpor l5, t1, t1; \
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
new file mode 100644
index 000000000000..35e97569d05f
--- /dev/null
+++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S
@@ -0,0 +1,643 @@
1########################################################################
2# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
3#
4# Copyright (c) 2013, Intel Corporation
5#
6# Authors:
7# Erdinc Ozturk <erdinc.ozturk@intel.com>
8# Vinodh Gopal <vinodh.gopal@intel.com>
9# James Guilford <james.guilford@intel.com>
10# Tim Chen <tim.c.chen@linux.intel.com>
11#
12# This software is available to you under a choice of one of two
13# licenses. You may choose to be licensed under the terms of the GNU
14# General Public License (GPL) Version 2, available from the file
15# COPYING in the main directory of this source tree, or the
16# OpenIB.org BSD license below:
17#
18# Redistribution and use in source and binary forms, with or without
19# modification, are permitted provided that the following conditions are
20# met:
21#
22# * Redistributions of source code must retain the above copyright
23# notice, this list of conditions and the following disclaimer.
24#
25# * Redistributions in binary form must reproduce the above copyright
26# notice, this list of conditions and the following disclaimer in the
27# documentation and/or other materials provided with the
28# distribution.
29#
30# * Neither the name of the Intel Corporation nor the names of its
31# contributors may be used to endorse or promote products derived from
32# this software without specific prior written permission.
33#
34#
35# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
36# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
39# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
40# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
41# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
42# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
43# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
44# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
45# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46########################################################################
47# Function API:
48# UINT16 crc_t10dif_pcl(
49# UINT16 init_crc, //initial CRC value, 16 bits
50# const unsigned char *buf, //buffer pointer to calculate CRC on
51# UINT64 len //buffer length in bytes (64-bit data)
52# );
53#
54# Reference paper titled "Fast CRC Computation for Generic
55# Polynomials Using PCLMULQDQ Instruction"
56# URL: http://www.intel.com/content/dam/www/public/us/en/documents
57# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
58#
59#
60
61#include <linux/linkage.h>
62
63.text
64
65#define arg1 %rdi
66#define arg2 %rsi
67#define arg3 %rdx
68
69#define arg1_low32 %edi
70
71ENTRY(crc_t10dif_pcl)
72.align 16
73
74 # adjust the 16-bit initial_crc value, scale it to 32 bits
75 shl $16, arg1_low32
76
77 # Allocate Stack Space
78 mov %rsp, %rcx
79 sub $16*2, %rsp
80 # align stack to 16 byte boundary
81 and $~(0x10 - 1), %rsp
82
83 # check if smaller than 256
84 cmp $256, arg3
85
86 # for sizes less than 128, we can't fold 64B at a time...
87 jl _less_than_128
88
89
90 # load the initial crc value
91 movd arg1_low32, %xmm10 # initial crc
92
93 # crc value does not need to be byte-reflected, but it needs
94 # to be moved to the high part of the register.
95 # because data will be byte-reflected and will align with
96 # initial crc at correct place.
97 pslldq $12, %xmm10
98
99 movdqa SHUF_MASK(%rip), %xmm11
100 # receive the initial 64B data, xor the initial crc value
101 movdqu 16*0(arg2), %xmm0
102 movdqu 16*1(arg2), %xmm1
103 movdqu 16*2(arg2), %xmm2
104 movdqu 16*3(arg2), %xmm3
105 movdqu 16*4(arg2), %xmm4
106 movdqu 16*5(arg2), %xmm5
107 movdqu 16*6(arg2), %xmm6
108 movdqu 16*7(arg2), %xmm7
109
110 pshufb %xmm11, %xmm0
111 # XOR the initial_crc value
112 pxor %xmm10, %xmm0
113 pshufb %xmm11, %xmm1
114 pshufb %xmm11, %xmm2
115 pshufb %xmm11, %xmm3
116 pshufb %xmm11, %xmm4
117 pshufb %xmm11, %xmm5
118 pshufb %xmm11, %xmm6
119 pshufb %xmm11, %xmm7
120
121 movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4
122 #imm value of pclmulqdq instruction
123 #will determine which constant to use
124
125 #################################################################
126 # we subtract 256 instead of 128 to save one instruction from the loop
127 sub $256, arg3
128
129 # at this section of the code, there is 64*x+y (0<=y<64) bytes of
130 # buffer. The _fold_64_B_loop will fold 64B at a time
131 # until we have 64+y Bytes of buffer
132
133
134 # fold 64B at a time. This section of the code folds 4 xmm
135 # registers in parallel
136_fold_64_B_loop:
137
138 # update the buffer pointer
139 add $128, arg2 # buf += 64#
140
141 movdqu 16*0(arg2), %xmm9
142 movdqu 16*1(arg2), %xmm12
143 pshufb %xmm11, %xmm9
144 pshufb %xmm11, %xmm12
145 movdqa %xmm0, %xmm8
146 movdqa %xmm1, %xmm13
147 pclmulqdq $0x0 , %xmm10, %xmm0
148 pclmulqdq $0x11, %xmm10, %xmm8
149 pclmulqdq $0x0 , %xmm10, %xmm1
150 pclmulqdq $0x11, %xmm10, %xmm13
151 pxor %xmm9 , %xmm0
152 xorps %xmm8 , %xmm0
153 pxor %xmm12, %xmm1
154 xorps %xmm13, %xmm1
155
156 movdqu 16*2(arg2), %xmm9
157 movdqu 16*3(arg2), %xmm12
158 pshufb %xmm11, %xmm9
159 pshufb %xmm11, %xmm12
160 movdqa %xmm2, %xmm8
161 movdqa %xmm3, %xmm13
162 pclmulqdq $0x0, %xmm10, %xmm2
163 pclmulqdq $0x11, %xmm10, %xmm8
164 pclmulqdq $0x0, %xmm10, %xmm3
165 pclmulqdq $0x11, %xmm10, %xmm13
166 pxor %xmm9 , %xmm2
167 xorps %xmm8 , %xmm2
168 pxor %xmm12, %xmm3
169 xorps %xmm13, %xmm3
170
171 movdqu 16*4(arg2), %xmm9
172 movdqu 16*5(arg2), %xmm12
173 pshufb %xmm11, %xmm9
174 pshufb %xmm11, %xmm12
175 movdqa %xmm4, %xmm8
176 movdqa %xmm5, %xmm13
177 pclmulqdq $0x0, %xmm10, %xmm4
178 pclmulqdq $0x11, %xmm10, %xmm8
179 pclmulqdq $0x0, %xmm10, %xmm5
180 pclmulqdq $0x11, %xmm10, %xmm13
181 pxor %xmm9 , %xmm4
182 xorps %xmm8 , %xmm4
183 pxor %xmm12, %xmm5
184 xorps %xmm13, %xmm5
185
186 movdqu 16*6(arg2), %xmm9
187 movdqu 16*7(arg2), %xmm12
188 pshufb %xmm11, %xmm9
189 pshufb %xmm11, %xmm12
190 movdqa %xmm6 , %xmm8
191 movdqa %xmm7 , %xmm13
192 pclmulqdq $0x0 , %xmm10, %xmm6
193 pclmulqdq $0x11, %xmm10, %xmm8
194 pclmulqdq $0x0 , %xmm10, %xmm7
195 pclmulqdq $0x11, %xmm10, %xmm13
196 pxor %xmm9 , %xmm6
197 xorps %xmm8 , %xmm6
198 pxor %xmm12, %xmm7
199 xorps %xmm13, %xmm7
200
201 sub $128, arg3
202
203 # check if there is another 64B in the buffer to be able to fold
204 jge _fold_64_B_loop
205 ##################################################################
206
207
208 add $128, arg2
209 # at this point, the buffer pointer is pointing at the last y Bytes
210 # of the buffer the 64B of folded data is in 4 of the xmm
211 # registers: xmm0, xmm1, xmm2, xmm3
212
213
214 # fold the 8 xmm registers to 1 xmm register with different constants
215
216 movdqa rk9(%rip), %xmm10
217 movdqa %xmm0, %xmm8
218 pclmulqdq $0x11, %xmm10, %xmm0
219 pclmulqdq $0x0 , %xmm10, %xmm8
220 pxor %xmm8, %xmm7
221 xorps %xmm0, %xmm7
222
223 movdqa rk11(%rip), %xmm10
224 movdqa %xmm1, %xmm8
225 pclmulqdq $0x11, %xmm10, %xmm1
226 pclmulqdq $0x0 , %xmm10, %xmm8
227 pxor %xmm8, %xmm7
228 xorps %xmm1, %xmm7
229
230 movdqa rk13(%rip), %xmm10
231 movdqa %xmm2, %xmm8
232 pclmulqdq $0x11, %xmm10, %xmm2
233 pclmulqdq $0x0 , %xmm10, %xmm8
234 pxor %xmm8, %xmm7
235 pxor %xmm2, %xmm7
236
237 movdqa rk15(%rip), %xmm10
238 movdqa %xmm3, %xmm8
239 pclmulqdq $0x11, %xmm10, %xmm3
240 pclmulqdq $0x0 , %xmm10, %xmm8
241 pxor %xmm8, %xmm7
242 xorps %xmm3, %xmm7
243
244 movdqa rk17(%rip), %xmm10
245 movdqa %xmm4, %xmm8
246 pclmulqdq $0x11, %xmm10, %xmm4
247 pclmulqdq $0x0 , %xmm10, %xmm8
248 pxor %xmm8, %xmm7
249 pxor %xmm4, %xmm7
250
251 movdqa rk19(%rip), %xmm10
252 movdqa %xmm5, %xmm8
253 pclmulqdq $0x11, %xmm10, %xmm5
254 pclmulqdq $0x0 , %xmm10, %xmm8
255 pxor %xmm8, %xmm7
256 xorps %xmm5, %xmm7
257
258 movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2
259 #imm value of pclmulqdq instruction
260 #will determine which constant to use
261 movdqa %xmm6, %xmm8
262 pclmulqdq $0x11, %xmm10, %xmm6
263 pclmulqdq $0x0 , %xmm10, %xmm8
264 pxor %xmm8, %xmm7
265 pxor %xmm6, %xmm7
266
267
268 # instead of 64, we add 48 to the loop counter to save 1 instruction
269 # from the loop instead of a cmp instruction, we use the negative
270 # flag with the jl instruction
271 add $128-16, arg3
272 jl _final_reduction_for_128
273
274 # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
275 # and the rest is in memory. We can fold 16 bytes at a time if y>=16
276 # continue folding 16B at a time
277
278_16B_reduction_loop:
279 movdqa %xmm7, %xmm8
280 pclmulqdq $0x11, %xmm10, %xmm7
281 pclmulqdq $0x0 , %xmm10, %xmm8
282 pxor %xmm8, %xmm7
283 movdqu (arg2), %xmm0
284 pshufb %xmm11, %xmm0
285 pxor %xmm0 , %xmm7
286 add $16, arg2
287 sub $16, arg3
288 # instead of a cmp instruction, we utilize the flags with the
289 # jge instruction equivalent of: cmp arg3, 16-16
290 # check if there is any more 16B in the buffer to be able to fold
291 jge _16B_reduction_loop
292
293 #now we have 16+z bytes left to reduce, where 0<= z < 16.
294 #first, we reduce the data in the xmm7 register
295
296
297_final_reduction_for_128:
298 # check if any more data to fold. If not, compute the CRC of
299 # the final 128 bits
300 add $16, arg3
301 je _128_done
302
303 # here we are getting data that is less than 16 bytes.
304 # since we know that there was data before the pointer, we can
305 # offset the input pointer before the actual point, to receive
306 # exactly 16 bytes. after that the registers need to be adjusted.
307_get_last_two_xmms:
308 movdqa %xmm7, %xmm2
309
310 movdqu -16(arg2, arg3), %xmm1
311 pshufb %xmm11, %xmm1
312
313 # get rid of the extra data that was loaded before
314 # load the shift constant
315 lea pshufb_shf_table+16(%rip), %rax
316 sub arg3, %rax
317 movdqu (%rax), %xmm0
318
319 # shift xmm2 to the left by arg3 bytes
320 pshufb %xmm0, %xmm2
321
322 # shift xmm7 to the right by 16-arg3 bytes
323 pxor mask1(%rip), %xmm0
324 pshufb %xmm0, %xmm7
325 pblendvb %xmm2, %xmm1 #xmm0 is implicit
326
327 # fold 16 Bytes
328 movdqa %xmm1, %xmm2
329 movdqa %xmm7, %xmm8
330 pclmulqdq $0x11, %xmm10, %xmm7
331 pclmulqdq $0x0 , %xmm10, %xmm8
332 pxor %xmm8, %xmm7
333 pxor %xmm2, %xmm7
334
335_128_done:
336 # compute crc of a 128-bit value
337 movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10
338 movdqa %xmm7, %xmm0
339
340 #64b fold
341 pclmulqdq $0x1, %xmm10, %xmm7
342 pslldq $8 , %xmm0
343 pxor %xmm0, %xmm7
344
345 #32b fold
346 movdqa %xmm7, %xmm0
347
348 pand mask2(%rip), %xmm0
349
350 psrldq $12, %xmm7
351 pclmulqdq $0x10, %xmm10, %xmm7
352 pxor %xmm0, %xmm7
353
354 #barrett reduction
355_barrett:
356 movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10
357 movdqa %xmm7, %xmm0
358 pclmulqdq $0x01, %xmm10, %xmm7
359 pslldq $4, %xmm7
360 pclmulqdq $0x11, %xmm10, %xmm7
361
362 pslldq $4, %xmm7
363 pxor %xmm0, %xmm7
364 pextrd $1, %xmm7, %eax
365
366_cleanup:
367 # scale the result back to 16 bits
368 shr $16, %eax
369 mov %rcx, %rsp
370 ret
371
372########################################################################
373
374.align 16
375_less_than_128:
376
377 # check if there is enough buffer to be able to fold 16B at a time
378 cmp $32, arg3
379 jl _less_than_32
380 movdqa SHUF_MASK(%rip), %xmm11
381
382 # now if there is, load the constants
383 movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
384
385 movd arg1_low32, %xmm0 # get the initial crc value
386 pslldq $12, %xmm0 # align it to its correct place
387 movdqu (arg2), %xmm7 # load the plaintext
388 pshufb %xmm11, %xmm7 # byte-reflect the plaintext
389 pxor %xmm0, %xmm7
390
391
392 # update the buffer pointer
393 add $16, arg2
394
395 # update the counter. subtract 32 instead of 16 to save one
396 # instruction from the loop
397 sub $32, arg3
398
399 jmp _16B_reduction_loop
400
401
402.align 16
403_less_than_32:
404 # mov initial crc to the return value. this is necessary for
405 # zero-length buffers.
406 mov arg1_low32, %eax
407 test arg3, arg3
408 je _cleanup
409
410 movdqa SHUF_MASK(%rip), %xmm11
411
412 movd arg1_low32, %xmm0 # get the initial crc value
413 pslldq $12, %xmm0 # align it to its correct place
414
415 cmp $16, arg3
416 je _exact_16_left
417 jl _less_than_16_left
418
419 movdqu (arg2), %xmm7 # load the plaintext
420 pshufb %xmm11, %xmm7 # byte-reflect the plaintext
421 pxor %xmm0 , %xmm7 # xor the initial crc value
422 add $16, arg2
423 sub $16, arg3
424 movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
425 jmp _get_last_two_xmms
426
427
428.align 16
429_less_than_16_left:
430 # use stack space to load data less than 16 bytes, zero-out
431 # the 16B in memory first.
432
433 pxor %xmm1, %xmm1
434 mov %rsp, %r11
435 movdqa %xmm1, (%r11)
436
437 cmp $4, arg3
438 jl _only_less_than_4
439
440 # backup the counter value
441 mov arg3, %r9
442 cmp $8, arg3
443 jl _less_than_8_left
444
445 # load 8 Bytes
446 mov (arg2), %rax
447 mov %rax, (%r11)
448 add $8, %r11
449 sub $8, arg3
450 add $8, arg2
451_less_than_8_left:
452
453 cmp $4, arg3
454 jl _less_than_4_left
455
456 # load 4 Bytes
457 mov (arg2), %eax
458 mov %eax, (%r11)
459 add $4, %r11
460 sub $4, arg3
461 add $4, arg2
462_less_than_4_left:
463
464 cmp $2, arg3
465 jl _less_than_2_left
466
467 # load 2 Bytes
468 mov (arg2), %ax
469 mov %ax, (%r11)
470 add $2, %r11
471 sub $2, arg3
472 add $2, arg2
473_less_than_2_left:
474 cmp $1, arg3
475 jl _zero_left
476
477 # load 1 Byte
478 mov (arg2), %al
479 mov %al, (%r11)
480_zero_left:
481 movdqa (%rsp), %xmm7
482 pshufb %xmm11, %xmm7
483 pxor %xmm0 , %xmm7 # xor the initial crc value
484
485 # shl r9, 4
486 lea pshufb_shf_table+16(%rip), %rax
487 sub %r9, %rax
488 movdqu (%rax), %xmm0
489 pxor mask1(%rip), %xmm0
490
491 pshufb %xmm0, %xmm7
492 jmp _128_done
493
494.align 16
495_exact_16_left:
496 movdqu (arg2), %xmm7
497 pshufb %xmm11, %xmm7
498 pxor %xmm0 , %xmm7 # xor the initial crc value
499
500 jmp _128_done
501
502_only_less_than_4:
503 cmp $3, arg3
504 jl _only_less_than_3
505
506 # load 3 Bytes
507 mov (arg2), %al
508 mov %al, (%r11)
509
510 mov 1(arg2), %al
511 mov %al, 1(%r11)
512
513 mov 2(arg2), %al
514 mov %al, 2(%r11)
515
516 movdqa (%rsp), %xmm7
517 pshufb %xmm11, %xmm7
518 pxor %xmm0 , %xmm7 # xor the initial crc value
519
520 psrldq $5, %xmm7
521
522 jmp _barrett
523_only_less_than_3:
524 cmp $2, arg3
525 jl _only_less_than_2
526
527 # load 2 Bytes
528 mov (arg2), %al
529 mov %al, (%r11)
530
531 mov 1(arg2), %al
532 mov %al, 1(%r11)
533
534 movdqa (%rsp), %xmm7
535 pshufb %xmm11, %xmm7
536 pxor %xmm0 , %xmm7 # xor the initial crc value
537
538 psrldq $6, %xmm7
539
540 jmp _barrett
541_only_less_than_2:
542
543 # load 1 Byte
544 mov (arg2), %al
545 mov %al, (%r11)
546
547 movdqa (%rsp), %xmm7
548 pshufb %xmm11, %xmm7
549 pxor %xmm0 , %xmm7 # xor the initial crc value
550
551 psrldq $7, %xmm7
552
553 jmp _barrett
554
555ENDPROC(crc_t10dif_pcl)
556
557.data
558
559# precomputed constants
560# these constants are precomputed from the poly:
561# 0x8bb70000 (0x8bb7 scaled to 32 bits)
562.align 16
563# Q = 0x18BB70000
564# rk1 = 2^(32*3) mod Q << 32
565# rk2 = 2^(32*5) mod Q << 32
566# rk3 = 2^(32*15) mod Q << 32
567# rk4 = 2^(32*17) mod Q << 32
568# rk5 = 2^(32*3) mod Q << 32
569# rk6 = 2^(32*2) mod Q << 32
570# rk7 = floor(2^64/Q)
571# rk8 = Q
572rk1:
573.quad 0x2d56000000000000
574rk2:
575.quad 0x06df000000000000
576rk3:
577.quad 0x9d9d000000000000
578rk4:
579.quad 0x7cf5000000000000
580rk5:
581.quad 0x2d56000000000000
582rk6:
583.quad 0x1368000000000000
584rk7:
585.quad 0x00000001f65a57f8
586rk8:
587.quad 0x000000018bb70000
588
589rk9:
590.quad 0xceae000000000000
591rk10:
592.quad 0xbfd6000000000000
593rk11:
594.quad 0x1e16000000000000
595rk12:
596.quad 0x713c000000000000
597rk13:
598.quad 0xf7f9000000000000
599rk14:
600.quad 0x80a6000000000000
601rk15:
602.quad 0x044c000000000000
603rk16:
604.quad 0xe658000000000000
605rk17:
606.quad 0xad18000000000000
607rk18:
608.quad 0xa497000000000000
609rk19:
610.quad 0x6ee3000000000000
611rk20:
612.quad 0xe7b5000000000000
613
614
615
616mask1:
617.octa 0x80808080808080808080808080808080
618mask2:
619.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
620
621SHUF_MASK:
622.octa 0x000102030405060708090A0B0C0D0E0F
623
624pshufb_shf_table:
625# use these values for shift constants for the pshufb instruction
626# different alignments result in values as shown:
627# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
628# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
629# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
630# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
631# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
632# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
633# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7
634# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8
635# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9
636# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10
637# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11
638# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12
639# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13
640# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14
641# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15
642.octa 0x8f8e8d8c8b8a89888786858483828100
643.octa 0x000e0d0c0b0a09080706050403020100
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
new file mode 100644
index 000000000000..7845d7fd54c0
--- /dev/null
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -0,0 +1,151 @@
1/*
2 * Cryptographic API.
3 *
4 * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
5 *
6 * Copyright (C) 2013 Intel Corporation
7 * Author: Tim Chen <tim.c.chen@linux.intel.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation; either version 2 of the License, or (at your option)
12 * any later version.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 */
24
25#include <linux/types.h>
26#include <linux/module.h>
27#include <linux/crc-t10dif.h>
28#include <crypto/internal/hash.h>
29#include <linux/init.h>
30#include <linux/string.h>
31#include <linux/kernel.h>
32#include <asm/i387.h>
33#include <asm/cpufeature.h>
34#include <asm/cpu_device_id.h>
35
36asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
37 size_t len);
38
39struct chksum_desc_ctx {
40 __u16 crc;
41};
42
43/*
44 * Steps through buffer one byte at at time, calculates reflected
45 * crc using table.
46 */
47
48static int chksum_init(struct shash_desc *desc)
49{
50 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
51
52 ctx->crc = 0;
53
54 return 0;
55}
56
57static int chksum_update(struct shash_desc *desc, const u8 *data,
58 unsigned int length)
59{
60 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
61
62 if (irq_fpu_usable()) {
63 kernel_fpu_begin();
64 ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
65 kernel_fpu_end();
66 } else
67 ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
68 return 0;
69}
70
71static int chksum_final(struct shash_desc *desc, u8 *out)
72{
73 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
74
75 *(__u16 *)out = ctx->crc;
76 return 0;
77}
78
79static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
80 u8 *out)
81{
82 if (irq_fpu_usable()) {
83 kernel_fpu_begin();
84 *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
85 kernel_fpu_end();
86 } else
87 *(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
88 return 0;
89}
90
91static int chksum_finup(struct shash_desc *desc, const u8 *data,
92 unsigned int len, u8 *out)
93{
94 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
95
96 return __chksum_finup(&ctx->crc, data, len, out);
97}
98
99static int chksum_digest(struct shash_desc *desc, const u8 *data,
100 unsigned int length, u8 *out)
101{
102 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
103
104 return __chksum_finup(&ctx->crc, data, length, out);
105}
106
107static struct shash_alg alg = {
108 .digestsize = CRC_T10DIF_DIGEST_SIZE,
109 .init = chksum_init,
110 .update = chksum_update,
111 .final = chksum_final,
112 .finup = chksum_finup,
113 .digest = chksum_digest,
114 .descsize = sizeof(struct chksum_desc_ctx),
115 .base = {
116 .cra_name = "crct10dif",
117 .cra_driver_name = "crct10dif-pclmul",
118 .cra_priority = 200,
119 .cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
120 .cra_module = THIS_MODULE,
121 }
122};
123
124static const struct x86_cpu_id crct10dif_cpu_id[] = {
125 X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
126 {}
127};
128MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
129
130static int __init crct10dif_intel_mod_init(void)
131{
132 if (!x86_match_cpu(crct10dif_cpu_id))
133 return -ENODEV;
134
135 return crypto_register_shash(&alg);
136}
137
138static void __exit crct10dif_intel_mod_fini(void)
139{
140 crypto_unregister_shash(&alg);
141}
142
143module_init(crct10dif_intel_mod_init);
144module_exit(crct10dif_intel_mod_fini);
145
146MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
147MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
148MODULE_LICENSE("GPL");
149
150MODULE_ALIAS("crct10dif");
151MODULE_ALIAS("crct10dif-pclmul");
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 597d4da69656..50226c4b86ed 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -187,7 +187,36 @@ static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
187 return 0; 187 return 0;
188} 188}
189 189
190static struct shash_alg alg = { 190static int sha224_ssse3_init(struct shash_desc *desc)
191{
192 struct sha256_state *sctx = shash_desc_ctx(desc);
193
194 sctx->state[0] = SHA224_H0;
195 sctx->state[1] = SHA224_H1;
196 sctx->state[2] = SHA224_H2;
197 sctx->state[3] = SHA224_H3;
198 sctx->state[4] = SHA224_H4;
199 sctx->state[5] = SHA224_H5;
200 sctx->state[6] = SHA224_H6;
201 sctx->state[7] = SHA224_H7;
202 sctx->count = 0;
203
204 return 0;
205}
206
207static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash)
208{
209 u8 D[SHA256_DIGEST_SIZE];
210
211 sha256_ssse3_final(desc, D);
212
213 memcpy(hash, D, SHA224_DIGEST_SIZE);
214 memset(D, 0, SHA256_DIGEST_SIZE);
215
216 return 0;
217}
218
219static struct shash_alg algs[] = { {
191 .digestsize = SHA256_DIGEST_SIZE, 220 .digestsize = SHA256_DIGEST_SIZE,
192 .init = sha256_ssse3_init, 221 .init = sha256_ssse3_init,
193 .update = sha256_ssse3_update, 222 .update = sha256_ssse3_update,
@@ -204,7 +233,24 @@ static struct shash_alg alg = {
204 .cra_blocksize = SHA256_BLOCK_SIZE, 233 .cra_blocksize = SHA256_BLOCK_SIZE,
205 .cra_module = THIS_MODULE, 234 .cra_module = THIS_MODULE,
206 } 235 }
207}; 236}, {
237 .digestsize = SHA224_DIGEST_SIZE,
238 .init = sha224_ssse3_init,
239 .update = sha256_ssse3_update,
240 .final = sha224_ssse3_final,
241 .export = sha256_ssse3_export,
242 .import = sha256_ssse3_import,
243 .descsize = sizeof(struct sha256_state),
244 .statesize = sizeof(struct sha256_state),
245 .base = {
246 .cra_name = "sha224",
247 .cra_driver_name = "sha224-ssse3",
248 .cra_priority = 150,
249 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
250 .cra_blocksize = SHA224_BLOCK_SIZE,
251 .cra_module = THIS_MODULE,
252 }
253} };
208 254
209#ifdef CONFIG_AS_AVX 255#ifdef CONFIG_AS_AVX
210static bool __init avx_usable(void) 256static bool __init avx_usable(void)
@@ -227,7 +273,7 @@ static bool __init avx_usable(void)
227 273
228static int __init sha256_ssse3_mod_init(void) 274static int __init sha256_ssse3_mod_init(void)
229{ 275{
230 /* test for SSE3 first */ 276 /* test for SSSE3 first */
231 if (cpu_has_ssse3) 277 if (cpu_has_ssse3)
232 sha256_transform_asm = sha256_transform_ssse3; 278 sha256_transform_asm = sha256_transform_ssse3;
233 279
@@ -254,7 +300,7 @@ static int __init sha256_ssse3_mod_init(void)
254 else 300 else
255#endif 301#endif
256 pr_info("Using SSSE3 optimized SHA-256 implementation\n"); 302 pr_info("Using SSSE3 optimized SHA-256 implementation\n");
257 return crypto_register_shash(&alg); 303 return crypto_register_shashes(algs, ARRAY_SIZE(algs));
258 } 304 }
259 pr_info("Neither AVX nor SSSE3 is available/usable.\n"); 305 pr_info("Neither AVX nor SSSE3 is available/usable.\n");
260 306
@@ -263,7 +309,7 @@ static int __init sha256_ssse3_mod_init(void)
263 309
264static void __exit sha256_ssse3_mod_fini(void) 310static void __exit sha256_ssse3_mod_fini(void)
265{ 311{
266 crypto_unregister_shash(&alg); 312 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
267} 313}
268 314
269module_init(sha256_ssse3_mod_init); 315module_init(sha256_ssse3_mod_init);
@@ -273,3 +319,4 @@ MODULE_LICENSE("GPL");
273MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); 319MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
274 320
275MODULE_ALIAS("sha256"); 321MODULE_ALIAS("sha256");
322MODULE_ALIAS("sha384");
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 6cbd8df348d2..f30cd10293f0 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -194,7 +194,37 @@ static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
194 return 0; 194 return 0;
195} 195}
196 196
197static struct shash_alg alg = { 197static int sha384_ssse3_init(struct shash_desc *desc)
198{
199 struct sha512_state *sctx = shash_desc_ctx(desc);
200
201 sctx->state[0] = SHA384_H0;
202 sctx->state[1] = SHA384_H1;
203 sctx->state[2] = SHA384_H2;
204 sctx->state[3] = SHA384_H3;
205 sctx->state[4] = SHA384_H4;
206 sctx->state[5] = SHA384_H5;
207 sctx->state[6] = SHA384_H6;
208 sctx->state[7] = SHA384_H7;
209
210 sctx->count[0] = sctx->count[1] = 0;
211
212 return 0;
213}
214
215static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash)
216{
217 u8 D[SHA512_DIGEST_SIZE];
218
219 sha512_ssse3_final(desc, D);
220
221 memcpy(hash, D, SHA384_DIGEST_SIZE);
222 memset(D, 0, SHA512_DIGEST_SIZE);
223
224 return 0;
225}
226
227static struct shash_alg algs[] = { {
198 .digestsize = SHA512_DIGEST_SIZE, 228 .digestsize = SHA512_DIGEST_SIZE,
199 .init = sha512_ssse3_init, 229 .init = sha512_ssse3_init,
200 .update = sha512_ssse3_update, 230 .update = sha512_ssse3_update,
@@ -211,7 +241,24 @@ static struct shash_alg alg = {
211 .cra_blocksize = SHA512_BLOCK_SIZE, 241 .cra_blocksize = SHA512_BLOCK_SIZE,
212 .cra_module = THIS_MODULE, 242 .cra_module = THIS_MODULE,
213 } 243 }
214}; 244}, {
245 .digestsize = SHA384_DIGEST_SIZE,
246 .init = sha384_ssse3_init,
247 .update = sha512_ssse3_update,
248 .final = sha384_ssse3_final,
249 .export = sha512_ssse3_export,
250 .import = sha512_ssse3_import,
251 .descsize = sizeof(struct sha512_state),
252 .statesize = sizeof(struct sha512_state),
253 .base = {
254 .cra_name = "sha384",
255 .cra_driver_name = "sha384-ssse3",
256 .cra_priority = 150,
257 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
258 .cra_blocksize = SHA384_BLOCK_SIZE,
259 .cra_module = THIS_MODULE,
260 }
261} };
215 262
216#ifdef CONFIG_AS_AVX 263#ifdef CONFIG_AS_AVX
217static bool __init avx_usable(void) 264static bool __init avx_usable(void)
@@ -234,7 +281,7 @@ static bool __init avx_usable(void)
234 281
235static int __init sha512_ssse3_mod_init(void) 282static int __init sha512_ssse3_mod_init(void)
236{ 283{
237 /* test for SSE3 first */ 284 /* test for SSSE3 first */
238 if (cpu_has_ssse3) 285 if (cpu_has_ssse3)
239 sha512_transform_asm = sha512_transform_ssse3; 286 sha512_transform_asm = sha512_transform_ssse3;
240 287
@@ -261,7 +308,7 @@ static int __init sha512_ssse3_mod_init(void)
261 else 308 else
262#endif 309#endif
263 pr_info("Using SSSE3 optimized SHA-512 implementation\n"); 310 pr_info("Using SSSE3 optimized SHA-512 implementation\n");
264 return crypto_register_shash(&alg); 311 return crypto_register_shashes(algs, ARRAY_SIZE(algs));
265 } 312 }
266 pr_info("Neither AVX nor SSSE3 is available/usable.\n"); 313 pr_info("Neither AVX nor SSSE3 is available/usable.\n");
267 314
@@ -270,7 +317,7 @@ static int __init sha512_ssse3_mod_init(void)
270 317
271static void __exit sha512_ssse3_mod_fini(void) 318static void __exit sha512_ssse3_mod_fini(void)
272{ 319{
273 crypto_unregister_shash(&alg); 320 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
274} 321}
275 322
276module_init(sha512_ssse3_mod_init); 323module_init(sha512_ssse3_mod_init);
@@ -280,3 +327,4 @@ MODULE_LICENSE("GPL");
280MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); 327MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
281 328
282MODULE_ALIAS("sha512"); 329MODULE_ALIAS("sha512");
330MODULE_ALIAS("sha384");
diff --git a/arch/x86/crypto/twofish-avx2-asm_64.S b/arch/x86/crypto/twofish-avx2-asm_64.S
deleted file mode 100644
index e1a83b9cd389..000000000000
--- a/arch/x86/crypto/twofish-avx2-asm_64.S
+++ /dev/null
@@ -1,600 +0,0 @@
1/*
2 * x86_64/AVX2 assembler optimized version of Twofish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/linkage.h>
14#include "glue_helper-asm-avx2.S"
15
16.file "twofish-avx2-asm_64.S"
17
18.data
19.align 16
20
21.Lvpshufb_mask0:
22.long 0x80808000
23.long 0x80808004
24.long 0x80808008
25.long 0x8080800c
26
27.Lbswap128_mask:
28 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
29.Lxts_gf128mul_and_shl1_mask_0:
30 .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
31.Lxts_gf128mul_and_shl1_mask_1:
32 .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
33
34.text
35
36/* structure of crypto context */
37#define s0 0
38#define s1 1024
39#define s2 2048
40#define s3 3072
41#define w 4096
42#define k 4128
43
44/* register macros */
45#define CTX %rdi
46
47#define RS0 CTX
48#define RS1 %r8
49#define RS2 %r9
50#define RS3 %r10
51#define RK %r11
52#define RW %rax
53#define RROUND %r12
54#define RROUNDd %r12d
55
56#define RA0 %ymm8
57#define RB0 %ymm9
58#define RC0 %ymm10
59#define RD0 %ymm11
60#define RA1 %ymm12
61#define RB1 %ymm13
62#define RC1 %ymm14
63#define RD1 %ymm15
64
65/* temp regs */
66#define RX0 %ymm0
67#define RY0 %ymm1
68#define RX1 %ymm2
69#define RY1 %ymm3
70#define RT0 %ymm4
71#define RIDX %ymm5
72
73#define RX0x %xmm0
74#define RY0x %xmm1
75#define RX1x %xmm2
76#define RY1x %xmm3
77#define RT0x %xmm4
78
79/* vpgatherdd mask and '-1' */
80#define RNOT %ymm6
81
82/* byte mask, (-1 >> 24) */
83#define RBYTE %ymm7
84
85/**********************************************************************
86 16-way AVX2 twofish
87 **********************************************************************/
88#define init_round_constants() \
89 vpcmpeqd RNOT, RNOT, RNOT; \
90 vpsrld $24, RNOT, RBYTE; \
91 leaq k(CTX), RK; \
92 leaq w(CTX), RW; \
93 leaq s1(CTX), RS1; \
94 leaq s2(CTX), RS2; \
95 leaq s3(CTX), RS3; \
96
97#define g16(ab, rs0, rs1, rs2, rs3, xy) \
98 vpand RBYTE, ab ## 0, RIDX; \
99 vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \
100 vpcmpeqd RNOT, RNOT, RNOT; \
101 \
102 vpand RBYTE, ab ## 1, RIDX; \
103 vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \
104 vpcmpeqd RNOT, RNOT, RNOT; \
105 \
106 vpsrld $8, ab ## 0, RIDX; \
107 vpand RBYTE, RIDX, RIDX; \
108 vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
109 vpcmpeqd RNOT, RNOT, RNOT; \
110 vpxor RT0, xy ## 0, xy ## 0; \
111 \
112 vpsrld $8, ab ## 1, RIDX; \
113 vpand RBYTE, RIDX, RIDX; \
114 vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
115 vpcmpeqd RNOT, RNOT, RNOT; \
116 vpxor RT0, xy ## 1, xy ## 1; \
117 \
118 vpsrld $16, ab ## 0, RIDX; \
119 vpand RBYTE, RIDX, RIDX; \
120 vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
121 vpcmpeqd RNOT, RNOT, RNOT; \
122 vpxor RT0, xy ## 0, xy ## 0; \
123 \
124 vpsrld $16, ab ## 1, RIDX; \
125 vpand RBYTE, RIDX, RIDX; \
126 vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
127 vpcmpeqd RNOT, RNOT, RNOT; \
128 vpxor RT0, xy ## 1, xy ## 1; \
129 \
130 vpsrld $24, ab ## 0, RIDX; \
131 vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
132 vpcmpeqd RNOT, RNOT, RNOT; \
133 vpxor RT0, xy ## 0, xy ## 0; \
134 \
135 vpsrld $24, ab ## 1, RIDX; \
136 vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
137 vpcmpeqd RNOT, RNOT, RNOT; \
138 vpxor RT0, xy ## 1, xy ## 1;
139
140#define g1_16(a, x) \
141 g16(a, RS0, RS1, RS2, RS3, x);
142
143#define g2_16(b, y) \
144 g16(b, RS1, RS2, RS3, RS0, y);
145
146#define encrypt_round_end16(a, b, c, d, nk) \
147 vpaddd RY0, RX0, RX0; \
148 vpaddd RX0, RY0, RY0; \
149 vpbroadcastd nk(RK,RROUND,8), RT0; \
150 vpaddd RT0, RX0, RX0; \
151 vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
152 vpaddd RT0, RY0, RY0; \
153 \
154 vpxor RY0, d ## 0, d ## 0; \
155 \
156 vpxor RX0, c ## 0, c ## 0; \
157 vpsrld $1, c ## 0, RT0; \
158 vpslld $31, c ## 0, c ## 0; \
159 vpor RT0, c ## 0, c ## 0; \
160 \
161 vpaddd RY1, RX1, RX1; \
162 vpaddd RX1, RY1, RY1; \
163 vpbroadcastd nk(RK,RROUND,8), RT0; \
164 vpaddd RT0, RX1, RX1; \
165 vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
166 vpaddd RT0, RY1, RY1; \
167 \
168 vpxor RY1, d ## 1, d ## 1; \
169 \
170 vpxor RX1, c ## 1, c ## 1; \
171 vpsrld $1, c ## 1, RT0; \
172 vpslld $31, c ## 1, c ## 1; \
173 vpor RT0, c ## 1, c ## 1; \
174
175#define encrypt_round16(a, b, c, d, nk) \
176 g2_16(b, RY); \
177 \
178 vpslld $1, b ## 0, RT0; \
179 vpsrld $31, b ## 0, b ## 0; \
180 vpor RT0, b ## 0, b ## 0; \
181 \
182 vpslld $1, b ## 1, RT0; \
183 vpsrld $31, b ## 1, b ## 1; \
184 vpor RT0, b ## 1, b ## 1; \
185 \
186 g1_16(a, RX); \
187 \
188 encrypt_round_end16(a, b, c, d, nk);
189
190#define encrypt_round_first16(a, b, c, d, nk) \
191 vpslld $1, d ## 0, RT0; \
192 vpsrld $31, d ## 0, d ## 0; \
193 vpor RT0, d ## 0, d ## 0; \
194 \
195 vpslld $1, d ## 1, RT0; \
196 vpsrld $31, d ## 1, d ## 1; \
197 vpor RT0, d ## 1, d ## 1; \
198 \
199 encrypt_round16(a, b, c, d, nk);
200
201#define encrypt_round_last16(a, b, c, d, nk) \
202 g2_16(b, RY); \
203 \
204 g1_16(a, RX); \
205 \
206 encrypt_round_end16(a, b, c, d, nk);
207
208#define decrypt_round_end16(a, b, c, d, nk) \
209 vpaddd RY0, RX0, RX0; \
210 vpaddd RX0, RY0, RY0; \
211 vpbroadcastd nk(RK,RROUND,8), RT0; \
212 vpaddd RT0, RX0, RX0; \
213 vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
214 vpaddd RT0, RY0, RY0; \
215 \
216 vpxor RX0, c ## 0, c ## 0; \
217 \
218 vpxor RY0, d ## 0, d ## 0; \
219 vpsrld $1, d ## 0, RT0; \
220 vpslld $31, d ## 0, d ## 0; \
221 vpor RT0, d ## 0, d ## 0; \
222 \
223 vpaddd RY1, RX1, RX1; \
224 vpaddd RX1, RY1, RY1; \
225 vpbroadcastd nk(RK,RROUND,8), RT0; \
226 vpaddd RT0, RX1, RX1; \
227 vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
228 vpaddd RT0, RY1, RY1; \
229 \
230 vpxor RX1, c ## 1, c ## 1; \
231 \
232 vpxor RY1, d ## 1, d ## 1; \
233 vpsrld $1, d ## 1, RT0; \
234 vpslld $31, d ## 1, d ## 1; \
235 vpor RT0, d ## 1, d ## 1;
236
237#define decrypt_round16(a, b, c, d, nk) \
238 g1_16(a, RX); \
239 \
240 vpslld $1, a ## 0, RT0; \
241 vpsrld $31, a ## 0, a ## 0; \
242 vpor RT0, a ## 0, a ## 0; \
243 \
244 vpslld $1, a ## 1, RT0; \
245 vpsrld $31, a ## 1, a ## 1; \
246 vpor RT0, a ## 1, a ## 1; \
247 \
248 g2_16(b, RY); \
249 \
250 decrypt_round_end16(a, b, c, d, nk);
251
252#define decrypt_round_first16(a, b, c, d, nk) \
253 vpslld $1, c ## 0, RT0; \
254 vpsrld $31, c ## 0, c ## 0; \
255 vpor RT0, c ## 0, c ## 0; \
256 \
257 vpslld $1, c ## 1, RT0; \
258 vpsrld $31, c ## 1, c ## 1; \
259 vpor RT0, c ## 1, c ## 1; \
260 \
261 decrypt_round16(a, b, c, d, nk)
262
263#define decrypt_round_last16(a, b, c, d, nk) \
264 g1_16(a, RX); \
265 \
266 g2_16(b, RY); \
267 \
268 decrypt_round_end16(a, b, c, d, nk);
269
270#define encrypt_cycle16() \
271 encrypt_round16(RA, RB, RC, RD, 0); \
272 encrypt_round16(RC, RD, RA, RB, 8);
273
274#define encrypt_cycle_first16() \
275 encrypt_round_first16(RA, RB, RC, RD, 0); \
276 encrypt_round16(RC, RD, RA, RB, 8);
277
278#define encrypt_cycle_last16() \
279 encrypt_round16(RA, RB, RC, RD, 0); \
280 encrypt_round_last16(RC, RD, RA, RB, 8);
281
282#define decrypt_cycle16(n) \
283 decrypt_round16(RC, RD, RA, RB, 8); \
284 decrypt_round16(RA, RB, RC, RD, 0);
285
286#define decrypt_cycle_first16(n) \
287 decrypt_round_first16(RC, RD, RA, RB, 8); \
288 decrypt_round16(RA, RB, RC, RD, 0);
289
290#define decrypt_cycle_last16(n) \
291 decrypt_round16(RC, RD, RA, RB, 8); \
292 decrypt_round_last16(RA, RB, RC, RD, 0);
293
294#define transpose_4x4(x0,x1,x2,x3,t1,t2) \
295 vpunpckhdq x1, x0, t2; \
296 vpunpckldq x1, x0, x0; \
297 \
298 vpunpckldq x3, x2, t1; \
299 vpunpckhdq x3, x2, x2; \
300 \
301 vpunpckhqdq t1, x0, x1; \
302 vpunpcklqdq t1, x0, x0; \
303 \
304 vpunpckhqdq x2, t2, x3; \
305 vpunpcklqdq x2, t2, x2;
306
307#define read_blocks8(offs,a,b,c,d) \
308 transpose_4x4(a, b, c, d, RX0, RY0);
309
310#define write_blocks8(offs,a,b,c,d) \
311 transpose_4x4(a, b, c, d, RX0, RY0);
312
313#define inpack_enc8(a,b,c,d) \
314 vpbroadcastd 4*0(RW), RT0; \
315 vpxor RT0, a, a; \
316 \
317 vpbroadcastd 4*1(RW), RT0; \
318 vpxor RT0, b, b; \
319 \
320 vpbroadcastd 4*2(RW), RT0; \
321 vpxor RT0, c, c; \
322 \
323 vpbroadcastd 4*3(RW), RT0; \
324 vpxor RT0, d, d;
325
326#define outunpack_enc8(a,b,c,d) \
327 vpbroadcastd 4*4(RW), RX0; \
328 vpbroadcastd 4*5(RW), RY0; \
329 vpxor RX0, c, RX0; \
330 vpxor RY0, d, RY0; \
331 \
332 vpbroadcastd 4*6(RW), RT0; \
333 vpxor RT0, a, c; \
334 vpbroadcastd 4*7(RW), RT0; \
335 vpxor RT0, b, d; \
336 \
337 vmovdqa RX0, a; \
338 vmovdqa RY0, b;
339
340#define inpack_dec8(a,b,c,d) \
341 vpbroadcastd 4*4(RW), RX0; \
342 vpbroadcastd 4*5(RW), RY0; \
343 vpxor RX0, a, RX0; \
344 vpxor RY0, b, RY0; \
345 \
346 vpbroadcastd 4*6(RW), RT0; \
347 vpxor RT0, c, a; \
348 vpbroadcastd 4*7(RW), RT0; \
349 vpxor RT0, d, b; \
350 \
351 vmovdqa RX0, c; \
352 vmovdqa RY0, d;
353
354#define outunpack_dec8(a,b,c,d) \
355 vpbroadcastd 4*0(RW), RT0; \
356 vpxor RT0, a, a; \
357 \
358 vpbroadcastd 4*1(RW), RT0; \
359 vpxor RT0, b, b; \
360 \
361 vpbroadcastd 4*2(RW), RT0; \
362 vpxor RT0, c, c; \
363 \
364 vpbroadcastd 4*3(RW), RT0; \
365 vpxor RT0, d, d;
366
367#define read_blocks16(a,b,c,d) \
368 read_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
369 read_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
370
371#define write_blocks16(a,b,c,d) \
372 write_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
373 write_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
374
375#define xor_blocks16(a,b,c,d) \
376 xor_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
377 xor_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
378
379#define inpack_enc16(a,b,c,d) \
380 inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
381 inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
382
383#define outunpack_enc16(a,b,c,d) \
384 outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
385 outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
386
387#define inpack_dec16(a,b,c,d) \
388 inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
389 inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
390
391#define outunpack_dec16(a,b,c,d) \
392 outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
393 outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
394
395.align 8
396__twofish_enc_blk16:
397 /* input:
398 * %rdi: ctx, CTX
399 * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext
400 * output:
401 * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext
402 */
403 init_round_constants();
404
405 read_blocks16(RA, RB, RC, RD);
406 inpack_enc16(RA, RB, RC, RD);
407
408 xorl RROUNDd, RROUNDd;
409 encrypt_cycle_first16();
410 movl $2, RROUNDd;
411
412.align 4
413.L__enc_loop:
414 encrypt_cycle16();
415
416 addl $2, RROUNDd;
417 cmpl $14, RROUNDd;
418 jne .L__enc_loop;
419
420 encrypt_cycle_last16();
421
422 outunpack_enc16(RA, RB, RC, RD);
423 write_blocks16(RA, RB, RC, RD);
424
425 ret;
426ENDPROC(__twofish_enc_blk16)
427
428.align 8
429__twofish_dec_blk16:
430 /* input:
431 * %rdi: ctx, CTX
432 * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext
433 * output:
434 * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext
435 */
436 init_round_constants();
437
438 read_blocks16(RA, RB, RC, RD);
439 inpack_dec16(RA, RB, RC, RD);
440
441 movl $14, RROUNDd;
442 decrypt_cycle_first16();
443 movl $12, RROUNDd;
444
445.align 4
446.L__dec_loop:
447 decrypt_cycle16();
448
449 addl $-2, RROUNDd;
450 jnz .L__dec_loop;
451
452 decrypt_cycle_last16();
453
454 outunpack_dec16(RA, RB, RC, RD);
455 write_blocks16(RA, RB, RC, RD);
456
457 ret;
458ENDPROC(__twofish_dec_blk16)
459
460ENTRY(twofish_ecb_enc_16way)
461 /* input:
462 * %rdi: ctx, CTX
463 * %rsi: dst
464 * %rdx: src
465 */
466
467 vzeroupper;
468 pushq %r12;
469
470 load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
471
472 call __twofish_enc_blk16;
473
474 store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
475
476 popq %r12;
477 vzeroupper;
478
479 ret;
480ENDPROC(twofish_ecb_enc_16way)
481
482ENTRY(twofish_ecb_dec_16way)
483 /* input:
484 * %rdi: ctx, CTX
485 * %rsi: dst
486 * %rdx: src
487 */
488
489 vzeroupper;
490 pushq %r12;
491
492 load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
493
494 call __twofish_dec_blk16;
495
496 store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
497
498 popq %r12;
499 vzeroupper;
500
501 ret;
502ENDPROC(twofish_ecb_dec_16way)
503
504ENTRY(twofish_cbc_dec_16way)
505 /* input:
506 * %rdi: ctx, CTX
507 * %rsi: dst
508 * %rdx: src
509 */
510
511 vzeroupper;
512 pushq %r12;
513
514 load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
515
516 call __twofish_dec_blk16;
517
518 store_cbc_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1,
519 RX0);
520
521 popq %r12;
522 vzeroupper;
523
524 ret;
525ENDPROC(twofish_cbc_dec_16way)
526
527ENTRY(twofish_ctr_16way)
528 /* input:
529 * %rdi: ctx, CTX
530 * %rsi: dst (16 blocks)
531 * %rdx: src (16 blocks)
532 * %rcx: iv (little endian, 128bit)
533 */
534
535 vzeroupper;
536 pushq %r12;
537
538 load_ctr_16way(%rcx, .Lbswap128_mask, RA0, RB0, RC0, RD0, RA1, RB1, RC1,
539 RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT,
540 RBYTE);
541
542 call __twofish_enc_blk16;
543
544 store_ctr_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
545
546 popq %r12;
547 vzeroupper;
548
549 ret;
550ENDPROC(twofish_ctr_16way)
551
552.align 8
553twofish_xts_crypt_16way:
554 /* input:
555 * %rdi: ctx, CTX
556 * %rsi: dst (16 blocks)
557 * %rdx: src (16 blocks)
558 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
559 * %r8: pointer to __twofish_enc_blk16 or __twofish_dec_blk16
560 */
561
562 vzeroupper;
563 pushq %r12;
564
565 load_xts_16way(%rcx, %rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1,
566 RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT,
567 .Lxts_gf128mul_and_shl1_mask_0,
568 .Lxts_gf128mul_and_shl1_mask_1);
569
570 call *%r8;
571
572 store_xts_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
573
574 popq %r12;
575 vzeroupper;
576
577 ret;
578ENDPROC(twofish_xts_crypt_16way)
579
580ENTRY(twofish_xts_enc_16way)
581 /* input:
582 * %rdi: ctx, CTX
583 * %rsi: dst (16 blocks)
584 * %rdx: src (16 blocks)
585 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
586 */
587 leaq __twofish_enc_blk16, %r8;
588 jmp twofish_xts_crypt_16way;
589ENDPROC(twofish_xts_enc_16way)
590
591ENTRY(twofish_xts_dec_16way)
592 /* input:
593 * %rdi: ctx, CTX
594 * %rsi: dst (16 blocks)
595 * %rdx: src (16 blocks)
596 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
597 */
598 leaq __twofish_dec_blk16, %r8;
599 jmp twofish_xts_crypt_16way;
600ENDPROC(twofish_xts_dec_16way)
diff --git a/arch/x86/crypto/twofish_avx2_glue.c b/arch/x86/crypto/twofish_avx2_glue.c
deleted file mode 100644
index ce33b5be64ee..000000000000
--- a/arch/x86/crypto/twofish_avx2_glue.c
+++ /dev/null
@@ -1,584 +0,0 @@
1/*
2 * Glue Code for x86_64/AVX2 assembler optimized version of Twofish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/crypto.h>
16#include <linux/err.h>
17#include <crypto/algapi.h>
18#include <crypto/ctr.h>
19#include <crypto/twofish.h>
20#include <crypto/lrw.h>
21#include <crypto/xts.h>
22#include <asm/xcr.h>
23#include <asm/xsave.h>
24#include <asm/crypto/twofish.h>
25#include <asm/crypto/ablk_helper.h>
26#include <asm/crypto/glue_helper.h>
27#include <crypto/scatterwalk.h>
28
29#define TF_AVX2_PARALLEL_BLOCKS 16
30
31/* 16-way AVX2 parallel cipher functions */
32asmlinkage void twofish_ecb_enc_16way(struct twofish_ctx *ctx, u8 *dst,
33 const u8 *src);
34asmlinkage void twofish_ecb_dec_16way(struct twofish_ctx *ctx, u8 *dst,
35 const u8 *src);
36asmlinkage void twofish_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
37
38asmlinkage void twofish_ctr_16way(void *ctx, u128 *dst, const u128 *src,
39 le128 *iv);
40
41asmlinkage void twofish_xts_enc_16way(struct twofish_ctx *ctx, u8 *dst,
42 const u8 *src, le128 *iv);
43asmlinkage void twofish_xts_dec_16way(struct twofish_ctx *ctx, u8 *dst,
44 const u8 *src, le128 *iv);
45
46static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
47 const u8 *src)
48{
49 __twofish_enc_blk_3way(ctx, dst, src, false);
50}
51
52static const struct common_glue_ctx twofish_enc = {
53 .num_funcs = 4,
54 .fpu_blocks_limit = 8,
55
56 .funcs = { {
57 .num_blocks = 16,
58 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_16way) }
59 }, {
60 .num_blocks = 8,
61 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
62 }, {
63 .num_blocks = 3,
64 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
65 }, {
66 .num_blocks = 1,
67 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
68 } }
69};
70
71static const struct common_glue_ctx twofish_ctr = {
72 .num_funcs = 4,
73 .fpu_blocks_limit = 8,
74
75 .funcs = { {
76 .num_blocks = 16,
77 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_16way) }
78 }, {
79 .num_blocks = 8,
80 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
81 }, {
82 .num_blocks = 3,
83 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
84 }, {
85 .num_blocks = 1,
86 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
87 } }
88};
89
90static const struct common_glue_ctx twofish_enc_xts = {
91 .num_funcs = 3,
92 .fpu_blocks_limit = 8,
93
94 .funcs = { {
95 .num_blocks = 16,
96 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way) }
97 }, {
98 .num_blocks = 8,
99 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
100 }, {
101 .num_blocks = 1,
102 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
103 } }
104};
105
106static const struct common_glue_ctx twofish_dec = {
107 .num_funcs = 4,
108 .fpu_blocks_limit = 8,
109
110 .funcs = { {
111 .num_blocks = 16,
112 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_16way) }
113 }, {
114 .num_blocks = 8,
115 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
116 }, {
117 .num_blocks = 3,
118 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
119 }, {
120 .num_blocks = 1,
121 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
122 } }
123};
124
125static const struct common_glue_ctx twofish_dec_cbc = {
126 .num_funcs = 4,
127 .fpu_blocks_limit = 8,
128
129 .funcs = { {
130 .num_blocks = 16,
131 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way) }
132 }, {
133 .num_blocks = 8,
134 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
135 }, {
136 .num_blocks = 3,
137 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
138 }, {
139 .num_blocks = 1,
140 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
141 } }
142};
143
144static const struct common_glue_ctx twofish_dec_xts = {
145 .num_funcs = 3,
146 .fpu_blocks_limit = 8,
147
148 .funcs = { {
149 .num_blocks = 16,
150 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way) }
151 }, {
152 .num_blocks = 8,
153 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
154 }, {
155 .num_blocks = 1,
156 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
157 } }
158};
159
160static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
161 struct scatterlist *src, unsigned int nbytes)
162{
163 return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
164}
165
166static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
167 struct scatterlist *src, unsigned int nbytes)
168{
169 return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
170}
171
172static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
173 struct scatterlist *src, unsigned int nbytes)
174{
175 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
176 dst, src, nbytes);
177}
178
179static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
180 struct scatterlist *src, unsigned int nbytes)
181{
182 return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
183 nbytes);
184}
185
186static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
187 struct scatterlist *src, unsigned int nbytes)
188{
189 return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
190}
191
192static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
193{
194 /* since reusing AVX functions, starts using FPU at 8 parallel blocks */
195 return glue_fpu_begin(TF_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes);
196}
197
198static inline void twofish_fpu_end(bool fpu_enabled)
199{
200 glue_fpu_end(fpu_enabled);
201}
202
203struct crypt_priv {
204 struct twofish_ctx *ctx;
205 bool fpu_enabled;
206};
207
208static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
209{
210 const unsigned int bsize = TF_BLOCK_SIZE;
211 struct crypt_priv *ctx = priv;
212 int i;
213
214 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
215
216 while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
217 twofish_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
218 srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
219 nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
220 }
221
222 while (nbytes >= 8 * bsize) {
223 twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
224 srcdst += bsize * 8;
225 nbytes -= bsize * 8;
226 }
227
228 while (nbytes >= 3 * bsize) {
229 twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
230 srcdst += bsize * 3;
231 nbytes -= bsize * 3;
232 }
233
234 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
235 twofish_enc_blk(ctx->ctx, srcdst, srcdst);
236}
237
238static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
239{
240 const unsigned int bsize = TF_BLOCK_SIZE;
241 struct crypt_priv *ctx = priv;
242 int i;
243
244 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
245
246 while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
247 twofish_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
248 srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
249 nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
250 }
251
252 while (nbytes >= 8 * bsize) {
253 twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
254 srcdst += bsize * 8;
255 nbytes -= bsize * 8;
256 }
257
258 while (nbytes >= 3 * bsize) {
259 twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
260 srcdst += bsize * 3;
261 nbytes -= bsize * 3;
262 }
263
264 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
265 twofish_dec_blk(ctx->ctx, srcdst, srcdst);
266}
267
268static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
269 struct scatterlist *src, unsigned int nbytes)
270{
271 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
272 be128 buf[TF_AVX2_PARALLEL_BLOCKS];
273 struct crypt_priv crypt_ctx = {
274 .ctx = &ctx->twofish_ctx,
275 .fpu_enabled = false,
276 };
277 struct lrw_crypt_req req = {
278 .tbuf = buf,
279 .tbuflen = sizeof(buf),
280
281 .table_ctx = &ctx->lrw_table,
282 .crypt_ctx = &crypt_ctx,
283 .crypt_fn = encrypt_callback,
284 };
285 int ret;
286
287 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
288 ret = lrw_crypt(desc, dst, src, nbytes, &req);
289 twofish_fpu_end(crypt_ctx.fpu_enabled);
290
291 return ret;
292}
293
294static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
295 struct scatterlist *src, unsigned int nbytes)
296{
297 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
298 be128 buf[TF_AVX2_PARALLEL_BLOCKS];
299 struct crypt_priv crypt_ctx = {
300 .ctx = &ctx->twofish_ctx,
301 .fpu_enabled = false,
302 };
303 struct lrw_crypt_req req = {
304 .tbuf = buf,
305 .tbuflen = sizeof(buf),
306
307 .table_ctx = &ctx->lrw_table,
308 .crypt_ctx = &crypt_ctx,
309 .crypt_fn = decrypt_callback,
310 };
311 int ret;
312
313 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
314 ret = lrw_crypt(desc, dst, src, nbytes, &req);
315 twofish_fpu_end(crypt_ctx.fpu_enabled);
316
317 return ret;
318}
319
320static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
321 struct scatterlist *src, unsigned int nbytes)
322{
323 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
324
325 return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
326 XTS_TWEAK_CAST(twofish_enc_blk),
327 &ctx->tweak_ctx, &ctx->crypt_ctx);
328}
329
330static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
331 struct scatterlist *src, unsigned int nbytes)
332{
333 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
334
335 return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
336 XTS_TWEAK_CAST(twofish_enc_blk),
337 &ctx->tweak_ctx, &ctx->crypt_ctx);
338}
339
340static struct crypto_alg tf_algs[10] = { {
341 .cra_name = "__ecb-twofish-avx2",
342 .cra_driver_name = "__driver-ecb-twofish-avx2",
343 .cra_priority = 0,
344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
345 .cra_blocksize = TF_BLOCK_SIZE,
346 .cra_ctxsize = sizeof(struct twofish_ctx),
347 .cra_alignmask = 0,
348 .cra_type = &crypto_blkcipher_type,
349 .cra_module = THIS_MODULE,
350 .cra_u = {
351 .blkcipher = {
352 .min_keysize = TF_MIN_KEY_SIZE,
353 .max_keysize = TF_MAX_KEY_SIZE,
354 .setkey = twofish_setkey,
355 .encrypt = ecb_encrypt,
356 .decrypt = ecb_decrypt,
357 },
358 },
359}, {
360 .cra_name = "__cbc-twofish-avx2",
361 .cra_driver_name = "__driver-cbc-twofish-avx2",
362 .cra_priority = 0,
363 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
364 .cra_blocksize = TF_BLOCK_SIZE,
365 .cra_ctxsize = sizeof(struct twofish_ctx),
366 .cra_alignmask = 0,
367 .cra_type = &crypto_blkcipher_type,
368 .cra_module = THIS_MODULE,
369 .cra_u = {
370 .blkcipher = {
371 .min_keysize = TF_MIN_KEY_SIZE,
372 .max_keysize = TF_MAX_KEY_SIZE,
373 .setkey = twofish_setkey,
374 .encrypt = cbc_encrypt,
375 .decrypt = cbc_decrypt,
376 },
377 },
378}, {
379 .cra_name = "__ctr-twofish-avx2",
380 .cra_driver_name = "__driver-ctr-twofish-avx2",
381 .cra_priority = 0,
382 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
383 .cra_blocksize = 1,
384 .cra_ctxsize = sizeof(struct twofish_ctx),
385 .cra_alignmask = 0,
386 .cra_type = &crypto_blkcipher_type,
387 .cra_module = THIS_MODULE,
388 .cra_u = {
389 .blkcipher = {
390 .min_keysize = TF_MIN_KEY_SIZE,
391 .max_keysize = TF_MAX_KEY_SIZE,
392 .ivsize = TF_BLOCK_SIZE,
393 .setkey = twofish_setkey,
394 .encrypt = ctr_crypt,
395 .decrypt = ctr_crypt,
396 },
397 },
398}, {
399 .cra_name = "__lrw-twofish-avx2",
400 .cra_driver_name = "__driver-lrw-twofish-avx2",
401 .cra_priority = 0,
402 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
403 .cra_blocksize = TF_BLOCK_SIZE,
404 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
405 .cra_alignmask = 0,
406 .cra_type = &crypto_blkcipher_type,
407 .cra_module = THIS_MODULE,
408 .cra_exit = lrw_twofish_exit_tfm,
409 .cra_u = {
410 .blkcipher = {
411 .min_keysize = TF_MIN_KEY_SIZE +
412 TF_BLOCK_SIZE,
413 .max_keysize = TF_MAX_KEY_SIZE +
414 TF_BLOCK_SIZE,
415 .ivsize = TF_BLOCK_SIZE,
416 .setkey = lrw_twofish_setkey,
417 .encrypt = lrw_encrypt,
418 .decrypt = lrw_decrypt,
419 },
420 },
421}, {
422 .cra_name = "__xts-twofish-avx2",
423 .cra_driver_name = "__driver-xts-twofish-avx2",
424 .cra_priority = 0,
425 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
426 .cra_blocksize = TF_BLOCK_SIZE,
427 .cra_ctxsize = sizeof(struct twofish_xts_ctx),
428 .cra_alignmask = 0,
429 .cra_type = &crypto_blkcipher_type,
430 .cra_module = THIS_MODULE,
431 .cra_u = {
432 .blkcipher = {
433 .min_keysize = TF_MIN_KEY_SIZE * 2,
434 .max_keysize = TF_MAX_KEY_SIZE * 2,
435 .ivsize = TF_BLOCK_SIZE,
436 .setkey = xts_twofish_setkey,
437 .encrypt = xts_encrypt,
438 .decrypt = xts_decrypt,
439 },
440 },
441}, {
442 .cra_name = "ecb(twofish)",
443 .cra_driver_name = "ecb-twofish-avx2",
444 .cra_priority = 500,
445 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
446 .cra_blocksize = TF_BLOCK_SIZE,
447 .cra_ctxsize = sizeof(struct async_helper_ctx),
448 .cra_alignmask = 0,
449 .cra_type = &crypto_ablkcipher_type,
450 .cra_module = THIS_MODULE,
451 .cra_init = ablk_init,
452 .cra_exit = ablk_exit,
453 .cra_u = {
454 .ablkcipher = {
455 .min_keysize = TF_MIN_KEY_SIZE,
456 .max_keysize = TF_MAX_KEY_SIZE,
457 .setkey = ablk_set_key,
458 .encrypt = ablk_encrypt,
459 .decrypt = ablk_decrypt,
460 },
461 },
462}, {
463 .cra_name = "cbc(twofish)",
464 .cra_driver_name = "cbc-twofish-avx2",
465 .cra_priority = 500,
466 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
467 .cra_blocksize = TF_BLOCK_SIZE,
468 .cra_ctxsize = sizeof(struct async_helper_ctx),
469 .cra_alignmask = 0,
470 .cra_type = &crypto_ablkcipher_type,
471 .cra_module = THIS_MODULE,
472 .cra_init = ablk_init,
473 .cra_exit = ablk_exit,
474 .cra_u = {
475 .ablkcipher = {
476 .min_keysize = TF_MIN_KEY_SIZE,
477 .max_keysize = TF_MAX_KEY_SIZE,
478 .ivsize = TF_BLOCK_SIZE,
479 .setkey = ablk_set_key,
480 .encrypt = __ablk_encrypt,
481 .decrypt = ablk_decrypt,
482 },
483 },
484}, {
485 .cra_name = "ctr(twofish)",
486 .cra_driver_name = "ctr-twofish-avx2",
487 .cra_priority = 500,
488 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
489 .cra_blocksize = 1,
490 .cra_ctxsize = sizeof(struct async_helper_ctx),
491 .cra_alignmask = 0,
492 .cra_type = &crypto_ablkcipher_type,
493 .cra_module = THIS_MODULE,
494 .cra_init = ablk_init,
495 .cra_exit = ablk_exit,
496 .cra_u = {
497 .ablkcipher = {
498 .min_keysize = TF_MIN_KEY_SIZE,
499 .max_keysize = TF_MAX_KEY_SIZE,
500 .ivsize = TF_BLOCK_SIZE,
501 .setkey = ablk_set_key,
502 .encrypt = ablk_encrypt,
503 .decrypt = ablk_encrypt,
504 .geniv = "chainiv",
505 },
506 },
507}, {
508 .cra_name = "lrw(twofish)",
509 .cra_driver_name = "lrw-twofish-avx2",
510 .cra_priority = 500,
511 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
512 .cra_blocksize = TF_BLOCK_SIZE,
513 .cra_ctxsize = sizeof(struct async_helper_ctx),
514 .cra_alignmask = 0,
515 .cra_type = &crypto_ablkcipher_type,
516 .cra_module = THIS_MODULE,
517 .cra_init = ablk_init,
518 .cra_exit = ablk_exit,
519 .cra_u = {
520 .ablkcipher = {
521 .min_keysize = TF_MIN_KEY_SIZE +
522 TF_BLOCK_SIZE,
523 .max_keysize = TF_MAX_KEY_SIZE +
524 TF_BLOCK_SIZE,
525 .ivsize = TF_BLOCK_SIZE,
526 .setkey = ablk_set_key,
527 .encrypt = ablk_encrypt,
528 .decrypt = ablk_decrypt,
529 },
530 },
531}, {
532 .cra_name = "xts(twofish)",
533 .cra_driver_name = "xts-twofish-avx2",
534 .cra_priority = 500,
535 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
536 .cra_blocksize = TF_BLOCK_SIZE,
537 .cra_ctxsize = sizeof(struct async_helper_ctx),
538 .cra_alignmask = 0,
539 .cra_type = &crypto_ablkcipher_type,
540 .cra_module = THIS_MODULE,
541 .cra_init = ablk_init,
542 .cra_exit = ablk_exit,
543 .cra_u = {
544 .ablkcipher = {
545 .min_keysize = TF_MIN_KEY_SIZE * 2,
546 .max_keysize = TF_MAX_KEY_SIZE * 2,
547 .ivsize = TF_BLOCK_SIZE,
548 .setkey = ablk_set_key,
549 .encrypt = ablk_encrypt,
550 .decrypt = ablk_decrypt,
551 },
552 },
553} };
554
555static int __init init(void)
556{
557 u64 xcr0;
558
559 if (!cpu_has_avx2 || !cpu_has_osxsave) {
560 pr_info("AVX2 instructions are not detected.\n");
561 return -ENODEV;
562 }
563
564 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
565 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
566 pr_info("AVX2 detected but unusable.\n");
567 return -ENODEV;
568 }
569
570 return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
571}
572
573static void __exit fini(void)
574{
575 crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
576}
577
578module_init(init);
579module_exit(fini);
580
581MODULE_LICENSE("GPL");
582MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized");
583MODULE_ALIAS("twofish");
584MODULE_ALIAS("twofish-asm");
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 2047a562f6b3..a62ba541884e 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -50,26 +50,18 @@
50/* 8-way parallel cipher functions */ 50/* 8-way parallel cipher functions */
51asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, 51asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
52 const u8 *src); 52 const u8 *src);
53EXPORT_SYMBOL_GPL(twofish_ecb_enc_8way);
54
55asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, 53asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
56 const u8 *src); 54 const u8 *src);
57EXPORT_SYMBOL_GPL(twofish_ecb_dec_8way);
58 55
59asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, 56asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
60 const u8 *src); 57 const u8 *src);
61EXPORT_SYMBOL_GPL(twofish_cbc_dec_8way);
62
63asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, 58asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
64 const u8 *src, le128 *iv); 59 const u8 *src, le128 *iv);
65EXPORT_SYMBOL_GPL(twofish_ctr_8way);
66 60
67asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, 61asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
68 const u8 *src, le128 *iv); 62 const u8 *src, le128 *iv);
69EXPORT_SYMBOL_GPL(twofish_xts_enc_8way);
70asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, 63asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
71 const u8 *src, le128 *iv); 64 const u8 *src, le128 *iv);
72EXPORT_SYMBOL_GPL(twofish_xts_dec_8way);
73 65
74static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, 66static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
75 const u8 *src) 67 const u8 *src)
@@ -77,19 +69,17 @@ static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
77 __twofish_enc_blk_3way(ctx, dst, src, false); 69 __twofish_enc_blk_3way(ctx, dst, src, false);
78} 70}
79 71
80void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) 72static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
81{ 73{
82 glue_xts_crypt_128bit_one(ctx, dst, src, iv, 74 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
83 GLUE_FUNC_CAST(twofish_enc_blk)); 75 GLUE_FUNC_CAST(twofish_enc_blk));
84} 76}
85EXPORT_SYMBOL_GPL(twofish_xts_enc);
86 77
87void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) 78static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
88{ 79{
89 glue_xts_crypt_128bit_one(ctx, dst, src, iv, 80 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
90 GLUE_FUNC_CAST(twofish_dec_blk)); 81 GLUE_FUNC_CAST(twofish_dec_blk));
91} 82}
92EXPORT_SYMBOL_GPL(twofish_xts_dec);
93 83
94 84
95static const struct common_glue_ctx twofish_enc = { 85static const struct common_glue_ctx twofish_enc = {
diff --git a/arch/x86/include/asm/crypto/blowfish.h b/arch/x86/include/asm/crypto/blowfish.h
deleted file mode 100644
index f097b2face10..000000000000
--- a/arch/x86/include/asm/crypto/blowfish.h
+++ /dev/null
@@ -1,43 +0,0 @@
1#ifndef ASM_X86_BLOWFISH_H
2#define ASM_X86_BLOWFISH_H
3
4#include <linux/crypto.h>
5#include <crypto/blowfish.h>
6
7#define BF_PARALLEL_BLOCKS 4
8
9/* regular block cipher functions */
10asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
11 bool xor);
12asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
13
14/* 4-way parallel cipher functions */
15asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
16 const u8 *src, bool xor);
17asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
18 const u8 *src);
19
20static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
21{
22 __blowfish_enc_blk(ctx, dst, src, false);
23}
24
25static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
26 const u8 *src)
27{
28 __blowfish_enc_blk(ctx, dst, src, true);
29}
30
31static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
32 const u8 *src)
33{
34 __blowfish_enc_blk_4way(ctx, dst, src, false);
35}
36
37static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
38 const u8 *src)
39{
40 __blowfish_enc_blk_4way(ctx, dst, src, true);
41}
42
43#endif
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
index e655c6029b45..878c51ceebb5 100644
--- a/arch/x86/include/asm/crypto/twofish.h
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -28,20 +28,6 @@ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
28asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, 28asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
29 const u8 *src); 29 const u8 *src);
30 30
31/* 8-way parallel cipher functions */
32asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
33 const u8 *src);
34asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
35 const u8 *src);
36asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
37 const u8 *src);
38asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
39 const u8 *src, le128 *iv);
40asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
41 const u8 *src, le128 *iv);
42asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
43 const u8 *src, le128 *iv);
44
45/* helpers from twofish_x86_64-3way module */ 31/* helpers from twofish_x86_64-3way module */
46extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); 32extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
47extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, 33extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
@@ -57,8 +43,4 @@ extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
57extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, 43extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
58 unsigned int keylen); 44 unsigned int keylen);
59 45
60/* helpers from twofish-avx module */
61extern void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
62extern void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
63
64#endif /* ASM_X86_TWOFISH_H */ 46#endif /* ASM_X86_TWOFISH_H */
diff --git a/crypto/Kconfig b/crypto/Kconfig
index bf8148e74e73..904ffe838567 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -376,6 +376,25 @@ config CRYPTO_CRC32_PCLMUL
376 which will enable any routine to use the CRC-32-IEEE 802.3 checksum 376 which will enable any routine to use the CRC-32-IEEE 802.3 checksum
377 and gain better performance as compared with the table implementation. 377 and gain better performance as compared with the table implementation.
378 378
379config CRYPTO_CRCT10DIF
380 tristate "CRCT10DIF algorithm"
381 select CRYPTO_HASH
382 help
383 CRC T10 Data Integrity Field computation is being cast as
384 a crypto transform. This allows for faster crc t10 diff
385 transforms to be used if they are available.
386
387config CRYPTO_CRCT10DIF_PCLMUL
388 tristate "CRCT10DIF PCLMULQDQ hardware acceleration"
389 depends on X86 && 64BIT && CRC_T10DIF
390 select CRYPTO_HASH
391 help
392 For x86_64 processors with SSE4.2 and PCLMULQDQ supported,
393 CRC T10 DIF PCLMULQDQ computation can be hardware
394 accelerated PCLMULQDQ instruction. This option will create
395 'crct10dif-plcmul' module, which is faster when computing the
396 crct10dif checksum as compared with the generic table implementation.
397
379config CRYPTO_GHASH 398config CRYPTO_GHASH
380 tristate "GHASH digest algorithm" 399 tristate "GHASH digest algorithm"
381 select CRYPTO_GF128MUL 400 select CRYPTO_GF128MUL
@@ -820,25 +839,6 @@ config CRYPTO_BLOWFISH_X86_64
820 See also: 839 See also:
821 <http://www.schneier.com/blowfish.html> 840 <http://www.schneier.com/blowfish.html>
822 841
823config CRYPTO_BLOWFISH_AVX2_X86_64
824 tristate "Blowfish cipher algorithm (x86_64/AVX2)"
825 depends on X86 && 64BIT
826 depends on BROKEN
827 select CRYPTO_ALGAPI
828 select CRYPTO_CRYPTD
829 select CRYPTO_ABLK_HELPER_X86
830 select CRYPTO_BLOWFISH_COMMON
831 select CRYPTO_BLOWFISH_X86_64
832 help
833 Blowfish cipher algorithm (x86_64/AVX2), by Bruce Schneier.
834
835 This is a variable key length cipher which can use keys from 32
836 bits to 448 bits in length. It's fast, simple and specifically
837 designed for use on "large microprocessors".
838
839 See also:
840 <http://www.schneier.com/blowfish.html>
841
842config CRYPTO_CAMELLIA 842config CRYPTO_CAMELLIA
843 tristate "Camellia cipher algorithms" 843 tristate "Camellia cipher algorithms"
844 depends on CRYPTO 844 depends on CRYPTO
@@ -1297,31 +1297,6 @@ config CRYPTO_TWOFISH_AVX_X86_64
1297 See also: 1297 See also:
1298 <http://www.schneier.com/twofish.html> 1298 <http://www.schneier.com/twofish.html>
1299 1299
1300config CRYPTO_TWOFISH_AVX2_X86_64
1301 tristate "Twofish cipher algorithm (x86_64/AVX2)"
1302 depends on X86 && 64BIT
1303 depends on BROKEN
1304 select CRYPTO_ALGAPI
1305 select CRYPTO_CRYPTD
1306 select CRYPTO_ABLK_HELPER_X86
1307 select CRYPTO_GLUE_HELPER_X86
1308 select CRYPTO_TWOFISH_COMMON
1309 select CRYPTO_TWOFISH_X86_64
1310 select CRYPTO_TWOFISH_X86_64_3WAY
1311 select CRYPTO_TWOFISH_AVX_X86_64
1312 select CRYPTO_LRW
1313 select CRYPTO_XTS
1314 help
1315 Twofish cipher algorithm (x86_64/AVX2).
1316
1317 Twofish was submitted as an AES (Advanced Encryption Standard)
1318 candidate cipher by researchers at CounterPane Systems. It is a
1319 16 round block cipher supporting key sizes of 128, 192, and 256
1320 bits.
1321
1322 See also:
1323 <http://www.schneier.com/twofish.html>
1324
1325comment "Compression" 1300comment "Compression"
1326 1301
1327config CRYPTO_DEFLATE 1302config CRYPTO_DEFLATE
diff --git a/crypto/Makefile b/crypto/Makefile
index a8e9b0fefbe9..62af87df8729 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -83,6 +83,7 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
83obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o 83obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
84obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o 84obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
85obj-$(CONFIG_CRYPTO_CRC32) += crc32.o 85obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
86obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o
86obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o 87obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
87obj-$(CONFIG_CRYPTO_LZO) += lzo.o 88obj-$(CONFIG_CRYPTO_LZO) += lzo.o
88obj-$(CONFIG_CRYPTO_842) += 842.o 89obj-$(CONFIG_CRYPTO_842) += 842.o
diff --git a/crypto/crct10dif.c b/crypto/crct10dif.c
new file mode 100644
index 000000000000..92aca96d6b98
--- /dev/null
+++ b/crypto/crct10dif.c
@@ -0,0 +1,178 @@
1/*
2 * Cryptographic API.
3 *
4 * T10 Data Integrity Field CRC16 Crypto Transform
5 *
6 * Copyright (c) 2007 Oracle Corporation. All rights reserved.
7 * Written by Martin K. Petersen <martin.petersen@oracle.com>
8 * Copyright (C) 2013 Intel Corporation
9 * Author: Tim Chen <tim.c.chen@linux.intel.com>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the Free
13 * Software Foundation; either version 2 of the License, or (at your option)
14 * any later version.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 */
26
27#include <linux/types.h>
28#include <linux/module.h>
29#include <linux/crc-t10dif.h>
30#include <crypto/internal/hash.h>
31#include <linux/init.h>
32#include <linux/string.h>
33#include <linux/kernel.h>
34
35struct chksum_desc_ctx {
36 __u16 crc;
37};
38
39/* Table generated using the following polynomium:
40 * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
41 * gt: 0x8bb7
42 */
43static const __u16 t10_dif_crc_table[256] = {
44 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
45 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
46 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
47 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
48 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
49 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
50 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
51 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
52 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
53 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
54 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
55 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
56 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
57 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
58 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
59 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
60 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
61 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
62 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
63 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
64 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
65 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
66 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
67 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
68 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
69 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
70 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
71 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
72 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
73 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
74 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
75 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
76};
77
78__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len)
79{
80 unsigned int i;
81
82 for (i = 0 ; i < len ; i++)
83 crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
84
85 return crc;
86}
87EXPORT_SYMBOL(crc_t10dif_generic);
88
89/*
90 * Steps through buffer one byte at at time, calculates reflected
91 * crc using table.
92 */
93
94static int chksum_init(struct shash_desc *desc)
95{
96 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
97
98 ctx->crc = 0;
99
100 return 0;
101}
102
103static int chksum_update(struct shash_desc *desc, const u8 *data,
104 unsigned int length)
105{
106 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
107
108 ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
109 return 0;
110}
111
112static int chksum_final(struct shash_desc *desc, u8 *out)
113{
114 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
115
116 *(__u16 *)out = ctx->crc;
117 return 0;
118}
119
120static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
121 u8 *out)
122{
123 *(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
124 return 0;
125}
126
127static int chksum_finup(struct shash_desc *desc, const u8 *data,
128 unsigned int len, u8 *out)
129{
130 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
131
132 return __chksum_finup(&ctx->crc, data, len, out);
133}
134
135static int chksum_digest(struct shash_desc *desc, const u8 *data,
136 unsigned int length, u8 *out)
137{
138 struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
139
140 return __chksum_finup(&ctx->crc, data, length, out);
141}
142
143static struct shash_alg alg = {
144 .digestsize = CRC_T10DIF_DIGEST_SIZE,
145 .init = chksum_init,
146 .update = chksum_update,
147 .final = chksum_final,
148 .finup = chksum_finup,
149 .digest = chksum_digest,
150 .descsize = sizeof(struct chksum_desc_ctx),
151 .base = {
152 .cra_name = "crct10dif",
153 .cra_driver_name = "crct10dif-generic",
154 .cra_priority = 100,
155 .cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
156 .cra_module = THIS_MODULE,
157 }
158};
159
160static int __init crct10dif_mod_init(void)
161{
162 int ret;
163
164 ret = crypto_register_shash(&alg);
165 return ret;
166}
167
168static void __exit crct10dif_mod_fini(void)
169{
170 crypto_unregister_shash(&alg);
171}
172
173module_init(crct10dif_mod_init);
174module_exit(crct10dif_mod_fini);
175
176MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
177MODULE_DESCRIPTION("T10 DIF CRC calculation.");
178MODULE_LICENSE("GPL");
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 4c5862095679..6ed124f3ea0f 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -251,6 +251,7 @@ static struct shash_alg sha512_algs[2] = { {
251 .descsize = sizeof(struct sha512_state), 251 .descsize = sizeof(struct sha512_state),
252 .base = { 252 .base = {
253 .cra_name = "sha512", 253 .cra_name = "sha512",
254 .cra_driver_name = "sha512-generic",
254 .cra_flags = CRYPTO_ALG_TYPE_SHASH, 255 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
255 .cra_blocksize = SHA512_BLOCK_SIZE, 256 .cra_blocksize = SHA512_BLOCK_SIZE,
256 .cra_module = THIS_MODULE, 257 .cra_module = THIS_MODULE,
@@ -263,6 +264,7 @@ static struct shash_alg sha512_algs[2] = { {
263 .descsize = sizeof(struct sha512_state), 264 .descsize = sizeof(struct sha512_state),
264 .base = { 265 .base = {
265 .cra_name = "sha384", 266 .cra_name = "sha384",
267 .cra_driver_name = "sha384-generic",
266 .cra_flags = CRYPTO_ALG_TYPE_SHASH, 268 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
267 .cra_blocksize = SHA384_BLOCK_SIZE, 269 .cra_blocksize = SHA384_BLOCK_SIZE,
268 .cra_module = THIS_MODULE, 270 .cra_module = THIS_MODULE,
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 66d254ce0d11..25a5934f0e50 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1174,6 +1174,10 @@ static int do_test(int m)
1174 ret += tcrypt_test("ghash"); 1174 ret += tcrypt_test("ghash");
1175 break; 1175 break;
1176 1176
1177 case 47:
1178 ret += tcrypt_test("crct10dif");
1179 break;
1180
1177 case 100: 1181 case 100:
1178 ret += tcrypt_test("hmac(md5)"); 1182 ret += tcrypt_test("hmac(md5)");
1179 break; 1183 break;
@@ -1498,6 +1502,10 @@ static int do_test(int m)
1498 test_hash_speed("crc32c", sec, generic_hash_speed_template); 1502 test_hash_speed("crc32c", sec, generic_hash_speed_template);
1499 if (mode > 300 && mode < 400) break; 1503 if (mode > 300 && mode < 400) break;
1500 1504
1505 case 320:
1506 test_hash_speed("crct10dif", sec, generic_hash_speed_template);
1507 if (mode > 300 && mode < 400) break;
1508
1501 case 399: 1509 case 399:
1502 break; 1510 break;
1503 1511
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 5823735cf381..2f00607039e2 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -184,8 +184,9 @@ static int do_one_async_hash_op(struct ahash_request *req,
184 return ret; 184 return ret;
185} 185}
186 186
187static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, 187static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
188 unsigned int tcount, bool use_digest) 188 unsigned int tcount, bool use_digest,
189 const int align_offset)
189{ 190{
190 const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm)); 191 const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm));
191 unsigned int i, j, k, temp; 192 unsigned int i, j, k, temp;
@@ -216,10 +217,15 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
216 if (template[i].np) 217 if (template[i].np)
217 continue; 218 continue;
218 219
220 ret = -EINVAL;
221 if (WARN_ON(align_offset + template[i].psize > PAGE_SIZE))
222 goto out;
223
219 j++; 224 j++;
220 memset(result, 0, 64); 225 memset(result, 0, 64);
221 226
222 hash_buff = xbuf[0]; 227 hash_buff = xbuf[0];
228 hash_buff += align_offset;
223 229
224 memcpy(hash_buff, template[i].plaintext, template[i].psize); 230 memcpy(hash_buff, template[i].plaintext, template[i].psize);
225 sg_init_one(&sg[0], hash_buff, template[i].psize); 231 sg_init_one(&sg[0], hash_buff, template[i].psize);
@@ -281,6 +287,10 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
281 287
282 j = 0; 288 j = 0;
283 for (i = 0; i < tcount; i++) { 289 for (i = 0; i < tcount; i++) {
290 /* alignment tests are only done with continuous buffers */
291 if (align_offset != 0)
292 break;
293
284 if (template[i].np) { 294 if (template[i].np) {
285 j++; 295 j++;
286 memset(result, 0, 64); 296 memset(result, 0, 64);
@@ -358,9 +368,36 @@ out_nobuf:
358 return ret; 368 return ret;
359} 369}
360 370
371static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
372 unsigned int tcount, bool use_digest)
373{
374 unsigned int alignmask;
375 int ret;
376
377 ret = __test_hash(tfm, template, tcount, use_digest, 0);
378 if (ret)
379 return ret;
380
381 /* test unaligned buffers, check with one byte offset */
382 ret = __test_hash(tfm, template, tcount, use_digest, 1);
383 if (ret)
384 return ret;
385
386 alignmask = crypto_tfm_alg_alignmask(&tfm->base);
387 if (alignmask) {
388 /* Check if alignment mask for tfm is correctly set. */
389 ret = __test_hash(tfm, template, tcount, use_digest,
390 alignmask + 1);
391 if (ret)
392 return ret;
393 }
394
395 return 0;
396}
397
361static int __test_aead(struct crypto_aead *tfm, int enc, 398static int __test_aead(struct crypto_aead *tfm, int enc,
362 struct aead_testvec *template, unsigned int tcount, 399 struct aead_testvec *template, unsigned int tcount,
363 const bool diff_dst) 400 const bool diff_dst, const int align_offset)
364{ 401{
365 const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm)); 402 const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm));
366 unsigned int i, j, k, n, temp; 403 unsigned int i, j, k, n, temp;
@@ -423,15 +460,16 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
423 if (!template[i].np) { 460 if (!template[i].np) {
424 j++; 461 j++;
425 462
426 /* some tepmplates have no input data but they will 463 /* some templates have no input data but they will
427 * touch input 464 * touch input
428 */ 465 */
429 input = xbuf[0]; 466 input = xbuf[0];
467 input += align_offset;
430 assoc = axbuf[0]; 468 assoc = axbuf[0];
431 469
432 ret = -EINVAL; 470 ret = -EINVAL;
433 if (WARN_ON(template[i].ilen > PAGE_SIZE || 471 if (WARN_ON(align_offset + template[i].ilen >
434 template[i].alen > PAGE_SIZE)) 472 PAGE_SIZE || template[i].alen > PAGE_SIZE))
435 goto out; 473 goto out;
436 474
437 memcpy(input, template[i].input, template[i].ilen); 475 memcpy(input, template[i].input, template[i].ilen);
@@ -470,6 +508,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
470 508
471 if (diff_dst) { 509 if (diff_dst) {
472 output = xoutbuf[0]; 510 output = xoutbuf[0];
511 output += align_offset;
473 sg_init_one(&sgout[0], output, 512 sg_init_one(&sgout[0], output,
474 template[i].ilen + 513 template[i].ilen +
475 (enc ? authsize : 0)); 514 (enc ? authsize : 0));
@@ -530,6 +569,10 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
530 } 569 }
531 570
532 for (i = 0, j = 0; i < tcount; i++) { 571 for (i = 0, j = 0; i < tcount; i++) {
572 /* alignment tests are only done with continuous buffers */
573 if (align_offset != 0)
574 break;
575
533 if (template[i].np) { 576 if (template[i].np) {
534 j++; 577 j++;
535 578
@@ -732,15 +775,34 @@ out_noxbuf:
732static int test_aead(struct crypto_aead *tfm, int enc, 775static int test_aead(struct crypto_aead *tfm, int enc,
733 struct aead_testvec *template, unsigned int tcount) 776 struct aead_testvec *template, unsigned int tcount)
734{ 777{
778 unsigned int alignmask;
735 int ret; 779 int ret;
736 780
737 /* test 'dst == src' case */ 781 /* test 'dst == src' case */
738 ret = __test_aead(tfm, enc, template, tcount, false); 782 ret = __test_aead(tfm, enc, template, tcount, false, 0);
739 if (ret) 783 if (ret)
740 return ret; 784 return ret;
741 785
742 /* test 'dst != src' case */ 786 /* test 'dst != src' case */
743 return __test_aead(tfm, enc, template, tcount, true); 787 ret = __test_aead(tfm, enc, template, tcount, true, 0);
788 if (ret)
789 return ret;
790
791 /* test unaligned buffers, check with one byte offset */
792 ret = __test_aead(tfm, enc, template, tcount, true, 1);
793 if (ret)
794 return ret;
795
796 alignmask = crypto_tfm_alg_alignmask(&tfm->base);
797 if (alignmask) {
798 /* Check if alignment mask for tfm is correctly set. */
799 ret = __test_aead(tfm, enc, template, tcount, true,
800 alignmask + 1);
801 if (ret)
802 return ret;
803 }
804
805 return 0;
744} 806}
745 807
746static int test_cipher(struct crypto_cipher *tfm, int enc, 808static int test_cipher(struct crypto_cipher *tfm, int enc,
@@ -820,7 +882,7 @@ out_nobuf:
820 882
821static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, 883static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
822 struct cipher_testvec *template, unsigned int tcount, 884 struct cipher_testvec *template, unsigned int tcount,
823 const bool diff_dst) 885 const bool diff_dst, const int align_offset)
824{ 886{
825 const char *algo = 887 const char *algo =
826 crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(tfm)); 888 crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(tfm));
@@ -876,10 +938,12 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
876 j++; 938 j++;
877 939
878 ret = -EINVAL; 940 ret = -EINVAL;
879 if (WARN_ON(template[i].ilen > PAGE_SIZE)) 941 if (WARN_ON(align_offset + template[i].ilen >
942 PAGE_SIZE))
880 goto out; 943 goto out;
881 944
882 data = xbuf[0]; 945 data = xbuf[0];
946 data += align_offset;
883 memcpy(data, template[i].input, template[i].ilen); 947 memcpy(data, template[i].input, template[i].ilen);
884 948
885 crypto_ablkcipher_clear_flags(tfm, ~0); 949 crypto_ablkcipher_clear_flags(tfm, ~0);
@@ -900,6 +964,7 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
900 sg_init_one(&sg[0], data, template[i].ilen); 964 sg_init_one(&sg[0], data, template[i].ilen);
901 if (diff_dst) { 965 if (diff_dst) {
902 data = xoutbuf[0]; 966 data = xoutbuf[0];
967 data += align_offset;
903 sg_init_one(&sgout[0], data, template[i].ilen); 968 sg_init_one(&sgout[0], data, template[i].ilen);
904 } 969 }
905 970
@@ -941,6 +1006,9 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
941 1006
942 j = 0; 1007 j = 0;
943 for (i = 0; i < tcount; i++) { 1008 for (i = 0; i < tcount; i++) {
1009 /* alignment tests are only done with continuous buffers */
1010 if (align_offset != 0)
1011 break;
944 1012
945 if (template[i].iv) 1013 if (template[i].iv)
946 memcpy(iv, template[i].iv, MAX_IVLEN); 1014 memcpy(iv, template[i].iv, MAX_IVLEN);
@@ -1075,15 +1143,34 @@ out_nobuf:
1075static int test_skcipher(struct crypto_ablkcipher *tfm, int enc, 1143static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
1076 struct cipher_testvec *template, unsigned int tcount) 1144 struct cipher_testvec *template, unsigned int tcount)
1077{ 1145{
1146 unsigned int alignmask;
1078 int ret; 1147 int ret;
1079 1148
1080 /* test 'dst == src' case */ 1149 /* test 'dst == src' case */
1081 ret = __test_skcipher(tfm, enc, template, tcount, false); 1150 ret = __test_skcipher(tfm, enc, template, tcount, false, 0);
1082 if (ret) 1151 if (ret)
1083 return ret; 1152 return ret;
1084 1153
1085 /* test 'dst != src' case */ 1154 /* test 'dst != src' case */
1086 return __test_skcipher(tfm, enc, template, tcount, true); 1155 ret = __test_skcipher(tfm, enc, template, tcount, true, 0);
1156 if (ret)
1157 return ret;
1158
1159 /* test unaligned buffers, check with one byte offset */
1160 ret = __test_skcipher(tfm, enc, template, tcount, true, 1);
1161 if (ret)
1162 return ret;
1163
1164 alignmask = crypto_tfm_alg_alignmask(&tfm->base);
1165 if (alignmask) {
1166 /* Check if alignment mask for tfm is correctly set. */
1167 ret = __test_skcipher(tfm, enc, template, tcount, true,
1168 alignmask + 1);
1169 if (ret)
1170 return ret;
1171 }
1172
1173 return 0;
1087} 1174}
1088 1175
1089static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate, 1176static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate,
@@ -1654,16 +1741,10 @@ static const struct alg_test_desc alg_test_descs[] = {
1654 .alg = "__cbc-twofish-avx", 1741 .alg = "__cbc-twofish-avx",
1655 .test = alg_test_null, 1742 .test = alg_test_null,
1656 }, { 1743 }, {
1657 .alg = "__cbc-twofish-avx2",
1658 .test = alg_test_null,
1659 }, {
1660 .alg = "__driver-cbc-aes-aesni", 1744 .alg = "__driver-cbc-aes-aesni",
1661 .test = alg_test_null, 1745 .test = alg_test_null,
1662 .fips_allowed = 1, 1746 .fips_allowed = 1,
1663 }, { 1747 }, {
1664 .alg = "__driver-cbc-blowfish-avx2",
1665 .test = alg_test_null,
1666 }, {
1667 .alg = "__driver-cbc-camellia-aesni", 1748 .alg = "__driver-cbc-camellia-aesni",
1668 .test = alg_test_null, 1749 .test = alg_test_null,
1669 }, { 1750 }, {
@@ -1688,16 +1769,10 @@ static const struct alg_test_desc alg_test_descs[] = {
1688 .alg = "__driver-cbc-twofish-avx", 1769 .alg = "__driver-cbc-twofish-avx",
1689 .test = alg_test_null, 1770 .test = alg_test_null,
1690 }, { 1771 }, {
1691 .alg = "__driver-cbc-twofish-avx2",
1692 .test = alg_test_null,
1693 }, {
1694 .alg = "__driver-ecb-aes-aesni", 1772 .alg = "__driver-ecb-aes-aesni",
1695 .test = alg_test_null, 1773 .test = alg_test_null,
1696 .fips_allowed = 1, 1774 .fips_allowed = 1,
1697 }, { 1775 }, {
1698 .alg = "__driver-ecb-blowfish-avx2",
1699 .test = alg_test_null,
1700 }, {
1701 .alg = "__driver-ecb-camellia-aesni", 1776 .alg = "__driver-ecb-camellia-aesni",
1702 .test = alg_test_null, 1777 .test = alg_test_null,
1703 }, { 1778 }, {
@@ -1722,9 +1797,6 @@ static const struct alg_test_desc alg_test_descs[] = {
1722 .alg = "__driver-ecb-twofish-avx", 1797 .alg = "__driver-ecb-twofish-avx",
1723 .test = alg_test_null, 1798 .test = alg_test_null,
1724 }, { 1799 }, {
1725 .alg = "__driver-ecb-twofish-avx2",
1726 .test = alg_test_null,
1727 }, {
1728 .alg = "__ghash-pclmulqdqni", 1800 .alg = "__ghash-pclmulqdqni",
1729 .test = alg_test_null, 1801 .test = alg_test_null,
1730 .fips_allowed = 1, 1802 .fips_allowed = 1,
@@ -1974,12 +2046,19 @@ static const struct alg_test_desc alg_test_descs[] = {
1974 } 2046 }
1975 } 2047 }
1976 }, { 2048 }, {
1977 .alg = "cryptd(__driver-cbc-aes-aesni)", 2049 .alg = "crct10dif",
1978 .test = alg_test_null, 2050 .test = alg_test_hash,
1979 .fips_allowed = 1, 2051 .fips_allowed = 1,
2052 .suite = {
2053 .hash = {
2054 .vecs = crct10dif_tv_template,
2055 .count = CRCT10DIF_TEST_VECTORS
2056 }
2057 }
1980 }, { 2058 }, {
1981 .alg = "cryptd(__driver-cbc-blowfish-avx2)", 2059 .alg = "cryptd(__driver-cbc-aes-aesni)",
1982 .test = alg_test_null, 2060 .test = alg_test_null,
2061 .fips_allowed = 1,
1983 }, { 2062 }, {
1984 .alg = "cryptd(__driver-cbc-camellia-aesni)", 2063 .alg = "cryptd(__driver-cbc-camellia-aesni)",
1985 .test = alg_test_null, 2064 .test = alg_test_null,
@@ -1994,9 +2073,6 @@ static const struct alg_test_desc alg_test_descs[] = {
1994 .test = alg_test_null, 2073 .test = alg_test_null,
1995 .fips_allowed = 1, 2074 .fips_allowed = 1,
1996 }, { 2075 }, {
1997 .alg = "cryptd(__driver-ecb-blowfish-avx2)",
1998 .test = alg_test_null,
1999 }, {
2000 .alg = "cryptd(__driver-ecb-camellia-aesni)", 2076 .alg = "cryptd(__driver-ecb-camellia-aesni)",
2001 .test = alg_test_null, 2077 .test = alg_test_null,
2002 }, { 2078 }, {
@@ -2021,9 +2097,6 @@ static const struct alg_test_desc alg_test_descs[] = {
2021 .alg = "cryptd(__driver-ecb-twofish-avx)", 2097 .alg = "cryptd(__driver-ecb-twofish-avx)",
2022 .test = alg_test_null, 2098 .test = alg_test_null,
2023 }, { 2099 }, {
2024 .alg = "cryptd(__driver-ecb-twofish-avx2)",
2025 .test = alg_test_null,
2026 }, {
2027 .alg = "cryptd(__driver-gcm-aes-aesni)", 2100 .alg = "cryptd(__driver-gcm-aes-aesni)",
2028 .test = alg_test_null, 2101 .test = alg_test_null,
2029 .fips_allowed = 1, 2102 .fips_allowed = 1,
@@ -3068,6 +3141,35 @@ static const struct alg_test_desc alg_test_descs[] = {
3068 } 3141 }
3069}; 3142};
3070 3143
3144static bool alg_test_descs_checked;
3145
3146static void alg_test_descs_check_order(void)
3147{
3148 int i;
3149
3150 /* only check once */
3151 if (alg_test_descs_checked)
3152 return;
3153
3154 alg_test_descs_checked = true;
3155
3156 for (i = 1; i < ARRAY_SIZE(alg_test_descs); i++) {
3157 int diff = strcmp(alg_test_descs[i - 1].alg,
3158 alg_test_descs[i].alg);
3159
3160 if (WARN_ON(diff > 0)) {
3161 pr_warn("testmgr: alg_test_descs entries in wrong order: '%s' before '%s'\n",
3162 alg_test_descs[i - 1].alg,
3163 alg_test_descs[i].alg);
3164 }
3165
3166 if (WARN_ON(diff == 0)) {
3167 pr_warn("testmgr: duplicate alg_test_descs entry: '%s'\n",
3168 alg_test_descs[i].alg);
3169 }
3170 }
3171}
3172
3071static int alg_find_test(const char *alg) 3173static int alg_find_test(const char *alg)
3072{ 3174{
3073 int start = 0; 3175 int start = 0;
@@ -3099,6 +3201,8 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
3099 int j; 3201 int j;
3100 int rc; 3202 int rc;
3101 3203
3204 alg_test_descs_check_order();
3205
3102 if ((type & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_CIPHER) { 3206 if ((type & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_CIPHER) {
3103 char nalg[CRYPTO_MAX_ALG_NAME]; 3207 char nalg[CRYPTO_MAX_ALG_NAME];
3104 3208
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 1e701bc075b9..7d44aa3d6b44 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -450,6 +450,39 @@ static struct hash_testvec rmd320_tv_template[] = {
450 } 450 }
451}; 451};
452 452
453#define CRCT10DIF_TEST_VECTORS 3
454static struct hash_testvec crct10dif_tv_template[] = {
455 {
456 .plaintext = "abc",
457 .psize = 3,
458#ifdef __LITTLE_ENDIAN
459 .digest = "\x3b\x44",
460#else
461 .digest = "\x44\x3b",
462#endif
463 }, {
464 .plaintext = "1234567890123456789012345678901234567890"
465 "123456789012345678901234567890123456789",
466 .psize = 79,
467#ifdef __LITTLE_ENDIAN
468 .digest = "\x70\x4b",
469#else
470 .digest = "\x4b\x70",
471#endif
472 }, {
473 .plaintext =
474 "abcddddddddddddddddddddddddddddddddddddddddddddddddddddd",
475 .psize = 56,
476#ifdef __LITTLE_ENDIAN
477 .digest = "\xe3\x9c",
478#else
479 .digest = "\x9c\xe3",
480#endif
481 .np = 2,
482 .tap = { 28, 28 }
483 }
484};
485
453/* 486/*
454 * SHA1 test vectors from from FIPS PUB 180-1 487 * SHA1 test vectors from from FIPS PUB 180-1
455 * Long vector from CAVS 5.0 488 * Long vector from CAVS 5.0
diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index 7c73d4aca36b..bf9fc6b79328 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -108,8 +108,6 @@ static int atmel_trng_remove(struct platform_device *pdev)
108 clk_disable(trng->clk); 108 clk_disable(trng->clk);
109 clk_put(trng->clk); 109 clk_put(trng->clk);
110 110
111 platform_set_drvdata(pdev, NULL);
112
113 return 0; 111 return 0;
114} 112}
115 113
diff --git a/drivers/char/hw_random/bcm63xx-rng.c b/drivers/char/hw_random/bcm63xx-rng.c
index f343b7d0dfa1..36581ea562cb 100644
--- a/drivers/char/hw_random/bcm63xx-rng.c
+++ b/drivers/char/hw_random/bcm63xx-rng.c
@@ -137,7 +137,6 @@ static int bcm63xx_rng_probe(struct platform_device *pdev)
137out_clk_disable: 137out_clk_disable:
138 clk_disable(clk); 138 clk_disable(clk);
139out_free_rng: 139out_free_rng:
140 platform_set_drvdata(pdev, NULL);
141 kfree(rng); 140 kfree(rng);
142out_free_priv: 141out_free_priv:
143 kfree(priv); 142 kfree(priv);
@@ -154,7 +153,6 @@ static int bcm63xx_rng_remove(struct platform_device *pdev)
154 clk_disable(priv->clk); 153 clk_disable(priv->clk);
155 kfree(priv); 154 kfree(priv);
156 kfree(rng); 155 kfree(rng);
157 platform_set_drvdata(pdev, NULL);
158 156
159 return 0; 157 return 0;
160} 158}
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index 20b962e1d832..f9beed54d0c8 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -700,7 +700,7 @@ static int n2rng_probe(struct platform_device *op)
700 if (err) 700 if (err)
701 goto out_free_units; 701 goto out_free_units;
702 702
703 dev_set_drvdata(&op->dev, np); 703 platform_set_drvdata(op, np);
704 704
705 schedule_delayed_work(&np->work, 0); 705 schedule_delayed_work(&np->work, 0);
706 706
@@ -721,7 +721,7 @@ out:
721 721
722static int n2rng_remove(struct platform_device *op) 722static int n2rng_remove(struct platform_device *op)
723{ 723{
724 struct n2rng *np = dev_get_drvdata(&op->dev); 724 struct n2rng *np = platform_get_drvdata(op);
725 725
726 np->flags |= N2RNG_FLAG_SHUTDOWN; 726 np->flags |= N2RNG_FLAG_SHUTDOWN;
727 727
@@ -736,8 +736,6 @@ static int n2rng_remove(struct platform_device *op)
736 736
737 kfree(np); 737 kfree(np);
738 738
739 dev_set_drvdata(&op->dev, NULL);
740
741 return 0; 739 return 0;
742} 740}
743 741
diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c
index 96de0249e595..232b87fb5fc9 100644
--- a/drivers/char/hw_random/nomadik-rng.c
+++ b/drivers/char/hw_random/nomadik-rng.c
@@ -51,7 +51,7 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id)
51 return ret; 51 return ret;
52 } 52 }
53 53
54 clk_enable(rng_clk); 54 clk_prepare_enable(rng_clk);
55 55
56 ret = amba_request_regions(dev, dev->dev.init_name); 56 ret = amba_request_regions(dev, dev->dev.init_name);
57 if (ret) 57 if (ret)
diff --git a/drivers/char/hw_random/octeon-rng.c b/drivers/char/hw_random/octeon-rng.c
index 1eada566ca70..f2885dbe1849 100644
--- a/drivers/char/hw_random/octeon-rng.c
+++ b/drivers/char/hw_random/octeon-rng.c
@@ -96,7 +96,7 @@ static int octeon_rng_probe(struct platform_device *pdev)
96 96
97 rng->ops = ops; 97 rng->ops = ops;
98 98
99 dev_set_drvdata(&pdev->dev, &rng->ops); 99 platform_set_drvdata(pdev, &rng->ops);
100 ret = hwrng_register(&rng->ops); 100 ret = hwrng_register(&rng->ops);
101 if (ret) 101 if (ret)
102 return -ENOENT; 102 return -ENOENT;
@@ -108,7 +108,7 @@ static int octeon_rng_probe(struct platform_device *pdev)
108 108
109static int __exit octeon_rng_remove(struct platform_device *pdev) 109static int __exit octeon_rng_remove(struct platform_device *pdev)
110{ 110{
111 struct hwrng *rng = dev_get_drvdata(&pdev->dev); 111 struct hwrng *rng = platform_get_drvdata(pdev);
112 112
113 hwrng_unregister(rng); 113 hwrng_unregister(rng);
114 114
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index d2903e772270..6843ec87b98b 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -116,7 +116,7 @@ static int omap_rng_probe(struct platform_device *pdev)
116 }; 116 };
117 117
118 omap_rng_ops.priv = (unsigned long)priv; 118 omap_rng_ops.priv = (unsigned long)priv;
119 dev_set_drvdata(&pdev->dev, priv); 119 platform_set_drvdata(pdev, priv);
120 120
121 priv->mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 121 priv->mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
122 priv->base = devm_ioremap_resource(&pdev->dev, priv->mem_res); 122 priv->base = devm_ioremap_resource(&pdev->dev, priv->mem_res);
@@ -124,7 +124,7 @@ static int omap_rng_probe(struct platform_device *pdev)
124 ret = PTR_ERR(priv->base); 124 ret = PTR_ERR(priv->base);
125 goto err_ioremap; 125 goto err_ioremap;
126 } 126 }
127 dev_set_drvdata(&pdev->dev, priv); 127 platform_set_drvdata(pdev, priv);
128 128
129 pm_runtime_enable(&pdev->dev); 129 pm_runtime_enable(&pdev->dev);
130 pm_runtime_get_sync(&pdev->dev); 130 pm_runtime_get_sync(&pdev->dev);
@@ -151,7 +151,7 @@ err_ioremap:
151 151
152static int __exit omap_rng_remove(struct platform_device *pdev) 152static int __exit omap_rng_remove(struct platform_device *pdev)
153{ 153{
154 struct omap_rng_private_data *priv = dev_get_drvdata(&pdev->dev); 154 struct omap_rng_private_data *priv = platform_get_drvdata(pdev);
155 155
156 hwrng_unregister(&omap_rng_ops); 156 hwrng_unregister(&omap_rng_ops);
157 157
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index 3e75737f5fe1..d2120ba8f3f9 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -192,7 +192,6 @@ out_release_io:
192out_timer: 192out_timer:
193 del_timer_sync(&priv->timer); 193 del_timer_sync(&priv->timer);
194out_free: 194out_free:
195 platform_set_drvdata(pdev, NULL);
196 kfree(priv); 195 kfree(priv);
197 return err; 196 return err;
198} 197}
@@ -209,7 +208,6 @@ static int timeriomem_rng_remove(struct platform_device *pdev)
209 del_timer_sync(&priv->timer); 208 del_timer_sync(&priv->timer);
210 iounmap(priv->io_base); 209 iounmap(priv->io_base);
211 release_mem_region(res->start, resource_size(res)); 210 release_mem_region(res->start, resource_size(res));
212 platform_set_drvdata(pdev, NULL);
213 kfree(priv); 211 kfree(priv);
214 212
215 return 0; 213 return 0;
diff --git a/drivers/char/hw_random/tx4939-rng.c b/drivers/char/hw_random/tx4939-rng.c
index d34a24a0d484..00593c847cf0 100644
--- a/drivers/char/hw_random/tx4939-rng.c
+++ b/drivers/char/hw_random/tx4939-rng.c
@@ -154,7 +154,6 @@ static int __exit tx4939_rng_remove(struct platform_device *dev)
154 struct tx4939_rng *rngdev = platform_get_drvdata(dev); 154 struct tx4939_rng *rngdev = platform_get_drvdata(dev);
155 155
156 hwrng_unregister(&rngdev->rng); 156 hwrng_unregister(&rngdev->rng);
157 platform_set_drvdata(dev, NULL);
158 return 0; 157 return 0;
159} 158}
160 159
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index dffb85525368..8ff7c230d82e 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -278,7 +278,7 @@ config CRYPTO_DEV_PICOXCELL
278 278
279config CRYPTO_DEV_SAHARA 279config CRYPTO_DEV_SAHARA
280 tristate "Support for SAHARA crypto accelerator" 280 tristate "Support for SAHARA crypto accelerator"
281 depends on ARCH_MXC && EXPERIMENTAL && OF 281 depends on ARCH_MXC && OF
282 select CRYPTO_BLKCIPHER 282 select CRYPTO_BLKCIPHER
283 select CRYPTO_AES 283 select CRYPTO_AES
284 select CRYPTO_ECB 284 select CRYPTO_ECB
@@ -286,6 +286,16 @@ config CRYPTO_DEV_SAHARA
286 This option enables support for the SAHARA HW crypto accelerator 286 This option enables support for the SAHARA HW crypto accelerator
287 found in some Freescale i.MX chips. 287 found in some Freescale i.MX chips.
288 288
289config CRYPTO_DEV_DCP
290 tristate "Support for the DCP engine"
291 depends on ARCH_MXS && OF
292 select CRYPTO_BLKCIPHER
293 select CRYPTO_AES
294 select CRYPTO_CBC
295 help
296 This options enables support for the hardware crypto-acceleration
297 capabilities of the DCP co-processor
298
289config CRYPTO_DEV_S5P 299config CRYPTO_DEV_S5P
290 tristate "Support for Samsung S5PV210 crypto accelerator" 300 tristate "Support for Samsung S5PV210 crypto accelerator"
291 depends on ARCH_S5PV210 301 depends on ARCH_S5PV210
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 38ce13d3b79b..b4946ddd2550 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
13obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o 13obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
14obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o 14obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
15obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o 15obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
16obj-$(CONFIG_CRYPTO_DEV_DCP) += dcp.o
16obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o 17obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
17obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o 18obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
18obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ 19obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 6e94bcd94678..f5d6deced1cb 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -202,6 +202,7 @@ static int caam_probe(struct platform_device *pdev)
202#ifdef CONFIG_DEBUG_FS 202#ifdef CONFIG_DEBUG_FS
203 struct caam_perfmon *perfmon; 203 struct caam_perfmon *perfmon;
204#endif 204#endif
205 u64 cha_vid;
205 206
206 ctrlpriv = kzalloc(sizeof(struct caam_drv_private), GFP_KERNEL); 207 ctrlpriv = kzalloc(sizeof(struct caam_drv_private), GFP_KERNEL);
207 if (!ctrlpriv) 208 if (!ctrlpriv)
@@ -293,11 +294,14 @@ static int caam_probe(struct platform_device *pdev)
293 return -ENOMEM; 294 return -ENOMEM;
294 } 295 }
295 296
297 cha_vid = rd_reg64(&topregs->ctrl.perfmon.cha_id);
298
296 /* 299 /*
297 * RNG4 based SECs (v5+) need special initialization prior 300 * If SEC has RNG version >= 4 and RNG state handle has not been
298 * to executing any descriptors 301 * already instantiated ,do RNG instantiation
299 */ 302 */
300 if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) { 303 if ((cha_vid & CHA_ID_RNG_MASK) >> CHA_ID_RNG_SHIFT >= 4 &&
304 !(rd_reg32(&topregs->ctrl.r4tst[0].rdsta) & RDSTA_IF0)) {
301 kick_trng(pdev); 305 kick_trng(pdev);
302 ret = instantiate_rng(ctrlpriv->jrdev[0]); 306 ret = instantiate_rng(ctrlpriv->jrdev[0]);
303 if (ret) { 307 if (ret) {
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index f7f833be8c67..53b296f78b0d 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -231,7 +231,12 @@ struct sec4_sg_entry {
231#define LDST_SRCDST_WORD_PKHA_B_SZ (0x11 << LDST_SRCDST_SHIFT) 231#define LDST_SRCDST_WORD_PKHA_B_SZ (0x11 << LDST_SRCDST_SHIFT)
232#define LDST_SRCDST_WORD_PKHA_N_SZ (0x12 << LDST_SRCDST_SHIFT) 232#define LDST_SRCDST_WORD_PKHA_N_SZ (0x12 << LDST_SRCDST_SHIFT)
233#define LDST_SRCDST_WORD_PKHA_E_SZ (0x13 << LDST_SRCDST_SHIFT) 233#define LDST_SRCDST_WORD_PKHA_E_SZ (0x13 << LDST_SRCDST_SHIFT)
234#define LDST_SRCDST_WORD_CLASS_CTX (0x20 << LDST_SRCDST_SHIFT)
234#define LDST_SRCDST_WORD_DESCBUF (0x40 << LDST_SRCDST_SHIFT) 235#define LDST_SRCDST_WORD_DESCBUF (0x40 << LDST_SRCDST_SHIFT)
236#define LDST_SRCDST_WORD_DESCBUF_JOB (0x41 << LDST_SRCDST_SHIFT)
237#define LDST_SRCDST_WORD_DESCBUF_SHARED (0x42 << LDST_SRCDST_SHIFT)
238#define LDST_SRCDST_WORD_DESCBUF_JOB_WE (0x45 << LDST_SRCDST_SHIFT)
239#define LDST_SRCDST_WORD_DESCBUF_SHARED_WE (0x46 << LDST_SRCDST_SHIFT)
235#define LDST_SRCDST_WORD_INFO_FIFO (0x7a << LDST_SRCDST_SHIFT) 240#define LDST_SRCDST_WORD_INFO_FIFO (0x7a << LDST_SRCDST_SHIFT)
236 241
237/* Offset in source/destination */ 242/* Offset in source/destination */
@@ -366,6 +371,7 @@ struct sec4_sg_entry {
366#define FIFOLD_TYPE_LAST2FLUSH1 (0x05 << FIFOLD_TYPE_SHIFT) 371#define FIFOLD_TYPE_LAST2FLUSH1 (0x05 << FIFOLD_TYPE_SHIFT)
367#define FIFOLD_TYPE_LASTBOTH (0x06 << FIFOLD_TYPE_SHIFT) 372#define FIFOLD_TYPE_LASTBOTH (0x06 << FIFOLD_TYPE_SHIFT)
368#define FIFOLD_TYPE_LASTBOTHFL (0x07 << FIFOLD_TYPE_SHIFT) 373#define FIFOLD_TYPE_LASTBOTHFL (0x07 << FIFOLD_TYPE_SHIFT)
374#define FIFOLD_TYPE_NOINFOFIFO (0x0F << FIFOLD_TYPE_SHIFT)
369 375
370#define FIFOLDST_LEN_MASK 0xffff 376#define FIFOLDST_LEN_MASK 0xffff
371#define FIFOLDST_EXT_LEN_MASK 0xffffffff 377#define FIFOLDST_EXT_LEN_MASK 0xffffffff
@@ -1294,10 +1300,10 @@ struct sec4_sg_entry {
1294#define SQOUT_SGF 0x01000000 1300#define SQOUT_SGF 0x01000000
1295 1301
1296/* Appends to a previous pointer */ 1302/* Appends to a previous pointer */
1297#define SQOUT_PRE 0x00800000 1303#define SQOUT_PRE SQIN_PRE
1298 1304
1299/* Restore sequence with pointer/length */ 1305/* Restore sequence with pointer/length */
1300#define SQOUT_RTO 0x00200000 1306#define SQOUT_RTO SQIN_RTO
1301 1307
1302/* Use extended length following pointer */ 1308/* Use extended length following pointer */
1303#define SQOUT_EXT 0x00400000 1309#define SQOUT_EXT 0x00400000
@@ -1359,6 +1365,7 @@ struct sec4_sg_entry {
1359#define MOVE_DEST_MATH3 (0x07 << MOVE_DEST_SHIFT) 1365#define MOVE_DEST_MATH3 (0x07 << MOVE_DEST_SHIFT)
1360#define MOVE_DEST_CLASS1INFIFO (0x08 << MOVE_DEST_SHIFT) 1366#define MOVE_DEST_CLASS1INFIFO (0x08 << MOVE_DEST_SHIFT)
1361#define MOVE_DEST_CLASS2INFIFO (0x09 << MOVE_DEST_SHIFT) 1367#define MOVE_DEST_CLASS2INFIFO (0x09 << MOVE_DEST_SHIFT)
1368#define MOVE_DEST_INFIFO_NOINFO (0x0a << MOVE_DEST_SHIFT)
1362#define MOVE_DEST_PK_A (0x0c << MOVE_DEST_SHIFT) 1369#define MOVE_DEST_PK_A (0x0c << MOVE_DEST_SHIFT)
1363#define MOVE_DEST_CLASS1KEY (0x0d << MOVE_DEST_SHIFT) 1370#define MOVE_DEST_CLASS1KEY (0x0d << MOVE_DEST_SHIFT)
1364#define MOVE_DEST_CLASS2KEY (0x0e << MOVE_DEST_SHIFT) 1371#define MOVE_DEST_CLASS2KEY (0x0e << MOVE_DEST_SHIFT)
@@ -1411,6 +1418,7 @@ struct sec4_sg_entry {
1411#define MATH_SRC0_REG2 (0x02 << MATH_SRC0_SHIFT) 1418#define MATH_SRC0_REG2 (0x02 << MATH_SRC0_SHIFT)
1412#define MATH_SRC0_REG3 (0x03 << MATH_SRC0_SHIFT) 1419#define MATH_SRC0_REG3 (0x03 << MATH_SRC0_SHIFT)
1413#define MATH_SRC0_IMM (0x04 << MATH_SRC0_SHIFT) 1420#define MATH_SRC0_IMM (0x04 << MATH_SRC0_SHIFT)
1421#define MATH_SRC0_DPOVRD (0x07 << MATH_SRC0_SHIFT)
1414#define MATH_SRC0_SEQINLEN (0x08 << MATH_SRC0_SHIFT) 1422#define MATH_SRC0_SEQINLEN (0x08 << MATH_SRC0_SHIFT)
1415#define MATH_SRC0_SEQOUTLEN (0x09 << MATH_SRC0_SHIFT) 1423#define MATH_SRC0_SEQOUTLEN (0x09 << MATH_SRC0_SHIFT)
1416#define MATH_SRC0_VARSEQINLEN (0x0a << MATH_SRC0_SHIFT) 1424#define MATH_SRC0_VARSEQINLEN (0x0a << MATH_SRC0_SHIFT)
@@ -1425,6 +1433,7 @@ struct sec4_sg_entry {
1425#define MATH_SRC1_REG2 (0x02 << MATH_SRC1_SHIFT) 1433#define MATH_SRC1_REG2 (0x02 << MATH_SRC1_SHIFT)
1426#define MATH_SRC1_REG3 (0x03 << MATH_SRC1_SHIFT) 1434#define MATH_SRC1_REG3 (0x03 << MATH_SRC1_SHIFT)
1427#define MATH_SRC1_IMM (0x04 << MATH_SRC1_SHIFT) 1435#define MATH_SRC1_IMM (0x04 << MATH_SRC1_SHIFT)
1436#define MATH_SRC1_DPOVRD (0x07 << MATH_SRC0_SHIFT)
1428#define MATH_SRC1_INFIFO (0x0a << MATH_SRC1_SHIFT) 1437#define MATH_SRC1_INFIFO (0x0a << MATH_SRC1_SHIFT)
1429#define MATH_SRC1_OUTFIFO (0x0b << MATH_SRC1_SHIFT) 1438#define MATH_SRC1_OUTFIFO (0x0b << MATH_SRC1_SHIFT)
1430#define MATH_SRC1_ONE (0x0c << MATH_SRC1_SHIFT) 1439#define MATH_SRC1_ONE (0x0c << MATH_SRC1_SHIFT)
@@ -1600,4 +1609,13 @@ struct sec4_sg_entry {
1600#define NFIFOENTRY_PLEN_SHIFT 0 1609#define NFIFOENTRY_PLEN_SHIFT 0
1601#define NFIFOENTRY_PLEN_MASK (0xFF << NFIFOENTRY_PLEN_SHIFT) 1610#define NFIFOENTRY_PLEN_MASK (0xFF << NFIFOENTRY_PLEN_SHIFT)
1602 1611
1612/* Append Load Immediate Command */
1613#define FD_CMD_APPEND_LOAD_IMMEDIATE 0x80000000
1614
1615/* Set SEQ LIODN equal to the Non-SEQ LIODN for the job */
1616#define FD_CMD_SET_SEQ_LIODN_EQUAL_NONSEQ_LIODN 0x40000000
1617
1618/* Frame Descriptor Command for Replacement Job Descriptor */
1619#define FD_CMD_REPLACE_JOB_DESC 0x20000000
1620
1603#endif /* DESC_H */ 1621#endif /* DESC_H */
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index c85c1f058401..fe3bfd1b08ca 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -110,6 +110,26 @@ static inline void append_cmd(u32 *desc, u32 command)
110 (*desc)++; 110 (*desc)++;
111} 111}
112 112
113#define append_u32 append_cmd
114
115static inline void append_u64(u32 *desc, u64 data)
116{
117 u32 *offset = desc_end(desc);
118
119 *offset = upper_32_bits(data);
120 *(++offset) = lower_32_bits(data);
121
122 (*desc) += 2;
123}
124
125/* Write command without affecting header, and return pointer to next word */
126static inline u32 *write_cmd(u32 *desc, u32 command)
127{
128 *desc = command;
129
130 return desc + 1;
131}
132
113static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len, 133static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len,
114 u32 command) 134 u32 command)
115{ 135{
@@ -122,7 +142,8 @@ static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr,
122 unsigned int len, u32 command) 142 unsigned int len, u32 command)
123{ 143{
124 append_cmd(desc, command); 144 append_cmd(desc, command);
125 append_ptr(desc, ptr); 145 if (!(command & (SQIN_RTO | SQIN_PRE)))
146 append_ptr(desc, ptr);
126 append_cmd(desc, len); 147 append_cmd(desc, len);
127} 148}
128 149
@@ -176,17 +197,36 @@ static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \
176} 197}
177APPEND_CMD_PTR(key, KEY) 198APPEND_CMD_PTR(key, KEY)
178APPEND_CMD_PTR(load, LOAD) 199APPEND_CMD_PTR(load, LOAD)
179APPEND_CMD_PTR(store, STORE)
180APPEND_CMD_PTR(fifo_load, FIFO_LOAD) 200APPEND_CMD_PTR(fifo_load, FIFO_LOAD)
181APPEND_CMD_PTR(fifo_store, FIFO_STORE) 201APPEND_CMD_PTR(fifo_store, FIFO_STORE)
182 202
203static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len,
204 u32 options)
205{
206 u32 cmd_src;
207
208 cmd_src = options & LDST_SRCDST_MASK;
209
210 append_cmd(desc, CMD_STORE | options | len);
211
212 /* The following options do not require pointer */
213 if (!(cmd_src == LDST_SRCDST_WORD_DESCBUF_SHARED ||
214 cmd_src == LDST_SRCDST_WORD_DESCBUF_JOB ||
215 cmd_src == LDST_SRCDST_WORD_DESCBUF_JOB_WE ||
216 cmd_src == LDST_SRCDST_WORD_DESCBUF_SHARED_WE))
217 append_ptr(desc, ptr);
218}
219
183#define APPEND_SEQ_PTR_INTLEN(cmd, op) \ 220#define APPEND_SEQ_PTR_INTLEN(cmd, op) \
184static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \ 221static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \
185 unsigned int len, \ 222 unsigned int len, \
186 u32 options) \ 223 u32 options) \
187{ \ 224{ \
188 PRINT_POS; \ 225 PRINT_POS; \
189 append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \ 226 if (options & (SQIN_RTO | SQIN_PRE)) \
227 append_cmd(desc, CMD_SEQ_##op##_PTR | len | options); \
228 else \
229 append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \
190} 230}
191APPEND_SEQ_PTR_INTLEN(in, IN) 231APPEND_SEQ_PTR_INTLEN(in, IN)
192APPEND_SEQ_PTR_INTLEN(out, OUT) 232APPEND_SEQ_PTR_INTLEN(out, OUT)
@@ -259,7 +299,7 @@ APPEND_CMD_RAW_IMM(load, LOAD, u32);
259 */ 299 */
260#define APPEND_MATH(op, desc, dest, src_0, src_1, len) \ 300#define APPEND_MATH(op, desc, dest, src_0, src_1, len) \
261append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \ 301append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \
262 MATH_SRC0_##src_0 | MATH_SRC1_##src_1 | (u32) (len & MATH_LEN_MASK)); 302 MATH_SRC0_##src_0 | MATH_SRC1_##src_1 | (u32)len);
263 303
264#define append_math_add(desc, dest, src0, src1, len) \ 304#define append_math_add(desc, dest, src0, src1, len) \
265 APPEND_MATH(ADD, desc, dest, src0, src1, len) 305 APPEND_MATH(ADD, desc, dest, src0, src1, len)
@@ -279,6 +319,8 @@ append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \
279 APPEND_MATH(LSHIFT, desc, dest, src0, src1, len) 319 APPEND_MATH(LSHIFT, desc, dest, src0, src1, len)
280#define append_math_rshift(desc, dest, src0, src1, len) \ 320#define append_math_rshift(desc, dest, src0, src1, len) \
281 APPEND_MATH(RSHIFT, desc, dest, src0, src1, len) 321 APPEND_MATH(RSHIFT, desc, dest, src0, src1, len)
322#define append_math_ldshift(desc, dest, src0, src1, len) \
323 APPEND_MATH(SHLD, desc, dest, src0, src1, len)
282 324
283/* Exactly one source is IMM. Data is passed in as u32 value */ 325/* Exactly one source is IMM. Data is passed in as u32 value */
284#define APPEND_MATH_IMM_u32(op, desc, dest, src_0, src_1, data) \ 326#define APPEND_MATH_IMM_u32(op, desc, dest, src_0, src_1, data) \
@@ -305,3 +347,34 @@ do { \
305 APPEND_MATH_IMM_u32(LSHIFT, desc, dest, src0, src1, data) 347 APPEND_MATH_IMM_u32(LSHIFT, desc, dest, src0, src1, data)
306#define append_math_rshift_imm_u32(desc, dest, src0, src1, data) \ 348#define append_math_rshift_imm_u32(desc, dest, src0, src1, data) \
307 APPEND_MATH_IMM_u32(RSHIFT, desc, dest, src0, src1, data) 349 APPEND_MATH_IMM_u32(RSHIFT, desc, dest, src0, src1, data)
350
351/* Exactly one source is IMM. Data is passed in as u64 value */
352#define APPEND_MATH_IMM_u64(op, desc, dest, src_0, src_1, data) \
353do { \
354 u32 upper = (data >> 16) >> 16; \
355 APPEND_MATH(op, desc, dest, src_0, src_1, CAAM_CMD_SZ * 2 | \
356 (upper ? 0 : MATH_IFB)); \
357 if (upper) \
358 append_u64(desc, data); \
359 else \
360 append_u32(desc, data); \
361} while (0)
362
363#define append_math_add_imm_u64(desc, dest, src0, src1, data) \
364 APPEND_MATH_IMM_u64(ADD, desc, dest, src0, src1, data)
365#define append_math_sub_imm_u64(desc, dest, src0, src1, data) \
366 APPEND_MATH_IMM_u64(SUB, desc, dest, src0, src1, data)
367#define append_math_add_c_imm_u64(desc, dest, src0, src1, data) \
368 APPEND_MATH_IMM_u64(ADDC, desc, dest, src0, src1, data)
369#define append_math_sub_b_imm_u64(desc, dest, src0, src1, data) \
370 APPEND_MATH_IMM_u64(SUBB, desc, dest, src0, src1, data)
371#define append_math_and_imm_u64(desc, dest, src0, src1, data) \
372 APPEND_MATH_IMM_u64(AND, desc, dest, src0, src1, data)
373#define append_math_or_imm_u64(desc, dest, src0, src1, data) \
374 APPEND_MATH_IMM_u64(OR, desc, dest, src0, src1, data)
375#define append_math_xor_imm_u64(desc, dest, src0, src1, data) \
376 APPEND_MATH_IMM_u64(XOR, desc, dest, src0, src1, data)
377#define append_math_lshift_imm_u64(desc, dest, src0, src1, data) \
378 APPEND_MATH_IMM_u64(LSHIFT, desc, dest, src0, src1, data)
379#define append_math_rshift_imm_u64(desc, dest, src0, src1, data) \
380 APPEND_MATH_IMM_u64(RSHIFT, desc, dest, src0, src1, data)
diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h
index 62950d22ac13..3a87c0cf879a 100644
--- a/drivers/crypto/caam/pdb.h
+++ b/drivers/crypto/caam/pdb.h
@@ -44,6 +44,7 @@
44#define PDBOPTS_ESP_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */ 44#define PDBOPTS_ESP_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */
45#define PDBOPTS_ESP_INCIPHDR 0x04 /* Prepend IP header to output frame */ 45#define PDBOPTS_ESP_INCIPHDR 0x04 /* Prepend IP header to output frame */
46#define PDBOPTS_ESP_IPVSN 0x02 /* process IPv6 header */ 46#define PDBOPTS_ESP_IPVSN 0x02 /* process IPv6 header */
47#define PDBOPTS_ESP_AOFL 0x04 /* adjust out frame len (decap, SEC>=5.3)*/
47#define PDBOPTS_ESP_TUNNEL 0x01 /* tunnel mode next-header byte */ 48#define PDBOPTS_ESP_TUNNEL 0x01 /* tunnel mode next-header byte */
48#define PDBOPTS_ESP_IPV6 0x02 /* ip header version is V6 */ 49#define PDBOPTS_ESP_IPV6 0x02 /* ip header version is V6 */
49#define PDBOPTS_ESP_DIFFSERV 0x40 /* copy TOS/TC from inner iphdr */ 50#define PDBOPTS_ESP_DIFFSERV 0x40 /* copy TOS/TC from inner iphdr */
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index cd6fedad9935..c09142fc13e3 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -117,6 +117,43 @@ struct jr_outentry {
117#define CHA_NUM_DECONUM_SHIFT 56 117#define CHA_NUM_DECONUM_SHIFT 56
118#define CHA_NUM_DECONUM_MASK (0xfull << CHA_NUM_DECONUM_SHIFT) 118#define CHA_NUM_DECONUM_MASK (0xfull << CHA_NUM_DECONUM_SHIFT)
119 119
120/* CHA Version IDs */
121#define CHA_ID_AES_SHIFT 0
122#define CHA_ID_AES_MASK (0xfull << CHA_ID_AES_SHIFT)
123
124#define CHA_ID_DES_SHIFT 4
125#define CHA_ID_DES_MASK (0xfull << CHA_ID_DES_SHIFT)
126
127#define CHA_ID_ARC4_SHIFT 8
128#define CHA_ID_ARC4_MASK (0xfull << CHA_ID_ARC4_SHIFT)
129
130#define CHA_ID_MD_SHIFT 12
131#define CHA_ID_MD_MASK (0xfull << CHA_ID_MD_SHIFT)
132
133#define CHA_ID_RNG_SHIFT 16
134#define CHA_ID_RNG_MASK (0xfull << CHA_ID_RNG_SHIFT)
135
136#define CHA_ID_SNW8_SHIFT 20
137#define CHA_ID_SNW8_MASK (0xfull << CHA_ID_SNW8_SHIFT)
138
139#define CHA_ID_KAS_SHIFT 24
140#define CHA_ID_KAS_MASK (0xfull << CHA_ID_KAS_SHIFT)
141
142#define CHA_ID_PK_SHIFT 28
143#define CHA_ID_PK_MASK (0xfull << CHA_ID_PK_SHIFT)
144
145#define CHA_ID_CRC_SHIFT 32
146#define CHA_ID_CRC_MASK (0xfull << CHA_ID_CRC_SHIFT)
147
148#define CHA_ID_SNW9_SHIFT 36
149#define CHA_ID_SNW9_MASK (0xfull << CHA_ID_SNW9_SHIFT)
150
151#define CHA_ID_DECO_SHIFT 56
152#define CHA_ID_DECO_MASK (0xfull << CHA_ID_DECO_SHIFT)
153
154#define CHA_ID_JR_SHIFT 60
155#define CHA_ID_JR_MASK (0xfull << CHA_ID_JR_SHIFT)
156
120struct sec_vid { 157struct sec_vid {
121 u16 ip_id; 158 u16 ip_id;
122 u8 maj_rev; 159 u8 maj_rev;
@@ -228,7 +265,10 @@ struct rng4tst {
228 u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */ 265 u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */
229 u32 rtfrqcnt; /* PRGM=0: freq. count register */ 266 u32 rtfrqcnt; /* PRGM=0: freq. count register */
230 }; 267 };
231 u32 rsvd1[56]; 268 u32 rsvd1[40];
269#define RDSTA_IF0 0x00000001
270 u32 rdsta;
271 u32 rsvd2[15];
232}; 272};
233 273
234/* 274/*
diff --git a/drivers/crypto/dcp.c b/drivers/crypto/dcp.c
new file mode 100644
index 000000000000..a8a7dd4b0d25
--- /dev/null
+++ b/drivers/crypto/dcp.c
@@ -0,0 +1,912 @@
1/*
2 * Cryptographic API.
3 *
4 * Support for DCP cryptographic accelerator.
5 *
6 * Copyright (c) 2013
7 * Author: Tobias Rauter <tobias.rauter@gmail.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as published
11 * by the Free Software Foundation.
12 *
13 * Based on tegra-aes.c, dcp.c (from freescale SDK) and sahara.c
14 */
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/errno.h>
18#include <linux/kernel.h>
19#include <linux/platform_device.h>
20#include <linux/dma-mapping.h>
21#include <linux/io.h>
22#include <linux/mutex.h>
23#include <linux/interrupt.h>
24#include <linux/completion.h>
25#include <linux/workqueue.h>
26#include <linux/delay.h>
27#include <linux/crypto.h>
28#include <linux/miscdevice.h>
29
30#include <crypto/scatterwalk.h>
31#include <crypto/aes.h>
32
33
34/* IOCTL for DCP OTP Key AES - taken from Freescale's SDK*/
35#define DBS_IOCTL_BASE 'd'
36#define DBS_ENC _IOW(DBS_IOCTL_BASE, 0x00, uint8_t[16])
37#define DBS_DEC _IOW(DBS_IOCTL_BASE, 0x01, uint8_t[16])
38
39/* DCP channel used for AES */
40#define USED_CHANNEL 1
41/* Ring Buffers' maximum size */
42#define DCP_MAX_PKG 20
43
44/* Control Register */
45#define DCP_REG_CTRL 0x000
46#define DCP_CTRL_SFRST (1<<31)
47#define DCP_CTRL_CLKGATE (1<<30)
48#define DCP_CTRL_CRYPTO_PRESENT (1<<29)
49#define DCP_CTRL_SHA_PRESENT (1<<28)
50#define DCP_CTRL_GATHER_RES_WRITE (1<<23)
51#define DCP_CTRL_ENABLE_CONTEXT_CACHE (1<<22)
52#define DCP_CTRL_ENABLE_CONTEXT_SWITCH (1<<21)
53#define DCP_CTRL_CH_IRQ_E_0 0x01
54#define DCP_CTRL_CH_IRQ_E_1 0x02
55#define DCP_CTRL_CH_IRQ_E_2 0x04
56#define DCP_CTRL_CH_IRQ_E_3 0x08
57
58/* Status register */
59#define DCP_REG_STAT 0x010
60#define DCP_STAT_OTP_KEY_READY (1<<28)
61#define DCP_STAT_CUR_CHANNEL(stat) ((stat>>24)&0x0F)
62#define DCP_STAT_READY_CHANNEL(stat) ((stat>>16)&0x0F)
63#define DCP_STAT_IRQ(stat) (stat&0x0F)
64#define DCP_STAT_CHAN_0 (0x01)
65#define DCP_STAT_CHAN_1 (0x02)
66#define DCP_STAT_CHAN_2 (0x04)
67#define DCP_STAT_CHAN_3 (0x08)
68
69/* Channel Control Register */
70#define DCP_REG_CHAN_CTRL 0x020
71#define DCP_CHAN_CTRL_CH0_IRQ_MERGED (1<<16)
72#define DCP_CHAN_CTRL_HIGH_PRIO_0 (0x0100)
73#define DCP_CHAN_CTRL_HIGH_PRIO_1 (0x0200)
74#define DCP_CHAN_CTRL_HIGH_PRIO_2 (0x0400)
75#define DCP_CHAN_CTRL_HIGH_PRIO_3 (0x0800)
76#define DCP_CHAN_CTRL_ENABLE_0 (0x01)
77#define DCP_CHAN_CTRL_ENABLE_1 (0x02)
78#define DCP_CHAN_CTRL_ENABLE_2 (0x04)
79#define DCP_CHAN_CTRL_ENABLE_3 (0x08)
80
81/*
82 * Channel Registers:
83 * The DCP has 4 channels. Each of this channels
84 * has 4 registers (command pointer, semaphore, status and options).
85 * The address of register REG of channel CHAN is obtained by
86 * dcp_chan_reg(REG, CHAN)
87 */
88#define DCP_REG_CHAN_PTR 0x00000100
89#define DCP_REG_CHAN_SEMA 0x00000110
90#define DCP_REG_CHAN_STAT 0x00000120
91#define DCP_REG_CHAN_OPT 0x00000130
92
93#define DCP_CHAN_STAT_NEXT_CHAIN_IS_0 0x010000
94#define DCP_CHAN_STAT_NO_CHAIN 0x020000
95#define DCP_CHAN_STAT_CONTEXT_ERROR 0x030000
96#define DCP_CHAN_STAT_PAYLOAD_ERROR 0x040000
97#define DCP_CHAN_STAT_INVALID_MODE 0x050000
98#define DCP_CHAN_STAT_PAGEFAULT 0x40
99#define DCP_CHAN_STAT_DST 0x20
100#define DCP_CHAN_STAT_SRC 0x10
101#define DCP_CHAN_STAT_PACKET 0x08
102#define DCP_CHAN_STAT_SETUP 0x04
103#define DCP_CHAN_STAT_MISMATCH 0x02
104
105/* hw packet control*/
106
107#define DCP_PKT_PAYLOAD_KEY (1<<11)
108#define DCP_PKT_OTP_KEY (1<<10)
109#define DCP_PKT_CIPHER_INIT (1<<9)
110#define DCP_PKG_CIPHER_ENCRYPT (1<<8)
111#define DCP_PKT_CIPHER_ENABLE (1<<5)
112#define DCP_PKT_DECR_SEM (1<<1)
113#define DCP_PKT_CHAIN (1<<2)
114#define DCP_PKT_IRQ 1
115
116#define DCP_PKT_MODE_CBC (1<<4)
117#define DCP_PKT_KEYSELECT_OTP (0xFF<<8)
118
119/* cipher flags */
120#define DCP_ENC 0x0001
121#define DCP_DEC 0x0002
122#define DCP_ECB 0x0004
123#define DCP_CBC 0x0008
124#define DCP_CBC_INIT 0x0010
125#define DCP_NEW_KEY 0x0040
126#define DCP_OTP_KEY 0x0080
127#define DCP_AES 0x1000
128
129/* DCP Flags */
130#define DCP_FLAG_BUSY 0x01
131#define DCP_FLAG_PRODUCING 0x02
132
133/* clock defines */
134#define CLOCK_ON 1
135#define CLOCK_OFF 0
136
137struct dcp_dev_req_ctx {
138 int mode;
139};
140
141struct dcp_op {
142 unsigned int flags;
143 u8 key[AES_KEYSIZE_128];
144 int keylen;
145
146 struct ablkcipher_request *req;
147 struct crypto_ablkcipher *fallback;
148
149 uint32_t stat;
150 uint32_t pkt1;
151 uint32_t pkt2;
152 struct ablkcipher_walk walk;
153};
154
155struct dcp_dev {
156 struct device *dev;
157 void __iomem *dcp_regs_base;
158
159 int dcp_vmi_irq;
160 int dcp_irq;
161
162 spinlock_t queue_lock;
163 struct crypto_queue queue;
164
165 uint32_t pkt_produced;
166 uint32_t pkt_consumed;
167
168 struct dcp_hw_packet *hw_pkg[DCP_MAX_PKG];
169 dma_addr_t hw_phys_pkg;
170
171 /* [KEY][IV] Both with 16 Bytes */
172 u8 *payload_base;
173 dma_addr_t payload_base_dma;
174
175
176 struct tasklet_struct done_task;
177 struct tasklet_struct queue_task;
178 struct timer_list watchdog;
179
180 unsigned long flags;
181
182 struct dcp_op *ctx;
183
184 struct miscdevice dcp_bootstream_misc;
185};
186
187struct dcp_hw_packet {
188 uint32_t next;
189 uint32_t pkt1;
190 uint32_t pkt2;
191 uint32_t src;
192 uint32_t dst;
193 uint32_t size;
194 uint32_t payload;
195 uint32_t stat;
196};
197
198static struct dcp_dev *global_dev;
199
200static inline u32 dcp_chan_reg(u32 reg, int chan)
201{
202 return reg + (chan) * 0x40;
203}
204
205static inline void dcp_write(struct dcp_dev *dev, u32 data, u32 reg)
206{
207 writel(data, dev->dcp_regs_base + reg);
208}
209
210static inline void dcp_set(struct dcp_dev *dev, u32 data, u32 reg)
211{
212 writel(data, dev->dcp_regs_base + (reg | 0x04));
213}
214
215static inline void dcp_clear(struct dcp_dev *dev, u32 data, u32 reg)
216{
217 writel(data, dev->dcp_regs_base + (reg | 0x08));
218}
219
220static inline void dcp_toggle(struct dcp_dev *dev, u32 data, u32 reg)
221{
222 writel(data, dev->dcp_regs_base + (reg | 0x0C));
223}
224
225static inline unsigned int dcp_read(struct dcp_dev *dev, u32 reg)
226{
227 return readl(dev->dcp_regs_base + reg);
228}
229
230static void dcp_dma_unmap(struct dcp_dev *dev, struct dcp_hw_packet *pkt)
231{
232 dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE);
233 dma_unmap_page(dev->dev, pkt->dst, pkt->size, DMA_FROM_DEVICE);
234 dev_dbg(dev->dev, "unmap packet %x", (unsigned int) pkt);
235}
236
237static int dcp_dma_map(struct dcp_dev *dev,
238 struct ablkcipher_walk *walk, struct dcp_hw_packet *pkt)
239{
240 dev_dbg(dev->dev, "map packet %x", (unsigned int) pkt);
241 /* align to length = 16 */
242 pkt->size = walk->nbytes - (walk->nbytes % 16);
243
244 pkt->src = dma_map_page(dev->dev, walk->src.page, walk->src.offset,
245 pkt->size, DMA_TO_DEVICE);
246
247 if (pkt->src == 0) {
248 dev_err(dev->dev, "Unable to map src");
249 return -ENOMEM;
250 }
251
252 pkt->dst = dma_map_page(dev->dev, walk->dst.page, walk->dst.offset,
253 pkt->size, DMA_FROM_DEVICE);
254
255 if (pkt->dst == 0) {
256 dev_err(dev->dev, "Unable to map dst");
257 dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE);
258 return -ENOMEM;
259 }
260
261 return 0;
262}
263
264static void dcp_op_one(struct dcp_dev *dev, struct dcp_hw_packet *pkt,
265 uint8_t last)
266{
267 struct dcp_op *ctx = dev->ctx;
268 pkt->pkt1 = ctx->pkt1;
269 pkt->pkt2 = ctx->pkt2;
270
271 pkt->payload = (u32) dev->payload_base_dma;
272 pkt->stat = 0;
273
274 if (ctx->flags & DCP_CBC_INIT) {
275 pkt->pkt1 |= DCP_PKT_CIPHER_INIT;
276 ctx->flags &= ~DCP_CBC_INIT;
277 }
278
279 mod_timer(&dev->watchdog, jiffies + msecs_to_jiffies(500));
280 pkt->pkt1 |= DCP_PKT_IRQ;
281 if (!last)
282 pkt->pkt1 |= DCP_PKT_CHAIN;
283
284 dev->pkt_produced++;
285
286 dcp_write(dev, 1,
287 dcp_chan_reg(DCP_REG_CHAN_SEMA, USED_CHANNEL));
288}
289
290static void dcp_op_proceed(struct dcp_dev *dev)
291{
292 struct dcp_op *ctx = dev->ctx;
293 struct dcp_hw_packet *pkt;
294
295 while (ctx->walk.nbytes) {
296 int err = 0;
297
298 pkt = dev->hw_pkg[dev->pkt_produced % DCP_MAX_PKG];
299 err = dcp_dma_map(dev, &ctx->walk, pkt);
300 if (err) {
301 dev->ctx->stat |= err;
302 /* start timer to wait for already set up calls */
303 mod_timer(&dev->watchdog,
304 jiffies + msecs_to_jiffies(500));
305 break;
306 }
307
308
309 err = ctx->walk.nbytes - pkt->size;
310 ablkcipher_walk_done(dev->ctx->req, &dev->ctx->walk, err);
311
312 dcp_op_one(dev, pkt, ctx->walk.nbytes == 0);
313 /* we have to wait if no space is left in buffer */
314 if (dev->pkt_produced - dev->pkt_consumed == DCP_MAX_PKG)
315 break;
316 }
317 clear_bit(DCP_FLAG_PRODUCING, &dev->flags);
318}
319
320static void dcp_op_start(struct dcp_dev *dev, uint8_t use_walk)
321{
322 struct dcp_op *ctx = dev->ctx;
323
324 if (ctx->flags & DCP_NEW_KEY) {
325 memcpy(dev->payload_base, ctx->key, ctx->keylen);
326 ctx->flags &= ~DCP_NEW_KEY;
327 }
328
329 ctx->pkt1 = 0;
330 ctx->pkt1 |= DCP_PKT_CIPHER_ENABLE;
331 ctx->pkt1 |= DCP_PKT_DECR_SEM;
332
333 if (ctx->flags & DCP_OTP_KEY)
334 ctx->pkt1 |= DCP_PKT_OTP_KEY;
335 else
336 ctx->pkt1 |= DCP_PKT_PAYLOAD_KEY;
337
338 if (ctx->flags & DCP_ENC)
339 ctx->pkt1 |= DCP_PKG_CIPHER_ENCRYPT;
340
341 ctx->pkt2 = 0;
342 if (ctx->flags & DCP_CBC)
343 ctx->pkt2 |= DCP_PKT_MODE_CBC;
344
345 dev->pkt_produced = 0;
346 dev->pkt_consumed = 0;
347
348 ctx->stat = 0;
349 dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
350 dcp_write(dev, (u32) dev->hw_phys_pkg,
351 dcp_chan_reg(DCP_REG_CHAN_PTR, USED_CHANNEL));
352
353 set_bit(DCP_FLAG_PRODUCING, &dev->flags);
354
355 if (use_walk) {
356 ablkcipher_walk_init(&ctx->walk, ctx->req->dst,
357 ctx->req->src, ctx->req->nbytes);
358 ablkcipher_walk_phys(ctx->req, &ctx->walk);
359 dcp_op_proceed(dev);
360 } else {
361 dcp_op_one(dev, dev->hw_pkg[0], 1);
362 clear_bit(DCP_FLAG_PRODUCING, &dev->flags);
363 }
364}
365
366static void dcp_done_task(unsigned long data)
367{
368 struct dcp_dev *dev = (struct dcp_dev *)data;
369 struct dcp_hw_packet *last_packet;
370 int fin;
371 fin = 0;
372
373 for (last_packet = dev->hw_pkg[(dev->pkt_consumed) % DCP_MAX_PKG];
374 last_packet->stat == 1;
375 last_packet =
376 dev->hw_pkg[++(dev->pkt_consumed) % DCP_MAX_PKG]) {
377
378 dcp_dma_unmap(dev, last_packet);
379 last_packet->stat = 0;
380 fin++;
381 }
382 /* the last call of this function already consumed this IRQ's packet */
383 if (fin == 0)
384 return;
385
386 dev_dbg(dev->dev,
387 "Packet(s) done with status %x; finished: %d, produced:%d, complete consumed: %d",
388 dev->ctx->stat, fin, dev->pkt_produced, dev->pkt_consumed);
389
390 last_packet = dev->hw_pkg[(dev->pkt_consumed - 1) % DCP_MAX_PKG];
391 if (!dev->ctx->stat && last_packet->pkt1 & DCP_PKT_CHAIN) {
392 if (!test_and_set_bit(DCP_FLAG_PRODUCING, &dev->flags))
393 dcp_op_proceed(dev);
394 return;
395 }
396
397 while (unlikely(dev->pkt_consumed < dev->pkt_produced)) {
398 dcp_dma_unmap(dev,
399 dev->hw_pkg[dev->pkt_consumed++ % DCP_MAX_PKG]);
400 }
401
402 if (dev->ctx->flags & DCP_OTP_KEY) {
403 /* we used the miscdevice, no walk to finish */
404 clear_bit(DCP_FLAG_BUSY, &dev->flags);
405 return;
406 }
407
408 ablkcipher_walk_complete(&dev->ctx->walk);
409 dev->ctx->req->base.complete(&dev->ctx->req->base,
410 dev->ctx->stat);
411 dev->ctx->req = NULL;
412 /* in case there are other requests in the queue */
413 tasklet_schedule(&dev->queue_task);
414}
415
416static void dcp_watchdog(unsigned long data)
417{
418 struct dcp_dev *dev = (struct dcp_dev *)data;
419 dev->ctx->stat |= dcp_read(dev,
420 dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
421
422 dev_err(dev->dev, "Timeout, Channel status: %x", dev->ctx->stat);
423
424 if (!dev->ctx->stat)
425 dev->ctx->stat = -ETIMEDOUT;
426
427 dcp_done_task(data);
428}
429
430
431static irqreturn_t dcp_common_irq(int irq, void *context)
432{
433 u32 msk;
434 struct dcp_dev *dev = (struct dcp_dev *) context;
435
436 del_timer(&dev->watchdog);
437
438 msk = DCP_STAT_IRQ(dcp_read(dev, DCP_REG_STAT));
439 dcp_clear(dev, msk, DCP_REG_STAT);
440 if (msk == 0)
441 return IRQ_NONE;
442
443 dev->ctx->stat |= dcp_read(dev,
444 dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
445
446 if (msk & DCP_STAT_CHAN_1)
447 tasklet_schedule(&dev->done_task);
448
449 return IRQ_HANDLED;
450}
451
452static irqreturn_t dcp_vmi_irq(int irq, void *context)
453{
454 return dcp_common_irq(irq, context);
455}
456
457static irqreturn_t dcp_irq(int irq, void *context)
458{
459 return dcp_common_irq(irq, context);
460}
461
462static void dcp_crypt(struct dcp_dev *dev, struct dcp_op *ctx)
463{
464 dev->ctx = ctx;
465
466 if ((ctx->flags & DCP_CBC) && ctx->req->info) {
467 ctx->flags |= DCP_CBC_INIT;
468 memcpy(dev->payload_base + AES_KEYSIZE_128,
469 ctx->req->info, AES_KEYSIZE_128);
470 }
471
472 dcp_op_start(dev, 1);
473}
474
475static void dcp_queue_task(unsigned long data)
476{
477 struct dcp_dev *dev = (struct dcp_dev *) data;
478 struct crypto_async_request *async_req, *backlog;
479 struct crypto_ablkcipher *tfm;
480 struct dcp_op *ctx;
481 struct dcp_dev_req_ctx *rctx;
482 struct ablkcipher_request *req;
483 unsigned long flags;
484
485 spin_lock_irqsave(&dev->queue_lock, flags);
486
487 backlog = crypto_get_backlog(&dev->queue);
488 async_req = crypto_dequeue_request(&dev->queue);
489
490 spin_unlock_irqrestore(&dev->queue_lock, flags);
491
492 if (!async_req)
493 goto ret_nothing_done;
494
495 if (backlog)
496 backlog->complete(backlog, -EINPROGRESS);
497
498 req = ablkcipher_request_cast(async_req);
499 tfm = crypto_ablkcipher_reqtfm(req);
500 rctx = ablkcipher_request_ctx(req);
501 ctx = crypto_ablkcipher_ctx(tfm);
502
503 if (!req->src || !req->dst)
504 goto ret_nothing_done;
505
506 ctx->flags |= rctx->mode;
507 ctx->req = req;
508
509 dcp_crypt(dev, ctx);
510
511 return;
512
513ret_nothing_done:
514 clear_bit(DCP_FLAG_BUSY, &dev->flags);
515}
516
517
518static int dcp_cra_init(struct crypto_tfm *tfm)
519{
520 const char *name = tfm->__crt_alg->cra_name;
521 struct dcp_op *ctx = crypto_tfm_ctx(tfm);
522
523 tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_dev_req_ctx);
524
525 ctx->fallback = crypto_alloc_ablkcipher(name, 0,
526 CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
527
528 if (IS_ERR(ctx->fallback)) {
529 dev_err(global_dev->dev, "Error allocating fallback algo %s\n",
530 name);
531 return PTR_ERR(ctx->fallback);
532 }
533
534 return 0;
535}
536
537static void dcp_cra_exit(struct crypto_tfm *tfm)
538{
539 struct dcp_op *ctx = crypto_tfm_ctx(tfm);
540
541 if (ctx->fallback)
542 crypto_free_ablkcipher(ctx->fallback);
543
544 ctx->fallback = NULL;
545}
546
547/* async interface */
548static int dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
549 unsigned int len)
550{
551 struct dcp_op *ctx = crypto_ablkcipher_ctx(tfm);
552 unsigned int ret = 0;
553 ctx->keylen = len;
554 ctx->flags = 0;
555 if (len == AES_KEYSIZE_128) {
556 if (memcmp(ctx->key, key, AES_KEYSIZE_128)) {
557 memcpy(ctx->key, key, len);
558 ctx->flags |= DCP_NEW_KEY;
559 }
560 return 0;
561 }
562
563 ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
564 ctx->fallback->base.crt_flags |=
565 (tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
566
567 ret = crypto_ablkcipher_setkey(ctx->fallback, key, len);
568 if (ret) {
569 struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm);
570
571 tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK;
572 tfm_aux->crt_flags |=
573 (ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK);
574 }
575 return ret;
576}
577
578static int dcp_aes_cbc_crypt(struct ablkcipher_request *req, int mode)
579{
580 struct dcp_dev_req_ctx *rctx = ablkcipher_request_ctx(req);
581 struct dcp_dev *dev = global_dev;
582 unsigned long flags;
583 int err = 0;
584
585 if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE))
586 return -EINVAL;
587
588 rctx->mode = mode;
589
590 spin_lock_irqsave(&dev->queue_lock, flags);
591 err = ablkcipher_enqueue_request(&dev->queue, req);
592 spin_unlock_irqrestore(&dev->queue_lock, flags);
593
594 flags = test_and_set_bit(DCP_FLAG_BUSY, &dev->flags);
595
596 if (!(flags & DCP_FLAG_BUSY))
597 tasklet_schedule(&dev->queue_task);
598
599 return err;
600}
601
602static int dcp_aes_cbc_encrypt(struct ablkcipher_request *req)
603{
604 struct crypto_tfm *tfm =
605 crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
606 struct dcp_op *ctx = crypto_ablkcipher_ctx(
607 crypto_ablkcipher_reqtfm(req));
608
609 if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
610 int err = 0;
611 ablkcipher_request_set_tfm(req, ctx->fallback);
612 err = crypto_ablkcipher_encrypt(req);
613 ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
614 return err;
615 }
616
617 return dcp_aes_cbc_crypt(req, DCP_AES | DCP_ENC | DCP_CBC);
618}
619
620static int dcp_aes_cbc_decrypt(struct ablkcipher_request *req)
621{
622 struct crypto_tfm *tfm =
623 crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
624 struct dcp_op *ctx = crypto_ablkcipher_ctx(
625 crypto_ablkcipher_reqtfm(req));
626
627 if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
628 int err = 0;
629 ablkcipher_request_set_tfm(req, ctx->fallback);
630 err = crypto_ablkcipher_decrypt(req);
631 ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
632 return err;
633 }
634 return dcp_aes_cbc_crypt(req, DCP_AES | DCP_DEC | DCP_CBC);
635}
636
637static struct crypto_alg algs[] = {
638 {
639 .cra_name = "cbc(aes)",
640 .cra_driver_name = "dcp-cbc-aes",
641 .cra_alignmask = 3,
642 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
643 CRYPTO_ALG_NEED_FALLBACK,
644 .cra_blocksize = AES_KEYSIZE_128,
645 .cra_type = &crypto_ablkcipher_type,
646 .cra_priority = 300,
647 .cra_u.ablkcipher = {
648 .min_keysize = AES_KEYSIZE_128,
649 .max_keysize = AES_KEYSIZE_128,
650 .setkey = dcp_aes_setkey,
651 .encrypt = dcp_aes_cbc_encrypt,
652 .decrypt = dcp_aes_cbc_decrypt,
653 .ivsize = AES_KEYSIZE_128,
654 }
655
656 },
657};
658
659/* DCP bootstream verification interface: uses OTP key for crypto */
660static int dcp_bootstream_open(struct inode *inode, struct file *file)
661{
662 file->private_data = container_of((file->private_data),
663 struct dcp_dev, dcp_bootstream_misc);
664 return 0;
665}
666
667static long dcp_bootstream_ioctl(struct file *file,
668 unsigned int cmd, unsigned long arg)
669{
670 struct dcp_dev *dev = (struct dcp_dev *) file->private_data;
671 void __user *argp = (void __user *)arg;
672 int ret;
673
674 if (dev == NULL)
675 return -EBADF;
676
677 if (cmd != DBS_ENC && cmd != DBS_DEC)
678 return -EINVAL;
679
680 if (copy_from_user(dev->payload_base, argp, 16))
681 return -EFAULT;
682
683 if (test_and_set_bit(DCP_FLAG_BUSY, &dev->flags))
684 return -EAGAIN;
685
686 dev->ctx = kzalloc(sizeof(struct dcp_op), GFP_KERNEL);
687 if (!dev->ctx) {
688 dev_err(dev->dev,
689 "cannot allocate context for OTP crypto");
690 clear_bit(DCP_FLAG_BUSY, &dev->flags);
691 return -ENOMEM;
692 }
693
694 dev->ctx->flags = DCP_AES | DCP_ECB | DCP_OTP_KEY | DCP_CBC_INIT;
695 dev->ctx->flags |= (cmd == DBS_ENC) ? DCP_ENC : DCP_DEC;
696 dev->hw_pkg[0]->src = dev->payload_base_dma;
697 dev->hw_pkg[0]->dst = dev->payload_base_dma;
698 dev->hw_pkg[0]->size = 16;
699
700 dcp_op_start(dev, 0);
701
702 while (test_bit(DCP_FLAG_BUSY, &dev->flags))
703 cpu_relax();
704
705 ret = dev->ctx->stat;
706 if (!ret && copy_to_user(argp, dev->payload_base, 16))
707 ret = -EFAULT;
708
709 kfree(dev->ctx);
710
711 return ret;
712}
713
714static const struct file_operations dcp_bootstream_fops = {
715 .owner = THIS_MODULE,
716 .unlocked_ioctl = dcp_bootstream_ioctl,
717 .open = dcp_bootstream_open,
718};
719
720static int dcp_probe(struct platform_device *pdev)
721{
722 struct dcp_dev *dev = NULL;
723 struct resource *r;
724 int i, ret, j;
725
726 dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
727 if (!dev)
728 return -ENOMEM;
729
730 global_dev = dev;
731 dev->dev = &pdev->dev;
732
733 platform_set_drvdata(pdev, dev);
734
735 r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
736 if (!r) {
737 dev_err(&pdev->dev, "failed to get IORESOURCE_MEM\n");
738 return -ENXIO;
739 }
740 dev->dcp_regs_base = devm_ioremap(&pdev->dev, r->start,
741 resource_size(r));
742
743 dcp_set(dev, DCP_CTRL_SFRST, DCP_REG_CTRL);
744 udelay(10);
745 dcp_clear(dev, DCP_CTRL_SFRST | DCP_CTRL_CLKGATE, DCP_REG_CTRL);
746
747 dcp_write(dev, DCP_CTRL_GATHER_RES_WRITE |
748 DCP_CTRL_ENABLE_CONTEXT_CACHE | DCP_CTRL_CH_IRQ_E_1,
749 DCP_REG_CTRL);
750
751 dcp_write(dev, DCP_CHAN_CTRL_ENABLE_1, DCP_REG_CHAN_CTRL);
752
753 for (i = 0; i < 4; i++)
754 dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, i));
755
756 dcp_clear(dev, -1, DCP_REG_STAT);
757
758
759 r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
760 if (!r) {
761 dev_err(&pdev->dev, "can't get IRQ resource (0)\n");
762 return -EIO;
763 }
764 dev->dcp_vmi_irq = r->start;
765 ret = request_irq(dev->dcp_vmi_irq, dcp_vmi_irq, 0, "dcp", dev);
766 if (ret != 0) {
767 dev_err(&pdev->dev, "can't request_irq (0)\n");
768 return -EIO;
769 }
770
771 r = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
772 if (!r) {
773 dev_err(&pdev->dev, "can't get IRQ resource (1)\n");
774 ret = -EIO;
775 goto err_free_irq0;
776 }
777 dev->dcp_irq = r->start;
778 ret = request_irq(dev->dcp_irq, dcp_irq, 0, "dcp", dev);
779 if (ret != 0) {
780 dev_err(&pdev->dev, "can't request_irq (1)\n");
781 ret = -EIO;
782 goto err_free_irq0;
783 }
784
785 dev->hw_pkg[0] = dma_alloc_coherent(&pdev->dev,
786 DCP_MAX_PKG * sizeof(struct dcp_hw_packet),
787 &dev->hw_phys_pkg,
788 GFP_KERNEL);
789 if (!dev->hw_pkg[0]) {
790 dev_err(&pdev->dev, "Could not allocate hw descriptors\n");
791 ret = -ENOMEM;
792 goto err_free_irq1;
793 }
794
795 for (i = 1; i < DCP_MAX_PKG; i++) {
796 dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg
797 + i * sizeof(struct dcp_hw_packet);
798 dev->hw_pkg[i] = dev->hw_pkg[i - 1] + 1;
799 }
800 dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg;
801
802
803 dev->payload_base = dma_alloc_coherent(&pdev->dev, 2 * AES_KEYSIZE_128,
804 &dev->payload_base_dma, GFP_KERNEL);
805 if (!dev->payload_base) {
806 dev_err(&pdev->dev, "Could not allocate memory for key\n");
807 ret = -ENOMEM;
808 goto err_free_hw_packet;
809 }
810 tasklet_init(&dev->queue_task, dcp_queue_task,
811 (unsigned long) dev);
812 tasklet_init(&dev->done_task, dcp_done_task,
813 (unsigned long) dev);
814 spin_lock_init(&dev->queue_lock);
815
816 crypto_init_queue(&dev->queue, 10);
817
818 init_timer(&dev->watchdog);
819 dev->watchdog.function = &dcp_watchdog;
820 dev->watchdog.data = (unsigned long)dev;
821
822 dev->dcp_bootstream_misc.minor = MISC_DYNAMIC_MINOR,
823 dev->dcp_bootstream_misc.name = "dcpboot",
824 dev->dcp_bootstream_misc.fops = &dcp_bootstream_fops,
825 ret = misc_register(&dev->dcp_bootstream_misc);
826 if (ret != 0) {
827 dev_err(dev->dev, "Unable to register misc device\n");
828 goto err_free_key_iv;
829 }
830
831 for (i = 0; i < ARRAY_SIZE(algs); i++) {
832 algs[i].cra_priority = 300;
833 algs[i].cra_ctxsize = sizeof(struct dcp_op);
834 algs[i].cra_module = THIS_MODULE;
835 algs[i].cra_init = dcp_cra_init;
836 algs[i].cra_exit = dcp_cra_exit;
837 if (crypto_register_alg(&algs[i])) {
838 dev_err(&pdev->dev, "register algorithm failed\n");
839 ret = -ENOMEM;
840 goto err_unregister;
841 }
842 }
843 dev_notice(&pdev->dev, "DCP crypto enabled.!\n");
844
845 return 0;
846
847err_unregister:
848 for (j = 0; j < i; j++)
849 crypto_unregister_alg(&algs[j]);
850err_free_key_iv:
851 dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base,
852 dev->payload_base_dma);
853err_free_hw_packet:
854 dma_free_coherent(&pdev->dev, DCP_MAX_PKG *
855 sizeof(struct dcp_hw_packet), dev->hw_pkg[0],
856 dev->hw_phys_pkg);
857err_free_irq1:
858 free_irq(dev->dcp_irq, dev);
859err_free_irq0:
860 free_irq(dev->dcp_vmi_irq, dev);
861
862 return ret;
863}
864
865static int dcp_remove(struct platform_device *pdev)
866{
867 struct dcp_dev *dev;
868 int j;
869 dev = platform_get_drvdata(pdev);
870
871 dma_free_coherent(&pdev->dev,
872 DCP_MAX_PKG * sizeof(struct dcp_hw_packet),
873 dev->hw_pkg[0], dev->hw_phys_pkg);
874
875 dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base,
876 dev->payload_base_dma);
877
878 free_irq(dev->dcp_irq, dev);
879 free_irq(dev->dcp_vmi_irq, dev);
880
881 tasklet_kill(&dev->done_task);
882 tasklet_kill(&dev->queue_task);
883
884 for (j = 0; j < ARRAY_SIZE(algs); j++)
885 crypto_unregister_alg(&algs[j]);
886
887 misc_deregister(&dev->dcp_bootstream_misc);
888
889 return 0;
890}
891
892static struct of_device_id fs_dcp_of_match[] = {
893 { .compatible = "fsl-dcp"},
894 {},
895};
896
897static struct platform_driver fs_dcp_driver = {
898 .probe = dcp_probe,
899 .remove = dcp_remove,
900 .driver = {
901 .name = "fsl-dcp",
902 .owner = THIS_MODULE,
903 .of_match_table = fs_dcp_of_match
904 }
905};
906
907module_platform_driver(fs_dcp_driver);
908
909
910MODULE_AUTHOR("Tobias Rauter <tobias.rauter@gmail.com>");
911MODULE_DESCRIPTION("Freescale DCP Crypto Driver");
912MODULE_LICENSE("GPL");
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index ebf130e894b5..12fea3e22348 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -2676,7 +2676,7 @@ err_out_stop_device:
2676 hifn_reset_dma(dev, 1); 2676 hifn_reset_dma(dev, 1);
2677 hifn_stop_device(dev); 2677 hifn_stop_device(dev);
2678err_out_free_irq: 2678err_out_free_irq:
2679 free_irq(dev->irq, dev->name); 2679 free_irq(dev->irq, dev);
2680 tasklet_kill(&dev->tasklet); 2680 tasklet_kill(&dev->tasklet);
2681err_out_free_desc: 2681err_out_free_desc:
2682 pci_free_consistent(pdev, sizeof(struct hifn_dma), 2682 pci_free_consistent(pdev, sizeof(struct hifn_dma),
@@ -2711,7 +2711,7 @@ static void hifn_remove(struct pci_dev *pdev)
2711 hifn_reset_dma(dev, 1); 2711 hifn_reset_dma(dev, 1);
2712 hifn_stop_device(dev); 2712 hifn_stop_device(dev);
2713 2713
2714 free_irq(dev->irq, dev->name); 2714 free_irq(dev->irq, dev);
2715 tasklet_kill(&dev->tasklet); 2715 tasklet_kill(&dev->tasklet);
2716 2716
2717 hifn_flush(dev); 2717 hifn_flush(dev);
diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index ce6290e5471a..3374a3ebe4c7 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -1146,7 +1146,6 @@ err_unmap_reg:
1146err: 1146err:
1147 kfree(cp); 1147 kfree(cp);
1148 cpg = NULL; 1148 cpg = NULL;
1149 platform_set_drvdata(pdev, NULL);
1150 return ret; 1149 return ret;
1151} 1150}
1152 1151
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index ee15b0f7849a..5f7980586850 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -203,13 +203,6 @@ static void omap_aes_write_n(struct omap_aes_dev *dd, u32 offset,
203 203
204static int omap_aes_hw_init(struct omap_aes_dev *dd) 204static int omap_aes_hw_init(struct omap_aes_dev *dd)
205{ 205{
206 /*
207 * clocks are enabled when request starts and disabled when finished.
208 * It may be long delays between requests.
209 * Device might go to off mode to save power.
210 */
211 pm_runtime_get_sync(dd->dev);
212
213 if (!(dd->flags & FLAGS_INIT)) { 206 if (!(dd->flags & FLAGS_INIT)) {
214 dd->flags |= FLAGS_INIT; 207 dd->flags |= FLAGS_INIT;
215 dd->err = 0; 208 dd->err = 0;
@@ -636,7 +629,6 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
636 629
637 pr_debug("err: %d\n", err); 630 pr_debug("err: %d\n", err);
638 631
639 pm_runtime_put(dd->dev);
640 dd->flags &= ~FLAGS_BUSY; 632 dd->flags &= ~FLAGS_BUSY;
641 633
642 req->base.complete(&req->base, err); 634 req->base.complete(&req->base, err);
@@ -837,8 +829,16 @@ static int omap_aes_ctr_decrypt(struct ablkcipher_request *req)
837 829
838static int omap_aes_cra_init(struct crypto_tfm *tfm) 830static int omap_aes_cra_init(struct crypto_tfm *tfm)
839{ 831{
840 pr_debug("enter\n"); 832 struct omap_aes_dev *dd = NULL;
833
834 /* Find AES device, currently picks the first device */
835 spin_lock_bh(&list_lock);
836 list_for_each_entry(dd, &dev_list, list) {
837 break;
838 }
839 spin_unlock_bh(&list_lock);
841 840
841 pm_runtime_get_sync(dd->dev);
842 tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx); 842 tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx);
843 843
844 return 0; 844 return 0;
@@ -846,7 +846,16 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
846 846
847static void omap_aes_cra_exit(struct crypto_tfm *tfm) 847static void omap_aes_cra_exit(struct crypto_tfm *tfm)
848{ 848{
849 pr_debug("enter\n"); 849 struct omap_aes_dev *dd = NULL;
850
851 /* Find AES device, currently picks the first device */
852 spin_lock_bh(&list_lock);
853 list_for_each_entry(dd, &dev_list, list) {
854 break;
855 }
856 spin_unlock_bh(&list_lock);
857
858 pm_runtime_put_sync(dd->dev);
850} 859}
851 860
852/* ********************** ALGS ************************************ */ 861/* ********************** ALGS ************************************ */
@@ -1125,10 +1134,9 @@ static int omap_aes_probe(struct platform_device *pdev)
1125 if (err) 1134 if (err)
1126 goto err_res; 1135 goto err_res;
1127 1136
1128 dd->io_base = devm_request_and_ioremap(dev, &res); 1137 dd->io_base = devm_ioremap_resource(dev, &res);
1129 if (!dd->io_base) { 1138 if (IS_ERR(dd->io_base)) {
1130 dev_err(dev, "can't ioremap\n"); 1139 err = PTR_ERR(dd->io_base);
1131 err = -ENOMEM;
1132 goto err_res; 1140 goto err_res;
1133 } 1141 }
1134 dd->phys_base = res.start; 1142 dd->phys_base = res.start;
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index a1e1b4756ee5..4bb67652c200 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -1686,10 +1686,9 @@ static int omap_sham_probe(struct platform_device *pdev)
1686 if (err) 1686 if (err)
1687 goto res_err; 1687 goto res_err;
1688 1688
1689 dd->io_base = devm_request_and_ioremap(dev, &res); 1689 dd->io_base = devm_ioremap_resource(dev, &res);
1690 if (!dd->io_base) { 1690 if (IS_ERR(dd->io_base)) {
1691 dev_err(dev, "can't ioremap\n"); 1691 err = PTR_ERR(dd->io_base);
1692 err = -ENOMEM;
1693 goto res_err; 1692 goto res_err;
1694 } 1693 }
1695 dd->phys_base = res.start; 1694 dd->phys_base = res.start;
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index ac30724d923d..888f7f4a6d3f 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -1298,7 +1298,7 @@ static ssize_t spacc_stat_irq_thresh_store(struct device *dev,
1298 struct spacc_engine *engine = spacc_dev_to_engine(dev); 1298 struct spacc_engine *engine = spacc_dev_to_engine(dev);
1299 unsigned long thresh; 1299 unsigned long thresh;
1300 1300
1301 if (strict_strtoul(buf, 0, &thresh)) 1301 if (kstrtoul(buf, 0, &thresh))
1302 return -EINVAL; 1302 return -EINVAL;
1303 1303
1304 thresh = clamp(thresh, 1UL, engine->fifo_sz - 1); 1304 thresh = clamp(thresh, 1UL, engine->fifo_sz - 1);
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index 4b314326f48a..cf149b19ff47 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -647,7 +647,6 @@ static int s5p_aes_probe(struct platform_device *pdev)
647 clk_disable(pdata->clk); 647 clk_disable(pdata->clk);
648 648
649 s5p_dev = NULL; 649 s5p_dev = NULL;
650 platform_set_drvdata(pdev, NULL);
651 650
652 return err; 651 return err;
653} 652}
@@ -668,7 +667,6 @@ static int s5p_aes_remove(struct platform_device *pdev)
668 clk_disable(pdata->clk); 667 clk_disable(pdata->clk);
669 668
670 s5p_dev = NULL; 669 s5p_dev = NULL;
671 platform_set_drvdata(pdev, NULL);
672 670
673 return 0; 671 return 0;
674} 672}
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index 83d79b964d12..a999f537228f 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -1629,7 +1629,7 @@ static int ux500_cryp_remove(struct platform_device *pdev)
1629 1629
1630 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1630 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1631 if (res) 1631 if (res)
1632 release_mem_region(res->start, res->end - res->start + 1); 1632 release_mem_region(res->start, resource_size(res));
1633 1633
1634 kfree(device_data); 1634 kfree(device_data);
1635 1635
diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h
index a9c96d865ee7..b3cb71f0d3b0 100644
--- a/include/linux/crc-t10dif.h
+++ b/include/linux/crc-t10dif.h
@@ -3,6 +3,10 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5 5
6#define CRC_T10DIF_DIGEST_SIZE 2
7#define CRC_T10DIF_BLOCK_SIZE 1
8
9__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len);
6__u16 crc_t10dif(unsigned char const *, size_t); 10__u16 crc_t10dif(unsigned char const *, size_t);
7 11
8#endif 12#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index 5a5203ded0dd..f1ed53c3aa44 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -66,6 +66,8 @@ config CRC16
66 66
67config CRC_T10DIF 67config CRC_T10DIF
68 tristate "CRC calculation for the T10 Data Integrity Field" 68 tristate "CRC calculation for the T10 Data Integrity Field"
69 select CRYPTO
70 select CRYPTO_CRCT10DIF
69 help 71 help
70 This option is only needed if a module that's not in the 72 This option is only needed if a module that's not in the
71 kernel tree needs to calculate CRC checks for use with the 73 kernel tree needs to calculate CRC checks for use with the
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index fbbd66ed86cd..fe3428c07b47 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -11,57 +11,44 @@
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/crc-t10dif.h> 13#include <linux/crc-t10dif.h>
14#include <linux/err.h>
15#include <linux/init.h>
16#include <crypto/hash.h>
14 17
15/* Table generated using the following polynomium: 18static struct crypto_shash *crct10dif_tfm;
16 * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
17 * gt: 0x8bb7
18 */
19static const __u16 t10_dif_crc_table[256] = {
20 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
21 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
22 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
23 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
24 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
25 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
26 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
27 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
28 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
29 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
30 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
31 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
32 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
33 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
34 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
35 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
36 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
37 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
38 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
39 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
40 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
41 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
42 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
43 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
44 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
45 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
46 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
47 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
48 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
49 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
50 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
51 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
52};
53 19
54__u16 crc_t10dif(const unsigned char *buffer, size_t len) 20__u16 crc_t10dif(const unsigned char *buffer, size_t len)
55{ 21{
56 __u16 crc = 0; 22 struct {
57 unsigned int i; 23 struct shash_desc shash;
24 char ctx[2];
25 } desc;
26 int err;
27
28 desc.shash.tfm = crct10dif_tfm;
29 desc.shash.flags = 0;
30 *(__u16 *)desc.ctx = 0;
58 31
59 for (i = 0 ; i < len ; i++) 32 err = crypto_shash_update(&desc.shash, buffer, len);
60 crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; 33 BUG_ON(err);
61 34
62 return crc; 35 return *(__u16 *)desc.ctx;
63} 36}
64EXPORT_SYMBOL(crc_t10dif); 37EXPORT_SYMBOL(crc_t10dif);
65 38
39static int __init crc_t10dif_mod_init(void)
40{
41 crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
42 return PTR_RET(crct10dif_tfm);
43}
44
45static void __exit crc_t10dif_mod_fini(void)
46{
47 crypto_free_shash(crct10dif_tfm);
48}
49
50module_init(crc_t10dif_mod_init);
51module_exit(crc_t10dif_mod_fini);
52
66MODULE_DESCRIPTION("T10 DIF CRC calculation"); 53MODULE_DESCRIPTION("T10 DIF CRC calculation");
67MODULE_LICENSE("GPL"); 54MODULE_LICENSE("GPL");