diff options
-rw-r--r-- | arch/x86/crypto/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/crypto/glue_helper-asm-avx2.S | 180 | ||||
-rw-r--r-- | arch/x86/crypto/twofish-avx2-asm_64.S | 600 | ||||
-rw-r--r-- | arch/x86/crypto/twofish_avx2_glue.c | 584 | ||||
-rw-r--r-- | arch/x86/crypto/twofish_avx_glue.c | 14 | ||||
-rw-r--r-- | arch/x86/include/asm/crypto/twofish.h | 18 | ||||
-rw-r--r-- | crypto/Kconfig | 24 | ||||
-rw-r--r-- | crypto/testmgr.c | 12 |
8 files changed, 1432 insertions, 2 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 28464ef6fa52..1f6e0c2e9140 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -43,6 +43,7 @@ endif | |||
43 | # These modules require assembler to support AVX2. | 43 | # These modules require assembler to support AVX2. |
44 | ifeq ($(avx2_supported),yes) | 44 | ifeq ($(avx2_supported),yes) |
45 | obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o | 45 | obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o |
46 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o | ||
46 | endif | 47 | endif |
47 | 48 | ||
48 | aes-i586-y := aes-i586-asm_32.o aes_glue.o | 49 | aes-i586-y := aes-i586-asm_32.o aes_glue.o |
@@ -71,6 +72,7 @@ endif | |||
71 | 72 | ||
72 | ifeq ($(avx2_supported),yes) | 73 | ifeq ($(avx2_supported),yes) |
73 | blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o | 74 | blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o |
75 | twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o | ||
74 | endif | 76 | endif |
75 | 77 | ||
76 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 78 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
diff --git a/arch/x86/crypto/glue_helper-asm-avx2.S b/arch/x86/crypto/glue_helper-asm-avx2.S new file mode 100644 index 000000000000..a53ac11dd385 --- /dev/null +++ b/arch/x86/crypto/glue_helper-asm-avx2.S | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * Shared glue code for 128bit block ciphers, AVX2 assembler macros | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #define load_16way(src, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
14 | vmovdqu (0*32)(src), x0; \ | ||
15 | vmovdqu (1*32)(src), x1; \ | ||
16 | vmovdqu (2*32)(src), x2; \ | ||
17 | vmovdqu (3*32)(src), x3; \ | ||
18 | vmovdqu (4*32)(src), x4; \ | ||
19 | vmovdqu (5*32)(src), x5; \ | ||
20 | vmovdqu (6*32)(src), x6; \ | ||
21 | vmovdqu (7*32)(src), x7; | ||
22 | |||
23 | #define store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
24 | vmovdqu x0, (0*32)(dst); \ | ||
25 | vmovdqu x1, (1*32)(dst); \ | ||
26 | vmovdqu x2, (2*32)(dst); \ | ||
27 | vmovdqu x3, (3*32)(dst); \ | ||
28 | vmovdqu x4, (4*32)(dst); \ | ||
29 | vmovdqu x5, (5*32)(dst); \ | ||
30 | vmovdqu x6, (6*32)(dst); \ | ||
31 | vmovdqu x7, (7*32)(dst); | ||
32 | |||
33 | #define store_cbc_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7, t0) \ | ||
34 | vpxor t0, t0, t0; \ | ||
35 | vinserti128 $1, (src), t0, t0; \ | ||
36 | vpxor t0, x0, x0; \ | ||
37 | vpxor (0*32+16)(src), x1, x1; \ | ||
38 | vpxor (1*32+16)(src), x2, x2; \ | ||
39 | vpxor (2*32+16)(src), x3, x3; \ | ||
40 | vpxor (3*32+16)(src), x4, x4; \ | ||
41 | vpxor (4*32+16)(src), x5, x5; \ | ||
42 | vpxor (5*32+16)(src), x6, x6; \ | ||
43 | vpxor (6*32+16)(src), x7, x7; \ | ||
44 | store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); | ||
45 | |||
46 | #define inc_le128(x, minus_one, tmp) \ | ||
47 | vpcmpeqq minus_one, x, tmp; \ | ||
48 | vpsubq minus_one, x, x; \ | ||
49 | vpslldq $8, tmp, tmp; \ | ||
50 | vpsubq tmp, x, x; | ||
51 | |||
52 | #define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \ | ||
53 | vpcmpeqq minus_one, x, tmp1; \ | ||
54 | vpcmpeqq minus_two, x, tmp2; \ | ||
55 | vpsubq minus_two, x, x; \ | ||
56 | vpor tmp2, tmp1, tmp1; \ | ||
57 | vpslldq $8, tmp1, tmp1; \ | ||
58 | vpsubq tmp1, x, x; | ||
59 | |||
60 | #define load_ctr_16way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t0x, t1, \ | ||
61 | t1x, t2, t2x, t3, t3x, t4, t5) \ | ||
62 | vpcmpeqd t0, t0, t0; \ | ||
63 | vpsrldq $8, t0, t0; /* ab: -1:0 ; cd: -1:0 */ \ | ||
64 | vpaddq t0, t0, t4; /* ab: -2:0 ; cd: -2:0 */\ | ||
65 | \ | ||
66 | /* load IV and byteswap */ \ | ||
67 | vmovdqu (iv), t2x; \ | ||
68 | vmovdqa t2x, t3x; \ | ||
69 | inc_le128(t2x, t0x, t1x); \ | ||
70 | vbroadcasti128 bswap, t1; \ | ||
71 | vinserti128 $1, t2x, t3, t2; /* ab: le0 ; cd: le1 */ \ | ||
72 | vpshufb t1, t2, x0; \ | ||
73 | \ | ||
74 | /* construct IVs */ \ | ||
75 | add2_le128(t2, t0, t4, t3, t5); /* ab: le2 ; cd: le3 */ \ | ||
76 | vpshufb t1, t2, x1; \ | ||
77 | add2_le128(t2, t0, t4, t3, t5); \ | ||
78 | vpshufb t1, t2, x2; \ | ||
79 | add2_le128(t2, t0, t4, t3, t5); \ | ||
80 | vpshufb t1, t2, x3; \ | ||
81 | add2_le128(t2, t0, t4, t3, t5); \ | ||
82 | vpshufb t1, t2, x4; \ | ||
83 | add2_le128(t2, t0, t4, t3, t5); \ | ||
84 | vpshufb t1, t2, x5; \ | ||
85 | add2_le128(t2, t0, t4, t3, t5); \ | ||
86 | vpshufb t1, t2, x6; \ | ||
87 | add2_le128(t2, t0, t4, t3, t5); \ | ||
88 | vpshufb t1, t2, x7; \ | ||
89 | vextracti128 $1, t2, t2x; \ | ||
90 | inc_le128(t2x, t0x, t3x); \ | ||
91 | vmovdqu t2x, (iv); | ||
92 | |||
93 | #define store_ctr_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
94 | vpxor (0*32)(src), x0, x0; \ | ||
95 | vpxor (1*32)(src), x1, x1; \ | ||
96 | vpxor (2*32)(src), x2, x2; \ | ||
97 | vpxor (3*32)(src), x3, x3; \ | ||
98 | vpxor (4*32)(src), x4, x4; \ | ||
99 | vpxor (5*32)(src), x5, x5; \ | ||
100 | vpxor (6*32)(src), x6, x6; \ | ||
101 | vpxor (7*32)(src), x7, x7; \ | ||
102 | store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); | ||
103 | |||
104 | #define gf128mul_x_ble(iv, mask, tmp) \ | ||
105 | vpsrad $31, iv, tmp; \ | ||
106 | vpaddq iv, iv, iv; \ | ||
107 | vpshufd $0x13, tmp, tmp; \ | ||
108 | vpand mask, tmp, tmp; \ | ||
109 | vpxor tmp, iv, iv; | ||
110 | |||
111 | #define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \ | ||
112 | vpsrad $31, iv, tmp0; \ | ||
113 | vpaddq iv, iv, tmp1; \ | ||
114 | vpsllq $2, iv, iv; \ | ||
115 | vpshufd $0x13, tmp0, tmp0; \ | ||
116 | vpsrad $31, tmp1, tmp1; \ | ||
117 | vpand mask2, tmp0, tmp0; \ | ||
118 | vpshufd $0x13, tmp1, tmp1; \ | ||
119 | vpxor tmp0, iv, iv; \ | ||
120 | vpand mask1, tmp1, tmp1; \ | ||
121 | vpxor tmp1, iv, iv; | ||
122 | |||
123 | #define load_xts_16way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, \ | ||
124 | tivx, t0, t0x, t1, t1x, t2, t2x, t3, \ | ||
125 | xts_gf128mul_and_shl1_mask_0, \ | ||
126 | xts_gf128mul_and_shl1_mask_1) \ | ||
127 | vbroadcasti128 xts_gf128mul_and_shl1_mask_0, t1; \ | ||
128 | \ | ||
129 | /* load IV and construct second IV */ \ | ||
130 | vmovdqu (iv), tivx; \ | ||
131 | vmovdqa tivx, t0x; \ | ||
132 | gf128mul_x_ble(tivx, t1x, t2x); \ | ||
133 | vbroadcasti128 xts_gf128mul_and_shl1_mask_1, t2; \ | ||
134 | vinserti128 $1, tivx, t0, tiv; \ | ||
135 | vpxor (0*32)(src), tiv, x0; \ | ||
136 | vmovdqu tiv, (0*32)(dst); \ | ||
137 | \ | ||
138 | /* construct and store IVs, also xor with source */ \ | ||
139 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
140 | vpxor (1*32)(src), tiv, x1; \ | ||
141 | vmovdqu tiv, (1*32)(dst); \ | ||
142 | \ | ||
143 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
144 | vpxor (2*32)(src), tiv, x2; \ | ||
145 | vmovdqu tiv, (2*32)(dst); \ | ||
146 | \ | ||
147 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
148 | vpxor (3*32)(src), tiv, x3; \ | ||
149 | vmovdqu tiv, (3*32)(dst); \ | ||
150 | \ | ||
151 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
152 | vpxor (4*32)(src), tiv, x4; \ | ||
153 | vmovdqu tiv, (4*32)(dst); \ | ||
154 | \ | ||
155 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
156 | vpxor (5*32)(src), tiv, x5; \ | ||
157 | vmovdqu tiv, (5*32)(dst); \ | ||
158 | \ | ||
159 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
160 | vpxor (6*32)(src), tiv, x6; \ | ||
161 | vmovdqu tiv, (6*32)(dst); \ | ||
162 | \ | ||
163 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
164 | vpxor (7*32)(src), tiv, x7; \ | ||
165 | vmovdqu tiv, (7*32)(dst); \ | ||
166 | \ | ||
167 | vextracti128 $1, tiv, tivx; \ | ||
168 | gf128mul_x_ble(tivx, t1x, t2x); \ | ||
169 | vmovdqu tivx, (iv); | ||
170 | |||
171 | #define store_xts_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
172 | vpxor (0*32)(dst), x0, x0; \ | ||
173 | vpxor (1*32)(dst), x1, x1; \ | ||
174 | vpxor (2*32)(dst), x2, x2; \ | ||
175 | vpxor (3*32)(dst), x3, x3; \ | ||
176 | vpxor (4*32)(dst), x4, x4; \ | ||
177 | vpxor (5*32)(dst), x5, x5; \ | ||
178 | vpxor (6*32)(dst), x6, x6; \ | ||
179 | vpxor (7*32)(dst), x7, x7; \ | ||
180 | store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); | ||
diff --git a/arch/x86/crypto/twofish-avx2-asm_64.S b/arch/x86/crypto/twofish-avx2-asm_64.S new file mode 100644 index 000000000000..e1a83b9cd389 --- /dev/null +++ b/arch/x86/crypto/twofish-avx2-asm_64.S | |||
@@ -0,0 +1,600 @@ | |||
1 | /* | ||
2 | * x86_64/AVX2 assembler optimized version of Twofish | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/linkage.h> | ||
14 | #include "glue_helper-asm-avx2.S" | ||
15 | |||
16 | .file "twofish-avx2-asm_64.S" | ||
17 | |||
18 | .data | ||
19 | .align 16 | ||
20 | |||
21 | .Lvpshufb_mask0: | ||
22 | .long 0x80808000 | ||
23 | .long 0x80808004 | ||
24 | .long 0x80808008 | ||
25 | .long 0x8080800c | ||
26 | |||
27 | .Lbswap128_mask: | ||
28 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
29 | .Lxts_gf128mul_and_shl1_mask_0: | ||
30 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | ||
31 | .Lxts_gf128mul_and_shl1_mask_1: | ||
32 | .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 | ||
33 | |||
34 | .text | ||
35 | |||
36 | /* structure of crypto context */ | ||
37 | #define s0 0 | ||
38 | #define s1 1024 | ||
39 | #define s2 2048 | ||
40 | #define s3 3072 | ||
41 | #define w 4096 | ||
42 | #define k 4128 | ||
43 | |||
44 | /* register macros */ | ||
45 | #define CTX %rdi | ||
46 | |||
47 | #define RS0 CTX | ||
48 | #define RS1 %r8 | ||
49 | #define RS2 %r9 | ||
50 | #define RS3 %r10 | ||
51 | #define RK %r11 | ||
52 | #define RW %rax | ||
53 | #define RROUND %r12 | ||
54 | #define RROUNDd %r12d | ||
55 | |||
56 | #define RA0 %ymm8 | ||
57 | #define RB0 %ymm9 | ||
58 | #define RC0 %ymm10 | ||
59 | #define RD0 %ymm11 | ||
60 | #define RA1 %ymm12 | ||
61 | #define RB1 %ymm13 | ||
62 | #define RC1 %ymm14 | ||
63 | #define RD1 %ymm15 | ||
64 | |||
65 | /* temp regs */ | ||
66 | #define RX0 %ymm0 | ||
67 | #define RY0 %ymm1 | ||
68 | #define RX1 %ymm2 | ||
69 | #define RY1 %ymm3 | ||
70 | #define RT0 %ymm4 | ||
71 | #define RIDX %ymm5 | ||
72 | |||
73 | #define RX0x %xmm0 | ||
74 | #define RY0x %xmm1 | ||
75 | #define RX1x %xmm2 | ||
76 | #define RY1x %xmm3 | ||
77 | #define RT0x %xmm4 | ||
78 | |||
79 | /* vpgatherdd mask and '-1' */ | ||
80 | #define RNOT %ymm6 | ||
81 | |||
82 | /* byte mask, (-1 >> 24) */ | ||
83 | #define RBYTE %ymm7 | ||
84 | |||
85 | /********************************************************************** | ||
86 | 16-way AVX2 twofish | ||
87 | **********************************************************************/ | ||
88 | #define init_round_constants() \ | ||
89 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
90 | vpsrld $24, RNOT, RBYTE; \ | ||
91 | leaq k(CTX), RK; \ | ||
92 | leaq w(CTX), RW; \ | ||
93 | leaq s1(CTX), RS1; \ | ||
94 | leaq s2(CTX), RS2; \ | ||
95 | leaq s3(CTX), RS3; \ | ||
96 | |||
97 | #define g16(ab, rs0, rs1, rs2, rs3, xy) \ | ||
98 | vpand RBYTE, ab ## 0, RIDX; \ | ||
99 | vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \ | ||
100 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
101 | \ | ||
102 | vpand RBYTE, ab ## 1, RIDX; \ | ||
103 | vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \ | ||
104 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
105 | \ | ||
106 | vpsrld $8, ab ## 0, RIDX; \ | ||
107 | vpand RBYTE, RIDX, RIDX; \ | ||
108 | vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \ | ||
109 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
110 | vpxor RT0, xy ## 0, xy ## 0; \ | ||
111 | \ | ||
112 | vpsrld $8, ab ## 1, RIDX; \ | ||
113 | vpand RBYTE, RIDX, RIDX; \ | ||
114 | vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \ | ||
115 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
116 | vpxor RT0, xy ## 1, xy ## 1; \ | ||
117 | \ | ||
118 | vpsrld $16, ab ## 0, RIDX; \ | ||
119 | vpand RBYTE, RIDX, RIDX; \ | ||
120 | vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \ | ||
121 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
122 | vpxor RT0, xy ## 0, xy ## 0; \ | ||
123 | \ | ||
124 | vpsrld $16, ab ## 1, RIDX; \ | ||
125 | vpand RBYTE, RIDX, RIDX; \ | ||
126 | vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \ | ||
127 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
128 | vpxor RT0, xy ## 1, xy ## 1; \ | ||
129 | \ | ||
130 | vpsrld $24, ab ## 0, RIDX; \ | ||
131 | vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \ | ||
132 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
133 | vpxor RT0, xy ## 0, xy ## 0; \ | ||
134 | \ | ||
135 | vpsrld $24, ab ## 1, RIDX; \ | ||
136 | vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \ | ||
137 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
138 | vpxor RT0, xy ## 1, xy ## 1; | ||
139 | |||
140 | #define g1_16(a, x) \ | ||
141 | g16(a, RS0, RS1, RS2, RS3, x); | ||
142 | |||
143 | #define g2_16(b, y) \ | ||
144 | g16(b, RS1, RS2, RS3, RS0, y); | ||
145 | |||
146 | #define encrypt_round_end16(a, b, c, d, nk) \ | ||
147 | vpaddd RY0, RX0, RX0; \ | ||
148 | vpaddd RX0, RY0, RY0; \ | ||
149 | vpbroadcastd nk(RK,RROUND,8), RT0; \ | ||
150 | vpaddd RT0, RX0, RX0; \ | ||
151 | vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ | ||
152 | vpaddd RT0, RY0, RY0; \ | ||
153 | \ | ||
154 | vpxor RY0, d ## 0, d ## 0; \ | ||
155 | \ | ||
156 | vpxor RX0, c ## 0, c ## 0; \ | ||
157 | vpsrld $1, c ## 0, RT0; \ | ||
158 | vpslld $31, c ## 0, c ## 0; \ | ||
159 | vpor RT0, c ## 0, c ## 0; \ | ||
160 | \ | ||
161 | vpaddd RY1, RX1, RX1; \ | ||
162 | vpaddd RX1, RY1, RY1; \ | ||
163 | vpbroadcastd nk(RK,RROUND,8), RT0; \ | ||
164 | vpaddd RT0, RX1, RX1; \ | ||
165 | vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ | ||
166 | vpaddd RT0, RY1, RY1; \ | ||
167 | \ | ||
168 | vpxor RY1, d ## 1, d ## 1; \ | ||
169 | \ | ||
170 | vpxor RX1, c ## 1, c ## 1; \ | ||
171 | vpsrld $1, c ## 1, RT0; \ | ||
172 | vpslld $31, c ## 1, c ## 1; \ | ||
173 | vpor RT0, c ## 1, c ## 1; \ | ||
174 | |||
175 | #define encrypt_round16(a, b, c, d, nk) \ | ||
176 | g2_16(b, RY); \ | ||
177 | \ | ||
178 | vpslld $1, b ## 0, RT0; \ | ||
179 | vpsrld $31, b ## 0, b ## 0; \ | ||
180 | vpor RT0, b ## 0, b ## 0; \ | ||
181 | \ | ||
182 | vpslld $1, b ## 1, RT0; \ | ||
183 | vpsrld $31, b ## 1, b ## 1; \ | ||
184 | vpor RT0, b ## 1, b ## 1; \ | ||
185 | \ | ||
186 | g1_16(a, RX); \ | ||
187 | \ | ||
188 | encrypt_round_end16(a, b, c, d, nk); | ||
189 | |||
190 | #define encrypt_round_first16(a, b, c, d, nk) \ | ||
191 | vpslld $1, d ## 0, RT0; \ | ||
192 | vpsrld $31, d ## 0, d ## 0; \ | ||
193 | vpor RT0, d ## 0, d ## 0; \ | ||
194 | \ | ||
195 | vpslld $1, d ## 1, RT0; \ | ||
196 | vpsrld $31, d ## 1, d ## 1; \ | ||
197 | vpor RT0, d ## 1, d ## 1; \ | ||
198 | \ | ||
199 | encrypt_round16(a, b, c, d, nk); | ||
200 | |||
201 | #define encrypt_round_last16(a, b, c, d, nk) \ | ||
202 | g2_16(b, RY); \ | ||
203 | \ | ||
204 | g1_16(a, RX); \ | ||
205 | \ | ||
206 | encrypt_round_end16(a, b, c, d, nk); | ||
207 | |||
208 | #define decrypt_round_end16(a, b, c, d, nk) \ | ||
209 | vpaddd RY0, RX0, RX0; \ | ||
210 | vpaddd RX0, RY0, RY0; \ | ||
211 | vpbroadcastd nk(RK,RROUND,8), RT0; \ | ||
212 | vpaddd RT0, RX0, RX0; \ | ||
213 | vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ | ||
214 | vpaddd RT0, RY0, RY0; \ | ||
215 | \ | ||
216 | vpxor RX0, c ## 0, c ## 0; \ | ||
217 | \ | ||
218 | vpxor RY0, d ## 0, d ## 0; \ | ||
219 | vpsrld $1, d ## 0, RT0; \ | ||
220 | vpslld $31, d ## 0, d ## 0; \ | ||
221 | vpor RT0, d ## 0, d ## 0; \ | ||
222 | \ | ||
223 | vpaddd RY1, RX1, RX1; \ | ||
224 | vpaddd RX1, RY1, RY1; \ | ||
225 | vpbroadcastd nk(RK,RROUND,8), RT0; \ | ||
226 | vpaddd RT0, RX1, RX1; \ | ||
227 | vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ | ||
228 | vpaddd RT0, RY1, RY1; \ | ||
229 | \ | ||
230 | vpxor RX1, c ## 1, c ## 1; \ | ||
231 | \ | ||
232 | vpxor RY1, d ## 1, d ## 1; \ | ||
233 | vpsrld $1, d ## 1, RT0; \ | ||
234 | vpslld $31, d ## 1, d ## 1; \ | ||
235 | vpor RT0, d ## 1, d ## 1; | ||
236 | |||
237 | #define decrypt_round16(a, b, c, d, nk) \ | ||
238 | g1_16(a, RX); \ | ||
239 | \ | ||
240 | vpslld $1, a ## 0, RT0; \ | ||
241 | vpsrld $31, a ## 0, a ## 0; \ | ||
242 | vpor RT0, a ## 0, a ## 0; \ | ||
243 | \ | ||
244 | vpslld $1, a ## 1, RT0; \ | ||
245 | vpsrld $31, a ## 1, a ## 1; \ | ||
246 | vpor RT0, a ## 1, a ## 1; \ | ||
247 | \ | ||
248 | g2_16(b, RY); \ | ||
249 | \ | ||
250 | decrypt_round_end16(a, b, c, d, nk); | ||
251 | |||
252 | #define decrypt_round_first16(a, b, c, d, nk) \ | ||
253 | vpslld $1, c ## 0, RT0; \ | ||
254 | vpsrld $31, c ## 0, c ## 0; \ | ||
255 | vpor RT0, c ## 0, c ## 0; \ | ||
256 | \ | ||
257 | vpslld $1, c ## 1, RT0; \ | ||
258 | vpsrld $31, c ## 1, c ## 1; \ | ||
259 | vpor RT0, c ## 1, c ## 1; \ | ||
260 | \ | ||
261 | decrypt_round16(a, b, c, d, nk) | ||
262 | |||
263 | #define decrypt_round_last16(a, b, c, d, nk) \ | ||
264 | g1_16(a, RX); \ | ||
265 | \ | ||
266 | g2_16(b, RY); \ | ||
267 | \ | ||
268 | decrypt_round_end16(a, b, c, d, nk); | ||
269 | |||
270 | #define encrypt_cycle16() \ | ||
271 | encrypt_round16(RA, RB, RC, RD, 0); \ | ||
272 | encrypt_round16(RC, RD, RA, RB, 8); | ||
273 | |||
274 | #define encrypt_cycle_first16() \ | ||
275 | encrypt_round_first16(RA, RB, RC, RD, 0); \ | ||
276 | encrypt_round16(RC, RD, RA, RB, 8); | ||
277 | |||
278 | #define encrypt_cycle_last16() \ | ||
279 | encrypt_round16(RA, RB, RC, RD, 0); \ | ||
280 | encrypt_round_last16(RC, RD, RA, RB, 8); | ||
281 | |||
282 | #define decrypt_cycle16(n) \ | ||
283 | decrypt_round16(RC, RD, RA, RB, 8); \ | ||
284 | decrypt_round16(RA, RB, RC, RD, 0); | ||
285 | |||
286 | #define decrypt_cycle_first16(n) \ | ||
287 | decrypt_round_first16(RC, RD, RA, RB, 8); \ | ||
288 | decrypt_round16(RA, RB, RC, RD, 0); | ||
289 | |||
290 | #define decrypt_cycle_last16(n) \ | ||
291 | decrypt_round16(RC, RD, RA, RB, 8); \ | ||
292 | decrypt_round_last16(RA, RB, RC, RD, 0); | ||
293 | |||
294 | #define transpose_4x4(x0,x1,x2,x3,t1,t2) \ | ||
295 | vpunpckhdq x1, x0, t2; \ | ||
296 | vpunpckldq x1, x0, x0; \ | ||
297 | \ | ||
298 | vpunpckldq x3, x2, t1; \ | ||
299 | vpunpckhdq x3, x2, x2; \ | ||
300 | \ | ||
301 | vpunpckhqdq t1, x0, x1; \ | ||
302 | vpunpcklqdq t1, x0, x0; \ | ||
303 | \ | ||
304 | vpunpckhqdq x2, t2, x3; \ | ||
305 | vpunpcklqdq x2, t2, x2; | ||
306 | |||
307 | #define read_blocks8(offs,a,b,c,d) \ | ||
308 | transpose_4x4(a, b, c, d, RX0, RY0); | ||
309 | |||
310 | #define write_blocks8(offs,a,b,c,d) \ | ||
311 | transpose_4x4(a, b, c, d, RX0, RY0); | ||
312 | |||
313 | #define inpack_enc8(a,b,c,d) \ | ||
314 | vpbroadcastd 4*0(RW), RT0; \ | ||
315 | vpxor RT0, a, a; \ | ||
316 | \ | ||
317 | vpbroadcastd 4*1(RW), RT0; \ | ||
318 | vpxor RT0, b, b; \ | ||
319 | \ | ||
320 | vpbroadcastd 4*2(RW), RT0; \ | ||
321 | vpxor RT0, c, c; \ | ||
322 | \ | ||
323 | vpbroadcastd 4*3(RW), RT0; \ | ||
324 | vpxor RT0, d, d; | ||
325 | |||
326 | #define outunpack_enc8(a,b,c,d) \ | ||
327 | vpbroadcastd 4*4(RW), RX0; \ | ||
328 | vpbroadcastd 4*5(RW), RY0; \ | ||
329 | vpxor RX0, c, RX0; \ | ||
330 | vpxor RY0, d, RY0; \ | ||
331 | \ | ||
332 | vpbroadcastd 4*6(RW), RT0; \ | ||
333 | vpxor RT0, a, c; \ | ||
334 | vpbroadcastd 4*7(RW), RT0; \ | ||
335 | vpxor RT0, b, d; \ | ||
336 | \ | ||
337 | vmovdqa RX0, a; \ | ||
338 | vmovdqa RY0, b; | ||
339 | |||
340 | #define inpack_dec8(a,b,c,d) \ | ||
341 | vpbroadcastd 4*4(RW), RX0; \ | ||
342 | vpbroadcastd 4*5(RW), RY0; \ | ||
343 | vpxor RX0, a, RX0; \ | ||
344 | vpxor RY0, b, RY0; \ | ||
345 | \ | ||
346 | vpbroadcastd 4*6(RW), RT0; \ | ||
347 | vpxor RT0, c, a; \ | ||
348 | vpbroadcastd 4*7(RW), RT0; \ | ||
349 | vpxor RT0, d, b; \ | ||
350 | \ | ||
351 | vmovdqa RX0, c; \ | ||
352 | vmovdqa RY0, d; | ||
353 | |||
354 | #define outunpack_dec8(a,b,c,d) \ | ||
355 | vpbroadcastd 4*0(RW), RT0; \ | ||
356 | vpxor RT0, a, a; \ | ||
357 | \ | ||
358 | vpbroadcastd 4*1(RW), RT0; \ | ||
359 | vpxor RT0, b, b; \ | ||
360 | \ | ||
361 | vpbroadcastd 4*2(RW), RT0; \ | ||
362 | vpxor RT0, c, c; \ | ||
363 | \ | ||
364 | vpbroadcastd 4*3(RW), RT0; \ | ||
365 | vpxor RT0, d, d; | ||
366 | |||
367 | #define read_blocks16(a,b,c,d) \ | ||
368 | read_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
369 | read_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); | ||
370 | |||
371 | #define write_blocks16(a,b,c,d) \ | ||
372 | write_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
373 | write_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); | ||
374 | |||
375 | #define xor_blocks16(a,b,c,d) \ | ||
376 | xor_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
377 | xor_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); | ||
378 | |||
379 | #define inpack_enc16(a,b,c,d) \ | ||
380 | inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
381 | inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1); | ||
382 | |||
383 | #define outunpack_enc16(a,b,c,d) \ | ||
384 | outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
385 | outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1); | ||
386 | |||
387 | #define inpack_dec16(a,b,c,d) \ | ||
388 | inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
389 | inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1); | ||
390 | |||
391 | #define outunpack_dec16(a,b,c,d) \ | ||
392 | outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
393 | outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1); | ||
394 | |||
395 | .align 8 | ||
396 | __twofish_enc_blk16: | ||
397 | /* input: | ||
398 | * %rdi: ctx, CTX | ||
399 | * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext | ||
400 | * output: | ||
401 | * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext | ||
402 | */ | ||
403 | init_round_constants(); | ||
404 | |||
405 | read_blocks16(RA, RB, RC, RD); | ||
406 | inpack_enc16(RA, RB, RC, RD); | ||
407 | |||
408 | xorl RROUNDd, RROUNDd; | ||
409 | encrypt_cycle_first16(); | ||
410 | movl $2, RROUNDd; | ||
411 | |||
412 | .align 4 | ||
413 | .L__enc_loop: | ||
414 | encrypt_cycle16(); | ||
415 | |||
416 | addl $2, RROUNDd; | ||
417 | cmpl $14, RROUNDd; | ||
418 | jne .L__enc_loop; | ||
419 | |||
420 | encrypt_cycle_last16(); | ||
421 | |||
422 | outunpack_enc16(RA, RB, RC, RD); | ||
423 | write_blocks16(RA, RB, RC, RD); | ||
424 | |||
425 | ret; | ||
426 | ENDPROC(__twofish_enc_blk16) | ||
427 | |||
428 | .align 8 | ||
429 | __twofish_dec_blk16: | ||
430 | /* input: | ||
431 | * %rdi: ctx, CTX | ||
432 | * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext | ||
433 | * output: | ||
434 | * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext | ||
435 | */ | ||
436 | init_round_constants(); | ||
437 | |||
438 | read_blocks16(RA, RB, RC, RD); | ||
439 | inpack_dec16(RA, RB, RC, RD); | ||
440 | |||
441 | movl $14, RROUNDd; | ||
442 | decrypt_cycle_first16(); | ||
443 | movl $12, RROUNDd; | ||
444 | |||
445 | .align 4 | ||
446 | .L__dec_loop: | ||
447 | decrypt_cycle16(); | ||
448 | |||
449 | addl $-2, RROUNDd; | ||
450 | jnz .L__dec_loop; | ||
451 | |||
452 | decrypt_cycle_last16(); | ||
453 | |||
454 | outunpack_dec16(RA, RB, RC, RD); | ||
455 | write_blocks16(RA, RB, RC, RD); | ||
456 | |||
457 | ret; | ||
458 | ENDPROC(__twofish_dec_blk16) | ||
459 | |||
460 | ENTRY(twofish_ecb_enc_16way) | ||
461 | /* input: | ||
462 | * %rdi: ctx, CTX | ||
463 | * %rsi: dst | ||
464 | * %rdx: src | ||
465 | */ | ||
466 | |||
467 | vzeroupper; | ||
468 | pushq %r12; | ||
469 | |||
470 | load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
471 | |||
472 | call __twofish_enc_blk16; | ||
473 | |||
474 | store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
475 | |||
476 | popq %r12; | ||
477 | vzeroupper; | ||
478 | |||
479 | ret; | ||
480 | ENDPROC(twofish_ecb_enc_16way) | ||
481 | |||
482 | ENTRY(twofish_ecb_dec_16way) | ||
483 | /* input: | ||
484 | * %rdi: ctx, CTX | ||
485 | * %rsi: dst | ||
486 | * %rdx: src | ||
487 | */ | ||
488 | |||
489 | vzeroupper; | ||
490 | pushq %r12; | ||
491 | |||
492 | load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
493 | |||
494 | call __twofish_dec_blk16; | ||
495 | |||
496 | store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
497 | |||
498 | popq %r12; | ||
499 | vzeroupper; | ||
500 | |||
501 | ret; | ||
502 | ENDPROC(twofish_ecb_dec_16way) | ||
503 | |||
504 | ENTRY(twofish_cbc_dec_16way) | ||
505 | /* input: | ||
506 | * %rdi: ctx, CTX | ||
507 | * %rsi: dst | ||
508 | * %rdx: src | ||
509 | */ | ||
510 | |||
511 | vzeroupper; | ||
512 | pushq %r12; | ||
513 | |||
514 | load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
515 | |||
516 | call __twofish_dec_blk16; | ||
517 | |||
518 | store_cbc_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1, | ||
519 | RX0); | ||
520 | |||
521 | popq %r12; | ||
522 | vzeroupper; | ||
523 | |||
524 | ret; | ||
525 | ENDPROC(twofish_cbc_dec_16way) | ||
526 | |||
527 | ENTRY(twofish_ctr_16way) | ||
528 | /* input: | ||
529 | * %rdi: ctx, CTX | ||
530 | * %rsi: dst (16 blocks) | ||
531 | * %rdx: src (16 blocks) | ||
532 | * %rcx: iv (little endian, 128bit) | ||
533 | */ | ||
534 | |||
535 | vzeroupper; | ||
536 | pushq %r12; | ||
537 | |||
538 | load_ctr_16way(%rcx, .Lbswap128_mask, RA0, RB0, RC0, RD0, RA1, RB1, RC1, | ||
539 | RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT, | ||
540 | RBYTE); | ||
541 | |||
542 | call __twofish_enc_blk16; | ||
543 | |||
544 | store_ctr_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
545 | |||
546 | popq %r12; | ||
547 | vzeroupper; | ||
548 | |||
549 | ret; | ||
550 | ENDPROC(twofish_ctr_16way) | ||
551 | |||
552 | .align 8 | ||
553 | twofish_xts_crypt_16way: | ||
554 | /* input: | ||
555 | * %rdi: ctx, CTX | ||
556 | * %rsi: dst (16 blocks) | ||
557 | * %rdx: src (16 blocks) | ||
558 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
559 | * %r8: pointer to __twofish_enc_blk16 or __twofish_dec_blk16 | ||
560 | */ | ||
561 | |||
562 | vzeroupper; | ||
563 | pushq %r12; | ||
564 | |||
565 | load_xts_16way(%rcx, %rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, | ||
566 | RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT, | ||
567 | .Lxts_gf128mul_and_shl1_mask_0, | ||
568 | .Lxts_gf128mul_and_shl1_mask_1); | ||
569 | |||
570 | call *%r8; | ||
571 | |||
572 | store_xts_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
573 | |||
574 | popq %r12; | ||
575 | vzeroupper; | ||
576 | |||
577 | ret; | ||
578 | ENDPROC(twofish_xts_crypt_16way) | ||
579 | |||
580 | ENTRY(twofish_xts_enc_16way) | ||
581 | /* input: | ||
582 | * %rdi: ctx, CTX | ||
583 | * %rsi: dst (16 blocks) | ||
584 | * %rdx: src (16 blocks) | ||
585 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
586 | */ | ||
587 | leaq __twofish_enc_blk16, %r8; | ||
588 | jmp twofish_xts_crypt_16way; | ||
589 | ENDPROC(twofish_xts_enc_16way) | ||
590 | |||
591 | ENTRY(twofish_xts_dec_16way) | ||
592 | /* input: | ||
593 | * %rdi: ctx, CTX | ||
594 | * %rsi: dst (16 blocks) | ||
595 | * %rdx: src (16 blocks) | ||
596 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
597 | */ | ||
598 | leaq __twofish_dec_blk16, %r8; | ||
599 | jmp twofish_xts_crypt_16way; | ||
600 | ENDPROC(twofish_xts_dec_16way) | ||
diff --git a/arch/x86/crypto/twofish_avx2_glue.c b/arch/x86/crypto/twofish_avx2_glue.c new file mode 100644 index 000000000000..ce33b5be64ee --- /dev/null +++ b/arch/x86/crypto/twofish_avx2_glue.c | |||
@@ -0,0 +1,584 @@ | |||
1 | /* | ||
2 | * Glue Code for x86_64/AVX2 assembler optimized version of Twofish | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/crypto.h> | ||
16 | #include <linux/err.h> | ||
17 | #include <crypto/algapi.h> | ||
18 | #include <crypto/ctr.h> | ||
19 | #include <crypto/twofish.h> | ||
20 | #include <crypto/lrw.h> | ||
21 | #include <crypto/xts.h> | ||
22 | #include <asm/xcr.h> | ||
23 | #include <asm/xsave.h> | ||
24 | #include <asm/crypto/twofish.h> | ||
25 | #include <asm/crypto/ablk_helper.h> | ||
26 | #include <asm/crypto/glue_helper.h> | ||
27 | #include <crypto/scatterwalk.h> | ||
28 | |||
29 | #define TF_AVX2_PARALLEL_BLOCKS 16 | ||
30 | |||
31 | /* 16-way AVX2 parallel cipher functions */ | ||
32 | asmlinkage void twofish_ecb_enc_16way(struct twofish_ctx *ctx, u8 *dst, | ||
33 | const u8 *src); | ||
34 | asmlinkage void twofish_ecb_dec_16way(struct twofish_ctx *ctx, u8 *dst, | ||
35 | const u8 *src); | ||
36 | asmlinkage void twofish_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src); | ||
37 | |||
38 | asmlinkage void twofish_ctr_16way(void *ctx, u128 *dst, const u128 *src, | ||
39 | le128 *iv); | ||
40 | |||
41 | asmlinkage void twofish_xts_enc_16way(struct twofish_ctx *ctx, u8 *dst, | ||
42 | const u8 *src, le128 *iv); | ||
43 | asmlinkage void twofish_xts_dec_16way(struct twofish_ctx *ctx, u8 *dst, | ||
44 | const u8 *src, le128 *iv); | ||
45 | |||
46 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
47 | const u8 *src) | ||
48 | { | ||
49 | __twofish_enc_blk_3way(ctx, dst, src, false); | ||
50 | } | ||
51 | |||
52 | static const struct common_glue_ctx twofish_enc = { | ||
53 | .num_funcs = 4, | ||
54 | .fpu_blocks_limit = 8, | ||
55 | |||
56 | .funcs = { { | ||
57 | .num_blocks = 16, | ||
58 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_16way) } | ||
59 | }, { | ||
60 | .num_blocks = 8, | ||
61 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) } | ||
62 | }, { | ||
63 | .num_blocks = 3, | ||
64 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
65 | }, { | ||
66 | .num_blocks = 1, | ||
67 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
68 | } } | ||
69 | }; | ||
70 | |||
71 | static const struct common_glue_ctx twofish_ctr = { | ||
72 | .num_funcs = 4, | ||
73 | .fpu_blocks_limit = 8, | ||
74 | |||
75 | .funcs = { { | ||
76 | .num_blocks = 16, | ||
77 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_16way) } | ||
78 | }, { | ||
79 | .num_blocks = 8, | ||
80 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) } | ||
81 | }, { | ||
82 | .num_blocks = 3, | ||
83 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } | ||
84 | }, { | ||
85 | .num_blocks = 1, | ||
86 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } | ||
87 | } } | ||
88 | }; | ||
89 | |||
90 | static const struct common_glue_ctx twofish_enc_xts = { | ||
91 | .num_funcs = 3, | ||
92 | .fpu_blocks_limit = 8, | ||
93 | |||
94 | .funcs = { { | ||
95 | .num_blocks = 16, | ||
96 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way) } | ||
97 | }, { | ||
98 | .num_blocks = 8, | ||
99 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) } | ||
100 | }, { | ||
101 | .num_blocks = 1, | ||
102 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) } | ||
103 | } } | ||
104 | }; | ||
105 | |||
106 | static const struct common_glue_ctx twofish_dec = { | ||
107 | .num_funcs = 4, | ||
108 | .fpu_blocks_limit = 8, | ||
109 | |||
110 | .funcs = { { | ||
111 | .num_blocks = 16, | ||
112 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_16way) } | ||
113 | }, { | ||
114 | .num_blocks = 8, | ||
115 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) } | ||
116 | }, { | ||
117 | .num_blocks = 3, | ||
118 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
119 | }, { | ||
120 | .num_blocks = 1, | ||
121 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
122 | } } | ||
123 | }; | ||
124 | |||
125 | static const struct common_glue_ctx twofish_dec_cbc = { | ||
126 | .num_funcs = 4, | ||
127 | .fpu_blocks_limit = 8, | ||
128 | |||
129 | .funcs = { { | ||
130 | .num_blocks = 16, | ||
131 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way) } | ||
132 | }, { | ||
133 | .num_blocks = 8, | ||
134 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) } | ||
135 | }, { | ||
136 | .num_blocks = 3, | ||
137 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
138 | }, { | ||
139 | .num_blocks = 1, | ||
140 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
141 | } } | ||
142 | }; | ||
143 | |||
144 | static const struct common_glue_ctx twofish_dec_xts = { | ||
145 | .num_funcs = 3, | ||
146 | .fpu_blocks_limit = 8, | ||
147 | |||
148 | .funcs = { { | ||
149 | .num_blocks = 16, | ||
150 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way) } | ||
151 | }, { | ||
152 | .num_blocks = 8, | ||
153 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) } | ||
154 | }, { | ||
155 | .num_blocks = 1, | ||
156 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) } | ||
157 | } } | ||
158 | }; | ||
159 | |||
160 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
161 | struct scatterlist *src, unsigned int nbytes) | ||
162 | { | ||
163 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); | ||
164 | } | ||
165 | |||
166 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
167 | struct scatterlist *src, unsigned int nbytes) | ||
168 | { | ||
169 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); | ||
170 | } | ||
171 | |||
172 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
173 | struct scatterlist *src, unsigned int nbytes) | ||
174 | { | ||
175 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, | ||
176 | dst, src, nbytes); | ||
177 | } | ||
178 | |||
179 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
180 | struct scatterlist *src, unsigned int nbytes) | ||
181 | { | ||
182 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, | ||
183 | nbytes); | ||
184 | } | ||
185 | |||
186 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
187 | struct scatterlist *src, unsigned int nbytes) | ||
188 | { | ||
189 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); | ||
190 | } | ||
191 | |||
192 | static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
193 | { | ||
194 | /* since reusing AVX functions, starts using FPU at 8 parallel blocks */ | ||
195 | return glue_fpu_begin(TF_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes); | ||
196 | } | ||
197 | |||
198 | static inline void twofish_fpu_end(bool fpu_enabled) | ||
199 | { | ||
200 | glue_fpu_end(fpu_enabled); | ||
201 | } | ||
202 | |||
203 | struct crypt_priv { | ||
204 | struct twofish_ctx *ctx; | ||
205 | bool fpu_enabled; | ||
206 | }; | ||
207 | |||
208 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
209 | { | ||
210 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
211 | struct crypt_priv *ctx = priv; | ||
212 | int i; | ||
213 | |||
214 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
215 | |||
216 | while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) { | ||
217 | twofish_ecb_enc_16way(ctx->ctx, srcdst, srcdst); | ||
218 | srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS; | ||
219 | nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS; | ||
220 | } | ||
221 | |||
222 | while (nbytes >= 8 * bsize) { | ||
223 | twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst); | ||
224 | srcdst += bsize * 8; | ||
225 | nbytes -= bsize * 8; | ||
226 | } | ||
227 | |||
228 | while (nbytes >= 3 * bsize) { | ||
229 | twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); | ||
230 | srcdst += bsize * 3; | ||
231 | nbytes -= bsize * 3; | ||
232 | } | ||
233 | |||
234 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
235 | twofish_enc_blk(ctx->ctx, srcdst, srcdst); | ||
236 | } | ||
237 | |||
238 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
239 | { | ||
240 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
241 | struct crypt_priv *ctx = priv; | ||
242 | int i; | ||
243 | |||
244 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
245 | |||
246 | while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) { | ||
247 | twofish_ecb_dec_16way(ctx->ctx, srcdst, srcdst); | ||
248 | srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS; | ||
249 | nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS; | ||
250 | } | ||
251 | |||
252 | while (nbytes >= 8 * bsize) { | ||
253 | twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst); | ||
254 | srcdst += bsize * 8; | ||
255 | nbytes -= bsize * 8; | ||
256 | } | ||
257 | |||
258 | while (nbytes >= 3 * bsize) { | ||
259 | twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); | ||
260 | srcdst += bsize * 3; | ||
261 | nbytes -= bsize * 3; | ||
262 | } | ||
263 | |||
264 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
265 | twofish_dec_blk(ctx->ctx, srcdst, srcdst); | ||
266 | } | ||
267 | |||
268 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
269 | struct scatterlist *src, unsigned int nbytes) | ||
270 | { | ||
271 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
272 | be128 buf[TF_AVX2_PARALLEL_BLOCKS]; | ||
273 | struct crypt_priv crypt_ctx = { | ||
274 | .ctx = &ctx->twofish_ctx, | ||
275 | .fpu_enabled = false, | ||
276 | }; | ||
277 | struct lrw_crypt_req req = { | ||
278 | .tbuf = buf, | ||
279 | .tbuflen = sizeof(buf), | ||
280 | |||
281 | .table_ctx = &ctx->lrw_table, | ||
282 | .crypt_ctx = &crypt_ctx, | ||
283 | .crypt_fn = encrypt_callback, | ||
284 | }; | ||
285 | int ret; | ||
286 | |||
287 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
288 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
289 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
290 | |||
291 | return ret; | ||
292 | } | ||
293 | |||
294 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
295 | struct scatterlist *src, unsigned int nbytes) | ||
296 | { | ||
297 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
298 | be128 buf[TF_AVX2_PARALLEL_BLOCKS]; | ||
299 | struct crypt_priv crypt_ctx = { | ||
300 | .ctx = &ctx->twofish_ctx, | ||
301 | .fpu_enabled = false, | ||
302 | }; | ||
303 | struct lrw_crypt_req req = { | ||
304 | .tbuf = buf, | ||
305 | .tbuflen = sizeof(buf), | ||
306 | |||
307 | .table_ctx = &ctx->lrw_table, | ||
308 | .crypt_ctx = &crypt_ctx, | ||
309 | .crypt_fn = decrypt_callback, | ||
310 | }; | ||
311 | int ret; | ||
312 | |||
313 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
314 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
315 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
316 | |||
317 | return ret; | ||
318 | } | ||
319 | |||
320 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
321 | struct scatterlist *src, unsigned int nbytes) | ||
322 | { | ||
323 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
324 | |||
325 | return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes, | ||
326 | XTS_TWEAK_CAST(twofish_enc_blk), | ||
327 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
328 | } | ||
329 | |||
330 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
331 | struct scatterlist *src, unsigned int nbytes) | ||
332 | { | ||
333 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
334 | |||
335 | return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes, | ||
336 | XTS_TWEAK_CAST(twofish_enc_blk), | ||
337 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
338 | } | ||
339 | |||
340 | static struct crypto_alg tf_algs[10] = { { | ||
341 | .cra_name = "__ecb-twofish-avx2", | ||
342 | .cra_driver_name = "__driver-ecb-twofish-avx2", | ||
343 | .cra_priority = 0, | ||
344 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
345 | .cra_blocksize = TF_BLOCK_SIZE, | ||
346 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
347 | .cra_alignmask = 0, | ||
348 | .cra_type = &crypto_blkcipher_type, | ||
349 | .cra_module = THIS_MODULE, | ||
350 | .cra_u = { | ||
351 | .blkcipher = { | ||
352 | .min_keysize = TF_MIN_KEY_SIZE, | ||
353 | .max_keysize = TF_MAX_KEY_SIZE, | ||
354 | .setkey = twofish_setkey, | ||
355 | .encrypt = ecb_encrypt, | ||
356 | .decrypt = ecb_decrypt, | ||
357 | }, | ||
358 | }, | ||
359 | }, { | ||
360 | .cra_name = "__cbc-twofish-avx2", | ||
361 | .cra_driver_name = "__driver-cbc-twofish-avx2", | ||
362 | .cra_priority = 0, | ||
363 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
364 | .cra_blocksize = TF_BLOCK_SIZE, | ||
365 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
366 | .cra_alignmask = 0, | ||
367 | .cra_type = &crypto_blkcipher_type, | ||
368 | .cra_module = THIS_MODULE, | ||
369 | .cra_u = { | ||
370 | .blkcipher = { | ||
371 | .min_keysize = TF_MIN_KEY_SIZE, | ||
372 | .max_keysize = TF_MAX_KEY_SIZE, | ||
373 | .setkey = twofish_setkey, | ||
374 | .encrypt = cbc_encrypt, | ||
375 | .decrypt = cbc_decrypt, | ||
376 | }, | ||
377 | }, | ||
378 | }, { | ||
379 | .cra_name = "__ctr-twofish-avx2", | ||
380 | .cra_driver_name = "__driver-ctr-twofish-avx2", | ||
381 | .cra_priority = 0, | ||
382 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
383 | .cra_blocksize = 1, | ||
384 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
385 | .cra_alignmask = 0, | ||
386 | .cra_type = &crypto_blkcipher_type, | ||
387 | .cra_module = THIS_MODULE, | ||
388 | .cra_u = { | ||
389 | .blkcipher = { | ||
390 | .min_keysize = TF_MIN_KEY_SIZE, | ||
391 | .max_keysize = TF_MAX_KEY_SIZE, | ||
392 | .ivsize = TF_BLOCK_SIZE, | ||
393 | .setkey = twofish_setkey, | ||
394 | .encrypt = ctr_crypt, | ||
395 | .decrypt = ctr_crypt, | ||
396 | }, | ||
397 | }, | ||
398 | }, { | ||
399 | .cra_name = "__lrw-twofish-avx2", | ||
400 | .cra_driver_name = "__driver-lrw-twofish-avx2", | ||
401 | .cra_priority = 0, | ||
402 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
403 | .cra_blocksize = TF_BLOCK_SIZE, | ||
404 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | ||
405 | .cra_alignmask = 0, | ||
406 | .cra_type = &crypto_blkcipher_type, | ||
407 | .cra_module = THIS_MODULE, | ||
408 | .cra_exit = lrw_twofish_exit_tfm, | ||
409 | .cra_u = { | ||
410 | .blkcipher = { | ||
411 | .min_keysize = TF_MIN_KEY_SIZE + | ||
412 | TF_BLOCK_SIZE, | ||
413 | .max_keysize = TF_MAX_KEY_SIZE + | ||
414 | TF_BLOCK_SIZE, | ||
415 | .ivsize = TF_BLOCK_SIZE, | ||
416 | .setkey = lrw_twofish_setkey, | ||
417 | .encrypt = lrw_encrypt, | ||
418 | .decrypt = lrw_decrypt, | ||
419 | }, | ||
420 | }, | ||
421 | }, { | ||
422 | .cra_name = "__xts-twofish-avx2", | ||
423 | .cra_driver_name = "__driver-xts-twofish-avx2", | ||
424 | .cra_priority = 0, | ||
425 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
426 | .cra_blocksize = TF_BLOCK_SIZE, | ||
427 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | ||
428 | .cra_alignmask = 0, | ||
429 | .cra_type = &crypto_blkcipher_type, | ||
430 | .cra_module = THIS_MODULE, | ||
431 | .cra_u = { | ||
432 | .blkcipher = { | ||
433 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
434 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
435 | .ivsize = TF_BLOCK_SIZE, | ||
436 | .setkey = xts_twofish_setkey, | ||
437 | .encrypt = xts_encrypt, | ||
438 | .decrypt = xts_decrypt, | ||
439 | }, | ||
440 | }, | ||
441 | }, { | ||
442 | .cra_name = "ecb(twofish)", | ||
443 | .cra_driver_name = "ecb-twofish-avx2", | ||
444 | .cra_priority = 500, | ||
445 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
446 | .cra_blocksize = TF_BLOCK_SIZE, | ||
447 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
448 | .cra_alignmask = 0, | ||
449 | .cra_type = &crypto_ablkcipher_type, | ||
450 | .cra_module = THIS_MODULE, | ||
451 | .cra_init = ablk_init, | ||
452 | .cra_exit = ablk_exit, | ||
453 | .cra_u = { | ||
454 | .ablkcipher = { | ||
455 | .min_keysize = TF_MIN_KEY_SIZE, | ||
456 | .max_keysize = TF_MAX_KEY_SIZE, | ||
457 | .setkey = ablk_set_key, | ||
458 | .encrypt = ablk_encrypt, | ||
459 | .decrypt = ablk_decrypt, | ||
460 | }, | ||
461 | }, | ||
462 | }, { | ||
463 | .cra_name = "cbc(twofish)", | ||
464 | .cra_driver_name = "cbc-twofish-avx2", | ||
465 | .cra_priority = 500, | ||
466 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
467 | .cra_blocksize = TF_BLOCK_SIZE, | ||
468 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
469 | .cra_alignmask = 0, | ||
470 | .cra_type = &crypto_ablkcipher_type, | ||
471 | .cra_module = THIS_MODULE, | ||
472 | .cra_init = ablk_init, | ||
473 | .cra_exit = ablk_exit, | ||
474 | .cra_u = { | ||
475 | .ablkcipher = { | ||
476 | .min_keysize = TF_MIN_KEY_SIZE, | ||
477 | .max_keysize = TF_MAX_KEY_SIZE, | ||
478 | .ivsize = TF_BLOCK_SIZE, | ||
479 | .setkey = ablk_set_key, | ||
480 | .encrypt = __ablk_encrypt, | ||
481 | .decrypt = ablk_decrypt, | ||
482 | }, | ||
483 | }, | ||
484 | }, { | ||
485 | .cra_name = "ctr(twofish)", | ||
486 | .cra_driver_name = "ctr-twofish-avx2", | ||
487 | .cra_priority = 500, | ||
488 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
489 | .cra_blocksize = 1, | ||
490 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
491 | .cra_alignmask = 0, | ||
492 | .cra_type = &crypto_ablkcipher_type, | ||
493 | .cra_module = THIS_MODULE, | ||
494 | .cra_init = ablk_init, | ||
495 | .cra_exit = ablk_exit, | ||
496 | .cra_u = { | ||
497 | .ablkcipher = { | ||
498 | .min_keysize = TF_MIN_KEY_SIZE, | ||
499 | .max_keysize = TF_MAX_KEY_SIZE, | ||
500 | .ivsize = TF_BLOCK_SIZE, | ||
501 | .setkey = ablk_set_key, | ||
502 | .encrypt = ablk_encrypt, | ||
503 | .decrypt = ablk_encrypt, | ||
504 | .geniv = "chainiv", | ||
505 | }, | ||
506 | }, | ||
507 | }, { | ||
508 | .cra_name = "lrw(twofish)", | ||
509 | .cra_driver_name = "lrw-twofish-avx2", | ||
510 | .cra_priority = 500, | ||
511 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
512 | .cra_blocksize = TF_BLOCK_SIZE, | ||
513 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
514 | .cra_alignmask = 0, | ||
515 | .cra_type = &crypto_ablkcipher_type, | ||
516 | .cra_module = THIS_MODULE, | ||
517 | .cra_init = ablk_init, | ||
518 | .cra_exit = ablk_exit, | ||
519 | .cra_u = { | ||
520 | .ablkcipher = { | ||
521 | .min_keysize = TF_MIN_KEY_SIZE + | ||
522 | TF_BLOCK_SIZE, | ||
523 | .max_keysize = TF_MAX_KEY_SIZE + | ||
524 | TF_BLOCK_SIZE, | ||
525 | .ivsize = TF_BLOCK_SIZE, | ||
526 | .setkey = ablk_set_key, | ||
527 | .encrypt = ablk_encrypt, | ||
528 | .decrypt = ablk_decrypt, | ||
529 | }, | ||
530 | }, | ||
531 | }, { | ||
532 | .cra_name = "xts(twofish)", | ||
533 | .cra_driver_name = "xts-twofish-avx2", | ||
534 | .cra_priority = 500, | ||
535 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
536 | .cra_blocksize = TF_BLOCK_SIZE, | ||
537 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
538 | .cra_alignmask = 0, | ||
539 | .cra_type = &crypto_ablkcipher_type, | ||
540 | .cra_module = THIS_MODULE, | ||
541 | .cra_init = ablk_init, | ||
542 | .cra_exit = ablk_exit, | ||
543 | .cra_u = { | ||
544 | .ablkcipher = { | ||
545 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
546 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
547 | .ivsize = TF_BLOCK_SIZE, | ||
548 | .setkey = ablk_set_key, | ||
549 | .encrypt = ablk_encrypt, | ||
550 | .decrypt = ablk_decrypt, | ||
551 | }, | ||
552 | }, | ||
553 | } }; | ||
554 | |||
555 | static int __init init(void) | ||
556 | { | ||
557 | u64 xcr0; | ||
558 | |||
559 | if (!cpu_has_avx2 || !cpu_has_osxsave) { | ||
560 | pr_info("AVX2 instructions are not detected.\n"); | ||
561 | return -ENODEV; | ||
562 | } | ||
563 | |||
564 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
565 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
566 | pr_info("AVX2 detected but unusable.\n"); | ||
567 | return -ENODEV; | ||
568 | } | ||
569 | |||
570 | return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); | ||
571 | } | ||
572 | |||
573 | static void __exit fini(void) | ||
574 | { | ||
575 | crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); | ||
576 | } | ||
577 | |||
578 | module_init(init); | ||
579 | module_exit(fini); | ||
580 | |||
581 | MODULE_LICENSE("GPL"); | ||
582 | MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized"); | ||
583 | MODULE_ALIAS("twofish"); | ||
584 | MODULE_ALIAS("twofish-asm"); | ||
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index a62ba541884e..2047a562f6b3 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c | |||
@@ -50,18 +50,26 @@ | |||
50 | /* 8-way parallel cipher functions */ | 50 | /* 8-way parallel cipher functions */ |
51 | asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, | 51 | asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, |
52 | const u8 *src); | 52 | const u8 *src); |
53 | EXPORT_SYMBOL_GPL(twofish_ecb_enc_8way); | ||
54 | |||
53 | asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, | 55 | asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, |
54 | const u8 *src); | 56 | const u8 *src); |
57 | EXPORT_SYMBOL_GPL(twofish_ecb_dec_8way); | ||
55 | 58 | ||
56 | asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, | 59 | asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, |
57 | const u8 *src); | 60 | const u8 *src); |
61 | EXPORT_SYMBOL_GPL(twofish_cbc_dec_8way); | ||
62 | |||
58 | asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, | 63 | asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, |
59 | const u8 *src, le128 *iv); | 64 | const u8 *src, le128 *iv); |
65 | EXPORT_SYMBOL_GPL(twofish_ctr_8way); | ||
60 | 66 | ||
61 | asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, | 67 | asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, |
62 | const u8 *src, le128 *iv); | 68 | const u8 *src, le128 *iv); |
69 | EXPORT_SYMBOL_GPL(twofish_xts_enc_8way); | ||
63 | asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, | 70 | asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, |
64 | const u8 *src, le128 *iv); | 71 | const u8 *src, le128 *iv); |
72 | EXPORT_SYMBOL_GPL(twofish_xts_dec_8way); | ||
65 | 73 | ||
66 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | 74 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, |
67 | const u8 *src) | 75 | const u8 *src) |
@@ -69,17 +77,19 @@ static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | |||
69 | __twofish_enc_blk_3way(ctx, dst, src, false); | 77 | __twofish_enc_blk_3way(ctx, dst, src, false); |
70 | } | 78 | } |
71 | 79 | ||
72 | static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) | 80 | void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) |
73 | { | 81 | { |
74 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | 82 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, |
75 | GLUE_FUNC_CAST(twofish_enc_blk)); | 83 | GLUE_FUNC_CAST(twofish_enc_blk)); |
76 | } | 84 | } |
85 | EXPORT_SYMBOL_GPL(twofish_xts_enc); | ||
77 | 86 | ||
78 | static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) | 87 | void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) |
79 | { | 88 | { |
80 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | 89 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, |
81 | GLUE_FUNC_CAST(twofish_dec_blk)); | 90 | GLUE_FUNC_CAST(twofish_dec_blk)); |
82 | } | 91 | } |
92 | EXPORT_SYMBOL_GPL(twofish_xts_dec); | ||
83 | 93 | ||
84 | 94 | ||
85 | static const struct common_glue_ctx twofish_enc = { | 95 | static const struct common_glue_ctx twofish_enc = { |
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index 878c51ceebb5..e655c6029b45 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h | |||
@@ -28,6 +28,20 @@ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | |||
28 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, | 28 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, |
29 | const u8 *src); | 29 | const u8 *src); |
30 | 30 | ||
31 | /* 8-way parallel cipher functions */ | ||
32 | asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, | ||
33 | const u8 *src); | ||
34 | asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, | ||
35 | const u8 *src); | ||
36 | asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, | ||
37 | const u8 *src); | ||
38 | asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, | ||
39 | const u8 *src, le128 *iv); | ||
40 | asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, | ||
41 | const u8 *src, le128 *iv); | ||
42 | asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, | ||
43 | const u8 *src, le128 *iv); | ||
44 | |||
31 | /* helpers from twofish_x86_64-3way module */ | 45 | /* helpers from twofish_x86_64-3way module */ |
32 | extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); | 46 | extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); |
33 | extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, | 47 | extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, |
@@ -43,4 +57,8 @@ extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); | |||
43 | extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | 57 | extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, |
44 | unsigned int keylen); | 58 | unsigned int keylen); |
45 | 59 | ||
60 | /* helpers from twofish-avx module */ | ||
61 | extern void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); | ||
62 | extern void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); | ||
63 | |||
46 | #endif /* ASM_X86_TWOFISH_H */ | 64 | #endif /* ASM_X86_TWOFISH_H */ |
diff --git a/crypto/Kconfig b/crypto/Kconfig index 6b9564f91168..1ba48ddd4da1 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig | |||
@@ -1250,6 +1250,30 @@ config CRYPTO_TWOFISH_AVX_X86_64 | |||
1250 | See also: | 1250 | See also: |
1251 | <http://www.schneier.com/twofish.html> | 1251 | <http://www.schneier.com/twofish.html> |
1252 | 1252 | ||
1253 | config CRYPTO_TWOFISH_AVX2_X86_64 | ||
1254 | tristate "Twofish cipher algorithm (x86_64/AVX2)" | ||
1255 | depends on X86 && 64BIT | ||
1256 | select CRYPTO_ALGAPI | ||
1257 | select CRYPTO_CRYPTD | ||
1258 | select CRYPTO_ABLK_HELPER_X86 | ||
1259 | select CRYPTO_GLUE_HELPER_X86 | ||
1260 | select CRYPTO_TWOFISH_COMMON | ||
1261 | select CRYPTO_TWOFISH_X86_64 | ||
1262 | select CRYPTO_TWOFISH_X86_64_3WAY | ||
1263 | select CRYPTO_TWOFISH_AVX_X86_64 | ||
1264 | select CRYPTO_LRW | ||
1265 | select CRYPTO_XTS | ||
1266 | help | ||
1267 | Twofish cipher algorithm (x86_64/AVX2). | ||
1268 | |||
1269 | Twofish was submitted as an AES (Advanced Encryption Standard) | ||
1270 | candidate cipher by researchers at CounterPane Systems. It is a | ||
1271 | 16 round block cipher supporting key sizes of 128, 192, and 256 | ||
1272 | bits. | ||
1273 | |||
1274 | See also: | ||
1275 | <http://www.schneier.com/twofish.html> | ||
1276 | |||
1253 | comment "Compression" | 1277 | comment "Compression" |
1254 | 1278 | ||
1255 | config CRYPTO_DEFLATE | 1279 | config CRYPTO_DEFLATE |
diff --git a/crypto/testmgr.c b/crypto/testmgr.c index f3effb42531e..fea7841dd6f3 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c | |||
@@ -1651,6 +1651,9 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1651 | .alg = "__cbc-twofish-avx", | 1651 | .alg = "__cbc-twofish-avx", |
1652 | .test = alg_test_null, | 1652 | .test = alg_test_null, |
1653 | }, { | 1653 | }, { |
1654 | .alg = "__cbc-twofish-avx2", | ||
1655 | .test = alg_test_null, | ||
1656 | }, { | ||
1654 | .alg = "__driver-cbc-aes-aesni", | 1657 | .alg = "__driver-cbc-aes-aesni", |
1655 | .test = alg_test_null, | 1658 | .test = alg_test_null, |
1656 | .fips_allowed = 1, | 1659 | .fips_allowed = 1, |
@@ -1676,6 +1679,9 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1676 | .alg = "__driver-cbc-twofish-avx", | 1679 | .alg = "__driver-cbc-twofish-avx", |
1677 | .test = alg_test_null, | 1680 | .test = alg_test_null, |
1678 | }, { | 1681 | }, { |
1682 | .alg = "__driver-cbc-twofish-avx2", | ||
1683 | .test = alg_test_null, | ||
1684 | }, { | ||
1679 | .alg = "__driver-ecb-aes-aesni", | 1685 | .alg = "__driver-ecb-aes-aesni", |
1680 | .test = alg_test_null, | 1686 | .test = alg_test_null, |
1681 | .fips_allowed = 1, | 1687 | .fips_allowed = 1, |
@@ -1701,6 +1707,9 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1701 | .alg = "__driver-ecb-twofish-avx", | 1707 | .alg = "__driver-ecb-twofish-avx", |
1702 | .test = alg_test_null, | 1708 | .test = alg_test_null, |
1703 | }, { | 1709 | }, { |
1710 | .alg = "__driver-ecb-twofish-avx2", | ||
1711 | .test = alg_test_null, | ||
1712 | }, { | ||
1704 | .alg = "__ghash-pclmulqdqni", | 1713 | .alg = "__ghash-pclmulqdqni", |
1705 | .test = alg_test_null, | 1714 | .test = alg_test_null, |
1706 | .fips_allowed = 1, | 1715 | .fips_allowed = 1, |
@@ -1985,6 +1994,9 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1985 | .alg = "cryptd(__driver-ecb-twofish-avx)", | 1994 | .alg = "cryptd(__driver-ecb-twofish-avx)", |
1986 | .test = alg_test_null, | 1995 | .test = alg_test_null, |
1987 | }, { | 1996 | }, { |
1997 | .alg = "cryptd(__driver-ecb-twofish-avx2)", | ||
1998 | .test = alg_test_null, | ||
1999 | }, { | ||
1988 | .alg = "cryptd(__driver-gcm-aes-aesni)", | 2000 | .alg = "cryptd(__driver-gcm-aes-aesni)", |
1989 | .test = alg_test_null, | 2001 | .test = alg_test_null, |
1990 | .fips_allowed = 1, | 2002 | .fips_allowed = 1, |