aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2013-04-13 06:46:45 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2013-04-25 09:09:04 -0400
commit604880107010a1e5794552d184cd5471ea31b973 (patch)
treeed37e3b3e8454f758daab88a2fb9cb5f043ca8ad /arch/x86/crypto
parentad8b7c3e92868dd86c54d9d5321000bbb4096f0d (diff)
crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher
Patch adds AVX2/x86-64 implementation of Blowfish cipher, requiring 32 parallel blocks for input (256 bytes). Table look-ups are performed using vpgatherdd instruction directly from vector registers and thus should be faster than earlier implementations. Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r--arch/x86/crypto/Makefile11
-rw-r--r--arch/x86/crypto/blowfish-avx2-asm_64.S449
-rw-r--r--arch/x86/crypto/blowfish_avx2_glue.c585
-rw-r--r--arch/x86/crypto/blowfish_glue.c32
4 files changed, 1053 insertions, 24 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 03cd7313ad4b..28464ef6fa52 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -3,6 +3,8 @@
3# 3#
4 4
5avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) 5avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
6avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
7 $(comma)4)$(comma)%ymm2,yes,no)
6 8
7obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o 9obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
8obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o 10obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
@@ -38,6 +40,11 @@ ifeq ($(avx_supported),yes)
38 obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o 40 obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
39endif 41endif
40 42
43# These modules require assembler to support AVX2.
44ifeq ($(avx2_supported),yes)
45 obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o
46endif
47
41aes-i586-y := aes-i586-asm_32.o aes_glue.o 48aes-i586-y := aes-i586-asm_32.o aes_glue.o
42twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o 49twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
43salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o 50salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
@@ -62,6 +69,10 @@ ifeq ($(avx_supported),yes)
62 serpent_avx_glue.o 69 serpent_avx_glue.o
63endif 70endif
64 71
72ifeq ($(avx2_supported),yes)
73 blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o
74endif
75
65aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o 76aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
66ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o 77ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
67sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o 78sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
diff --git a/arch/x86/crypto/blowfish-avx2-asm_64.S b/arch/x86/crypto/blowfish-avx2-asm_64.S
new file mode 100644
index 000000000000..784452e0d05d
--- /dev/null
+++ b/arch/x86/crypto/blowfish-avx2-asm_64.S
@@ -0,0 +1,449 @@
1/*
2 * x86_64/AVX2 assembler optimized version of Blowfish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/linkage.h>
14
15.file "blowfish-avx2-asm_64.S"
16
17.data
18.align 32
19
20.Lprefetch_mask:
21.long 0*64
22.long 1*64
23.long 2*64
24.long 3*64
25.long 4*64
26.long 5*64
27.long 6*64
28.long 7*64
29
30.Lbswap32_mask:
31.long 0x00010203
32.long 0x04050607
33.long 0x08090a0b
34.long 0x0c0d0e0f
35
36.Lbswap128_mask:
37 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
38.Lbswap_iv_mask:
39 .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
40
41.text
42/* structure of crypto context */
43#define p 0
44#define s0 ((16 + 2) * 4)
45#define s1 ((16 + 2 + (1 * 256)) * 4)
46#define s2 ((16 + 2 + (2 * 256)) * 4)
47#define s3 ((16 + 2 + (3 * 256)) * 4)
48
49/* register macros */
50#define CTX %rdi
51#define RIO %rdx
52
53#define RS0 %rax
54#define RS1 %r8
55#define RS2 %r9
56#define RS3 %r10
57
58#define RLOOP %r11
59#define RLOOPd %r11d
60
61#define RXr0 %ymm8
62#define RXr1 %ymm9
63#define RXr2 %ymm10
64#define RXr3 %ymm11
65#define RXl0 %ymm12
66#define RXl1 %ymm13
67#define RXl2 %ymm14
68#define RXl3 %ymm15
69
70/* temp regs */
71#define RT0 %ymm0
72#define RT0x %xmm0
73#define RT1 %ymm1
74#define RT1x %xmm1
75#define RIDX0 %ymm2
76#define RIDX1 %ymm3
77#define RIDX1x %xmm3
78#define RIDX2 %ymm4
79#define RIDX3 %ymm5
80
81/* vpgatherdd mask and '-1' */
82#define RNOT %ymm6
83
84/* byte mask, (-1 >> 24) */
85#define RBYTE %ymm7
86
87/***********************************************************************
88 * 32-way AVX2 blowfish
89 ***********************************************************************/
90#define F(xl, xr) \
91 vpsrld $24, xl, RIDX0; \
92 vpsrld $16, xl, RIDX1; \
93 vpsrld $8, xl, RIDX2; \
94 vpand RBYTE, RIDX1, RIDX1; \
95 vpand RBYTE, RIDX2, RIDX2; \
96 vpand RBYTE, xl, RIDX3; \
97 \
98 vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \
99 vpcmpeqd RNOT, RNOT, RNOT; \
100 vpcmpeqd RIDX0, RIDX0, RIDX0; \
101 \
102 vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \
103 vpcmpeqd RIDX1, RIDX1, RIDX1; \
104 vpaddd RT0, RT1, RT0; \
105 \
106 vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \
107 vpxor RT0, RT1, RT0; \
108 \
109 vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \
110 vpcmpeqd RNOT, RNOT, RNOT; \
111 vpaddd RT0, RT1, RT0; \
112 \
113 vpxor RT0, xr, xr;
114
115#define add_roundkey(xl, nmem) \
116 vpbroadcastd nmem, RT0; \
117 vpxor RT0, xl ## 0, xl ## 0; \
118 vpxor RT0, xl ## 1, xl ## 1; \
119 vpxor RT0, xl ## 2, xl ## 2; \
120 vpxor RT0, xl ## 3, xl ## 3;
121
122#define round_enc() \
123 add_roundkey(RXr, p(CTX,RLOOP,4)); \
124 F(RXl0, RXr0); \
125 F(RXl1, RXr1); \
126 F(RXl2, RXr2); \
127 F(RXl3, RXr3); \
128 \
129 add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
130 F(RXr0, RXl0); \
131 F(RXr1, RXl1); \
132 F(RXr2, RXl2); \
133 F(RXr3, RXl3);
134
135#define round_dec() \
136 add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \
137 F(RXl0, RXr0); \
138 F(RXl1, RXr1); \
139 F(RXl2, RXr2); \
140 F(RXl3, RXr3); \
141 \
142 add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
143 F(RXr0, RXl0); \
144 F(RXr1, RXl1); \
145 F(RXr2, RXl2); \
146 F(RXr3, RXl3);
147
148#define init_round_constants() \
149 vpcmpeqd RNOT, RNOT, RNOT; \
150 leaq s0(CTX), RS0; \
151 leaq s1(CTX), RS1; \
152 leaq s2(CTX), RS2; \
153 leaq s3(CTX), RS3; \
154 vpsrld $24, RNOT, RBYTE;
155
156#define transpose_2x2(x0, x1, t0) \
157 vpunpckldq x0, x1, t0; \
158 vpunpckhdq x0, x1, x1; \
159 \
160 vpunpcklqdq t0, x1, x0; \
161 vpunpckhqdq t0, x1, x1;
162
163#define read_block(xl, xr) \
164 vbroadcasti128 .Lbswap32_mask, RT1; \
165 \
166 vpshufb RT1, xl ## 0, xl ## 0; \
167 vpshufb RT1, xr ## 0, xr ## 0; \
168 vpshufb RT1, xl ## 1, xl ## 1; \
169 vpshufb RT1, xr ## 1, xr ## 1; \
170 vpshufb RT1, xl ## 2, xl ## 2; \
171 vpshufb RT1, xr ## 2, xr ## 2; \
172 vpshufb RT1, xl ## 3, xl ## 3; \
173 vpshufb RT1, xr ## 3, xr ## 3; \
174 \
175 transpose_2x2(xl ## 0, xr ## 0, RT0); \
176 transpose_2x2(xl ## 1, xr ## 1, RT0); \
177 transpose_2x2(xl ## 2, xr ## 2, RT0); \
178 transpose_2x2(xl ## 3, xr ## 3, RT0);
179
180#define write_block(xl, xr) \
181 vbroadcasti128 .Lbswap32_mask, RT1; \
182 \
183 transpose_2x2(xl ## 0, xr ## 0, RT0); \
184 transpose_2x2(xl ## 1, xr ## 1, RT0); \
185 transpose_2x2(xl ## 2, xr ## 2, RT0); \
186 transpose_2x2(xl ## 3, xr ## 3, RT0); \
187 \
188 vpshufb RT1, xl ## 0, xl ## 0; \
189 vpshufb RT1, xr ## 0, xr ## 0; \
190 vpshufb RT1, xl ## 1, xl ## 1; \
191 vpshufb RT1, xr ## 1, xr ## 1; \
192 vpshufb RT1, xl ## 2, xl ## 2; \
193 vpshufb RT1, xr ## 2, xr ## 2; \
194 vpshufb RT1, xl ## 3, xl ## 3; \
195 vpshufb RT1, xr ## 3, xr ## 3;
196
197.align 8
198__blowfish_enc_blk32:
199 /* input:
200 * %rdi: ctx, CTX
201 * RXl0..4, RXr0..4: plaintext
202 * output:
203 * RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped)
204 */
205 init_round_constants();
206
207 read_block(RXl, RXr);
208
209 movl $1, RLOOPd;
210 add_roundkey(RXl, p+4*(0)(CTX));
211
212.align 4
213.L__enc_loop:
214 round_enc();
215
216 leal 2(RLOOPd), RLOOPd;
217 cmpl $17, RLOOPd;
218 jne .L__enc_loop;
219
220 add_roundkey(RXr, p+4*(17)(CTX));
221
222 write_block(RXl, RXr);
223
224 ret;
225ENDPROC(__blowfish_enc_blk32)
226
227.align 8
228__blowfish_dec_blk32:
229 /* input:
230 * %rdi: ctx, CTX
231 * RXl0..4, RXr0..4: ciphertext
232 * output:
233 * RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped)
234 */
235 init_round_constants();
236
237 read_block(RXl, RXr);
238
239 movl $14, RLOOPd;
240 add_roundkey(RXl, p+4*(17)(CTX));
241
242.align 4
243.L__dec_loop:
244 round_dec();
245
246 addl $-2, RLOOPd;
247 jns .L__dec_loop;
248
249 add_roundkey(RXr, p+4*(0)(CTX));
250
251 write_block(RXl, RXr);
252
253 ret;
254ENDPROC(__blowfish_dec_blk32)
255
256ENTRY(blowfish_ecb_enc_32way)
257 /* input:
258 * %rdi: ctx, CTX
259 * %rsi: dst
260 * %rdx: src
261 */
262
263 vzeroupper;
264
265 vmovdqu 0*32(%rdx), RXl0;
266 vmovdqu 1*32(%rdx), RXr0;
267 vmovdqu 2*32(%rdx), RXl1;
268 vmovdqu 3*32(%rdx), RXr1;
269 vmovdqu 4*32(%rdx), RXl2;
270 vmovdqu 5*32(%rdx), RXr2;
271 vmovdqu 6*32(%rdx), RXl3;
272 vmovdqu 7*32(%rdx), RXr3;
273
274 call __blowfish_enc_blk32;
275
276 vmovdqu RXr0, 0*32(%rsi);
277 vmovdqu RXl0, 1*32(%rsi);
278 vmovdqu RXr1, 2*32(%rsi);
279 vmovdqu RXl1, 3*32(%rsi);
280 vmovdqu RXr2, 4*32(%rsi);
281 vmovdqu RXl2, 5*32(%rsi);
282 vmovdqu RXr3, 6*32(%rsi);
283 vmovdqu RXl3, 7*32(%rsi);
284
285 vzeroupper;
286
287 ret;
288ENDPROC(blowfish_ecb_enc_32way)
289
290ENTRY(blowfish_ecb_dec_32way)
291 /* input:
292 * %rdi: ctx, CTX
293 * %rsi: dst
294 * %rdx: src
295 */
296
297 vzeroupper;
298
299 vmovdqu 0*32(%rdx), RXl0;
300 vmovdqu 1*32(%rdx), RXr0;
301 vmovdqu 2*32(%rdx), RXl1;
302 vmovdqu 3*32(%rdx), RXr1;
303 vmovdqu 4*32(%rdx), RXl2;
304 vmovdqu 5*32(%rdx), RXr2;
305 vmovdqu 6*32(%rdx), RXl3;
306 vmovdqu 7*32(%rdx), RXr3;
307
308 call __blowfish_dec_blk32;
309
310 vmovdqu RXr0, 0*32(%rsi);
311 vmovdqu RXl0, 1*32(%rsi);
312 vmovdqu RXr1, 2*32(%rsi);
313 vmovdqu RXl1, 3*32(%rsi);
314 vmovdqu RXr2, 4*32(%rsi);
315 vmovdqu RXl2, 5*32(%rsi);
316 vmovdqu RXr3, 6*32(%rsi);
317 vmovdqu RXl3, 7*32(%rsi);
318
319 vzeroupper;
320
321 ret;
322ENDPROC(blowfish_ecb_dec_32way)
323
324ENTRY(blowfish_cbc_dec_32way)
325 /* input:
326 * %rdi: ctx, CTX
327 * %rsi: dst
328 * %rdx: src
329 */
330
331 vzeroupper;
332
333 vmovdqu 0*32(%rdx), RXl0;
334 vmovdqu 1*32(%rdx), RXr0;
335 vmovdqu 2*32(%rdx), RXl1;
336 vmovdqu 3*32(%rdx), RXr1;
337 vmovdqu 4*32(%rdx), RXl2;
338 vmovdqu 5*32(%rdx), RXr2;
339 vmovdqu 6*32(%rdx), RXl3;
340 vmovdqu 7*32(%rdx), RXr3;
341
342 call __blowfish_dec_blk32;
343
344 /* xor with src */
345 vmovq (%rdx), RT0x;
346 vpshufd $0x4f, RT0x, RT0x;
347 vinserti128 $1, 8(%rdx), RT0, RT0;
348 vpxor RT0, RXr0, RXr0;
349 vpxor 0*32+24(%rdx), RXl0, RXl0;
350 vpxor 1*32+24(%rdx), RXr1, RXr1;
351 vpxor 2*32+24(%rdx), RXl1, RXl1;
352 vpxor 3*32+24(%rdx), RXr2, RXr2;
353 vpxor 4*32+24(%rdx), RXl2, RXl2;
354 vpxor 5*32+24(%rdx), RXr3, RXr3;
355 vpxor 6*32+24(%rdx), RXl3, RXl3;
356
357 vmovdqu RXr0, (0*32)(%rsi);
358 vmovdqu RXl0, (1*32)(%rsi);
359 vmovdqu RXr1, (2*32)(%rsi);
360 vmovdqu RXl1, (3*32)(%rsi);
361 vmovdqu RXr2, (4*32)(%rsi);
362 vmovdqu RXl2, (5*32)(%rsi);
363 vmovdqu RXr3, (6*32)(%rsi);
364 vmovdqu RXl3, (7*32)(%rsi);
365
366 vzeroupper;
367
368 ret;
369ENDPROC(blowfish_cbc_dec_32way)
370
371ENTRY(blowfish_ctr_32way)
372 /* input:
373 * %rdi: ctx, CTX
374 * %rsi: dst
375 * %rdx: src
376 * %rcx: iv (big endian, 64bit)
377 */
378
379 vzeroupper;
380
381 vpcmpeqd RT0, RT0, RT0;
382 vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */
383
384 vpcmpeqd RT1x, RT1x, RT1x;
385 vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */
386 vpxor RIDX0, RIDX0, RIDX0;
387 vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */
388
389 vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */
390
391 vpcmpeqd RT1, RT1, RT1;
392 vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */
393 vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */
394
395 vbroadcasti128 .Lbswap_iv_mask, RIDX0;
396 vbroadcasti128 .Lbswap128_mask, RIDX1;
397
398 /* load IV and byteswap */
399 vmovq (%rcx), RT1x;
400 vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */
401 vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */
402
403 /* construct IVs */
404 vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */
405 vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */
406 vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */
407 vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */
408 vpsubq RIDX2, RT1, RT1;
409 vpshufb RIDX1, RT1, RXl1;
410 vpsubq RIDX2, RT1, RT1;
411 vpshufb RIDX1, RT1, RXr1;
412 vpsubq RIDX2, RT1, RT1;
413 vpshufb RIDX1, RT1, RXl2;
414 vpsubq RIDX2, RT1, RT1;
415 vpshufb RIDX1, RT1, RXr2;
416 vpsubq RIDX2, RT1, RT1;
417 vpshufb RIDX1, RT1, RXl3;
418 vpsubq RIDX2, RT1, RT1;
419 vpshufb RIDX1, RT1, RXr3;
420
421 /* store last IV */
422 vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */
423 vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */
424 vmovq RT1x, (%rcx);
425
426 call __blowfish_enc_blk32;
427
428 /* dst = src ^ iv */
429 vpxor 0*32(%rdx), RXr0, RXr0;
430 vpxor 1*32(%rdx), RXl0, RXl0;
431 vpxor 2*32(%rdx), RXr1, RXr1;
432 vpxor 3*32(%rdx), RXl1, RXl1;
433 vpxor 4*32(%rdx), RXr2, RXr2;
434 vpxor 5*32(%rdx), RXl2, RXl2;
435 vpxor 6*32(%rdx), RXr3, RXr3;
436 vpxor 7*32(%rdx), RXl3, RXl3;
437 vmovdqu RXr0, (0*32)(%rsi);
438 vmovdqu RXl0, (1*32)(%rsi);
439 vmovdqu RXr1, (2*32)(%rsi);
440 vmovdqu RXl1, (3*32)(%rsi);
441 vmovdqu RXr2, (4*32)(%rsi);
442 vmovdqu RXl2, (5*32)(%rsi);
443 vmovdqu RXr3, (6*32)(%rsi);
444 vmovdqu RXl3, (7*32)(%rsi);
445
446 vzeroupper;
447
448 ret;
449ENDPROC(blowfish_ctr_32way)
diff --git a/arch/x86/crypto/blowfish_avx2_glue.c b/arch/x86/crypto/blowfish_avx2_glue.c
new file mode 100644
index 000000000000..4417e9aea78d
--- /dev/null
+++ b/arch/x86/crypto/blowfish_avx2_glue.c
@@ -0,0 +1,585 @@
1/*
2 * Glue Code for x86_64/AVX2 assembler optimized version of Blowfish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8 * CTR part based on code (crypto/ctr.c) by:
9 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/types.h>
25#include <linux/crypto.h>
26#include <linux/err.h>
27#include <crypto/algapi.h>
28#include <crypto/blowfish.h>
29#include <crypto/cryptd.h>
30#include <crypto/ctr.h>
31#include <asm/i387.h>
32#include <asm/xcr.h>
33#include <asm/xsave.h>
34#include <asm/crypto/blowfish.h>
35#include <asm/crypto/ablk_helper.h>
36#include <crypto/scatterwalk.h>
37
38#define BF_AVX2_PARALLEL_BLOCKS 32
39
40/* 32-way AVX2 parallel cipher functions */
41asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst,
42 const u8 *src);
43asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst,
44 const u8 *src);
45asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst,
46 const u8 *src);
47asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src,
48 __be64 *iv);
49
50static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes)
51{
52 if (fpu_enabled)
53 return true;
54
55 /* FPU is only used when chunk to be processed is large enough, so
56 * do not enable FPU until it is necessary.
57 */
58 if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS)
59 return false;
60
61 kernel_fpu_begin();
62 return true;
63}
64
65static inline void bf_fpu_end(bool fpu_enabled)
66{
67 if (fpu_enabled)
68 kernel_fpu_end();
69}
70
71static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
72 bool enc)
73{
74 bool fpu_enabled = false;
75 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
76 const unsigned int bsize = BF_BLOCK_SIZE;
77 unsigned int nbytes;
78 int err;
79
80 err = blkcipher_walk_virt(desc, walk);
81 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
82
83 while ((nbytes = walk->nbytes)) {
84 u8 *wsrc = walk->src.virt.addr;
85 u8 *wdst = walk->dst.virt.addr;
86
87 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
88
89 /* Process multi-block AVX2 batch */
90 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
91 do {
92 if (enc)
93 blowfish_ecb_enc_32way(ctx, wdst, wsrc);
94 else
95 blowfish_ecb_dec_32way(ctx, wdst, wsrc);
96
97 wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS;
98 wdst += bsize * BF_AVX2_PARALLEL_BLOCKS;
99 nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
100 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
101
102 if (nbytes < bsize)
103 goto done;
104 }
105
106 /* Process multi-block batch */
107 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
108 do {
109 if (enc)
110 blowfish_enc_blk_4way(ctx, wdst, wsrc);
111 else
112 blowfish_dec_blk_4way(ctx, wdst, wsrc);
113
114 wsrc += bsize * BF_PARALLEL_BLOCKS;
115 wdst += bsize * BF_PARALLEL_BLOCKS;
116 nbytes -= bsize * BF_PARALLEL_BLOCKS;
117 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
118
119 if (nbytes < bsize)
120 goto done;
121 }
122
123 /* Handle leftovers */
124 do {
125 if (enc)
126 blowfish_enc_blk(ctx, wdst, wsrc);
127 else
128 blowfish_dec_blk(ctx, wdst, wsrc);
129
130 wsrc += bsize;
131 wdst += bsize;
132 nbytes -= bsize;
133 } while (nbytes >= bsize);
134
135done:
136 err = blkcipher_walk_done(desc, walk, nbytes);
137 }
138
139 bf_fpu_end(fpu_enabled);
140 return err;
141}
142
143static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
144 struct scatterlist *src, unsigned int nbytes)
145{
146 struct blkcipher_walk walk;
147
148 blkcipher_walk_init(&walk, dst, src, nbytes);
149 return ecb_crypt(desc, &walk, true);
150}
151
152static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
153 struct scatterlist *src, unsigned int nbytes)
154{
155 struct blkcipher_walk walk;
156
157 blkcipher_walk_init(&walk, dst, src, nbytes);
158 return ecb_crypt(desc, &walk, false);
159}
160
161static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
162 struct blkcipher_walk *walk)
163{
164 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
165 unsigned int bsize = BF_BLOCK_SIZE;
166 unsigned int nbytes = walk->nbytes;
167 u64 *src = (u64 *)walk->src.virt.addr;
168 u64 *dst = (u64 *)walk->dst.virt.addr;
169 u64 *iv = (u64 *)walk->iv;
170
171 do {
172 *dst = *src ^ *iv;
173 blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
174 iv = dst;
175
176 src += 1;
177 dst += 1;
178 nbytes -= bsize;
179 } while (nbytes >= bsize);
180
181 *(u64 *)walk->iv = *iv;
182 return nbytes;
183}
184
185static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
186 struct scatterlist *src, unsigned int nbytes)
187{
188 struct blkcipher_walk walk;
189 int err;
190
191 blkcipher_walk_init(&walk, dst, src, nbytes);
192 err = blkcipher_walk_virt(desc, &walk);
193
194 while ((nbytes = walk.nbytes)) {
195 nbytes = __cbc_encrypt(desc, &walk);
196 err = blkcipher_walk_done(desc, &walk, nbytes);
197 }
198
199 return err;
200}
201
202static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
203 struct blkcipher_walk *walk)
204{
205 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
206 const unsigned int bsize = BF_BLOCK_SIZE;
207 unsigned int nbytes = walk->nbytes;
208 u64 *src = (u64 *)walk->src.virt.addr;
209 u64 *dst = (u64 *)walk->dst.virt.addr;
210 u64 last_iv;
211 int i;
212
213 /* Start of the last block. */
214 src += nbytes / bsize - 1;
215 dst += nbytes / bsize - 1;
216
217 last_iv = *src;
218
219 /* Process multi-block AVX2 batch */
220 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
221 do {
222 nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1);
223 src -= BF_AVX2_PARALLEL_BLOCKS - 1;
224 dst -= BF_AVX2_PARALLEL_BLOCKS - 1;
225
226 blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src);
227
228 nbytes -= bsize;
229 if (nbytes < bsize)
230 goto done;
231
232 *dst ^= *(src - 1);
233 src -= 1;
234 dst -= 1;
235 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
236
237 if (nbytes < bsize)
238 goto done;
239 }
240
241 /* Process multi-block batch */
242 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
243 u64 ivs[BF_PARALLEL_BLOCKS - 1];
244
245 do {
246 nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1);
247 src -= BF_PARALLEL_BLOCKS - 1;
248 dst -= BF_PARALLEL_BLOCKS - 1;
249
250 for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
251 ivs[i] = src[i];
252
253 blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
254
255 for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
256 dst[i + 1] ^= ivs[i];
257
258 nbytes -= bsize;
259 if (nbytes < bsize)
260 goto done;
261
262 *dst ^= *(src - 1);
263 src -= 1;
264 dst -= 1;
265 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
266
267 if (nbytes < bsize)
268 goto done;
269 }
270
271 /* Handle leftovers */
272 for (;;) {
273 blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
274
275 nbytes -= bsize;
276 if (nbytes < bsize)
277 break;
278
279 *dst ^= *(src - 1);
280 src -= 1;
281 dst -= 1;
282 }
283
284done:
285 *dst ^= *(u64 *)walk->iv;
286 *(u64 *)walk->iv = last_iv;
287
288 return nbytes;
289}
290
291static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
292 struct scatterlist *src, unsigned int nbytes)
293{
294 bool fpu_enabled = false;
295 struct blkcipher_walk walk;
296 int err;
297
298 blkcipher_walk_init(&walk, dst, src, nbytes);
299 err = blkcipher_walk_virt(desc, &walk);
300 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
301
302 while ((nbytes = walk.nbytes)) {
303 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
304 nbytes = __cbc_decrypt(desc, &walk);
305 err = blkcipher_walk_done(desc, &walk, nbytes);
306 }
307
308 bf_fpu_end(fpu_enabled);
309 return err;
310}
311
312static void ctr_crypt_final(struct blkcipher_desc *desc,
313 struct blkcipher_walk *walk)
314{
315 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
316 u8 *ctrblk = walk->iv;
317 u8 keystream[BF_BLOCK_SIZE];
318 u8 *src = walk->src.virt.addr;
319 u8 *dst = walk->dst.virt.addr;
320 unsigned int nbytes = walk->nbytes;
321
322 blowfish_enc_blk(ctx, keystream, ctrblk);
323 crypto_xor(keystream, src, nbytes);
324 memcpy(dst, keystream, nbytes);
325
326 crypto_inc(ctrblk, BF_BLOCK_SIZE);
327}
328
329static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
330 struct blkcipher_walk *walk)
331{
332 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
333 unsigned int bsize = BF_BLOCK_SIZE;
334 unsigned int nbytes = walk->nbytes;
335 u64 *src = (u64 *)walk->src.virt.addr;
336 u64 *dst = (u64 *)walk->dst.virt.addr;
337 int i;
338
339 /* Process multi-block AVX2 batch */
340 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
341 do {
342 blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src,
343 (__be64 *)walk->iv);
344
345 src += BF_AVX2_PARALLEL_BLOCKS;
346 dst += BF_AVX2_PARALLEL_BLOCKS;
347 nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
348 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
349
350 if (nbytes < bsize)
351 goto done;
352 }
353
354 /* Process four block batch */
355 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
356 __be64 ctrblocks[BF_PARALLEL_BLOCKS];
357 u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
358
359 do {
360 /* create ctrblks for parallel encrypt */
361 for (i = 0; i < BF_PARALLEL_BLOCKS; i++) {
362 if (dst != src)
363 dst[i] = src[i];
364
365 ctrblocks[i] = cpu_to_be64(ctrblk++);
366 }
367
368 blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
369 (u8 *)ctrblocks);
370
371 src += BF_PARALLEL_BLOCKS;
372 dst += BF_PARALLEL_BLOCKS;
373 nbytes -= bsize * BF_PARALLEL_BLOCKS;
374 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
375
376 *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
377
378 if (nbytes < bsize)
379 goto done;
380 }
381
382 /* Handle leftovers */
383 do {
384 u64 ctrblk;
385
386 if (dst != src)
387 *dst = *src;
388
389 ctrblk = *(u64 *)walk->iv;
390 be64_add_cpu((__be64 *)walk->iv, 1);
391
392 blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
393
394 src += 1;
395 dst += 1;
396 } while ((nbytes -= bsize) >= bsize);
397
398done:
399 return nbytes;
400}
401
402static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
403 struct scatterlist *src, unsigned int nbytes)
404{
405 bool fpu_enabled = false;
406 struct blkcipher_walk walk;
407 int err;
408
409 blkcipher_walk_init(&walk, dst, src, nbytes);
410 err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
411 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
412
413 while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
414 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
415 nbytes = __ctr_crypt(desc, &walk);
416 err = blkcipher_walk_done(desc, &walk, nbytes);
417 }
418
419 bf_fpu_end(fpu_enabled);
420
421 if (walk.nbytes) {
422 ctr_crypt_final(desc, &walk);
423 err = blkcipher_walk_done(desc, &walk, 0);
424 }
425
426 return err;
427}
428
429static struct crypto_alg bf_algs[6] = { {
430 .cra_name = "__ecb-blowfish-avx2",
431 .cra_driver_name = "__driver-ecb-blowfish-avx2",
432 .cra_priority = 0,
433 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
434 .cra_blocksize = BF_BLOCK_SIZE,
435 .cra_ctxsize = sizeof(struct bf_ctx),
436 .cra_alignmask = 0,
437 .cra_type = &crypto_blkcipher_type,
438 .cra_module = THIS_MODULE,
439 .cra_u = {
440 .blkcipher = {
441 .min_keysize = BF_MIN_KEY_SIZE,
442 .max_keysize = BF_MAX_KEY_SIZE,
443 .setkey = blowfish_setkey,
444 .encrypt = ecb_encrypt,
445 .decrypt = ecb_decrypt,
446 },
447 },
448}, {
449 .cra_name = "__cbc-blowfish-avx2",
450 .cra_driver_name = "__driver-cbc-blowfish-avx2",
451 .cra_priority = 0,
452 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
453 .cra_blocksize = BF_BLOCK_SIZE,
454 .cra_ctxsize = sizeof(struct bf_ctx),
455 .cra_alignmask = 0,
456 .cra_type = &crypto_blkcipher_type,
457 .cra_module = THIS_MODULE,
458 .cra_u = {
459 .blkcipher = {
460 .min_keysize = BF_MIN_KEY_SIZE,
461 .max_keysize = BF_MAX_KEY_SIZE,
462 .setkey = blowfish_setkey,
463 .encrypt = cbc_encrypt,
464 .decrypt = cbc_decrypt,
465 },
466 },
467}, {
468 .cra_name = "__ctr-blowfish-avx2",
469 .cra_driver_name = "__driver-ctr-blowfish-avx2",
470 .cra_priority = 0,
471 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
472 .cra_blocksize = 1,
473 .cra_ctxsize = sizeof(struct bf_ctx),
474 .cra_alignmask = 0,
475 .cra_type = &crypto_blkcipher_type,
476 .cra_module = THIS_MODULE,
477 .cra_u = {
478 .blkcipher = {
479 .min_keysize = BF_MIN_KEY_SIZE,
480 .max_keysize = BF_MAX_KEY_SIZE,
481 .ivsize = BF_BLOCK_SIZE,
482 .setkey = blowfish_setkey,
483 .encrypt = ctr_crypt,
484 .decrypt = ctr_crypt,
485 },
486 },
487}, {
488 .cra_name = "ecb(blowfish)",
489 .cra_driver_name = "ecb-blowfish-avx2",
490 .cra_priority = 400,
491 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
492 .cra_blocksize = BF_BLOCK_SIZE,
493 .cra_ctxsize = sizeof(struct async_helper_ctx),
494 .cra_alignmask = 0,
495 .cra_type = &crypto_ablkcipher_type,
496 .cra_module = THIS_MODULE,
497 .cra_init = ablk_init,
498 .cra_exit = ablk_exit,
499 .cra_u = {
500 .ablkcipher = {
501 .min_keysize = BF_MIN_KEY_SIZE,
502 .max_keysize = BF_MAX_KEY_SIZE,
503 .setkey = ablk_set_key,
504 .encrypt = ablk_encrypt,
505 .decrypt = ablk_decrypt,
506 },
507 },
508}, {
509 .cra_name = "cbc(blowfish)",
510 .cra_driver_name = "cbc-blowfish-avx2",
511 .cra_priority = 400,
512 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
513 .cra_blocksize = BF_BLOCK_SIZE,
514 .cra_ctxsize = sizeof(struct async_helper_ctx),
515 .cra_alignmask = 0,
516 .cra_type = &crypto_ablkcipher_type,
517 .cra_module = THIS_MODULE,
518 .cra_init = ablk_init,
519 .cra_exit = ablk_exit,
520 .cra_u = {
521 .ablkcipher = {
522 .min_keysize = BF_MIN_KEY_SIZE,
523 .max_keysize = BF_MAX_KEY_SIZE,
524 .ivsize = BF_BLOCK_SIZE,
525 .setkey = ablk_set_key,
526 .encrypt = __ablk_encrypt,
527 .decrypt = ablk_decrypt,
528 },
529 },
530}, {
531 .cra_name = "ctr(blowfish)",
532 .cra_driver_name = "ctr-blowfish-avx2",
533 .cra_priority = 400,
534 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
535 .cra_blocksize = 1,
536 .cra_ctxsize = sizeof(struct async_helper_ctx),
537 .cra_alignmask = 0,
538 .cra_type = &crypto_ablkcipher_type,
539 .cra_module = THIS_MODULE,
540 .cra_init = ablk_init,
541 .cra_exit = ablk_exit,
542 .cra_u = {
543 .ablkcipher = {
544 .min_keysize = BF_MIN_KEY_SIZE,
545 .max_keysize = BF_MAX_KEY_SIZE,
546 .ivsize = BF_BLOCK_SIZE,
547 .setkey = ablk_set_key,
548 .encrypt = ablk_encrypt,
549 .decrypt = ablk_encrypt,
550 .geniv = "chainiv",
551 },
552 },
553} };
554
555
556static int __init init(void)
557{
558 u64 xcr0;
559
560 if (!cpu_has_avx2 || !cpu_has_osxsave) {
561 pr_info("AVX2 instructions are not detected.\n");
562 return -ENODEV;
563 }
564
565 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
566 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
567 pr_info("AVX detected but unusable.\n");
568 return -ENODEV;
569 }
570
571 return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
572}
573
574static void __exit fini(void)
575{
576 crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
577}
578
579module_init(init);
580module_exit(fini);
581
582MODULE_LICENSE("GPL");
583MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized");
584MODULE_ALIAS("blowfish");
585MODULE_ALIAS("blowfish-asm");
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 50ec333b70e6..3548d76dbaa9 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Glue Code for assembler optimized version of Blowfish 2 * Glue Code for assembler optimized version of Blowfish
3 * 3 *
4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 4 * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 * 5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: 6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> 7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
@@ -32,40 +32,24 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/types.h> 33#include <linux/types.h>
34#include <crypto/algapi.h> 34#include <crypto/algapi.h>
35#include <asm/crypto/blowfish.h>
35 36
36/* regular block cipher functions */ 37/* regular block cipher functions */
37asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, 38asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
38 bool xor); 39 bool xor);
40EXPORT_SYMBOL_GPL(__blowfish_enc_blk);
41
39asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); 42asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
43EXPORT_SYMBOL_GPL(blowfish_dec_blk);
40 44
41/* 4-way parallel cipher functions */ 45/* 4-way parallel cipher functions */
42asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, 46asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
43 const u8 *src, bool xor); 47 const u8 *src, bool xor);
48EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way);
49
44asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, 50asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
45 const u8 *src); 51 const u8 *src);
46 52EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way);
47static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
48{
49 __blowfish_enc_blk(ctx, dst, src, false);
50}
51
52static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
53 const u8 *src)
54{
55 __blowfish_enc_blk(ctx, dst, src, true);
56}
57
58static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
59 const u8 *src)
60{
61 __blowfish_enc_blk_4way(ctx, dst, src, false);
62}
63
64static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
65 const u8 *src)
66{
67 __blowfish_enc_blk_4way(ctx, dst, src, true);
68}
69 53
70static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 54static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
71{ 55{