diff options
| -rw-r--r-- | arch/x86/crypto/Makefile | 4 | ||||
| -rw-r--r-- | arch/x86/crypto/blowfish-avx2-asm_64.S | 449 | ||||
| -rw-r--r-- | arch/x86/crypto/blowfish_avx2_glue.c | 585 | ||||
| -rw-r--r-- | arch/x86/crypto/blowfish_glue.c | 32 | ||||
| -rw-r--r-- | arch/x86/include/asm/crypto/blowfish.h | 43 | ||||
| -rw-r--r-- | crypto/Kconfig | 18 | ||||
| -rw-r--r-- | crypto/testmgr.c | 12 |
7 files changed, 24 insertions, 1119 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 94cb151adc1d..9ce341839f4a 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
| @@ -3,8 +3,6 @@ | |||
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) | 5 | avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) |
| 6 | avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ | ||
| 7 | $(comma)4)$(comma)%ymm2,yes,no) | ||
| 8 | 6 | ||
| 9 | obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o | 7 | obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o |
| 10 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o | 8 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o |
| @@ -43,7 +41,6 @@ endif | |||
| 43 | 41 | ||
| 44 | # These modules require assembler to support AVX2. | 42 | # These modules require assembler to support AVX2. |
| 45 | ifeq ($(avx2_supported),yes) | 43 | ifeq ($(avx2_supported),yes) |
| 46 | obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o | ||
| 47 | obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o | 44 | obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o |
| 48 | obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o | 45 | obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o |
| 49 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o | 46 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o |
| @@ -74,7 +71,6 @@ ifeq ($(avx_supported),yes) | |||
| 74 | endif | 71 | endif |
| 75 | 72 | ||
| 76 | ifeq ($(avx2_supported),yes) | 73 | ifeq ($(avx2_supported),yes) |
| 77 | blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o | ||
| 78 | camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o | 74 | camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o |
| 79 | serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o | 75 | serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o |
| 80 | twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o | 76 | twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o |
diff --git a/arch/x86/crypto/blowfish-avx2-asm_64.S b/arch/x86/crypto/blowfish-avx2-asm_64.S deleted file mode 100644 index 784452e0d05d..000000000000 --- a/arch/x86/crypto/blowfish-avx2-asm_64.S +++ /dev/null | |||
| @@ -1,449 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * x86_64/AVX2 assembler optimized version of Blowfish | ||
| 3 | * | ||
| 4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | * | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/linkage.h> | ||
| 14 | |||
| 15 | .file "blowfish-avx2-asm_64.S" | ||
| 16 | |||
| 17 | .data | ||
| 18 | .align 32 | ||
| 19 | |||
| 20 | .Lprefetch_mask: | ||
| 21 | .long 0*64 | ||
| 22 | .long 1*64 | ||
| 23 | .long 2*64 | ||
| 24 | .long 3*64 | ||
| 25 | .long 4*64 | ||
| 26 | .long 5*64 | ||
| 27 | .long 6*64 | ||
| 28 | .long 7*64 | ||
| 29 | |||
| 30 | .Lbswap32_mask: | ||
| 31 | .long 0x00010203 | ||
| 32 | .long 0x04050607 | ||
| 33 | .long 0x08090a0b | ||
| 34 | .long 0x0c0d0e0f | ||
| 35 | |||
| 36 | .Lbswap128_mask: | ||
| 37 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
| 38 | .Lbswap_iv_mask: | ||
| 39 | .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0 | ||
| 40 | |||
| 41 | .text | ||
| 42 | /* structure of crypto context */ | ||
| 43 | #define p 0 | ||
| 44 | #define s0 ((16 + 2) * 4) | ||
| 45 | #define s1 ((16 + 2 + (1 * 256)) * 4) | ||
| 46 | #define s2 ((16 + 2 + (2 * 256)) * 4) | ||
| 47 | #define s3 ((16 + 2 + (3 * 256)) * 4) | ||
| 48 | |||
| 49 | /* register macros */ | ||
| 50 | #define CTX %rdi | ||
| 51 | #define RIO %rdx | ||
| 52 | |||
| 53 | #define RS0 %rax | ||
| 54 | #define RS1 %r8 | ||
| 55 | #define RS2 %r9 | ||
| 56 | #define RS3 %r10 | ||
| 57 | |||
| 58 | #define RLOOP %r11 | ||
| 59 | #define RLOOPd %r11d | ||
| 60 | |||
| 61 | #define RXr0 %ymm8 | ||
| 62 | #define RXr1 %ymm9 | ||
| 63 | #define RXr2 %ymm10 | ||
| 64 | #define RXr3 %ymm11 | ||
| 65 | #define RXl0 %ymm12 | ||
| 66 | #define RXl1 %ymm13 | ||
| 67 | #define RXl2 %ymm14 | ||
| 68 | #define RXl3 %ymm15 | ||
| 69 | |||
| 70 | /* temp regs */ | ||
| 71 | #define RT0 %ymm0 | ||
| 72 | #define RT0x %xmm0 | ||
| 73 | #define RT1 %ymm1 | ||
| 74 | #define RT1x %xmm1 | ||
| 75 | #define RIDX0 %ymm2 | ||
| 76 | #define RIDX1 %ymm3 | ||
| 77 | #define RIDX1x %xmm3 | ||
| 78 | #define RIDX2 %ymm4 | ||
| 79 | #define RIDX3 %ymm5 | ||
| 80 | |||
| 81 | /* vpgatherdd mask and '-1' */ | ||
| 82 | #define RNOT %ymm6 | ||
| 83 | |||
| 84 | /* byte mask, (-1 >> 24) */ | ||
| 85 | #define RBYTE %ymm7 | ||
| 86 | |||
| 87 | /*********************************************************************** | ||
| 88 | * 32-way AVX2 blowfish | ||
| 89 | ***********************************************************************/ | ||
| 90 | #define F(xl, xr) \ | ||
| 91 | vpsrld $24, xl, RIDX0; \ | ||
| 92 | vpsrld $16, xl, RIDX1; \ | ||
| 93 | vpsrld $8, xl, RIDX2; \ | ||
| 94 | vpand RBYTE, RIDX1, RIDX1; \ | ||
| 95 | vpand RBYTE, RIDX2, RIDX2; \ | ||
| 96 | vpand RBYTE, xl, RIDX3; \ | ||
| 97 | \ | ||
| 98 | vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \ | ||
| 99 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
| 100 | vpcmpeqd RIDX0, RIDX0, RIDX0; \ | ||
| 101 | \ | ||
| 102 | vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \ | ||
| 103 | vpcmpeqd RIDX1, RIDX1, RIDX1; \ | ||
| 104 | vpaddd RT0, RT1, RT0; \ | ||
| 105 | \ | ||
| 106 | vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \ | ||
| 107 | vpxor RT0, RT1, RT0; \ | ||
| 108 | \ | ||
| 109 | vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \ | ||
| 110 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
| 111 | vpaddd RT0, RT1, RT0; \ | ||
| 112 | \ | ||
| 113 | vpxor RT0, xr, xr; | ||
| 114 | |||
| 115 | #define add_roundkey(xl, nmem) \ | ||
| 116 | vpbroadcastd nmem, RT0; \ | ||
| 117 | vpxor RT0, xl ## 0, xl ## 0; \ | ||
| 118 | vpxor RT0, xl ## 1, xl ## 1; \ | ||
| 119 | vpxor RT0, xl ## 2, xl ## 2; \ | ||
| 120 | vpxor RT0, xl ## 3, xl ## 3; | ||
| 121 | |||
| 122 | #define round_enc() \ | ||
| 123 | add_roundkey(RXr, p(CTX,RLOOP,4)); \ | ||
| 124 | F(RXl0, RXr0); \ | ||
| 125 | F(RXl1, RXr1); \ | ||
| 126 | F(RXl2, RXr2); \ | ||
| 127 | F(RXl3, RXr3); \ | ||
| 128 | \ | ||
| 129 | add_roundkey(RXl, p+4(CTX,RLOOP,4)); \ | ||
| 130 | F(RXr0, RXl0); \ | ||
| 131 | F(RXr1, RXl1); \ | ||
| 132 | F(RXr2, RXl2); \ | ||
| 133 | F(RXr3, RXl3); | ||
| 134 | |||
| 135 | #define round_dec() \ | ||
| 136 | add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \ | ||
| 137 | F(RXl0, RXr0); \ | ||
| 138 | F(RXl1, RXr1); \ | ||
| 139 | F(RXl2, RXr2); \ | ||
| 140 | F(RXl3, RXr3); \ | ||
| 141 | \ | ||
| 142 | add_roundkey(RXl, p+4(CTX,RLOOP,4)); \ | ||
| 143 | F(RXr0, RXl0); \ | ||
| 144 | F(RXr1, RXl1); \ | ||
| 145 | F(RXr2, RXl2); \ | ||
| 146 | F(RXr3, RXl3); | ||
| 147 | |||
| 148 | #define init_round_constants() \ | ||
| 149 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
| 150 | leaq s0(CTX), RS0; \ | ||
| 151 | leaq s1(CTX), RS1; \ | ||
| 152 | leaq s2(CTX), RS2; \ | ||
| 153 | leaq s3(CTX), RS3; \ | ||
| 154 | vpsrld $24, RNOT, RBYTE; | ||
| 155 | |||
| 156 | #define transpose_2x2(x0, x1, t0) \ | ||
| 157 | vpunpckldq x0, x1, t0; \ | ||
| 158 | vpunpckhdq x0, x1, x1; \ | ||
| 159 | \ | ||
| 160 | vpunpcklqdq t0, x1, x0; \ | ||
| 161 | vpunpckhqdq t0, x1, x1; | ||
| 162 | |||
| 163 | #define read_block(xl, xr) \ | ||
| 164 | vbroadcasti128 .Lbswap32_mask, RT1; \ | ||
| 165 | \ | ||
| 166 | vpshufb RT1, xl ## 0, xl ## 0; \ | ||
| 167 | vpshufb RT1, xr ## 0, xr ## 0; \ | ||
| 168 | vpshufb RT1, xl ## 1, xl ## 1; \ | ||
| 169 | vpshufb RT1, xr ## 1, xr ## 1; \ | ||
| 170 | vpshufb RT1, xl ## 2, xl ## 2; \ | ||
| 171 | vpshufb RT1, xr ## 2, xr ## 2; \ | ||
| 172 | vpshufb RT1, xl ## 3, xl ## 3; \ | ||
| 173 | vpshufb RT1, xr ## 3, xr ## 3; \ | ||
| 174 | \ | ||
| 175 | transpose_2x2(xl ## 0, xr ## 0, RT0); \ | ||
| 176 | transpose_2x2(xl ## 1, xr ## 1, RT0); \ | ||
| 177 | transpose_2x2(xl ## 2, xr ## 2, RT0); \ | ||
| 178 | transpose_2x2(xl ## 3, xr ## 3, RT0); | ||
| 179 | |||
| 180 | #define write_block(xl, xr) \ | ||
| 181 | vbroadcasti128 .Lbswap32_mask, RT1; \ | ||
| 182 | \ | ||
| 183 | transpose_2x2(xl ## 0, xr ## 0, RT0); \ | ||
| 184 | transpose_2x2(xl ## 1, xr ## 1, RT0); \ | ||
| 185 | transpose_2x2(xl ## 2, xr ## 2, RT0); \ | ||
| 186 | transpose_2x2(xl ## 3, xr ## 3, RT0); \ | ||
| 187 | \ | ||
| 188 | vpshufb RT1, xl ## 0, xl ## 0; \ | ||
| 189 | vpshufb RT1, xr ## 0, xr ## 0; \ | ||
| 190 | vpshufb RT1, xl ## 1, xl ## 1; \ | ||
| 191 | vpshufb RT1, xr ## 1, xr ## 1; \ | ||
| 192 | vpshufb RT1, xl ## 2, xl ## 2; \ | ||
| 193 | vpshufb RT1, xr ## 2, xr ## 2; \ | ||
| 194 | vpshufb RT1, xl ## 3, xl ## 3; \ | ||
| 195 | vpshufb RT1, xr ## 3, xr ## 3; | ||
| 196 | |||
| 197 | .align 8 | ||
| 198 | __blowfish_enc_blk32: | ||
| 199 | /* input: | ||
| 200 | * %rdi: ctx, CTX | ||
| 201 | * RXl0..4, RXr0..4: plaintext | ||
| 202 | * output: | ||
| 203 | * RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped) | ||
| 204 | */ | ||
| 205 | init_round_constants(); | ||
| 206 | |||
| 207 | read_block(RXl, RXr); | ||
| 208 | |||
| 209 | movl $1, RLOOPd; | ||
| 210 | add_roundkey(RXl, p+4*(0)(CTX)); | ||
| 211 | |||
| 212 | .align 4 | ||
| 213 | .L__enc_loop: | ||
| 214 | round_enc(); | ||
| 215 | |||
| 216 | leal 2(RLOOPd), RLOOPd; | ||
| 217 | cmpl $17, RLOOPd; | ||
| 218 | jne .L__enc_loop; | ||
| 219 | |||
| 220 | add_roundkey(RXr, p+4*(17)(CTX)); | ||
| 221 | |||
| 222 | write_block(RXl, RXr); | ||
| 223 | |||
| 224 | ret; | ||
| 225 | ENDPROC(__blowfish_enc_blk32) | ||
| 226 | |||
| 227 | .align 8 | ||
| 228 | __blowfish_dec_blk32: | ||
| 229 | /* input: | ||
| 230 | * %rdi: ctx, CTX | ||
| 231 | * RXl0..4, RXr0..4: ciphertext | ||
| 232 | * output: | ||
| 233 | * RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped) | ||
| 234 | */ | ||
| 235 | init_round_constants(); | ||
| 236 | |||
| 237 | read_block(RXl, RXr); | ||
| 238 | |||
| 239 | movl $14, RLOOPd; | ||
| 240 | add_roundkey(RXl, p+4*(17)(CTX)); | ||
| 241 | |||
| 242 | .align 4 | ||
| 243 | .L__dec_loop: | ||
| 244 | round_dec(); | ||
| 245 | |||
| 246 | addl $-2, RLOOPd; | ||
| 247 | jns .L__dec_loop; | ||
| 248 | |||
| 249 | add_roundkey(RXr, p+4*(0)(CTX)); | ||
| 250 | |||
| 251 | write_block(RXl, RXr); | ||
| 252 | |||
| 253 | ret; | ||
| 254 | ENDPROC(__blowfish_dec_blk32) | ||
| 255 | |||
| 256 | ENTRY(blowfish_ecb_enc_32way) | ||
| 257 | /* input: | ||
| 258 | * %rdi: ctx, CTX | ||
| 259 | * %rsi: dst | ||
| 260 | * %rdx: src | ||
| 261 | */ | ||
| 262 | |||
| 263 | vzeroupper; | ||
| 264 | |||
| 265 | vmovdqu 0*32(%rdx), RXl0; | ||
| 266 | vmovdqu 1*32(%rdx), RXr0; | ||
| 267 | vmovdqu 2*32(%rdx), RXl1; | ||
| 268 | vmovdqu 3*32(%rdx), RXr1; | ||
| 269 | vmovdqu 4*32(%rdx), RXl2; | ||
| 270 | vmovdqu 5*32(%rdx), RXr2; | ||
| 271 | vmovdqu 6*32(%rdx), RXl3; | ||
| 272 | vmovdqu 7*32(%rdx), RXr3; | ||
| 273 | |||
| 274 | call __blowfish_enc_blk32; | ||
| 275 | |||
| 276 | vmovdqu RXr0, 0*32(%rsi); | ||
| 277 | vmovdqu RXl0, 1*32(%rsi); | ||
| 278 | vmovdqu RXr1, 2*32(%rsi); | ||
| 279 | vmovdqu RXl1, 3*32(%rsi); | ||
| 280 | vmovdqu RXr2, 4*32(%rsi); | ||
| 281 | vmovdqu RXl2, 5*32(%rsi); | ||
| 282 | vmovdqu RXr3, 6*32(%rsi); | ||
| 283 | vmovdqu RXl3, 7*32(%rsi); | ||
| 284 | |||
| 285 | vzeroupper; | ||
| 286 | |||
| 287 | ret; | ||
| 288 | ENDPROC(blowfish_ecb_enc_32way) | ||
| 289 | |||
| 290 | ENTRY(blowfish_ecb_dec_32way) | ||
| 291 | /* input: | ||
| 292 | * %rdi: ctx, CTX | ||
| 293 | * %rsi: dst | ||
| 294 | * %rdx: src | ||
| 295 | */ | ||
| 296 | |||
| 297 | vzeroupper; | ||
| 298 | |||
| 299 | vmovdqu 0*32(%rdx), RXl0; | ||
| 300 | vmovdqu 1*32(%rdx), RXr0; | ||
| 301 | vmovdqu 2*32(%rdx), RXl1; | ||
| 302 | vmovdqu 3*32(%rdx), RXr1; | ||
| 303 | vmovdqu 4*32(%rdx), RXl2; | ||
| 304 | vmovdqu 5*32(%rdx), RXr2; | ||
| 305 | vmovdqu 6*32(%rdx), RXl3; | ||
| 306 | vmovdqu 7*32(%rdx), RXr3; | ||
| 307 | |||
| 308 | call __blowfish_dec_blk32; | ||
| 309 | |||
| 310 | vmovdqu RXr0, 0*32(%rsi); | ||
| 311 | vmovdqu RXl0, 1*32(%rsi); | ||
| 312 | vmovdqu RXr1, 2*32(%rsi); | ||
| 313 | vmovdqu RXl1, 3*32(%rsi); | ||
| 314 | vmovdqu RXr2, 4*32(%rsi); | ||
| 315 | vmovdqu RXl2, 5*32(%rsi); | ||
| 316 | vmovdqu RXr3, 6*32(%rsi); | ||
| 317 | vmovdqu RXl3, 7*32(%rsi); | ||
| 318 | |||
| 319 | vzeroupper; | ||
| 320 | |||
| 321 | ret; | ||
| 322 | ENDPROC(blowfish_ecb_dec_32way) | ||
| 323 | |||
| 324 | ENTRY(blowfish_cbc_dec_32way) | ||
| 325 | /* input: | ||
| 326 | * %rdi: ctx, CTX | ||
| 327 | * %rsi: dst | ||
| 328 | * %rdx: src | ||
| 329 | */ | ||
| 330 | |||
| 331 | vzeroupper; | ||
| 332 | |||
| 333 | vmovdqu 0*32(%rdx), RXl0; | ||
| 334 | vmovdqu 1*32(%rdx), RXr0; | ||
| 335 | vmovdqu 2*32(%rdx), RXl1; | ||
| 336 | vmovdqu 3*32(%rdx), RXr1; | ||
| 337 | vmovdqu 4*32(%rdx), RXl2; | ||
| 338 | vmovdqu 5*32(%rdx), RXr2; | ||
| 339 | vmovdqu 6*32(%rdx), RXl3; | ||
| 340 | vmovdqu 7*32(%rdx), RXr3; | ||
| 341 | |||
| 342 | call __blowfish_dec_blk32; | ||
| 343 | |||
| 344 | /* xor with src */ | ||
| 345 | vmovq (%rdx), RT0x; | ||
| 346 | vpshufd $0x4f, RT0x, RT0x; | ||
| 347 | vinserti128 $1, 8(%rdx), RT0, RT0; | ||
| 348 | vpxor RT0, RXr0, RXr0; | ||
| 349 | vpxor 0*32+24(%rdx), RXl0, RXl0; | ||
| 350 | vpxor 1*32+24(%rdx), RXr1, RXr1; | ||
| 351 | vpxor 2*32+24(%rdx), RXl1, RXl1; | ||
| 352 | vpxor 3*32+24(%rdx), RXr2, RXr2; | ||
| 353 | vpxor 4*32+24(%rdx), RXl2, RXl2; | ||
| 354 | vpxor 5*32+24(%rdx), RXr3, RXr3; | ||
| 355 | vpxor 6*32+24(%rdx), RXl3, RXl3; | ||
| 356 | |||
| 357 | vmovdqu RXr0, (0*32)(%rsi); | ||
| 358 | vmovdqu RXl0, (1*32)(%rsi); | ||
| 359 | vmovdqu RXr1, (2*32)(%rsi); | ||
| 360 | vmovdqu RXl1, (3*32)(%rsi); | ||
| 361 | vmovdqu RXr2, (4*32)(%rsi); | ||
| 362 | vmovdqu RXl2, (5*32)(%rsi); | ||
| 363 | vmovdqu RXr3, (6*32)(%rsi); | ||
| 364 | vmovdqu RXl3, (7*32)(%rsi); | ||
| 365 | |||
| 366 | vzeroupper; | ||
| 367 | |||
| 368 | ret; | ||
| 369 | ENDPROC(blowfish_cbc_dec_32way) | ||
| 370 | |||
| 371 | ENTRY(blowfish_ctr_32way) | ||
| 372 | /* input: | ||
| 373 | * %rdi: ctx, CTX | ||
| 374 | * %rsi: dst | ||
| 375 | * %rdx: src | ||
| 376 | * %rcx: iv (big endian, 64bit) | ||
| 377 | */ | ||
| 378 | |||
| 379 | vzeroupper; | ||
| 380 | |||
| 381 | vpcmpeqd RT0, RT0, RT0; | ||
| 382 | vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */ | ||
| 383 | |||
| 384 | vpcmpeqd RT1x, RT1x, RT1x; | ||
| 385 | vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */ | ||
| 386 | vpxor RIDX0, RIDX0, RIDX0; | ||
| 387 | vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */ | ||
| 388 | |||
| 389 | vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */ | ||
| 390 | |||
| 391 | vpcmpeqd RT1, RT1, RT1; | ||
| 392 | vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */ | ||
| 393 | vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */ | ||
| 394 | |||
| 395 | vbroadcasti128 .Lbswap_iv_mask, RIDX0; | ||
| 396 | vbroadcasti128 .Lbswap128_mask, RIDX1; | ||
| 397 | |||
| 398 | /* load IV and byteswap */ | ||
| 399 | vmovq (%rcx), RT1x; | ||
| 400 | vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */ | ||
| 401 | vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */ | ||
| 402 | |||
| 403 | /* construct IVs */ | ||
| 404 | vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */ | ||
| 405 | vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */ | ||
| 406 | vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */ | ||
| 407 | vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */ | ||
| 408 | vpsubq RIDX2, RT1, RT1; | ||
| 409 | vpshufb RIDX1, RT1, RXl1; | ||
| 410 | vpsubq RIDX2, RT1, RT1; | ||
| 411 | vpshufb RIDX1, RT1, RXr1; | ||
| 412 | vpsubq RIDX2, RT1, RT1; | ||
| 413 | vpshufb RIDX1, RT1, RXl2; | ||
| 414 | vpsubq RIDX2, RT1, RT1; | ||
| 415 | vpshufb RIDX1, RT1, RXr2; | ||
| 416 | vpsubq RIDX2, RT1, RT1; | ||
| 417 | vpshufb RIDX1, RT1, RXl3; | ||
| 418 | vpsubq RIDX2, RT1, RT1; | ||
| 419 | vpshufb RIDX1, RT1, RXr3; | ||
| 420 | |||
| 421 | /* store last IV */ | ||
| 422 | vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */ | ||
| 423 | vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */ | ||
| 424 | vmovq RT1x, (%rcx); | ||
| 425 | |||
| 426 | call __blowfish_enc_blk32; | ||
| 427 | |||
| 428 | /* dst = src ^ iv */ | ||
| 429 | vpxor 0*32(%rdx), RXr0, RXr0; | ||
| 430 | vpxor 1*32(%rdx), RXl0, RXl0; | ||
| 431 | vpxor 2*32(%rdx), RXr1, RXr1; | ||
| 432 | vpxor 3*32(%rdx), RXl1, RXl1; | ||
| 433 | vpxor 4*32(%rdx), RXr2, RXr2; | ||
| 434 | vpxor 5*32(%rdx), RXl2, RXl2; | ||
| 435 | vpxor 6*32(%rdx), RXr3, RXr3; | ||
| 436 | vpxor 7*32(%rdx), RXl3, RXl3; | ||
| 437 | vmovdqu RXr0, (0*32)(%rsi); | ||
| 438 | vmovdqu RXl0, (1*32)(%rsi); | ||
| 439 | vmovdqu RXr1, (2*32)(%rsi); | ||
| 440 | vmovdqu RXl1, (3*32)(%rsi); | ||
| 441 | vmovdqu RXr2, (4*32)(%rsi); | ||
| 442 | vmovdqu RXl2, (5*32)(%rsi); | ||
| 443 | vmovdqu RXr3, (6*32)(%rsi); | ||
| 444 | vmovdqu RXl3, (7*32)(%rsi); | ||
| 445 | |||
| 446 | vzeroupper; | ||
| 447 | |||
| 448 | ret; | ||
| 449 | ENDPROC(blowfish_ctr_32way) | ||
diff --git a/arch/x86/crypto/blowfish_avx2_glue.c b/arch/x86/crypto/blowfish_avx2_glue.c deleted file mode 100644 index 4417e9aea78d..000000000000 --- a/arch/x86/crypto/blowfish_avx2_glue.c +++ /dev/null | |||
| @@ -1,585 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Glue Code for x86_64/AVX2 assembler optimized version of Blowfish | ||
| 3 | * | ||
| 4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
| 5 | * | ||
| 6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
| 7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
| 8 | * CTR part based on code (crypto/ctr.c) by: | ||
| 9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or modify | ||
| 12 | * it under the terms of the GNU General Public License as published by | ||
| 13 | * the Free Software Foundation; either version 2 of the License, or | ||
| 14 | * (at your option) any later version. | ||
| 15 | * | ||
| 16 | * This program is distributed in the hope that it will be useful, | ||
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 19 | * GNU General Public License for more details. | ||
| 20 | * | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <linux/module.h> | ||
| 24 | #include <linux/types.h> | ||
| 25 | #include <linux/crypto.h> | ||
| 26 | #include <linux/err.h> | ||
| 27 | #include <crypto/algapi.h> | ||
| 28 | #include <crypto/blowfish.h> | ||
| 29 | #include <crypto/cryptd.h> | ||
| 30 | #include <crypto/ctr.h> | ||
| 31 | #include <asm/i387.h> | ||
| 32 | #include <asm/xcr.h> | ||
| 33 | #include <asm/xsave.h> | ||
| 34 | #include <asm/crypto/blowfish.h> | ||
| 35 | #include <asm/crypto/ablk_helper.h> | ||
| 36 | #include <crypto/scatterwalk.h> | ||
| 37 | |||
| 38 | #define BF_AVX2_PARALLEL_BLOCKS 32 | ||
| 39 | |||
| 40 | /* 32-way AVX2 parallel cipher functions */ | ||
| 41 | asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst, | ||
| 42 | const u8 *src); | ||
| 43 | asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst, | ||
| 44 | const u8 *src); | ||
| 45 | asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst, | ||
| 46 | const u8 *src); | ||
| 47 | asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src, | ||
| 48 | __be64 *iv); | ||
| 49 | |||
| 50 | static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
| 51 | { | ||
| 52 | if (fpu_enabled) | ||
| 53 | return true; | ||
| 54 | |||
| 55 | /* FPU is only used when chunk to be processed is large enough, so | ||
| 56 | * do not enable FPU until it is necessary. | ||
| 57 | */ | ||
| 58 | if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS) | ||
| 59 | return false; | ||
| 60 | |||
| 61 | kernel_fpu_begin(); | ||
| 62 | return true; | ||
| 63 | } | ||
| 64 | |||
| 65 | static inline void bf_fpu_end(bool fpu_enabled) | ||
| 66 | { | ||
| 67 | if (fpu_enabled) | ||
| 68 | kernel_fpu_end(); | ||
| 69 | } | ||
| 70 | |||
| 71 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | ||
| 72 | bool enc) | ||
| 73 | { | ||
| 74 | bool fpu_enabled = false; | ||
| 75 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
| 76 | const unsigned int bsize = BF_BLOCK_SIZE; | ||
| 77 | unsigned int nbytes; | ||
| 78 | int err; | ||
| 79 | |||
| 80 | err = blkcipher_walk_virt(desc, walk); | ||
| 81 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
| 82 | |||
| 83 | while ((nbytes = walk->nbytes)) { | ||
| 84 | u8 *wsrc = walk->src.virt.addr; | ||
| 85 | u8 *wdst = walk->dst.virt.addr; | ||
| 86 | |||
| 87 | fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); | ||
| 88 | |||
| 89 | /* Process multi-block AVX2 batch */ | ||
| 90 | if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { | ||
| 91 | do { | ||
| 92 | if (enc) | ||
| 93 | blowfish_ecb_enc_32way(ctx, wdst, wsrc); | ||
| 94 | else | ||
| 95 | blowfish_ecb_dec_32way(ctx, wdst, wsrc); | ||
| 96 | |||
| 97 | wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS; | ||
| 98 | wdst += bsize * BF_AVX2_PARALLEL_BLOCKS; | ||
| 99 | nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS; | ||
| 100 | } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); | ||
| 101 | |||
| 102 | if (nbytes < bsize) | ||
| 103 | goto done; | ||
| 104 | } | ||
| 105 | |||
| 106 | /* Process multi-block batch */ | ||
| 107 | if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { | ||
| 108 | do { | ||
| 109 | if (enc) | ||
| 110 | blowfish_enc_blk_4way(ctx, wdst, wsrc); | ||
| 111 | else | ||
| 112 | blowfish_dec_blk_4way(ctx, wdst, wsrc); | ||
| 113 | |||
| 114 | wsrc += bsize * BF_PARALLEL_BLOCKS; | ||
| 115 | wdst += bsize * BF_PARALLEL_BLOCKS; | ||
| 116 | nbytes -= bsize * BF_PARALLEL_BLOCKS; | ||
| 117 | } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); | ||
| 118 | |||
| 119 | if (nbytes < bsize) | ||
| 120 | goto done; | ||
| 121 | } | ||
| 122 | |||
| 123 | /* Handle leftovers */ | ||
| 124 | do { | ||
| 125 | if (enc) | ||
| 126 | blowfish_enc_blk(ctx, wdst, wsrc); | ||
| 127 | else | ||
| 128 | blowfish_dec_blk(ctx, wdst, wsrc); | ||
| 129 | |||
| 130 | wsrc += bsize; | ||
| 131 | wdst += bsize; | ||
| 132 | nbytes -= bsize; | ||
| 133 | } while (nbytes >= bsize); | ||
| 134 | |||
| 135 | done: | ||
| 136 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
| 137 | } | ||
| 138 | |||
| 139 | bf_fpu_end(fpu_enabled); | ||
| 140 | return err; | ||
| 141 | } | ||
| 142 | |||
| 143 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
| 144 | struct scatterlist *src, unsigned int nbytes) | ||
| 145 | { | ||
| 146 | struct blkcipher_walk walk; | ||
| 147 | |||
| 148 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
| 149 | return ecb_crypt(desc, &walk, true); | ||
| 150 | } | ||
| 151 | |||
| 152 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
| 153 | struct scatterlist *src, unsigned int nbytes) | ||
| 154 | { | ||
| 155 | struct blkcipher_walk walk; | ||
| 156 | |||
| 157 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
| 158 | return ecb_crypt(desc, &walk, false); | ||
| 159 | } | ||
| 160 | |||
| 161 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | ||
| 162 | struct blkcipher_walk *walk) | ||
| 163 | { | ||
| 164 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
| 165 | unsigned int bsize = BF_BLOCK_SIZE; | ||
| 166 | unsigned int nbytes = walk->nbytes; | ||
| 167 | u64 *src = (u64 *)walk->src.virt.addr; | ||
| 168 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
| 169 | u64 *iv = (u64 *)walk->iv; | ||
| 170 | |||
| 171 | do { | ||
| 172 | *dst = *src ^ *iv; | ||
| 173 | blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
| 174 | iv = dst; | ||
| 175 | |||
| 176 | src += 1; | ||
| 177 | dst += 1; | ||
| 178 | nbytes -= bsize; | ||
| 179 | } while (nbytes >= bsize); | ||
| 180 | |||
| 181 | *(u64 *)walk->iv = *iv; | ||
| 182 | return nbytes; | ||
| 183 | } | ||
| 184 | |||
| 185 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
| 186 | struct scatterlist *src, unsigned int nbytes) | ||
| 187 | { | ||
| 188 | struct blkcipher_walk walk; | ||
| 189 | int err; | ||
| 190 | |||
| 191 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
| 192 | err = blkcipher_walk_virt(desc, &walk); | ||
| 193 | |||
| 194 | while ((nbytes = walk.nbytes)) { | ||
| 195 | nbytes = __cbc_encrypt(desc, &walk); | ||
| 196 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
| 197 | } | ||
| 198 | |||
| 199 | return err; | ||
| 200 | } | ||
| 201 | |||
| 202 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | ||
| 203 | struct blkcipher_walk *walk) | ||
| 204 | { | ||
| 205 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
| 206 | const unsigned int bsize = BF_BLOCK_SIZE; | ||
| 207 | unsigned int nbytes = walk->nbytes; | ||
| 208 | u64 *src = (u64 *)walk->src.virt.addr; | ||
| 209 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
| 210 | u64 last_iv; | ||
| 211 | int i; | ||
| 212 | |||
| 213 | /* Start of the last block. */ | ||
| 214 | src += nbytes / bsize - 1; | ||
| 215 | dst += nbytes / bsize - 1; | ||
| 216 | |||
| 217 | last_iv = *src; | ||
| 218 | |||
| 219 | /* Process multi-block AVX2 batch */ | ||
| 220 | if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { | ||
| 221 | do { | ||
| 222 | nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1); | ||
| 223 | src -= BF_AVX2_PARALLEL_BLOCKS - 1; | ||
| 224 | dst -= BF_AVX2_PARALLEL_BLOCKS - 1; | ||
| 225 | |||
| 226 | blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src); | ||
| 227 | |||
| 228 | nbytes -= bsize; | ||
| 229 | if (nbytes < bsize) | ||
| 230 | goto done; | ||
| 231 | |||
| 232 | *dst ^= *(src - 1); | ||
| 233 | src -= 1; | ||
| 234 | dst -= 1; | ||
| 235 | } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); | ||
| 236 | |||
| 237 | if (nbytes < bsize) | ||
| 238 | goto done; | ||
| 239 | } | ||
| 240 | |||
| 241 | /* Process multi-block batch */ | ||
| 242 | if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { | ||
| 243 | u64 ivs[BF_PARALLEL_BLOCKS - 1]; | ||
| 244 | |||
| 245 | do { | ||
| 246 | nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1); | ||
| 247 | src -= BF_PARALLEL_BLOCKS - 1; | ||
| 248 | dst -= BF_PARALLEL_BLOCKS - 1; | ||
| 249 | |||
| 250 | for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++) | ||
| 251 | ivs[i] = src[i]; | ||
| 252 | |||
| 253 | blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); | ||
| 254 | |||
| 255 | for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++) | ||
| 256 | dst[i + 1] ^= ivs[i]; | ||
| 257 | |||
| 258 | nbytes -= bsize; | ||
| 259 | if (nbytes < bsize) | ||
| 260 | goto done; | ||
| 261 | |||
| 262 | *dst ^= *(src - 1); | ||
| 263 | src -= 1; | ||
| 264 | dst -= 1; | ||
| 265 | } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); | ||
| 266 | |||
| 267 | if (nbytes < bsize) | ||
| 268 | goto done; | ||
| 269 | } | ||
| 270 | |||
| 271 | /* Handle leftovers */ | ||
| 272 | for (;;) { | ||
| 273 | blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); | ||
| 274 | |||
| 275 | nbytes -= bsize; | ||
| 276 | if (nbytes < bsize) | ||
| 277 | break; | ||
| 278 | |||
| 279 | *dst ^= *(src - 1); | ||
| 280 | src -= 1; | ||
| 281 | dst -= 1; | ||
| 282 | } | ||
| 283 | |||
| 284 | done: | ||
| 285 | *dst ^= *(u64 *)walk->iv; | ||
| 286 | *(u64 *)walk->iv = last_iv; | ||
| 287 | |||
| 288 | return nbytes; | ||
| 289 | } | ||
| 290 | |||
| 291 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
| 292 | struct scatterlist *src, unsigned int nbytes) | ||
| 293 | { | ||
| 294 | bool fpu_enabled = false; | ||
| 295 | struct blkcipher_walk walk; | ||
| 296 | int err; | ||
| 297 | |||
| 298 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
| 299 | err = blkcipher_walk_virt(desc, &walk); | ||
| 300 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
| 301 | |||
| 302 | while ((nbytes = walk.nbytes)) { | ||
| 303 | fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); | ||
| 304 | nbytes = __cbc_decrypt(desc, &walk); | ||
| 305 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
| 306 | } | ||
| 307 | |||
| 308 | bf_fpu_end(fpu_enabled); | ||
| 309 | return err; | ||
| 310 | } | ||
| 311 | |||
| 312 | static void ctr_crypt_final(struct blkcipher_desc *desc, | ||
| 313 | struct blkcipher_walk *walk) | ||
| 314 | { | ||
| 315 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
| 316 | u8 *ctrblk = walk->iv; | ||
| 317 | u8 keystream[BF_BLOCK_SIZE]; | ||
| 318 | u8 *src = walk->src.virt.addr; | ||
| 319 | u8 *dst = walk->dst.virt.addr; | ||
| 320 | unsigned int nbytes = walk->nbytes; | ||
| 321 | |||
| 322 | blowfish_enc_blk(ctx, keystream, ctrblk); | ||
| 323 | crypto_xor(keystream, src, nbytes); | ||
| 324 | memcpy(dst, keystream, nbytes); | ||
| 325 | |||
| 326 | crypto_inc(ctrblk, BF_BLOCK_SIZE); | ||
| 327 | } | ||
| 328 | |||
| 329 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | ||
| 330 | struct blkcipher_walk *walk) | ||
| 331 | { | ||
| 332 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
| 333 | unsigned int bsize = BF_BLOCK_SIZE; | ||
| 334 | unsigned int nbytes = walk->nbytes; | ||
| 335 | u64 *src = (u64 *)walk->src.virt.addr; | ||
| 336 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
| 337 | int i; | ||
| 338 | |||
| 339 | /* Process multi-block AVX2 batch */ | ||
| 340 | if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { | ||
| 341 | do { | ||
| 342 | blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src, | ||
| 343 | (__be64 *)walk->iv); | ||
| 344 | |||
| 345 | src += BF_AVX2_PARALLEL_BLOCKS; | ||
| 346 | dst += BF_AVX2_PARALLEL_BLOCKS; | ||
| 347 | nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS; | ||
| 348 | } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); | ||
| 349 | |||
| 350 | if (nbytes < bsize) | ||
| 351 | goto done; | ||
| 352 | } | ||
| 353 | |||
| 354 | /* Process four block batch */ | ||
| 355 | if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { | ||
| 356 | __be64 ctrblocks[BF_PARALLEL_BLOCKS]; | ||
| 357 | u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); | ||
| 358 | |||
| 359 | do { | ||
| 360 | /* create ctrblks for parallel encrypt */ | ||
| 361 | for (i = 0; i < BF_PARALLEL_BLOCKS; i++) { | ||
| 362 | if (dst != src) | ||
| 363 | dst[i] = src[i]; | ||
| 364 | |||
| 365 | ctrblocks[i] = cpu_to_be64(ctrblk++); | ||
| 366 | } | ||
| 367 | |||
| 368 | blowfish_enc_blk_xor_4way(ctx, (u8 *)dst, | ||
| 369 | (u8 *)ctrblocks); | ||
| 370 | |||
| 371 | src += BF_PARALLEL_BLOCKS; | ||
| 372 | dst += BF_PARALLEL_BLOCKS; | ||
| 373 | nbytes -= bsize * BF_PARALLEL_BLOCKS; | ||
| 374 | } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); | ||
| 375 | |||
| 376 | *(__be64 *)walk->iv = cpu_to_be64(ctrblk); | ||
| 377 | |||
| 378 | if (nbytes < bsize) | ||
| 379 | goto done; | ||
| 380 | } | ||
| 381 | |||
| 382 | /* Handle leftovers */ | ||
| 383 | do { | ||
| 384 | u64 ctrblk; | ||
| 385 | |||
| 386 | if (dst != src) | ||
| 387 | *dst = *src; | ||
| 388 | |||
| 389 | ctrblk = *(u64 *)walk->iv; | ||
| 390 | be64_add_cpu((__be64 *)walk->iv, 1); | ||
| 391 | |||
| 392 | blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); | ||
| 393 | |||
| 394 | src += 1; | ||
| 395 | dst += 1; | ||
| 396 | } while ((nbytes -= bsize) >= bsize); | ||
| 397 | |||
| 398 | done: | ||
| 399 | return nbytes; | ||
| 400 | } | ||
| 401 | |||
| 402 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
| 403 | struct scatterlist *src, unsigned int nbytes) | ||
| 404 | { | ||
| 405 | bool fpu_enabled = false; | ||
| 406 | struct blkcipher_walk walk; | ||
| 407 | int err; | ||
| 408 | |||
| 409 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
| 410 | err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); | ||
| 411 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
| 412 | |||
| 413 | while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { | ||
| 414 | fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); | ||
| 415 | nbytes = __ctr_crypt(desc, &walk); | ||
| 416 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
| 417 | } | ||
| 418 | |||
| 419 | bf_fpu_end(fpu_enabled); | ||
| 420 | |||
| 421 | if (walk.nbytes) { | ||
| 422 | ctr_crypt_final(desc, &walk); | ||
| 423 | err = blkcipher_walk_done(desc, &walk, 0); | ||
| 424 | } | ||
| 425 | |||
| 426 | return err; | ||
| 427 | } | ||
| 428 | |||
| 429 | static struct crypto_alg bf_algs[6] = { { | ||
| 430 | .cra_name = "__ecb-blowfish-avx2", | ||
| 431 | .cra_driver_name = "__driver-ecb-blowfish-avx2", | ||
| 432 | .cra_priority = 0, | ||
| 433 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
| 434 | .cra_blocksize = BF_BLOCK_SIZE, | ||
| 435 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
| 436 | .cra_alignmask = 0, | ||
| 437 | .cra_type = &crypto_blkcipher_type, | ||
| 438 | .cra_module = THIS_MODULE, | ||
| 439 | .cra_u = { | ||
| 440 | .blkcipher = { | ||
| 441 | .min_keysize = BF_MIN_KEY_SIZE, | ||
| 442 | .max_keysize = BF_MAX_KEY_SIZE, | ||
| 443 | .setkey = blowfish_setkey, | ||
| 444 | .encrypt = ecb_encrypt, | ||
| 445 | .decrypt = ecb_decrypt, | ||
| 446 | }, | ||
| 447 | }, | ||
| 448 | }, { | ||
| 449 | .cra_name = "__cbc-blowfish-avx2", | ||
| 450 | .cra_driver_name = "__driver-cbc-blowfish-avx2", | ||
| 451 | .cra_priority = 0, | ||
| 452 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
| 453 | .cra_blocksize = BF_BLOCK_SIZE, | ||
| 454 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
| 455 | .cra_alignmask = 0, | ||
| 456 | .cra_type = &crypto_blkcipher_type, | ||
| 457 | .cra_module = THIS_MODULE, | ||
| 458 | .cra_u = { | ||
| 459 | .blkcipher = { | ||
| 460 | .min_keysize = BF_MIN_KEY_SIZE, | ||
| 461 | .max_keysize = BF_MAX_KEY_SIZE, | ||
| 462 | .setkey = blowfish_setkey, | ||
| 463 | .encrypt = cbc_encrypt, | ||
| 464 | .decrypt = cbc_decrypt, | ||
| 465 | }, | ||
| 466 | }, | ||
| 467 | }, { | ||
| 468 | .cra_name = "__ctr-blowfish-avx2", | ||
| 469 | .cra_driver_name = "__driver-ctr-blowfish-avx2", | ||
| 470 | .cra_priority = 0, | ||
| 471 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
| 472 | .cra_blocksize = 1, | ||
| 473 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
| 474 | .cra_alignmask = 0, | ||
| 475 | .cra_type = &crypto_blkcipher_type, | ||
| 476 | .cra_module = THIS_MODULE, | ||
| 477 | .cra_u = { | ||
| 478 | .blkcipher = { | ||
| 479 | .min_keysize = BF_MIN_KEY_SIZE, | ||
| 480 | .max_keysize = BF_MAX_KEY_SIZE, | ||
| 481 | .ivsize = BF_BLOCK_SIZE, | ||
| 482 | .setkey = blowfish_setkey, | ||
| 483 | .encrypt = ctr_crypt, | ||
| 484 | .decrypt = ctr_crypt, | ||
| 485 | }, | ||
| 486 | }, | ||
| 487 | }, { | ||
| 488 | .cra_name = "ecb(blowfish)", | ||
| 489 | .cra_driver_name = "ecb-blowfish-avx2", | ||
| 490 | .cra_priority = 400, | ||
| 491 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
| 492 | .cra_blocksize = BF_BLOCK_SIZE, | ||
| 493 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
| 494 | .cra_alignmask = 0, | ||
| 495 | .cra_type = &crypto_ablkcipher_type, | ||
| 496 | .cra_module = THIS_MODULE, | ||
| 497 | .cra_init = ablk_init, | ||
| 498 | .cra_exit = ablk_exit, | ||
| 499 | .cra_u = { | ||
| 500 | .ablkcipher = { | ||
| 501 | .min_keysize = BF_MIN_KEY_SIZE, | ||
| 502 | .max_keysize = BF_MAX_KEY_SIZE, | ||
| 503 | .setkey = ablk_set_key, | ||
| 504 | .encrypt = ablk_encrypt, | ||
| 505 | .decrypt = ablk_decrypt, | ||
| 506 | }, | ||
| 507 | }, | ||
| 508 | }, { | ||
| 509 | .cra_name = "cbc(blowfish)", | ||
| 510 | .cra_driver_name = "cbc-blowfish-avx2", | ||
| 511 | .cra_priority = 400, | ||
| 512 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
| 513 | .cra_blocksize = BF_BLOCK_SIZE, | ||
| 514 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
| 515 | .cra_alignmask = 0, | ||
| 516 | .cra_type = &crypto_ablkcipher_type, | ||
| 517 | .cra_module = THIS_MODULE, | ||
| 518 | .cra_init = ablk_init, | ||
| 519 | .cra_exit = ablk_exit, | ||
| 520 | .cra_u = { | ||
| 521 | .ablkcipher = { | ||
| 522 | .min_keysize = BF_MIN_KEY_SIZE, | ||
| 523 | .max_keysize = BF_MAX_KEY_SIZE, | ||
| 524 | .ivsize = BF_BLOCK_SIZE, | ||
| 525 | .setkey = ablk_set_key, | ||
| 526 | .encrypt = __ablk_encrypt, | ||
| 527 | .decrypt = ablk_decrypt, | ||
| 528 | }, | ||
| 529 | }, | ||
| 530 | }, { | ||
| 531 | .cra_name = "ctr(blowfish)", | ||
| 532 | .cra_driver_name = "ctr-blowfish-avx2", | ||
| 533 | .cra_priority = 400, | ||
| 534 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
| 535 | .cra_blocksize = 1, | ||
| 536 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
| 537 | .cra_alignmask = 0, | ||
| 538 | .cra_type = &crypto_ablkcipher_type, | ||
| 539 | .cra_module = THIS_MODULE, | ||
| 540 | .cra_init = ablk_init, | ||
| 541 | .cra_exit = ablk_exit, | ||
| 542 | .cra_u = { | ||
| 543 | .ablkcipher = { | ||
| 544 | .min_keysize = BF_MIN_KEY_SIZE, | ||
| 545 | .max_keysize = BF_MAX_KEY_SIZE, | ||
| 546 | .ivsize = BF_BLOCK_SIZE, | ||
| 547 | .setkey = ablk_set_key, | ||
| 548 | .encrypt = ablk_encrypt, | ||
| 549 | .decrypt = ablk_encrypt, | ||
| 550 | .geniv = "chainiv", | ||
| 551 | }, | ||
| 552 | }, | ||
| 553 | } }; | ||
| 554 | |||
| 555 | |||
| 556 | static int __init init(void) | ||
| 557 | { | ||
| 558 | u64 xcr0; | ||
| 559 | |||
| 560 | if (!cpu_has_avx2 || !cpu_has_osxsave) { | ||
| 561 | pr_info("AVX2 instructions are not detected.\n"); | ||
| 562 | return -ENODEV; | ||
| 563 | } | ||
| 564 | |||
| 565 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
| 566 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
| 567 | pr_info("AVX detected but unusable.\n"); | ||
| 568 | return -ENODEV; | ||
| 569 | } | ||
| 570 | |||
| 571 | return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); | ||
| 572 | } | ||
| 573 | |||
| 574 | static void __exit fini(void) | ||
| 575 | { | ||
| 576 | crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); | ||
| 577 | } | ||
| 578 | |||
| 579 | module_init(init); | ||
| 580 | module_exit(fini); | ||
| 581 | |||
| 582 | MODULE_LICENSE("GPL"); | ||
| 583 | MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized"); | ||
| 584 | MODULE_ALIAS("blowfish"); | ||
| 585 | MODULE_ALIAS("blowfish-asm"); | ||
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 3548d76dbaa9..50ec333b70e6 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Glue Code for assembler optimized version of Blowfish | 2 | * Glue Code for assembler optimized version of Blowfish |
| 3 | * | 3 | * |
| 4 | * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | 4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> |
| 5 | * | 5 | * |
| 6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | 6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: |
| 7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | 7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> |
| @@ -32,24 +32,40 @@ | |||
| 32 | #include <linux/module.h> | 32 | #include <linux/module.h> |
| 33 | #include <linux/types.h> | 33 | #include <linux/types.h> |
| 34 | #include <crypto/algapi.h> | 34 | #include <crypto/algapi.h> |
| 35 | #include <asm/crypto/blowfish.h> | ||
| 36 | 35 | ||
| 37 | /* regular block cipher functions */ | 36 | /* regular block cipher functions */ |
| 38 | asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, | 37 | asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, |
| 39 | bool xor); | 38 | bool xor); |
| 40 | EXPORT_SYMBOL_GPL(__blowfish_enc_blk); | ||
| 41 | |||
| 42 | asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); | 39 | asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); |
| 43 | EXPORT_SYMBOL_GPL(blowfish_dec_blk); | ||
| 44 | 40 | ||
| 45 | /* 4-way parallel cipher functions */ | 41 | /* 4-way parallel cipher functions */ |
| 46 | asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | 42 | asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, |
| 47 | const u8 *src, bool xor); | 43 | const u8 *src, bool xor); |
| 48 | EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way); | ||
| 49 | |||
| 50 | asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, | 44 | asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, |
| 51 | const u8 *src); | 45 | const u8 *src); |
| 52 | EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way); | 46 | |
| 47 | static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) | ||
| 48 | { | ||
| 49 | __blowfish_enc_blk(ctx, dst, src, false); | ||
| 50 | } | ||
| 51 | |||
| 52 | static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, | ||
| 53 | const u8 *src) | ||
| 54 | { | ||
| 55 | __blowfish_enc_blk(ctx, dst, src, true); | ||
| 56 | } | ||
| 57 | |||
| 58 | static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
| 59 | const u8 *src) | ||
| 60 | { | ||
| 61 | __blowfish_enc_blk_4way(ctx, dst, src, false); | ||
| 62 | } | ||
| 63 | |||
| 64 | static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, | ||
| 65 | const u8 *src) | ||
| 66 | { | ||
| 67 | __blowfish_enc_blk_4way(ctx, dst, src, true); | ||
| 68 | } | ||
| 53 | 69 | ||
| 54 | static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | 70 | static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) |
| 55 | { | 71 | { |
diff --git a/arch/x86/include/asm/crypto/blowfish.h b/arch/x86/include/asm/crypto/blowfish.h deleted file mode 100644 index f097b2face10..000000000000 --- a/arch/x86/include/asm/crypto/blowfish.h +++ /dev/null | |||
| @@ -1,43 +0,0 @@ | |||
| 1 | #ifndef ASM_X86_BLOWFISH_H | ||
| 2 | #define ASM_X86_BLOWFISH_H | ||
| 3 | |||
| 4 | #include <linux/crypto.h> | ||
| 5 | #include <crypto/blowfish.h> | ||
| 6 | |||
| 7 | #define BF_PARALLEL_BLOCKS 4 | ||
| 8 | |||
| 9 | /* regular block cipher functions */ | ||
| 10 | asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, | ||
| 11 | bool xor); | ||
| 12 | asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); | ||
| 13 | |||
| 14 | /* 4-way parallel cipher functions */ | ||
| 15 | asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
| 16 | const u8 *src, bool xor); | ||
| 17 | asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
| 18 | const u8 *src); | ||
| 19 | |||
| 20 | static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) | ||
| 21 | { | ||
| 22 | __blowfish_enc_blk(ctx, dst, src, false); | ||
| 23 | } | ||
| 24 | |||
| 25 | static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, | ||
| 26 | const u8 *src) | ||
| 27 | { | ||
| 28 | __blowfish_enc_blk(ctx, dst, src, true); | ||
| 29 | } | ||
| 30 | |||
| 31 | static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
| 32 | const u8 *src) | ||
| 33 | { | ||
| 34 | __blowfish_enc_blk_4way(ctx, dst, src, false); | ||
| 35 | } | ||
| 36 | |||
| 37 | static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, | ||
| 38 | const u8 *src) | ||
| 39 | { | ||
| 40 | __blowfish_enc_blk_4way(ctx, dst, src, true); | ||
| 41 | } | ||
| 42 | |||
| 43 | #endif | ||
diff --git a/crypto/Kconfig b/crypto/Kconfig index d1ca6312d798..4ef0ee715171 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig | |||
| @@ -839,24 +839,6 @@ config CRYPTO_BLOWFISH_X86_64 | |||
| 839 | See also: | 839 | See also: |
| 840 | <http://www.schneier.com/blowfish.html> | 840 | <http://www.schneier.com/blowfish.html> |
| 841 | 841 | ||
| 842 | config CRYPTO_BLOWFISH_AVX2_X86_64 | ||
| 843 | tristate "Blowfish cipher algorithm (x86_64/AVX2)" | ||
| 844 | depends on X86 && 64BIT | ||
| 845 | select CRYPTO_ALGAPI | ||
| 846 | select CRYPTO_CRYPTD | ||
| 847 | select CRYPTO_ABLK_HELPER_X86 | ||
| 848 | select CRYPTO_BLOWFISH_COMMON | ||
| 849 | select CRYPTO_BLOWFISH_X86_64 | ||
| 850 | help | ||
| 851 | Blowfish cipher algorithm (x86_64/AVX2), by Bruce Schneier. | ||
| 852 | |||
| 853 | This is a variable key length cipher which can use keys from 32 | ||
| 854 | bits to 448 bits in length. It's fast, simple and specifically | ||
| 855 | designed for use on "large microprocessors". | ||
| 856 | |||
| 857 | See also: | ||
| 858 | <http://www.schneier.com/blowfish.html> | ||
| 859 | |||
| 860 | config CRYPTO_CAMELLIA | 842 | config CRYPTO_CAMELLIA |
| 861 | tristate "Camellia cipher algorithms" | 843 | tristate "Camellia cipher algorithms" |
| 862 | depends on CRYPTO | 844 | depends on CRYPTO |
diff --git a/crypto/testmgr.c b/crypto/testmgr.c index f19a392ade78..27f111876523 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c | |||
| @@ -1661,9 +1661,6 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
| 1661 | .test = alg_test_null, | 1661 | .test = alg_test_null, |
| 1662 | .fips_allowed = 1, | 1662 | .fips_allowed = 1, |
| 1663 | }, { | 1663 | }, { |
| 1664 | .alg = "__driver-cbc-blowfish-avx2", | ||
| 1665 | .test = alg_test_null, | ||
| 1666 | }, { | ||
| 1667 | .alg = "__driver-cbc-camellia-aesni", | 1664 | .alg = "__driver-cbc-camellia-aesni", |
| 1668 | .test = alg_test_null, | 1665 | .test = alg_test_null, |
| 1669 | }, { | 1666 | }, { |
| @@ -1695,9 +1692,6 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
| 1695 | .test = alg_test_null, | 1692 | .test = alg_test_null, |
| 1696 | .fips_allowed = 1, | 1693 | .fips_allowed = 1, |
| 1697 | }, { | 1694 | }, { |
| 1698 | .alg = "__driver-ecb-blowfish-avx2", | ||
| 1699 | .test = alg_test_null, | ||
| 1700 | }, { | ||
| 1701 | .alg = "__driver-ecb-camellia-aesni", | 1695 | .alg = "__driver-ecb-camellia-aesni", |
| 1702 | .test = alg_test_null, | 1696 | .test = alg_test_null, |
| 1703 | }, { | 1697 | }, { |
| @@ -1988,9 +1982,6 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
| 1988 | .test = alg_test_null, | 1982 | .test = alg_test_null, |
| 1989 | .fips_allowed = 1, | 1983 | .fips_allowed = 1, |
| 1990 | }, { | 1984 | }, { |
| 1991 | .alg = "cryptd(__driver-cbc-blowfish-avx2)", | ||
| 1992 | .test = alg_test_null, | ||
| 1993 | }, { | ||
| 1994 | .alg = "cryptd(__driver-cbc-camellia-aesni)", | 1985 | .alg = "cryptd(__driver-cbc-camellia-aesni)", |
| 1995 | .test = alg_test_null, | 1986 | .test = alg_test_null, |
| 1996 | }, { | 1987 | }, { |
| @@ -2004,9 +1995,6 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
| 2004 | .test = alg_test_null, | 1995 | .test = alg_test_null, |
| 2005 | .fips_allowed = 1, | 1996 | .fips_allowed = 1, |
| 2006 | }, { | 1997 | }, { |
| 2007 | .alg = "cryptd(__driver-ecb-blowfish-avx2)", | ||
| 2008 | .test = alg_test_null, | ||
| 2009 | }, { | ||
| 2010 | .alg = "cryptd(__driver-ecb-camellia-aesni)", | 1998 | .alg = "cryptd(__driver-ecb-camellia-aesni)", |
| 2011 | .test = alg_test_null, | 1999 | .test = alg_test_null, |
| 2012 | }, { | 2000 | }, { |
