diff options
| -rw-r--r-- | arch/x86/crypto/camellia-aesni-avx-asm_64.S | 38 | ||||
| -rw-r--r-- | arch/x86/crypto/camellia-x86_64-asm_64.S | 50 |
2 files changed, 36 insertions, 52 deletions
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 2306d2e4816f..cfc163469c71 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S | |||
| @@ -15,6 +15,8 @@ | |||
| 15 | * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz | 15 | * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz |
| 16 | */ | 16 | */ |
| 17 | 17 | ||
| 18 | #include <linux/linkage.h> | ||
| 19 | |||
| 18 | #define CAMELLIA_TABLE_BYTE_LEN 272 | 20 | #define CAMELLIA_TABLE_BYTE_LEN 272 |
| 19 | 21 | ||
| 20 | /* struct camellia_ctx: */ | 22 | /* struct camellia_ctx: */ |
| @@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: | |||
| 190 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, | 192 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, |
| 191 | %rcx, (%r9)); | 193 | %rcx, (%r9)); |
| 192 | ret; | 194 | ret; |
| 195 | ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) | ||
| 193 | 196 | ||
| 194 | .align 8 | 197 | .align 8 |
| 195 | roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: | 198 | roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: |
| @@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: | |||
| 197 | %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, | 200 | %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, |
| 198 | %rax, (%r9)); | 201 | %rax, (%r9)); |
| 199 | ret; | 202 | ret; |
| 203 | ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) | ||
| 200 | 204 | ||
| 201 | /* | 205 | /* |
| 202 | * IN/OUT: | 206 | * IN/OUT: |
| @@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: | |||
| 709 | .text | 713 | .text |
| 710 | 714 | ||
| 711 | .align 8 | 715 | .align 8 |
| 712 | .type __camellia_enc_blk16,@function; | ||
| 713 | |||
| 714 | __camellia_enc_blk16: | 716 | __camellia_enc_blk16: |
| 715 | /* input: | 717 | /* input: |
| 716 | * %rdi: ctx, CTX | 718 | * %rdi: ctx, CTX |
| @@ -793,10 +795,9 @@ __camellia_enc_blk16: | |||
| 793 | %xmm15, %rax, %rcx, 24); | 795 | %xmm15, %rax, %rcx, 24); |
| 794 | 796 | ||
| 795 | jmp .Lenc_done; | 797 | jmp .Lenc_done; |
| 798 | ENDPROC(__camellia_enc_blk16) | ||
| 796 | 799 | ||
| 797 | .align 8 | 800 | .align 8 |
| 798 | .type __camellia_dec_blk16,@function; | ||
| 799 | |||
| 800 | __camellia_dec_blk16: | 801 | __camellia_dec_blk16: |
| 801 | /* input: | 802 | /* input: |
| 802 | * %rdi: ctx, CTX | 803 | * %rdi: ctx, CTX |
| @@ -877,12 +878,9 @@ __camellia_dec_blk16: | |||
| 877 | ((key_table + (24) * 8) + 4)(CTX)); | 878 | ((key_table + (24) * 8) + 4)(CTX)); |
| 878 | 879 | ||
| 879 | jmp .Ldec_max24; | 880 | jmp .Ldec_max24; |
| 881 | ENDPROC(__camellia_dec_blk16) | ||
| 880 | 882 | ||
| 881 | .align 8 | 883 | ENTRY(camellia_ecb_enc_16way) |
| 882 | .global camellia_ecb_enc_16way | ||
| 883 | .type camellia_ecb_enc_16way,@function; | ||
| 884 | |||
| 885 | camellia_ecb_enc_16way: | ||
| 886 | /* input: | 884 | /* input: |
| 887 | * %rdi: ctx, CTX | 885 | * %rdi: ctx, CTX |
| 888 | * %rsi: dst (16 blocks) | 886 | * %rsi: dst (16 blocks) |
| @@ -903,12 +901,9 @@ camellia_ecb_enc_16way: | |||
| 903 | %xmm8, %rsi); | 901 | %xmm8, %rsi); |
| 904 | 902 | ||
| 905 | ret; | 903 | ret; |
| 904 | ENDPROC(camellia_ecb_enc_16way) | ||
| 906 | 905 | ||
| 907 | .align 8 | 906 | ENTRY(camellia_ecb_dec_16way) |
| 908 | .global camellia_ecb_dec_16way | ||
| 909 | .type camellia_ecb_dec_16way,@function; | ||
| 910 | |||
| 911 | camellia_ecb_dec_16way: | ||
| 912 | /* input: | 907 | /* input: |
| 913 | * %rdi: ctx, CTX | 908 | * %rdi: ctx, CTX |
| 914 | * %rsi: dst (16 blocks) | 909 | * %rsi: dst (16 blocks) |
| @@ -934,12 +929,9 @@ camellia_ecb_dec_16way: | |||
| 934 | %xmm8, %rsi); | 929 | %xmm8, %rsi); |
| 935 | 930 | ||
| 936 | ret; | 931 | ret; |
| 932 | ENDPROC(camellia_ecb_dec_16way) | ||
| 937 | 933 | ||
| 938 | .align 8 | 934 | ENTRY(camellia_cbc_dec_16way) |
| 939 | .global camellia_cbc_dec_16way | ||
| 940 | .type camellia_cbc_dec_16way,@function; | ||
| 941 | |||
| 942 | camellia_cbc_dec_16way: | ||
| 943 | /* input: | 935 | /* input: |
| 944 | * %rdi: ctx, CTX | 936 | * %rdi: ctx, CTX |
| 945 | * %rsi: dst (16 blocks) | 937 | * %rsi: dst (16 blocks) |
| @@ -986,6 +978,7 @@ camellia_cbc_dec_16way: | |||
| 986 | %xmm8, %rsi); | 978 | %xmm8, %rsi); |
| 987 | 979 | ||
| 988 | ret; | 980 | ret; |
| 981 | ENDPROC(camellia_cbc_dec_16way) | ||
| 989 | 982 | ||
| 990 | #define inc_le128(x, minus_one, tmp) \ | 983 | #define inc_le128(x, minus_one, tmp) \ |
| 991 | vpcmpeqq minus_one, x, tmp; \ | 984 | vpcmpeqq minus_one, x, tmp; \ |
| @@ -993,11 +986,7 @@ camellia_cbc_dec_16way: | |||
| 993 | vpslldq $8, tmp, tmp; \ | 986 | vpslldq $8, tmp, tmp; \ |
| 994 | vpsubq tmp, x, x; | 987 | vpsubq tmp, x, x; |
| 995 | 988 | ||
| 996 | .align 8 | 989 | ENTRY(camellia_ctr_16way) |
| 997 | .global camellia_ctr_16way | ||
| 998 | .type camellia_ctr_16way,@function; | ||
| 999 | |||
| 1000 | camellia_ctr_16way: | ||
| 1001 | /* input: | 990 | /* input: |
| 1002 | * %rdi: ctx, CTX | 991 | * %rdi: ctx, CTX |
| 1003 | * %rsi: dst (16 blocks) | 992 | * %rsi: dst (16 blocks) |
| @@ -1100,3 +1089,4 @@ camellia_ctr_16way: | |||
| 1100 | %xmm8, %rsi); | 1089 | %xmm8, %rsi); |
| 1101 | 1090 | ||
| 1102 | ret; | 1091 | ret; |
| 1092 | ENDPROC(camellia_ctr_16way) | ||
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 0b3374335fdc..310319c601ed 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S | |||
| @@ -20,6 +20,8 @@ | |||
| 20 | * | 20 | * |
| 21 | */ | 21 | */ |
| 22 | 22 | ||
| 23 | #include <linux/linkage.h> | ||
| 24 | |||
| 23 | .file "camellia-x86_64-asm_64.S" | 25 | .file "camellia-x86_64-asm_64.S" |
| 24 | .text | 26 | .text |
| 25 | 27 | ||
| @@ -188,10 +190,7 @@ | |||
| 188 | bswapq RAB0; \ | 190 | bswapq RAB0; \ |
| 189 | movq RAB0, 4*2(RIO); | 191 | movq RAB0, 4*2(RIO); |
| 190 | 192 | ||
| 191 | .global __camellia_enc_blk; | 193 | ENTRY(__camellia_enc_blk) |
| 192 | .type __camellia_enc_blk,@function; | ||
| 193 | |||
| 194 | __camellia_enc_blk: | ||
| 195 | /* input: | 194 | /* input: |
| 196 | * %rdi: ctx, CTX | 195 | * %rdi: ctx, CTX |
| 197 | * %rsi: dst | 196 | * %rsi: dst |
| @@ -214,33 +213,31 @@ __camellia_enc_blk: | |||
| 214 | movl $24, RT1d; /* max */ | 213 | movl $24, RT1d; /* max */ |
| 215 | 214 | ||
| 216 | cmpb $16, key_length(CTX); | 215 | cmpb $16, key_length(CTX); |
| 217 | je __enc_done; | 216 | je .L__enc_done; |
| 218 | 217 | ||
| 219 | enc_fls(24); | 218 | enc_fls(24); |
| 220 | enc_rounds(24); | 219 | enc_rounds(24); |
| 221 | movl $32, RT1d; /* max */ | 220 | movl $32, RT1d; /* max */ |
| 222 | 221 | ||
| 223 | __enc_done: | 222 | .L__enc_done: |
| 224 | testb RXORbl, RXORbl; | 223 | testb RXORbl, RXORbl; |
| 225 | movq RDST, RIO; | 224 | movq RDST, RIO; |
| 226 | 225 | ||
| 227 | jnz __enc_xor; | 226 | jnz .L__enc_xor; |
| 228 | 227 | ||
| 229 | enc_outunpack(mov, RT1); | 228 | enc_outunpack(mov, RT1); |
| 230 | 229 | ||
| 231 | movq RRBP, %rbp; | 230 | movq RRBP, %rbp; |
| 232 | ret; | 231 | ret; |
| 233 | 232 | ||
| 234 | __enc_xor: | 233 | .L__enc_xor: |
| 235 | enc_outunpack(xor, RT1); | 234 | enc_outunpack(xor, RT1); |
| 236 | 235 | ||
| 237 | movq RRBP, %rbp; | 236 | movq RRBP, %rbp; |
| 238 | ret; | 237 | ret; |
| 238 | ENDPROC(__camellia_enc_blk) | ||
| 239 | 239 | ||
| 240 | .global camellia_dec_blk; | 240 | ENTRY(camellia_dec_blk) |
| 241 | .type camellia_dec_blk,@function; | ||
| 242 | |||
| 243 | camellia_dec_blk: | ||
| 244 | /* input: | 241 | /* input: |
| 245 | * %rdi: ctx, CTX | 242 | * %rdi: ctx, CTX |
| 246 | * %rsi: dst | 243 | * %rsi: dst |
| @@ -258,12 +255,12 @@ camellia_dec_blk: | |||
| 258 | dec_inpack(RT2); | 255 | dec_inpack(RT2); |
| 259 | 256 | ||
| 260 | cmpb $24, RT2bl; | 257 | cmpb $24, RT2bl; |
| 261 | je __dec_rounds16; | 258 | je .L__dec_rounds16; |
| 262 | 259 | ||
| 263 | dec_rounds(24); | 260 | dec_rounds(24); |
| 264 | dec_fls(24); | 261 | dec_fls(24); |
| 265 | 262 | ||
| 266 | __dec_rounds16: | 263 | .L__dec_rounds16: |
| 267 | dec_rounds(16); | 264 | dec_rounds(16); |
| 268 | dec_fls(16); | 265 | dec_fls(16); |
| 269 | dec_rounds(8); | 266 | dec_rounds(8); |
| @@ -276,6 +273,7 @@ __dec_rounds16: | |||
| 276 | 273 | ||
| 277 | movq RRBP, %rbp; | 274 | movq RRBP, %rbp; |
| 278 | ret; | 275 | ret; |
| 276 | ENDPROC(camellia_dec_blk) | ||
| 279 | 277 | ||
| 280 | /********************************************************************** | 278 | /********************************************************************** |
| 281 | 2-way camellia | 279 | 2-way camellia |
| @@ -426,10 +424,7 @@ __dec_rounds16: | |||
| 426 | bswapq RAB1; \ | 424 | bswapq RAB1; \ |
| 427 | movq RAB1, 12*2(RIO); | 425 | movq RAB1, 12*2(RIO); |
| 428 | 426 | ||
| 429 | .global __camellia_enc_blk_2way; | 427 | ENTRY(__camellia_enc_blk_2way) |
| 430 | .type __camellia_enc_blk_2way,@function; | ||
| 431 | |||
| 432 | __camellia_enc_blk_2way: | ||
| 433 | /* input: | 428 | /* input: |
| 434 | * %rdi: ctx, CTX | 429 | * %rdi: ctx, CTX |
| 435 | * %rsi: dst | 430 | * %rsi: dst |
| @@ -453,16 +448,16 @@ __camellia_enc_blk_2way: | |||
| 453 | movl $24, RT2d; /* max */ | 448 | movl $24, RT2d; /* max */ |
| 454 | 449 | ||
| 455 | cmpb $16, key_length(CTX); | 450 | cmpb $16, key_length(CTX); |
| 456 | je __enc2_done; | 451 | je .L__enc2_done; |
| 457 | 452 | ||
| 458 | enc_fls2(24); | 453 | enc_fls2(24); |
| 459 | enc_rounds2(24); | 454 | enc_rounds2(24); |
| 460 | movl $32, RT2d; /* max */ | 455 | movl $32, RT2d; /* max */ |
| 461 | 456 | ||
| 462 | __enc2_done: | 457 | .L__enc2_done: |
| 463 | test RXORbl, RXORbl; | 458 | test RXORbl, RXORbl; |
| 464 | movq RDST, RIO; | 459 | movq RDST, RIO; |
| 465 | jnz __enc2_xor; | 460 | jnz .L__enc2_xor; |
| 466 | 461 | ||
| 467 | enc_outunpack2(mov, RT2); | 462 | enc_outunpack2(mov, RT2); |
| 468 | 463 | ||
| @@ -470,17 +465,15 @@ __enc2_done: | |||
| 470 | popq %rbx; | 465 | popq %rbx; |
| 471 | ret; | 466 | ret; |
| 472 | 467 | ||
| 473 | __enc2_xor: | 468 | .L__enc2_xor: |
| 474 | enc_outunpack2(xor, RT2); | 469 | enc_outunpack2(xor, RT2); |
| 475 | 470 | ||
| 476 | movq RRBP, %rbp; | 471 | movq RRBP, %rbp; |
| 477 | popq %rbx; | 472 | popq %rbx; |
| 478 | ret; | 473 | ret; |
| 474 | ENDPROC(__camellia_enc_blk_2way) | ||
| 479 | 475 | ||
| 480 | .global camellia_dec_blk_2way; | 476 | ENTRY(camellia_dec_blk_2way) |
| 481 | .type camellia_dec_blk_2way,@function; | ||
| 482 | |||
| 483 | camellia_dec_blk_2way: | ||
| 484 | /* input: | 477 | /* input: |
| 485 | * %rdi: ctx, CTX | 478 | * %rdi: ctx, CTX |
| 486 | * %rsi: dst | 479 | * %rsi: dst |
| @@ -499,12 +492,12 @@ camellia_dec_blk_2way: | |||
| 499 | dec_inpack2(RT2); | 492 | dec_inpack2(RT2); |
| 500 | 493 | ||
| 501 | cmpb $24, RT2bl; | 494 | cmpb $24, RT2bl; |
| 502 | je __dec2_rounds16; | 495 | je .L__dec2_rounds16; |
| 503 | 496 | ||
| 504 | dec_rounds2(24); | 497 | dec_rounds2(24); |
| 505 | dec_fls2(24); | 498 | dec_fls2(24); |
| 506 | 499 | ||
| 507 | __dec2_rounds16: | 500 | .L__dec2_rounds16: |
| 508 | dec_rounds2(16); | 501 | dec_rounds2(16); |
| 509 | dec_fls2(16); | 502 | dec_fls2(16); |
| 510 | dec_rounds2(8); | 503 | dec_rounds2(8); |
| @@ -518,3 +511,4 @@ __dec2_rounds16: | |||
| 518 | movq RRBP, %rbp; | 511 | movq RRBP, %rbp; |
| 519 | movq RXOR, %rbx; | 512 | movq RXOR, %rbx; |
| 520 | ret; | 513 | ret; |
| 514 | ENDPROC(camellia_dec_blk_2way) | ||
