aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosh Poimboeuf <jpoimboe@redhat.com>2017-09-18 15:42:02 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2017-09-20 05:42:32 -0400
commit4b15606664a2f8d7c4f0092fb0305fe1c7c65b7b (patch)
tree207a3d51441bf2376b8f084f154db23adfc98847
parentb46c9d717645529417ca9045cfdbf59f84922573 (diff)
crypto: x86/cast5 - Fix RBP usage
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R15 instead of RBP. R15 can't be used as the RID1 register because of x86 instruction encoding limitations. So use R15 for CTX and RDI for CTX. This means that CTX is no longer an implicit function argument. Instead it needs to be explicitly copied from RDI. Reported-by: Eric Biggers <ebiggers@google.com> Reported-by: Peter Zijlstra <peterz@infradead.org> Tested-by: Eric Biggers <ebiggers@google.com> Acked-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S47
1 files changed, 30 insertions, 17 deletions
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index b4a8806234ea..86107c961bb4 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
47/********************************************************************** 47/**********************************************************************
48 16-way AVX cast5 48 16-way AVX cast5
49 **********************************************************************/ 49 **********************************************************************/
50#define CTX %rdi 50#define CTX %r15
51 51
52#define RL1 %xmm0 52#define RL1 %xmm0
53#define RR1 %xmm1 53#define RR1 %xmm1
@@ -70,8 +70,8 @@
70 70
71#define RTMP %xmm15 71#define RTMP %xmm15
72 72
73#define RID1 %rbp 73#define RID1 %rdi
74#define RID1d %ebp 74#define RID1d %edi
75#define RID2 %rsi 75#define RID2 %rsi
76#define RID2d %esi 76#define RID2d %esi
77 77
@@ -226,7 +226,7 @@
226.align 16 226.align 16
227__cast5_enc_blk16: 227__cast5_enc_blk16:
228 /* input: 228 /* input:
229 * %rdi: ctx, CTX 229 * %rdi: ctx
230 * RL1: blocks 1 and 2 230 * RL1: blocks 1 and 2
231 * RR1: blocks 3 and 4 231 * RR1: blocks 3 and 4
232 * RL2: blocks 5 and 6 232 * RL2: blocks 5 and 6
@@ -246,9 +246,11 @@ __cast5_enc_blk16:
246 * RR4: encrypted blocks 15 and 16 246 * RR4: encrypted blocks 15 and 16
247 */ 247 */
248 248
249 pushq %rbp; 249 pushq %r15;
250 pushq %rbx; 250 pushq %rbx;
251 251
252 movq %rdi, CTX;
253
252 vmovdqa .Lbswap_mask, RKM; 254 vmovdqa .Lbswap_mask, RKM;
253 vmovd .Lfirst_mask, R1ST; 255 vmovd .Lfirst_mask, R1ST;
254 vmovd .L32_mask, R32; 256 vmovd .L32_mask, R32;
@@ -283,7 +285,7 @@ __cast5_enc_blk16:
283 285
284.L__skip_enc: 286.L__skip_enc:
285 popq %rbx; 287 popq %rbx;
286 popq %rbp; 288 popq %r15;
287 289
288 vmovdqa .Lbswap_mask, RKM; 290 vmovdqa .Lbswap_mask, RKM;
289 291
@@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16)
298.align 16 300.align 16
299__cast5_dec_blk16: 301__cast5_dec_blk16:
300 /* input: 302 /* input:
301 * %rdi: ctx, CTX 303 * %rdi: ctx
302 * RL1: encrypted blocks 1 and 2 304 * RL1: encrypted blocks 1 and 2
303 * RR1: encrypted blocks 3 and 4 305 * RR1: encrypted blocks 3 and 4
304 * RL2: encrypted blocks 5 and 6 306 * RL2: encrypted blocks 5 and 6
@@ -318,9 +320,11 @@ __cast5_dec_blk16:
318 * RR4: decrypted blocks 15 and 16 320 * RR4: decrypted blocks 15 and 16
319 */ 321 */
320 322
321 pushq %rbp; 323 pushq %r15;
322 pushq %rbx; 324 pushq %rbx;
323 325
326 movq %rdi, CTX;
327
324 vmovdqa .Lbswap_mask, RKM; 328 vmovdqa .Lbswap_mask, RKM;
325 vmovd .Lfirst_mask, R1ST; 329 vmovd .Lfirst_mask, R1ST;
326 vmovd .L32_mask, R32; 330 vmovd .L32_mask, R32;
@@ -356,7 +360,7 @@ __cast5_dec_blk16:
356 360
357 vmovdqa .Lbswap_mask, RKM; 361 vmovdqa .Lbswap_mask, RKM;
358 popq %rbx; 362 popq %rbx;
359 popq %rbp; 363 popq %r15;
360 364
361 outunpack_blocks(RR1, RL1, RTMP, RX, RKM); 365 outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
362 outunpack_blocks(RR2, RL2, RTMP, RX, RKM); 366 outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
@@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16)
372 376
373ENTRY(cast5_ecb_enc_16way) 377ENTRY(cast5_ecb_enc_16way)
374 /* input: 378 /* input:
375 * %rdi: ctx, CTX 379 * %rdi: ctx
376 * %rsi: dst 380 * %rsi: dst
377 * %rdx: src 381 * %rdx: src
378 */ 382 */
379 FRAME_BEGIN 383 FRAME_BEGIN
384 pushq %r15;
380 385
386 movq %rdi, CTX;
381 movq %rsi, %r11; 387 movq %rsi, %r11;
382 388
383 vmovdqu (0*4*4)(%rdx), RL1; 389 vmovdqu (0*4*4)(%rdx), RL1;
@@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way)
400 vmovdqu RR4, (6*4*4)(%r11); 406 vmovdqu RR4, (6*4*4)(%r11);
401 vmovdqu RL4, (7*4*4)(%r11); 407 vmovdqu RL4, (7*4*4)(%r11);
402 408
409 popq %r15;
403 FRAME_END 410 FRAME_END
404 ret; 411 ret;
405ENDPROC(cast5_ecb_enc_16way) 412ENDPROC(cast5_ecb_enc_16way)
406 413
407ENTRY(cast5_ecb_dec_16way) 414ENTRY(cast5_ecb_dec_16way)
408 /* input: 415 /* input:
409 * %rdi: ctx, CTX 416 * %rdi: ctx
410 * %rsi: dst 417 * %rsi: dst
411 * %rdx: src 418 * %rdx: src
412 */ 419 */
413 420
414 FRAME_BEGIN 421 FRAME_BEGIN
422 pushq %r15;
423
424 movq %rdi, CTX;
415 movq %rsi, %r11; 425 movq %rsi, %r11;
416 426
417 vmovdqu (0*4*4)(%rdx), RL1; 427 vmovdqu (0*4*4)(%rdx), RL1;
@@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way)
434 vmovdqu RR4, (6*4*4)(%r11); 444 vmovdqu RR4, (6*4*4)(%r11);
435 vmovdqu RL4, (7*4*4)(%r11); 445 vmovdqu RL4, (7*4*4)(%r11);
436 446
447 popq %r15;
437 FRAME_END 448 FRAME_END
438 ret; 449 ret;
439ENDPROC(cast5_ecb_dec_16way) 450ENDPROC(cast5_ecb_dec_16way)
440 451
441ENTRY(cast5_cbc_dec_16way) 452ENTRY(cast5_cbc_dec_16way)
442 /* input: 453 /* input:
443 * %rdi: ctx, CTX 454 * %rdi: ctx
444 * %rsi: dst 455 * %rsi: dst
445 * %rdx: src 456 * %rdx: src
446 */ 457 */
447 FRAME_BEGIN 458 FRAME_BEGIN
448
449 pushq %r12; 459 pushq %r12;
460 pushq %r15;
450 461
462 movq %rdi, CTX;
451 movq %rsi, %r11; 463 movq %rsi, %r11;
452 movq %rdx, %r12; 464 movq %rdx, %r12;
453 465
@@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way)
483 vmovdqu RR4, (6*16)(%r11); 495 vmovdqu RR4, (6*16)(%r11);
484 vmovdqu RL4, (7*16)(%r11); 496 vmovdqu RL4, (7*16)(%r11);
485 497
498 popq %r15;
486 popq %r12; 499 popq %r12;
487
488 FRAME_END 500 FRAME_END
489 ret; 501 ret;
490ENDPROC(cast5_cbc_dec_16way) 502ENDPROC(cast5_cbc_dec_16way)
491 503
492ENTRY(cast5_ctr_16way) 504ENTRY(cast5_ctr_16way)
493 /* input: 505 /* input:
494 * %rdi: ctx, CTX 506 * %rdi: ctx
495 * %rsi: dst 507 * %rsi: dst
496 * %rdx: src 508 * %rdx: src
497 * %rcx: iv (big endian, 64bit) 509 * %rcx: iv (big endian, 64bit)
498 */ 510 */
499 FRAME_BEGIN 511 FRAME_BEGIN
500
501 pushq %r12; 512 pushq %r12;
513 pushq %r15;
502 514
515 movq %rdi, CTX;
503 movq %rsi, %r11; 516 movq %rsi, %r11;
504 movq %rdx, %r12; 517 movq %rdx, %r12;
505 518
@@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way)
558 vmovdqu RR4, (6*16)(%r11); 571 vmovdqu RR4, (6*16)(%r11);
559 vmovdqu RL4, (7*16)(%r11); 572 vmovdqu RL4, (7*16)(%r11);
560 573
574 popq %r15;
561 popq %r12; 575 popq %r12;
562
563 FRAME_END 576 FRAME_END
564 ret; 577 ret;
565ENDPROC(cast5_ctr_16way) 578ENDPROC(cast5_ctr_16way)