diff options
| -rw-r--r-- | arch/arm64/crypto/chacha-neon-core.S | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S index 021bb9e9784b..706c4e10e9e2 100644 --- a/arch/arm64/crypto/chacha-neon-core.S +++ b/arch/arm64/crypto/chacha-neon-core.S | |||
| @@ -158,8 +158,8 @@ ENTRY(hchacha_block_neon) | |||
| 158 | mov w3, w2 | 158 | mov w3, w2 |
| 159 | bl chacha_permute | 159 | bl chacha_permute |
| 160 | 160 | ||
| 161 | st1 {v0.16b}, [x1], #16 | 161 | st1 {v0.4s}, [x1], #16 |
| 162 | st1 {v3.16b}, [x1] | 162 | st1 {v3.4s}, [x1] |
| 163 | 163 | ||
| 164 | ldp x29, x30, [sp], #16 | 164 | ldp x29, x30, [sp], #16 |
| 165 | ret | 165 | ret |
| @@ -532,6 +532,10 @@ ENTRY(chacha_4block_xor_neon) | |||
| 532 | add v3.4s, v3.4s, v19.4s | 532 | add v3.4s, v3.4s, v19.4s |
| 533 | add a2, a2, w8 | 533 | add a2, a2, w8 |
| 534 | add a3, a3, w9 | 534 | add a3, a3, w9 |
| 535 | CPU_BE( rev a0, a0 ) | ||
| 536 | CPU_BE( rev a1, a1 ) | ||
| 537 | CPU_BE( rev a2, a2 ) | ||
| 538 | CPU_BE( rev a3, a3 ) | ||
| 535 | 539 | ||
| 536 | ld4r {v24.4s-v27.4s}, [x0], #16 | 540 | ld4r {v24.4s-v27.4s}, [x0], #16 |
| 537 | ld4r {v28.4s-v31.4s}, [x0] | 541 | ld4r {v28.4s-v31.4s}, [x0] |
| @@ -552,6 +556,10 @@ ENTRY(chacha_4block_xor_neon) | |||
| 552 | add v7.4s, v7.4s, v23.4s | 556 | add v7.4s, v7.4s, v23.4s |
| 553 | add a6, a6, w8 | 557 | add a6, a6, w8 |
| 554 | add a7, a7, w9 | 558 | add a7, a7, w9 |
| 559 | CPU_BE( rev a4, a4 ) | ||
| 560 | CPU_BE( rev a5, a5 ) | ||
| 561 | CPU_BE( rev a6, a6 ) | ||
| 562 | CPU_BE( rev a7, a7 ) | ||
| 555 | 563 | ||
| 556 | // x8[0-3] += s2[0] | 564 | // x8[0-3] += s2[0] |
| 557 | // x9[0-3] += s2[1] | 565 | // x9[0-3] += s2[1] |
| @@ -569,6 +577,10 @@ ENTRY(chacha_4block_xor_neon) | |||
| 569 | add v11.4s, v11.4s, v27.4s | 577 | add v11.4s, v11.4s, v27.4s |
| 570 | add a10, a10, w8 | 578 | add a10, a10, w8 |
| 571 | add a11, a11, w9 | 579 | add a11, a11, w9 |
| 580 | CPU_BE( rev a8, a8 ) | ||
| 581 | CPU_BE( rev a9, a9 ) | ||
| 582 | CPU_BE( rev a10, a10 ) | ||
| 583 | CPU_BE( rev a11, a11 ) | ||
| 572 | 584 | ||
| 573 | // x12[0-3] += s3[0] | 585 | // x12[0-3] += s3[0] |
| 574 | // x13[0-3] += s3[1] | 586 | // x13[0-3] += s3[1] |
| @@ -586,6 +598,10 @@ ENTRY(chacha_4block_xor_neon) | |||
| 586 | add v15.4s, v15.4s, v31.4s | 598 | add v15.4s, v15.4s, v31.4s |
| 587 | add a14, a14, w8 | 599 | add a14, a14, w8 |
| 588 | add a15, a15, w9 | 600 | add a15, a15, w9 |
| 601 | CPU_BE( rev a12, a12 ) | ||
| 602 | CPU_BE( rev a13, a13 ) | ||
| 603 | CPU_BE( rev a14, a14 ) | ||
| 604 | CPU_BE( rev a15, a15 ) | ||
| 589 | 605 | ||
| 590 | // interleave 32-bit words in state n, n+1 | 606 | // interleave 32-bit words in state n, n+1 |
| 591 | ldp w6, w7, [x2], #64 | 607 | ldp w6, w7, [x2], #64 |
