aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2015-03-17 14:05:13 -0400
committerWill Deacon <will.deacon@arm.com>2015-03-19 06:43:57 -0400
commit4a97abd44329bf7b9c57f020224da5f823c9c9ea (patch)
tree7c22535e94706459719f71071113c57897de4bad /arch/arm64
parentb63dbef93f91d56cb4385fdd8d1765201d451136 (diff)
arm64/crypto: issue aese/aesmc instructions in pairs
This changes the AES core transform implementations to issue aese/aesmc (and aesd/aesimc) in pairs. This enables a micro-architectural optimization in recent Cortex-A5x cores that improves performance by 50-90%. Measured performance in cycles per byte (Cortex-A57): CBC enc CBC dec CTR before 3.64 1.34 1.32 after 1.95 0.85 0.93 Note that this results in a ~5% performance decrease for older cores. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-core.S12
-rw-r--r--arch/arm64/crypto/aes-ce.S10
2 files changed, 9 insertions, 13 deletions
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 432e4841cd81..a2a7fbcacc14 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -101,19 +101,19 @@ ENTRY(ce_aes_ccm_final)
1010: mov v4.16b, v3.16b 1010: mov v4.16b, v3.16b
1021: ld1 {v5.2d}, [x2], #16 /* load next round key */ 1021: ld1 {v5.2d}, [x2], #16 /* load next round key */
103 aese v0.16b, v4.16b 103 aese v0.16b, v4.16b
104 aese v1.16b, v4.16b
105 aesmc v0.16b, v0.16b 104 aesmc v0.16b, v0.16b
105 aese v1.16b, v4.16b
106 aesmc v1.16b, v1.16b 106 aesmc v1.16b, v1.16b
1072: ld1 {v3.2d}, [x2], #16 /* load next round key */ 1072: ld1 {v3.2d}, [x2], #16 /* load next round key */
108 aese v0.16b, v5.16b 108 aese v0.16b, v5.16b
109 aese v1.16b, v5.16b
110 aesmc v0.16b, v0.16b 109 aesmc v0.16b, v0.16b
110 aese v1.16b, v5.16b
111 aesmc v1.16b, v1.16b 111 aesmc v1.16b, v1.16b
1123: ld1 {v4.2d}, [x2], #16 /* load next round key */ 1123: ld1 {v4.2d}, [x2], #16 /* load next round key */
113 subs w3, w3, #3 113 subs w3, w3, #3
114 aese v0.16b, v3.16b 114 aese v0.16b, v3.16b
115 aese v1.16b, v3.16b
116 aesmc v0.16b, v0.16b 115 aesmc v0.16b, v0.16b
116 aese v1.16b, v3.16b
117 aesmc v1.16b, v1.16b 117 aesmc v1.16b, v1.16b
118 bpl 1b 118 bpl 1b
119 aese v0.16b, v4.16b 119 aese v0.16b, v4.16b
@@ -146,19 +146,19 @@ ENDPROC(ce_aes_ccm_final)
146 ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ 146 ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
1472: /* inner loop: 3 rounds, 2x interleaved */ 1472: /* inner loop: 3 rounds, 2x interleaved */
148 aese v0.16b, v4.16b 148 aese v0.16b, v4.16b
149 aese v1.16b, v4.16b
150 aesmc v0.16b, v0.16b 149 aesmc v0.16b, v0.16b
150 aese v1.16b, v4.16b
151 aesmc v1.16b, v1.16b 151 aesmc v1.16b, v1.16b
1523: ld1 {v3.2d}, [x10], #16 /* load next round key */ 1523: ld1 {v3.2d}, [x10], #16 /* load next round key */
153 aese v0.16b, v5.16b 153 aese v0.16b, v5.16b
154 aese v1.16b, v5.16b
155 aesmc v0.16b, v0.16b 154 aesmc v0.16b, v0.16b
155 aese v1.16b, v5.16b
156 aesmc v1.16b, v1.16b 156 aesmc v1.16b, v1.16b
1574: ld1 {v4.2d}, [x10], #16 /* load next round key */ 1574: ld1 {v4.2d}, [x10], #16 /* load next round key */
158 subs w7, w7, #3 158 subs w7, w7, #3
159 aese v0.16b, v3.16b 159 aese v0.16b, v3.16b
160 aese v1.16b, v3.16b
161 aesmc v0.16b, v0.16b 160 aesmc v0.16b, v0.16b
161 aese v1.16b, v3.16b
162 aesmc v1.16b, v1.16b 162 aesmc v1.16b, v1.16b
163 ld1 {v5.2d}, [x10], #16 /* load next round key */ 163 ld1 {v5.2d}, [x10], #16 /* load next round key */
164 bpl 2b 164 bpl 2b
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 685a18f731eb..78f3cfe92c08 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -45,18 +45,14 @@
45 45
46 .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 46 .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
47 aes\de \i0\().16b, \k\().16b 47 aes\de \i0\().16b, \k\().16b
48 .ifnb \i1
49 aes\de \i1\().16b, \k\().16b
50 .ifnb \i3
51 aes\de \i2\().16b, \k\().16b
52 aes\de \i3\().16b, \k\().16b
53 .endif
54 .endif
55 aes\mc \i0\().16b, \i0\().16b 48 aes\mc \i0\().16b, \i0\().16b
56 .ifnb \i1 49 .ifnb \i1
50 aes\de \i1\().16b, \k\().16b
57 aes\mc \i1\().16b, \i1\().16b 51 aes\mc \i1\().16b, \i1\().16b
58 .ifnb \i3 52 .ifnb \i3
53 aes\de \i2\().16b, \k\().16b
59 aes\mc \i2\().16b, \i2\().16b 54 aes\mc \i2\().16b, \i2\().16b
55 aes\de \i3\().16b, \k\().16b
60 aes\mc \i3\().16b, \i3\().16b 56 aes\mc \i3\().16b, \i3\().16b
61 .endif 57 .endif
62 .endif 58 .endif