diff options
author | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2015-03-17 14:05:13 -0400 |
---|---|---|
committer | Will Deacon <will.deacon@arm.com> | 2015-03-19 06:43:57 -0400 |
commit | 4a97abd44329bf7b9c57f020224da5f823c9c9ea (patch) | |
tree | 7c22535e94706459719f71071113c57897de4bad /arch/arm64 | |
parent | b63dbef93f91d56cb4385fdd8d1765201d451136 (diff) |
arm64/crypto: issue aese/aesmc instructions in pairs
This changes the AES core transform implementations to issue aese/aesmc
(and aesd/aesimc) in pairs. This enables a micro-architectural optimization
in recent Cortex-A5x cores that improves performance by 50-90%.
Measured performance in cycles per byte (Cortex-A57):
CBC enc CBC dec CTR
before 3.64 1.34 1.32
after 1.95 0.85 0.93
Note that this results in a ~5% performance decrease for older cores.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm64')
-rw-r--r-- | arch/arm64/crypto/aes-ce-ccm-core.S | 12 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-ce.S | 10 |
2 files changed, 9 insertions, 13 deletions
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index 432e4841cd81..a2a7fbcacc14 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S | |||
@@ -101,19 +101,19 @@ ENTRY(ce_aes_ccm_final) | |||
101 | 0: mov v4.16b, v3.16b | 101 | 0: mov v4.16b, v3.16b |
102 | 1: ld1 {v5.2d}, [x2], #16 /* load next round key */ | 102 | 1: ld1 {v5.2d}, [x2], #16 /* load next round key */ |
103 | aese v0.16b, v4.16b | 103 | aese v0.16b, v4.16b |
104 | aese v1.16b, v4.16b | ||
105 | aesmc v0.16b, v0.16b | 104 | aesmc v0.16b, v0.16b |
105 | aese v1.16b, v4.16b | ||
106 | aesmc v1.16b, v1.16b | 106 | aesmc v1.16b, v1.16b |
107 | 2: ld1 {v3.2d}, [x2], #16 /* load next round key */ | 107 | 2: ld1 {v3.2d}, [x2], #16 /* load next round key */ |
108 | aese v0.16b, v5.16b | 108 | aese v0.16b, v5.16b |
109 | aese v1.16b, v5.16b | ||
110 | aesmc v0.16b, v0.16b | 109 | aesmc v0.16b, v0.16b |
110 | aese v1.16b, v5.16b | ||
111 | aesmc v1.16b, v1.16b | 111 | aesmc v1.16b, v1.16b |
112 | 3: ld1 {v4.2d}, [x2], #16 /* load next round key */ | 112 | 3: ld1 {v4.2d}, [x2], #16 /* load next round key */ |
113 | subs w3, w3, #3 | 113 | subs w3, w3, #3 |
114 | aese v0.16b, v3.16b | 114 | aese v0.16b, v3.16b |
115 | aese v1.16b, v3.16b | ||
116 | aesmc v0.16b, v0.16b | 115 | aesmc v0.16b, v0.16b |
116 | aese v1.16b, v3.16b | ||
117 | aesmc v1.16b, v1.16b | 117 | aesmc v1.16b, v1.16b |
118 | bpl 1b | 118 | bpl 1b |
119 | aese v0.16b, v4.16b | 119 | aese v0.16b, v4.16b |
@@ -146,19 +146,19 @@ ENDPROC(ce_aes_ccm_final) | |||
146 | ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ | 146 | ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ |
147 | 2: /* inner loop: 3 rounds, 2x interleaved */ | 147 | 2: /* inner loop: 3 rounds, 2x interleaved */ |
148 | aese v0.16b, v4.16b | 148 | aese v0.16b, v4.16b |
149 | aese v1.16b, v4.16b | ||
150 | aesmc v0.16b, v0.16b | 149 | aesmc v0.16b, v0.16b |
150 | aese v1.16b, v4.16b | ||
151 | aesmc v1.16b, v1.16b | 151 | aesmc v1.16b, v1.16b |
152 | 3: ld1 {v3.2d}, [x10], #16 /* load next round key */ | 152 | 3: ld1 {v3.2d}, [x10], #16 /* load next round key */ |
153 | aese v0.16b, v5.16b | 153 | aese v0.16b, v5.16b |
154 | aese v1.16b, v5.16b | ||
155 | aesmc v0.16b, v0.16b | 154 | aesmc v0.16b, v0.16b |
155 | aese v1.16b, v5.16b | ||
156 | aesmc v1.16b, v1.16b | 156 | aesmc v1.16b, v1.16b |
157 | 4: ld1 {v4.2d}, [x10], #16 /* load next round key */ | 157 | 4: ld1 {v4.2d}, [x10], #16 /* load next round key */ |
158 | subs w7, w7, #3 | 158 | subs w7, w7, #3 |
159 | aese v0.16b, v3.16b | 159 | aese v0.16b, v3.16b |
160 | aese v1.16b, v3.16b | ||
161 | aesmc v0.16b, v0.16b | 160 | aesmc v0.16b, v0.16b |
161 | aese v1.16b, v3.16b | ||
162 | aesmc v1.16b, v1.16b | 162 | aesmc v1.16b, v1.16b |
163 | ld1 {v5.2d}, [x10], #16 /* load next round key */ | 163 | ld1 {v5.2d}, [x10], #16 /* load next round key */ |
164 | bpl 2b | 164 | bpl 2b |
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 685a18f731eb..78f3cfe92c08 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S | |||
@@ -45,18 +45,14 @@ | |||
45 | 45 | ||
46 | .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 | 46 | .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 |
47 | aes\de \i0\().16b, \k\().16b | 47 | aes\de \i0\().16b, \k\().16b |
48 | .ifnb \i1 | ||
49 | aes\de \i1\().16b, \k\().16b | ||
50 | .ifnb \i3 | ||
51 | aes\de \i2\().16b, \k\().16b | ||
52 | aes\de \i3\().16b, \k\().16b | ||
53 | .endif | ||
54 | .endif | ||
55 | aes\mc \i0\().16b, \i0\().16b | 48 | aes\mc \i0\().16b, \i0\().16b |
56 | .ifnb \i1 | 49 | .ifnb \i1 |
50 | aes\de \i1\().16b, \k\().16b | ||
57 | aes\mc \i1\().16b, \i1\().16b | 51 | aes\mc \i1\().16b, \i1\().16b |
58 | .ifnb \i3 | 52 | .ifnb \i3 |
53 | aes\de \i2\().16b, \k\().16b | ||
59 | aes\mc \i2\().16b, \i2\().16b | 54 | aes\mc \i2\().16b, \i2\().16b |
55 | aes\de \i3\().16b, \k\().16b | ||
60 | aes\mc \i3\().16b, \i3\().16b | 56 | aes\mc \i3\().16b, \i3\().16b |
61 | .endif | 57 | .endif |
62 | .endif | 58 | .endif |