aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2016-10-11 14:15:18 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-01-12 05:39:34 -0500
commit39b7e1c2fdda93d632c38b457f865dcacc8fa01a (patch)
treefbb13f81602f0c570b0a6001499c7ca498b32b51 /arch/arm64
parentd018dc9540f729699f6f96802e67c1dac1a4769d (diff)
crypto: arm64/aes-neon - fix for big endian
commit a2c435cc99862fd3d165e1b66bf48ac72c839c62 upstream. The AES implementation using pure NEON instructions relies on the generic AES key schedule generation routines, which store the round keys as arrays of 32-bit quantities stored in memory using native endianness. This means we should refer to these round keys using 4x4 loads rather than 16x1 loads. In addition, the ShiftRows tables are loading using a single scalar load, which is also affected by endianness, so emit these tables in the correct order depending on whether we are building for big endian or not. Fixes: 49788fe2a128 ("arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON and Crypto Extensions") Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/crypto/aes-neon.S25
1 files changed, 15 insertions, 10 deletions
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index b93170e1cc93..85f07ead7c5c 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -9,6 +9,7 @@
9 */ 9 */
10 10
11#include <linux/linkage.h> 11#include <linux/linkage.h>
12#include <asm/assembler.h>
12 13
13#define AES_ENTRY(func) ENTRY(neon_ ## func) 14#define AES_ENTRY(func) ENTRY(neon_ ## func)
14#define AES_ENDPROC(func) ENDPROC(neon_ ## func) 15#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
@@ -83,13 +84,13 @@
83 .endm 84 .endm
84 85
85 .macro do_block, enc, in, rounds, rk, rkp, i 86 .macro do_block, enc, in, rounds, rk, rkp, i
86 ld1 {v15.16b}, [\rk] 87 ld1 {v15.4s}, [\rk]
87 add \rkp, \rk, #16 88 add \rkp, \rk, #16
88 mov \i, \rounds 89 mov \i, \rounds
891111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 901111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
90 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ 91 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
91 sub_bytes \in 92 sub_bytes \in
92 ld1 {v15.16b}, [\rkp], #16 93 ld1 {v15.4s}, [\rkp], #16
93 subs \i, \i, #1 94 subs \i, \i, #1
94 beq 2222f 95 beq 2222f
95 .if \enc == 1 96 .if \enc == 1
@@ -229,7 +230,7 @@
229 .endm 230 .endm
230 231
231 .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i 232 .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
232 ld1 {v15.16b}, [\rk] 233 ld1 {v15.4s}, [\rk]
233 add \rkp, \rk, #16 234 add \rkp, \rk, #16
234 mov \i, \rounds 235 mov \i, \rounds
2351111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 2361111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
@@ -237,7 +238,7 @@
237 sub_bytes_2x \in0, \in1 238 sub_bytes_2x \in0, \in1
238 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 239 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
239 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 240 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
240 ld1 {v15.16b}, [\rkp], #16 241 ld1 {v15.4s}, [\rkp], #16
241 subs \i, \i, #1 242 subs \i, \i, #1
242 beq 2222f 243 beq 2222f
243 .if \enc == 1 244 .if \enc == 1
@@ -254,7 +255,7 @@
254 .endm 255 .endm
255 256
256 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i 257 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
257 ld1 {v15.16b}, [\rk] 258 ld1 {v15.4s}, [\rk]
258 add \rkp, \rk, #16 259 add \rkp, \rk, #16
259 mov \i, \rounds 260 mov \i, \rounds
2601111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 2611111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
@@ -266,7 +267,7 @@
266 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 267 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
267 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ 268 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
268 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ 269 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
269 ld1 {v15.16b}, [\rkp], #16 270 ld1 {v15.4s}, [\rkp], #16
270 subs \i, \i, #1 271 subs \i, \i, #1
271 beq 2222f 272 beq 2222f
272 .if \enc == 1 273 .if \enc == 1
@@ -306,12 +307,16 @@
306 .text 307 .text
307 .align 4 308 .align 4
308.LForward_ShiftRows: 309.LForward_ShiftRows:
309 .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 310CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 )
310 .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb 311CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb )
312CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 )
313CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 )
311 314
312.LReverse_ShiftRows: 315.LReverse_ShiftRows:
313 .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb 316CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb )
314 .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 317CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 )
318CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 )
319CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
315 320
316.LForward_Sbox: 321.LForward_Sbox:
317 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 322 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5