summaryrefslogtreecommitdiffstats
path: root/crypto/aegis128-neon-inner.c
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2019-08-19 10:15:00 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2019-08-30 04:05:27 -0400
commit389139b34f407da7c09bc26c4d943f52742a6d42 (patch)
tree9a9f96abb48b42e1640aae289448f6bd70d0d6d8 /crypto/aegis128-neon-inner.c
parent309b77e0f8bfa3126ff12949173e6d45801a968b (diff)
crypto: arm64/aegis128 - use explicit vector load for permute vectors
When building the new aegis128 NEON code in big endian mode, Clang complains about the const uint8x16_t permute vectors in the following way: crypto/aegis128-neon-inner.c:58:40: warning: vector initializers are not compatible with NEON intrinsics in big endian mode [-Wnonportable-vector-initialization] static const uint8x16_t shift_rows = { ^ crypto/aegis128-neon-inner.c:58:40: note: consider using vld1q_u8() to initialize a vector from memory, or vcombine_u8(vcreate_u8(), vcreate_u8()) to initialize from integer constants Since the same issue applies to the uint8x16x4_t loads of the AES Sbox, update those references as well. However, since GCC does not implement the vld1q_u8_x4() intrinsic, switch from IS_ENABLED() to a preprocessor conditional to conditionally include this code. Reported-by: Nathan Chancellor <natechancellor@gmail.com> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Tested-by: Nathan Chancellor <natechancellor@gmail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'crypto/aegis128-neon-inner.c')
-rw-r--r--crypto/aegis128-neon-inner.c38
1 files changed, 19 insertions, 19 deletions
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c
index ed55568afd1b..f05310ca22aa 100644
--- a/crypto/aegis128-neon-inner.c
+++ b/crypto/aegis128-neon-inner.c
@@ -26,7 +26,7 @@ struct aegis128_state {
26 uint8x16_t v[5]; 26 uint8x16_t v[5];
27}; 27};
28 28
29extern const uint8x16x4_t crypto_aes_sbox[]; 29extern const uint8_t crypto_aes_sbox[];
30 30
31static struct aegis128_state aegis128_load_state_neon(const void *state) 31static struct aegis128_state aegis128_load_state_neon(const void *state)
32{ 32{
@@ -55,39 +55,39 @@ uint8x16_t aegis_aes_round(uint8x16_t w)
55 55
56#ifdef CONFIG_ARM64 56#ifdef CONFIG_ARM64
57 if (!__builtin_expect(aegis128_have_aes_insn, 1)) { 57 if (!__builtin_expect(aegis128_have_aes_insn, 1)) {
58 static const uint8x16_t shift_rows = { 58 static const uint8_t shift_rows[] = {
59 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3, 59 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
60 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb, 60 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
61 }; 61 };
62 static const uint8x16_t ror32by8 = { 62 static const uint8_t ror32by8[] = {
63 0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4, 63 0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
64 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc, 64 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
65 }; 65 };
66 uint8x16_t v; 66 uint8x16_t v;
67 67
68 // shift rows 68 // shift rows
69 w = vqtbl1q_u8(w, shift_rows); 69 w = vqtbl1q_u8(w, vld1q_u8(shift_rows));
70 70
71 // sub bytes 71 // sub bytes
72 if (!IS_ENABLED(CONFIG_CC_IS_GCC)) { 72#ifndef CONFIG_CC_IS_GCC
73 v = vqtbl4q_u8(crypto_aes_sbox[0], w); 73 v = vqtbl4q_u8(vld1q_u8_x4(crypto_aes_sbox), w);
74 v = vqtbx4q_u8(v, crypto_aes_sbox[1], w - 0x40); 74 v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x40), w - 0x40);
75 v = vqtbx4q_u8(v, crypto_aes_sbox[2], w - 0x80); 75 v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x80), w - 0x80);
76 v = vqtbx4q_u8(v, crypto_aes_sbox[3], w - 0xc0); 76 v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0xc0), w - 0xc0);
77 } else { 77#else
78 asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w)); 78 asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));
79 w -= 0x40; 79 w -= 0x40;
80 asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w)); 80 asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));
81 w -= 0x40; 81 w -= 0x40;
82 asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w)); 82 asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));
83 w -= 0x40; 83 w -= 0x40;
84 asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w)); 84 asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));
85 } 85#endif
86 86
87 // mix columns 87 // mix columns
88 w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b); 88 w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
89 w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v); 89 w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
90 w ^= vqtbl1q_u8(v ^ w, ror32by8); 90 w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
91 91
92 return w; 92 return w;
93 } 93 }