diff options
author | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2019-08-19 10:15:00 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2019-08-30 04:05:27 -0400 |
commit | 389139b34f407da7c09bc26c4d943f52742a6d42 (patch) | |
tree | 9a9f96abb48b42e1640aae289448f6bd70d0d6d8 /crypto/aegis128-neon-inner.c | |
parent | 309b77e0f8bfa3126ff12949173e6d45801a968b (diff) |
crypto: arm64/aegis128 - use explicit vector load for permute vectors
When building the new aegis128 NEON code in big endian mode, Clang
complains about the const uint8x16_t permute vectors in the following
way:
crypto/aegis128-neon-inner.c:58:40: warning: vector initializers are not
compatible with NEON intrinsics in big endian mode
[-Wnonportable-vector-initialization]
static const uint8x16_t shift_rows = {
^
crypto/aegis128-neon-inner.c:58:40: note: consider using vld1q_u8() to
initialize a vector from memory, or vcombine_u8(vcreate_u8(), vcreate_u8())
to initialize from integer constants
Since the same issue applies to the uint8x16x4_t loads of the AES Sbox,
update those references as well. However, since GCC does not implement
the vld1q_u8_x4() intrinsic, switch from IS_ENABLED() to a preprocessor
conditional to conditionally include this code.
Reported-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'crypto/aegis128-neon-inner.c')
-rw-r--r-- | crypto/aegis128-neon-inner.c | 38 |
1 files changed, 19 insertions, 19 deletions
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c index ed55568afd1b..f05310ca22aa 100644 --- a/crypto/aegis128-neon-inner.c +++ b/crypto/aegis128-neon-inner.c | |||
@@ -26,7 +26,7 @@ struct aegis128_state { | |||
26 | uint8x16_t v[5]; | 26 | uint8x16_t v[5]; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | extern const uint8x16x4_t crypto_aes_sbox[]; | 29 | extern const uint8_t crypto_aes_sbox[]; |
30 | 30 | ||
31 | static struct aegis128_state aegis128_load_state_neon(const void *state) | 31 | static struct aegis128_state aegis128_load_state_neon(const void *state) |
32 | { | 32 | { |
@@ -55,39 +55,39 @@ uint8x16_t aegis_aes_round(uint8x16_t w) | |||
55 | 55 | ||
56 | #ifdef CONFIG_ARM64 | 56 | #ifdef CONFIG_ARM64 |
57 | if (!__builtin_expect(aegis128_have_aes_insn, 1)) { | 57 | if (!__builtin_expect(aegis128_have_aes_insn, 1)) { |
58 | static const uint8x16_t shift_rows = { | 58 | static const uint8_t shift_rows[] = { |
59 | 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3, | 59 | 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3, |
60 | 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb, | 60 | 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb, |
61 | }; | 61 | }; |
62 | static const uint8x16_t ror32by8 = { | 62 | static const uint8_t ror32by8[] = { |
63 | 0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4, | 63 | 0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4, |
64 | 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc, | 64 | 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc, |
65 | }; | 65 | }; |
66 | uint8x16_t v; | 66 | uint8x16_t v; |
67 | 67 | ||
68 | // shift rows | 68 | // shift rows |
69 | w = vqtbl1q_u8(w, shift_rows); | 69 | w = vqtbl1q_u8(w, vld1q_u8(shift_rows)); |
70 | 70 | ||
71 | // sub bytes | 71 | // sub bytes |
72 | if (!IS_ENABLED(CONFIG_CC_IS_GCC)) { | 72 | #ifndef CONFIG_CC_IS_GCC |
73 | v = vqtbl4q_u8(crypto_aes_sbox[0], w); | 73 | v = vqtbl4q_u8(vld1q_u8_x4(crypto_aes_sbox), w); |
74 | v = vqtbx4q_u8(v, crypto_aes_sbox[1], w - 0x40); | 74 | v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x40), w - 0x40); |
75 | v = vqtbx4q_u8(v, crypto_aes_sbox[2], w - 0x80); | 75 | v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x80), w - 0x80); |
76 | v = vqtbx4q_u8(v, crypto_aes_sbox[3], w - 0xc0); | 76 | v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0xc0), w - 0xc0); |
77 | } else { | 77 | #else |
78 | asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w)); | 78 | asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w)); |
79 | w -= 0x40; | 79 | w -= 0x40; |
80 | asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w)); | 80 | asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w)); |
81 | w -= 0x40; | 81 | w -= 0x40; |
82 | asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w)); | 82 | asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w)); |
83 | w -= 0x40; | 83 | w -= 0x40; |
84 | asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w)); | 84 | asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w)); |
85 | } | 85 | #endif |
86 | 86 | ||
87 | // mix columns | 87 | // mix columns |
88 | w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b); | 88 | w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b); |
89 | w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v); | 89 | w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v); |
90 | w ^= vqtbl1q_u8(v ^ w, ror32by8); | 90 | w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8)); |
91 | 91 | ||
92 | return w; | 92 | return w; |
93 | } | 93 | } |