crypto: arm64/aegis128 - use explicit vector load for permute vectors

When building the new aegis128 NEON code in big endian mode, Clang complains about the const uint8x16_t permute vectors in the following way: crypto/aegis128-neon-inner.c:58:40: warning: vector initializers are not compatible with NEON intrinsics in big endian mode [-Wnonportable-vector-initialization] static const uint8x16_t shift_rows = { ^ crypto/aegis128-neon-inner.c:58:40: note: consider using vld1q_u8() to initialize a vector from memory, or vcombine_u8(vcreate_u8(), vcreate_u8()) to initialize from integer constants Since the same issue applies to the uint8x16x4_t loads of the AES Sbox, update those references as well. However, since GCC does not implement the vld1q_u8_x4() intrinsic, switch from IS_ENABLED() to a preprocessor conditional to conditionally include this code. Reported-by: Nathan Chancellor <natechancellor@gmail.com> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Tested-by: Nathan Chancellor <natechancellor@gmail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author: Ard Biesheuvel <ard.biesheuvel@linaro.org> 2019-08-19 10:15:00 -0400
committer: Herbert Xu <herbert@gondor.apana.org.au> 2019-08-30 04:05:27 -0400
commit: 389139b34f407da7c09bc26c4d943f52742a6d42 (patch)
tree: 9a9f96abb48b42e1640aae289448f6bd70d0d6d8 /crypto/aegis128-neon-inner.c
parent: 309b77e0f8bfa3126ff12949173e6d45801a968b (diff)
1 files changed, 19 insertions, 19 deletions
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c
index ed55568afd1b..f05310ca22aa 100644
--- a/crypto/aegis128-neon-inner.c
+++ b/crypto/aegis128-neon-inner.c
@@ -26,7 +26,7 @@ struct aegis128_state {
        uint8x16_t v[5];
 };
-extern const uint8x16x4_t crypto_aes_sbox[];
+extern const uint8_t crypto_aes_sbox[];
 static struct aegis128_state aegis128_load_state_neon(const void *state)
 {
@@ -55,39 +55,39 @@ uint8x16_t aegis_aes_round(uint8x16_t w)
 #ifdef CONFIG_ARM64
        if (!__builtin_expect(aegis128_have_aes_insn, 1)) {
-                static const uint8x16_t shift_rows = {
+                static const uint8_t shift_rows[] = {
                        0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
                        0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
                };
-                static const uint8x16_t ror32by8 = {
+                static const uint8_t ror32by8[] = {
                        0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
                        0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
                };
                uint8x16_t v;
                // shift rows
-                w = vqtbl1q_u8(w, shift_rows);
+                w = vqtbl1q_u8(w, vld1q_u8(shift_rows));
                // sub bytes
-                if (!IS_ENABLED(CONFIG_CC_IS_GCC)) {
+#ifndef CONFIG_CC_IS_GCC
-                        v = vqtbl4q_u8(crypto_aes_sbox[0], w);
+                v = vqtbl4q_u8(vld1q_u8_x4(crypto_aes_sbox), w);
-                        v = vqtbx4q_u8(v, crypto_aes_sbox[1], w - 0x40);
+                v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x40), w - 0x40);
-                        v = vqtbx4q_u8(v, crypto_aes_sbox[2], w - 0x80);
+                v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x80), w - 0x80);
-                        v = vqtbx4q_u8(v, crypto_aes_sbox[3], w - 0xc0);
+                v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0xc0), w - 0xc0);
-                } else {
+#else
-                        asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));
+                asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));
-                        w -= 0x40;
+                w -= 0x40;
-                        asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));
+                asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));
-                        w -= 0x40;
+                w -= 0x40;
-                        asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));
+                asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));
-                        w -= 0x40;
+                w -= 0x40;
-                        asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));
+                asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));
-                }
+#endif
                // mix columns
                w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
                w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
-                w ^= vqtbl1q_u8(v ^ w, ror32by8);
+                w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
                return w;
        }
author	Ard Biesheuvel <ard.biesheuvel@linaro.org>	2019-08-19 10:15:00 -0400
committer	Herbert Xu <herbert@gondor.apana.org.au>	2019-08-30 04:05:27 -0400
commit	389139b34f407da7c09bc26c4d943f52742a6d42 (patch)
tree	9a9f96abb48b42e1640aae289448f6bd70d0d6d8 /crypto/aegis128-neon-inner.c
parent	309b77e0f8bfa3126ff12949173e6d45801a968b (diff)

diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c index ed55568afd1b..f05310ca22aa 100644 --- a/crypto/aegis128-neon-inner.c +++ b/crypto/aegis128-neon-inner.c
@@ -26,7 +26,7 @@ struct aegis128_state {
26	uint8x16_t v[5];	26	uint8x16_t v[5];
27	};	27	};
28		28
29	extern const uint8x16x4_t crypto_aes_sbox[];	29	extern const uint8_t crypto_aes_sbox[];
30		30
31	static struct aegis128_state aegis128_load_state_neon(const void *state)	31	static struct aegis128_state aegis128_load_state_neon(const void *state)
32	{	32	{
@@ -55,39 +55,39 @@ uint8x16_t aegis_aes_round(uint8x16_t w)
55		55
56	#ifdef CONFIG_ARM64	56	#ifdef CONFIG_ARM64
57	if (!__builtin_expect(aegis128_have_aes_insn, 1)) {	57	if (!__builtin_expect(aegis128_have_aes_insn, 1)) {
58	static const uint8x16_t shift_rows = {	58	static const uint8_t shift_rows[] = {
59	0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,	59	0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
60	0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,	60	0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
61	};	61	};
62	static const uint8x16_t ror32by8 = {	62	static const uint8_t ror32by8[] = {
63	0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,	63	0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
64	0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,	64	0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
65	};	65	};
66	uint8x16_t v;	66	uint8x16_t v;
67		67
68	// shift rows	68	// shift rows
69	w = vqtbl1q_u8(w, shift_rows);	69	w = vqtbl1q_u8(w, vld1q_u8(shift_rows));
70		70
71	// sub bytes	71	// sub bytes
72	if (!IS_ENABLED(CONFIG_CC_IS_GCC)) {	72	#ifndef CONFIG_CC_IS_GCC
73	v = vqtbl4q_u8(crypto_aes_sbox[0], w);	73	v = vqtbl4q_u8(vld1q_u8_x4(crypto_aes_sbox), w);
74	v = vqtbx4q_u8(v, crypto_aes_sbox[1], w - 0x40);	74	v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x40), w - 0x40);
75	v = vqtbx4q_u8(v, crypto_aes_sbox[2], w - 0x80);	75	v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x80), w - 0x80);
76	v = vqtbx4q_u8(v, crypto_aes_sbox[3], w - 0xc0);	76	v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0xc0), w - 0xc0);
77	} else {	77	#else
78	asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));	78	asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));
79	w -= 0x40;	79	w -= 0x40;
80	asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));	80	asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));
81	w -= 0x40;	81	w -= 0x40;
82	asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));	82	asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));
83	w -= 0x40;	83	w -= 0x40;
84	asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));	84	asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));
85	}	85	#endif
86		86
87	// mix columns	87	// mix columns
88	w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);	88	w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
89	w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);	89	w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
90	w ^= vqtbl1q_u8(v ^ w, ror32by8);	90	w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
91		91
92	return w;	92	return w;
93	}	93	}