summaryrefslogtreecommitdiffstats
path: root/crypto/algapi.c
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2017-02-05 05:06:12 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2017-02-11 04:52:28 -0500
commitdb91af0fbe20474cec33263e28d15f5e6b45ebc9 (patch)
tree22ff7970897903b0418cef78a2aeb59907f5b554 /crypto/algapi.c
parent7d6e9105026788c497f0ab32fa16c82f4ab5ff61 (diff)
crypto: algapi - make crypto_xor() and crypto_inc() alignment agnostic
Instead of unconditionally forcing 4 byte alignment for all generic chaining modes that rely on crypto_xor() or crypto_inc() (which may result in unnecessary copying of data when the underlying hardware can perform unaligned accesses efficiently), make those functions deal with unaligned input explicitly, but only if the Kconfig symbol HAVE_EFFICIENT_UNALIGNED_ACCESS is set. This will allow us to drop the alignmasks from the CBC, CMAC, CTR, CTS, PCBC and SEQIV drivers. For crypto_inc(), this simply involves making the 4-byte stride conditional on HAVE_EFFICIENT_UNALIGNED_ACCESS being set, given that it typically operates on 16 byte buffers. For crypto_xor(), an algorithm is implemented that simply runs through the input using the largest strides possible if unaligned accesses are allowed. If they are not, an optimal sequence of memory accesses is emitted that takes the relative alignment of the input buffers into account, e.g., if the relative misalignment of dst and src is 4 bytes, the entire xor operation will be completed using 4 byte loads and stores (modulo unaligned bits at the start and end). Note that all expressions involving misalign are simply eliminated by the compiler when HAVE_EFFICIENT_UNALIGNED_ACCESS is defined. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'crypto/algapi.c')
-rw-r--r--crypto/algapi.c68
1 files changed, 50 insertions, 18 deletions
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 1fad2a6b3bbb..6b52e8f0b95f 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -962,34 +962,66 @@ void crypto_inc(u8 *a, unsigned int size)
962 __be32 *b = (__be32 *)(a + size); 962 __be32 *b = (__be32 *)(a + size);
963 u32 c; 963 u32 c;
964 964
965 for (; size >= 4; size -= 4) { 965 if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
966 c = be32_to_cpu(*--b) + 1; 966 !((unsigned long)b & (__alignof__(*b) - 1)))
967 *b = cpu_to_be32(c); 967 for (; size >= 4; size -= 4) {
968 if (c) 968 c = be32_to_cpu(*--b) + 1;
969 return; 969 *b = cpu_to_be32(c);
970 } 970 if (c)
971 return;
972 }
971 973
972 crypto_inc_byte(a, size); 974 crypto_inc_byte(a, size);
973} 975}
974EXPORT_SYMBOL_GPL(crypto_inc); 976EXPORT_SYMBOL_GPL(crypto_inc);
975 977
976static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size) 978void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
977{ 979{
978 for (; size; size--) 980 int relalign = 0;
979 *a++ ^= *b++; 981
980} 982 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
983 int size = sizeof(unsigned long);
984 int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1);
985
986 relalign = d ? 1 << __ffs(d) : size;
987
988 /*
989 * If we care about alignment, process as many bytes as
990 * needed to advance dst and src to values whose alignments
991 * equal their relative alignment. This will allow us to
992 * process the remainder of the input using optimal strides.
993 */
994 while (((unsigned long)dst & (relalign - 1)) && len > 0) {
995 *dst++ ^= *src++;
996 len--;
997 }
998 }
981 999
982void crypto_xor(u8 *dst, const u8 *src, unsigned int size) 1000 while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
983{ 1001 *(u64 *)dst ^= *(u64 *)src;
984 u32 *a = (u32 *)dst; 1002 dst += 8;
985 u32 *b = (u32 *)src; 1003 src += 8;
1004 len -= 8;
1005 }
986 1006
987 for (; size >= 4; size -= 4) 1007 while (len >= 4 && !(relalign & 3)) {
988 *a++ ^= *b++; 1008 *(u32 *)dst ^= *(u32 *)src;
1009 dst += 4;
1010 src += 4;
1011 len -= 4;
1012 }
1013
1014 while (len >= 2 && !(relalign & 1)) {
1015 *(u16 *)dst ^= *(u16 *)src;
1016 dst += 2;
1017 src += 2;
1018 len -= 2;
1019 }
989 1020
990 crypto_xor_byte((u8 *)a, (u8 *)b, size); 1021 while (len--)
1022 *dst++ ^= *src++;
991} 1023}
992EXPORT_SYMBOL_GPL(crypto_xor); 1024EXPORT_SYMBOL_GPL(__crypto_xor);
993 1025
994unsigned int crypto_alg_extsize(struct crypto_alg *alg) 1026unsigned int crypto_alg_extsize(struct crypto_alg *alg)
995{ 1027{