summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2017-02-05 05:06:12 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2017-02-11 04:52:28 -0500
commitdb91af0fbe20474cec33263e28d15f5e6b45ebc9 (patch)
tree22ff7970897903b0418cef78a2aeb59907f5b554
parent7d6e9105026788c497f0ab32fa16c82f4ab5ff61 (diff)
crypto: algapi - make crypto_xor() and crypto_inc() alignment agnostic
Instead of unconditionally forcing 4 byte alignment for all generic chaining modes that rely on crypto_xor() or crypto_inc() (which may result in unnecessary copying of data when the underlying hardware can perform unaligned accesses efficiently), make those functions deal with unaligned input explicitly, but only if the Kconfig symbol HAVE_EFFICIENT_UNALIGNED_ACCESS is set. This will allow us to drop the alignmasks from the CBC, CMAC, CTR, CTS, PCBC and SEQIV drivers. For crypto_inc(), this simply involves making the 4-byte stride conditional on HAVE_EFFICIENT_UNALIGNED_ACCESS being set, given that it typically operates on 16 byte buffers. For crypto_xor(), an algorithm is implemented that simply runs through the input using the largest strides possible if unaligned accesses are allowed. If they are not, an optimal sequence of memory accesses is emitted that takes the relative alignment of the input buffers into account, e.g., if the relative misalignment of dst and src is 4 bytes, the entire xor operation will be completed using 4 byte loads and stores (modulo unaligned bits at the start and end). Note that all expressions involving misalign are simply eliminated by the compiler when HAVE_EFFICIENT_UNALIGNED_ACCESS is defined. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--crypto/algapi.c68
-rw-r--r--crypto/cbc.c3
-rw-r--r--crypto/cmac.c3
-rw-r--r--crypto/ctr.c2
-rw-r--r--crypto/cts.c3
-rw-r--r--crypto/pcbc.c3
-rw-r--r--crypto/seqiv.c2
-rw-r--r--include/crypto/algapi.h20
8 files changed, 70 insertions, 34 deletions
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 1fad2a6b3bbb..6b52e8f0b95f 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -962,34 +962,66 @@ void crypto_inc(u8 *a, unsigned int size)
962 __be32 *b = (__be32 *)(a + size); 962 __be32 *b = (__be32 *)(a + size);
963 u32 c; 963 u32 c;
964 964
965 for (; size >= 4; size -= 4) { 965 if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
966 c = be32_to_cpu(*--b) + 1; 966 !((unsigned long)b & (__alignof__(*b) - 1)))
967 *b = cpu_to_be32(c); 967 for (; size >= 4; size -= 4) {
968 if (c) 968 c = be32_to_cpu(*--b) + 1;
969 return; 969 *b = cpu_to_be32(c);
970 } 970 if (c)
971 return;
972 }
971 973
972 crypto_inc_byte(a, size); 974 crypto_inc_byte(a, size);
973} 975}
974EXPORT_SYMBOL_GPL(crypto_inc); 976EXPORT_SYMBOL_GPL(crypto_inc);
975 977
976static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size) 978void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
977{ 979{
978 for (; size; size--) 980 int relalign = 0;
979 *a++ ^= *b++; 981
980} 982 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
983 int size = sizeof(unsigned long);
984 int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1);
985
986 relalign = d ? 1 << __ffs(d) : size;
987
988 /*
989 * If we care about alignment, process as many bytes as
990 * needed to advance dst and src to values whose alignments
991 * equal their relative alignment. This will allow us to
992 * process the remainder of the input using optimal strides.
993 */
994 while (((unsigned long)dst & (relalign - 1)) && len > 0) {
995 *dst++ ^= *src++;
996 len--;
997 }
998 }
981 999
982void crypto_xor(u8 *dst, const u8 *src, unsigned int size) 1000 while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
983{ 1001 *(u64 *)dst ^= *(u64 *)src;
984 u32 *a = (u32 *)dst; 1002 dst += 8;
985 u32 *b = (u32 *)src; 1003 src += 8;
1004 len -= 8;
1005 }
986 1006
987 for (; size >= 4; size -= 4) 1007 while (len >= 4 && !(relalign & 3)) {
988 *a++ ^= *b++; 1008 *(u32 *)dst ^= *(u32 *)src;
1009 dst += 4;
1010 src += 4;
1011 len -= 4;
1012 }
1013
1014 while (len >= 2 && !(relalign & 1)) {
1015 *(u16 *)dst ^= *(u16 *)src;
1016 dst += 2;
1017 src += 2;
1018 len -= 2;
1019 }
989 1020
990 crypto_xor_byte((u8 *)a, (u8 *)b, size); 1021 while (len--)
1022 *dst++ ^= *src++;
991} 1023}
992EXPORT_SYMBOL_GPL(crypto_xor); 1024EXPORT_SYMBOL_GPL(__crypto_xor);
993 1025
994unsigned int crypto_alg_extsize(struct crypto_alg *alg) 1026unsigned int crypto_alg_extsize(struct crypto_alg *alg)
995{ 1027{
diff --git a/crypto/cbc.c b/crypto/cbc.c
index 68f751a41a84..bc160a3186dc 100644
--- a/crypto/cbc.c
+++ b/crypto/cbc.c
@@ -145,9 +145,6 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
145 inst->alg.base.cra_blocksize = alg->cra_blocksize; 145 inst->alg.base.cra_blocksize = alg->cra_blocksize;
146 inst->alg.base.cra_alignmask = alg->cra_alignmask; 146 inst->alg.base.cra_alignmask = alg->cra_alignmask;
147 147
148 /* We access the data as u32s when xoring. */
149 inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
150
151 inst->alg.ivsize = alg->cra_blocksize; 148 inst->alg.ivsize = alg->cra_blocksize;
152 inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize; 149 inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
153 inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize; 150 inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
diff --git a/crypto/cmac.c b/crypto/cmac.c
index 04080dca8f0c..16301f52858c 100644
--- a/crypto/cmac.c
+++ b/crypto/cmac.c
@@ -260,8 +260,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
260 if (err) 260 if (err)
261 goto out_free_inst; 261 goto out_free_inst;
262 262
263 /* We access the data as u32s when xoring. */ 263 alignmask = alg->cra_alignmask;
264 alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
265 inst->alg.base.cra_alignmask = alignmask; 264 inst->alg.base.cra_alignmask = alignmask;
266 inst->alg.base.cra_priority = alg->cra_priority; 265 inst->alg.base.cra_priority = alg->cra_priority;
267 inst->alg.base.cra_blocksize = alg->cra_blocksize; 266 inst->alg.base.cra_blocksize = alg->cra_blocksize;
diff --git a/crypto/ctr.c b/crypto/ctr.c
index a9a7a44f2783..a4f4a8983169 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -209,7 +209,7 @@ static struct crypto_instance *crypto_ctr_alloc(struct rtattr **tb)
209 inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; 209 inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
210 inst->alg.cra_priority = alg->cra_priority; 210 inst->alg.cra_priority = alg->cra_priority;
211 inst->alg.cra_blocksize = 1; 211 inst->alg.cra_blocksize = 1;
212 inst->alg.cra_alignmask = alg->cra_alignmask | (__alignof__(u32) - 1); 212 inst->alg.cra_alignmask = alg->cra_alignmask;
213 inst->alg.cra_type = &crypto_blkcipher_type; 213 inst->alg.cra_type = &crypto_blkcipher_type;
214 214
215 inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; 215 inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
diff --git a/crypto/cts.c b/crypto/cts.c
index a1335d6c35fb..243f591dc409 100644
--- a/crypto/cts.c
+++ b/crypto/cts.c
@@ -374,9 +374,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
374 inst->alg.base.cra_blocksize = alg->base.cra_blocksize; 374 inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
375 inst->alg.base.cra_alignmask = alg->base.cra_alignmask; 375 inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
376 376
377 /* We access the data as u32s when xoring. */
378 inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
379
380 inst->alg.ivsize = alg->base.cra_blocksize; 377 inst->alg.ivsize = alg->base.cra_blocksize;
381 inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg); 378 inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
382 inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg); 379 inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
diff --git a/crypto/pcbc.c b/crypto/pcbc.c
index 11d248673ad4..29dd2b4a3b85 100644
--- a/crypto/pcbc.c
+++ b/crypto/pcbc.c
@@ -260,9 +260,6 @@ static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
260 inst->alg.base.cra_blocksize = alg->cra_blocksize; 260 inst->alg.base.cra_blocksize = alg->cra_blocksize;
261 inst->alg.base.cra_alignmask = alg->cra_alignmask; 261 inst->alg.base.cra_alignmask = alg->cra_alignmask;
262 262
263 /* We access the data as u32s when xoring. */
264 inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
265
266 inst->alg.ivsize = alg->cra_blocksize; 263 inst->alg.ivsize = alg->cra_blocksize;
267 inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize; 264 inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
268 inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize; 265 inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
diff --git a/crypto/seqiv.c b/crypto/seqiv.c
index c7049231861f..570b7d1aa0ca 100644
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
@@ -153,8 +153,6 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
153 if (IS_ERR(inst)) 153 if (IS_ERR(inst))
154 return PTR_ERR(inst); 154 return PTR_ERR(inst);
155 155
156 inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
157
158 spawn = aead_instance_ctx(inst); 156 spawn = aead_instance_ctx(inst);
159 alg = crypto_spawn_aead_alg(spawn); 157 alg = crypto_spawn_aead_alg(spawn);
160 158
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 404e9558e879..ebe4ded0c55d 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -191,9 +191,25 @@ static inline unsigned int crypto_queue_len(struct crypto_queue *queue)
191 return queue->qlen; 191 return queue->qlen;
192} 192}
193 193
194/* These functions require the input/output to be aligned as u32. */
195void crypto_inc(u8 *a, unsigned int size); 194void crypto_inc(u8 *a, unsigned int size);
196void crypto_xor(u8 *dst, const u8 *src, unsigned int size); 195void __crypto_xor(u8 *dst, const u8 *src, unsigned int size);
196
197static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
198{
199 if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
200 __builtin_constant_p(size) &&
201 (size % sizeof(unsigned long)) == 0) {
202 unsigned long *d = (unsigned long *)dst;
203 unsigned long *s = (unsigned long *)src;
204
205 while (size > 0) {
206 *d++ ^= *s++;
207 size -= sizeof(unsigned long);
208 }
209 } else {
210 __crypto_xor(dst, src, size);
211 }
212}
197 213
198int blkcipher_walk_done(struct blkcipher_desc *desc, 214int blkcipher_walk_done(struct blkcipher_desc *desc,
199 struct blkcipher_walk *walk, int err); 215 struct blkcipher_walk *walk, int err);