summaryrefslogtreecommitdiffstats
path: root/crypto/lrw.c
diff options
context:
space:
mode:
authorOndrej Mosnacek <omosnace@redhat.com>2018-09-13 04:51:33 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2018-09-21 01:24:52 -0400
commitc778f96bf3471b870caa2b9282f08f176a416f88 (patch)
tree85d8367951116ab1135f90bfeafcb9de05964b04 /crypto/lrw.c
parentdc6d6d5a586fdd6876e18092d5363d48f26b7738 (diff)
crypto: lrw - Optimize tweak computation
This patch rewrites the tweak computation to a slightly simpler method that performs less bswaps. Based on performance measurements the new code seems to provide slightly better performance than the old one. PERFORMANCE MEASUREMENTS (x86_64) Performed using: https://gitlab.com/omos/linux-crypto-bench Crypto driver used: lrw(ecb-aes-aesni) Before: ALGORITHM KEY (b) DATA (B) TIME ENC (ns) TIME DEC (ns) lrw(aes) 256 64 204 286 lrw(aes) 320 64 227 203 lrw(aes) 384 64 208 204 lrw(aes) 256 512 441 439 lrw(aes) 320 512 456 455 lrw(aes) 384 512 469 483 lrw(aes) 256 4096 2136 2190 lrw(aes) 320 4096 2161 2213 lrw(aes) 384 4096 2295 2369 lrw(aes) 256 16384 7692 7868 lrw(aes) 320 16384 8230 8691 lrw(aes) 384 16384 8971 8813 lrw(aes) 256 32768 15336 15560 lrw(aes) 320 32768 16410 16346 lrw(aes) 384 32768 18023 17465 After: ALGORITHM KEY (b) DATA (B) TIME ENC (ns) TIME DEC (ns) lrw(aes) 256 64 200 203 lrw(aes) 320 64 202 204 lrw(aes) 384 64 204 205 lrw(aes) 256 512 415 415 lrw(aes) 320 512 432 440 lrw(aes) 384 512 449 451 lrw(aes) 256 4096 1838 1995 lrw(aes) 320 4096 2123 1980 lrw(aes) 384 4096 2100 2119 lrw(aes) 256 16384 7183 6954 lrw(aes) 320 16384 7844 7631 lrw(aes) 384 16384 8256 8126 lrw(aes) 256 32768 14772 14484 lrw(aes) 320 32768 15281 15431 lrw(aes) 384 32768 16469 16293 Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'crypto/lrw.c')
-rw-r--r--crypto/lrw.c61
1 files changed, 37 insertions, 24 deletions
diff --git a/crypto/lrw.c b/crypto/lrw.c
index 5504d1325a56..7377b5b486fd 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -120,27 +120,28 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
120 return 0; 120 return 0;
121} 121}
122 122
123static inline void inc(be128 *iv) 123/*
124{ 124 * Returns the number of trailing '1' bits in the words of the counter, which is
125 be64_add_cpu(&iv->b, 1); 125 * represented by 4 32-bit words, arranged from least to most significant.
126 if (!iv->b) 126 * At the same time, increments the counter by one.
127 be64_add_cpu(&iv->a, 1); 127 *
128} 128 * For example:
129 129 *
130/* this returns the number of consequative 1 bits starting 130 * u32 counter[4] = { 0xFFFFFFFF, 0x1, 0x0, 0x0 };
131 * from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */ 131 * int i = next_index(&counter);
132static inline int get_index128(be128 *block) 132 * // i == 33, counter == { 0x0, 0x2, 0x0, 0x0 }
133 */
134static int next_index(u32 *counter)
133{ 135{
134 int x; 136 int i, res = 0;
135 __be32 *p = (__be32 *) block;
136 137
137 for (p += 3, x = 0; x < 128; p--, x += 32) { 138 for (i = 0; i < 4; i++) {
138 u32 val = be32_to_cpup(p); 139 if (counter[i] + 1 != 0) {
139 140 res += ffz(counter[i]++);
140 if (!~val) 141 break;
141 continue; 142 }
142 143 counter[i] = 0;
143 return x + ffz(val); 144 res += 32;
144 } 145 }
145 146
146 /* 147 /*
@@ -214,8 +215,9 @@ static int pre_crypt(struct skcipher_request *req)
214 struct scatterlist *sg; 215 struct scatterlist *sg;
215 unsigned cryptlen; 216 unsigned cryptlen;
216 unsigned offset; 217 unsigned offset;
217 be128 *iv;
218 bool more; 218 bool more;
219 __be32 *iv;
220 u32 counter[4];
219 int err; 221 int err;
220 222
221 subreq = &rctx->subreq; 223 subreq = &rctx->subreq;
@@ -230,7 +232,12 @@ static int pre_crypt(struct skcipher_request *req)
230 cryptlen, req->iv); 232 cryptlen, req->iv);
231 233
232 err = skcipher_walk_virt(&w, subreq, false); 234 err = skcipher_walk_virt(&w, subreq, false);
233 iv = w.iv; 235 iv = (__be32 *)w.iv;
236
237 counter[0] = be32_to_cpu(iv[3]);
238 counter[1] = be32_to_cpu(iv[2]);
239 counter[2] = be32_to_cpu(iv[1]);
240 counter[3] = be32_to_cpu(iv[0]);
234 241
235 while (w.nbytes) { 242 while (w.nbytes) {
236 unsigned int avail = w.nbytes; 243 unsigned int avail = w.nbytes;
@@ -247,10 +254,16 @@ static int pre_crypt(struct skcipher_request *req)
247 /* T <- I*Key2, using the optimization 254 /* T <- I*Key2, using the optimization
248 * discussed in the specification */ 255 * discussed in the specification */
249 be128_xor(&rctx->t, &rctx->t, 256 be128_xor(&rctx->t, &rctx->t,
250 &ctx->mulinc[get_index128(iv)]); 257 &ctx->mulinc[next_index(counter)]);
251 inc(iv);
252 } while ((avail -= bs) >= bs); 258 } while ((avail -= bs) >= bs);
253 259
260 if (w.nbytes == w.total) {
261 iv[0] = cpu_to_be32(counter[3]);
262 iv[1] = cpu_to_be32(counter[2]);
263 iv[2] = cpu_to_be32(counter[1]);
264 iv[3] = cpu_to_be32(counter[0]);
265 }
266
254 err = skcipher_walk_done(&w, avail); 267 err = skcipher_walk_done(&w, avail);
255 } 268 }
256 269
@@ -548,7 +561,7 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
548 inst->alg.base.cra_priority = alg->base.cra_priority; 561 inst->alg.base.cra_priority = alg->base.cra_priority;
549 inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE; 562 inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE;
550 inst->alg.base.cra_alignmask = alg->base.cra_alignmask | 563 inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
551 (__alignof__(u64) - 1); 564 (__alignof__(__be32) - 1);
552 565
553 inst->alg.ivsize = LRW_BLOCK_SIZE; 566 inst->alg.ivsize = LRW_BLOCK_SIZE;
554 inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) + 567 inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +