aboutsummaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2009-02-19 01:42:19 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2009-02-19 01:42:19 -0500
commit254eff771441f4ee7aa9cf770a6e4820492c9dab (patch)
treeaa657717e9c52ce7941179c11da78f4173f333c5 /crypto
parent25c38d3fb92fc23af7730a1601bc20af8216ae44 (diff)
crypto: cryptd - Per-CPU thread implementation based on kcrypto_wq
Original cryptd thread implementation has scalability issue, this patch solve the issue with a per-CPU thread implementation. struct cryptd_queue is defined to be a per-CPU queue, which holds one struct cryptd_cpu_queue for each CPU. In struct cryptd_cpu_queue, a struct crypto_queue holds all requests for the CPU, a struct work_struct is used to run all requests for the CPU. Testing based on dm-crypt on an Intel Core 2 E6400 (two cores) machine shows 19.2% performance gain. The testing script is as follow: -------------------- script begin --------------------------- #!/bin/sh dmc_create() { # Create a crypt device using dmsetup dmsetup create $2 --table "0 `blockdev --getsize $1` crypt cbc(aes-asm)?cryptd?plain:plain babebabebabebabebabebabebabebabe 0 $1 0" } dmsetup remove crypt0 dmsetup remove crypt1 dd if=/dev/zero of=/dev/ram0 bs=1M count=4 >& /dev/null dd if=/dev/zero of=/dev/ram1 bs=1M count=4 >& /dev/null dmc_create /dev/ram0 crypt0 dmc_create /dev/ram1 crypt1 cat >tr.sh <<EOF #!/bin/sh for n in \$(seq 10); do dd if=/dev/dm-0 of=/dev/null >& /dev/null & dd if=/dev/dm-1 of=/dev/null >& /dev/null & done wait EOF for n in $(seq 10); do /usr/bin/time sh tr.sh done rm tr.sh -------------------- script end --------------------------- The separator of dm-crypt parameter is changed from "-" to "?", because "-" is used in some cipher driver name too, and cryptds need to specify cipher driver name instead of cipher name. The test result on an Intel Core2 E6400 (two cores) is as follow: without patch: -----------------wo begin -------------------------- 0.04user 0.38system 0:00.39elapsed 107%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6566minor)pagefaults 0swaps 0.07user 0.35system 0:00.35elapsed 121%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6567minor)pagefaults 0swaps 0.06user 0.34system 0:00.30elapsed 135%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6562minor)pagefaults 0swaps 0.05user 0.37system 0:00.36elapsed 119%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6607minor)pagefaults 0swaps 0.06user 0.36system 0:00.35elapsed 120%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6562minor)pagefaults 0swaps 0.05user 0.37system 0:00.31elapsed 136%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6594minor)pagefaults 0swaps 0.04user 0.34system 0:00.30elapsed 126%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6597minor)pagefaults 0swaps 0.06user 0.32system 0:00.31elapsed 125%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6571minor)pagefaults 0swaps 0.06user 0.34system 0:00.31elapsed 134%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6581minor)pagefaults 0swaps 0.05user 0.38system 0:00.31elapsed 138%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6600minor)pagefaults 0swaps -----------------wo end -------------------------- with patch: ------------------w begin -------------------------- 0.02user 0.31system 0:00.24elapsed 141%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6554minor)pagefaults 0swaps 0.05user 0.34system 0:00.31elapsed 127%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6606minor)pagefaults 0swaps 0.07user 0.33system 0:00.26elapsed 155%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6559minor)pagefaults 0swaps 0.07user 0.32system 0:00.26elapsed 151%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6562minor)pagefaults 0swaps 0.05user 0.34system 0:00.26elapsed 150%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6603minor)pagefaults 0swaps 0.03user 0.36system 0:00.31elapsed 124%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6562minor)pagefaults 0swaps 0.04user 0.35system 0:00.26elapsed 147%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6586minor)pagefaults 0swaps 0.03user 0.37system 0:00.27elapsed 146%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6562minor)pagefaults 0swaps 0.04user 0.36system 0:00.26elapsed 154%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6594minor)pagefaults 0swaps 0.04user 0.35system 0:00.26elapsed 154%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6557minor)pagefaults 0swaps ------------------w end -------------------------- The middle value of elapsed time is: wo cryptwq: 0.31 w cryptwq: 0.26 The performance gain is about (0.31-0.26)/0.26 = 0.192. Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'crypto')
-rw-r--r--crypto/Kconfig1
-rw-r--r--crypto/cryptd.c220
2 files changed, 104 insertions, 117 deletions
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 420b630a17cf..24c31efde882 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -114,6 +114,7 @@ config CRYPTO_CRYPTD
114 select CRYPTO_BLKCIPHER 114 select CRYPTO_BLKCIPHER
115 select CRYPTO_HASH 115 select CRYPTO_HASH
116 select CRYPTO_MANAGER 116 select CRYPTO_MANAGER
117 select CRYPTO_WORKQUEUE
117 help 118 help
118 This is a generic software asynchronous crypto daemon that 119 This is a generic software asynchronous crypto daemon that
119 converts an arbitrary synchronous software crypto algorithm 120 converts an arbitrary synchronous software crypto algorithm
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 93b98c525b3a..d14b22658d7a 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -13,30 +13,30 @@
13#include <crypto/algapi.h> 13#include <crypto/algapi.h>
14#include <crypto/internal/hash.h> 14#include <crypto/internal/hash.h>
15#include <crypto/cryptd.h> 15#include <crypto/cryptd.h>
16#include <crypto/crypto_wq.h>
16#include <linux/err.h> 17#include <linux/err.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/kernel.h> 19#include <linux/kernel.h>
19#include <linux/kthread.h>
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/mutex.h>
23#include <linux/scatterlist.h> 22#include <linux/scatterlist.h>
24#include <linux/sched.h> 23#include <linux/sched.h>
25#include <linux/slab.h> 24#include <linux/slab.h>
26#include <linux/spinlock.h>
27 25
28#define CRYPTD_MAX_QLEN 100 26#define CRYPTD_MAX_CPU_QLEN 100
29 27
30struct cryptd_state { 28struct cryptd_cpu_queue {
31 spinlock_t lock;
32 struct mutex mutex;
33 struct crypto_queue queue; 29 struct crypto_queue queue;
34 struct task_struct *task; 30 struct work_struct work;
31};
32
33struct cryptd_queue {
34 struct cryptd_cpu_queue *cpu_queue;
35}; 35};
36 36
37struct cryptd_instance_ctx { 37struct cryptd_instance_ctx {
38 struct crypto_spawn spawn; 38 struct crypto_spawn spawn;
39 struct cryptd_state *state; 39 struct cryptd_queue *queue;
40}; 40};
41 41
42struct cryptd_blkcipher_ctx { 42struct cryptd_blkcipher_ctx {
@@ -55,11 +55,85 @@ struct cryptd_hash_request_ctx {
55 crypto_completion_t complete; 55 crypto_completion_t complete;
56}; 56};
57 57
58static inline struct cryptd_state *cryptd_get_state(struct crypto_tfm *tfm) 58static void cryptd_queue_worker(struct work_struct *work);
59
60static int cryptd_init_queue(struct cryptd_queue *queue,
61 unsigned int max_cpu_qlen)
62{
63 int cpu;
64 struct cryptd_cpu_queue *cpu_queue;
65
66 queue->cpu_queue = alloc_percpu(struct cryptd_cpu_queue);
67 if (!queue->cpu_queue)
68 return -ENOMEM;
69 for_each_possible_cpu(cpu) {
70 cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
71 crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
72 INIT_WORK(&cpu_queue->work, cryptd_queue_worker);
73 }
74 return 0;
75}
76
77static void cryptd_fini_queue(struct cryptd_queue *queue)
78{
79 int cpu;
80 struct cryptd_cpu_queue *cpu_queue;
81
82 for_each_possible_cpu(cpu) {
83 cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
84 BUG_ON(cpu_queue->queue.qlen);
85 }
86 free_percpu(queue->cpu_queue);
87}
88
89static int cryptd_enqueue_request(struct cryptd_queue *queue,
90 struct crypto_async_request *request)
91{
92 int cpu, err;
93 struct cryptd_cpu_queue *cpu_queue;
94
95 cpu = get_cpu();
96 cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
97 err = crypto_enqueue_request(&cpu_queue->queue, request);
98 queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
99 put_cpu();
100
101 return err;
102}
103
104/* Called in workqueue context, do one real cryption work (via
105 * req->complete) and reschedule itself if there are more work to
106 * do. */
107static void cryptd_queue_worker(struct work_struct *work)
108{
109 struct cryptd_cpu_queue *cpu_queue;
110 struct crypto_async_request *req, *backlog;
111
112 cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
113 /* Only handle one request at a time to avoid hogging crypto
114 * workqueue. preempt_disable/enable is used to prevent
115 * being preempted by cryptd_enqueue_request() */
116 preempt_disable();
117 backlog = crypto_get_backlog(&cpu_queue->queue);
118 req = crypto_dequeue_request(&cpu_queue->queue);
119 preempt_enable();
120
121 if (!req)
122 return;
123
124 if (backlog)
125 backlog->complete(backlog, -EINPROGRESS);
126 req->complete(req, 0);
127
128 if (cpu_queue->queue.qlen)
129 queue_work(kcrypto_wq, &cpu_queue->work);
130}
131
132static inline struct cryptd_queue *cryptd_get_queue(struct crypto_tfm *tfm)
59{ 133{
60 struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); 134 struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
61 struct cryptd_instance_ctx *ictx = crypto_instance_ctx(inst); 135 struct cryptd_instance_ctx *ictx = crypto_instance_ctx(inst);
62 return ictx->state; 136 return ictx->queue;
63} 137}
64 138
65static int cryptd_blkcipher_setkey(struct crypto_ablkcipher *parent, 139static int cryptd_blkcipher_setkey(struct crypto_ablkcipher *parent,
@@ -131,19 +205,13 @@ static int cryptd_blkcipher_enqueue(struct ablkcipher_request *req,
131{ 205{
132 struct cryptd_blkcipher_request_ctx *rctx = ablkcipher_request_ctx(req); 206 struct cryptd_blkcipher_request_ctx *rctx = ablkcipher_request_ctx(req);
133 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); 207 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
134 struct cryptd_state *state = 208 struct cryptd_queue *queue;
135 cryptd_get_state(crypto_ablkcipher_tfm(tfm));
136 int err;
137 209
210 queue = cryptd_get_queue(crypto_ablkcipher_tfm(tfm));
138 rctx->complete = req->base.complete; 211 rctx->complete = req->base.complete;
139 req->base.complete = complete; 212 req->base.complete = complete;
140 213
141 spin_lock_bh(&state->lock); 214 return cryptd_enqueue_request(queue, &req->base);
142 err = ablkcipher_enqueue_request(&state->queue, req);
143 spin_unlock_bh(&state->lock);
144
145 wake_up_process(state->task);
146 return err;
147} 215}
148 216
149static int cryptd_blkcipher_encrypt_enqueue(struct ablkcipher_request *req) 217static int cryptd_blkcipher_encrypt_enqueue(struct ablkcipher_request *req)
@@ -177,21 +245,12 @@ static int cryptd_blkcipher_init_tfm(struct crypto_tfm *tfm)
177static void cryptd_blkcipher_exit_tfm(struct crypto_tfm *tfm) 245static void cryptd_blkcipher_exit_tfm(struct crypto_tfm *tfm)
178{ 246{
179 struct cryptd_blkcipher_ctx *ctx = crypto_tfm_ctx(tfm); 247 struct cryptd_blkcipher_ctx *ctx = crypto_tfm_ctx(tfm);
180 struct cryptd_state *state = cryptd_get_state(tfm);
181 int active;
182
183 mutex_lock(&state->mutex);
184 active = ablkcipher_tfm_in_queue(&state->queue,
185 __crypto_ablkcipher_cast(tfm));
186 mutex_unlock(&state->mutex);
187
188 BUG_ON(active);
189 248
190 crypto_free_blkcipher(ctx->child); 249 crypto_free_blkcipher(ctx->child);
191} 250}
192 251
193static struct crypto_instance *cryptd_alloc_instance(struct crypto_alg *alg, 252static struct crypto_instance *cryptd_alloc_instance(struct crypto_alg *alg,
194 struct cryptd_state *state) 253 struct cryptd_queue *queue)
195{ 254{
196 struct crypto_instance *inst; 255 struct crypto_instance *inst;
197 struct cryptd_instance_ctx *ctx; 256 struct cryptd_instance_ctx *ctx;
@@ -214,7 +273,7 @@ static struct crypto_instance *cryptd_alloc_instance(struct crypto_alg *alg,
214 if (err) 273 if (err)
215 goto out_free_inst; 274 goto out_free_inst;
216 275
217 ctx->state = state; 276 ctx->queue = queue;
218 277
219 memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); 278 memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
220 279
@@ -232,7 +291,7 @@ out_free_inst:
232} 291}
233 292
234static struct crypto_instance *cryptd_alloc_blkcipher( 293static struct crypto_instance *cryptd_alloc_blkcipher(
235 struct rtattr **tb, struct cryptd_state *state) 294 struct rtattr **tb, struct cryptd_queue *queue)
236{ 295{
237 struct crypto_instance *inst; 296 struct crypto_instance *inst;
238 struct crypto_alg *alg; 297 struct crypto_alg *alg;
@@ -242,7 +301,7 @@ static struct crypto_instance *cryptd_alloc_blkcipher(
242 if (IS_ERR(alg)) 301 if (IS_ERR(alg))
243 return ERR_CAST(alg); 302 return ERR_CAST(alg);
244 303
245 inst = cryptd_alloc_instance(alg, state); 304 inst = cryptd_alloc_instance(alg, queue);
246 if (IS_ERR(inst)) 305 if (IS_ERR(inst))
247 goto out_put_alg; 306 goto out_put_alg;
248 307
@@ -290,15 +349,6 @@ static int cryptd_hash_init_tfm(struct crypto_tfm *tfm)
290static void cryptd_hash_exit_tfm(struct crypto_tfm *tfm) 349static void cryptd_hash_exit_tfm(struct crypto_tfm *tfm)
291{ 350{
292 struct cryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); 351 struct cryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
293 struct cryptd_state *state = cryptd_get_state(tfm);
294 int active;
295
296 mutex_lock(&state->mutex);
297 active = ahash_tfm_in_queue(&state->queue,
298 __crypto_ahash_cast(tfm));
299 mutex_unlock(&state->mutex);
300
301 BUG_ON(active);
302 352
303 crypto_free_hash(ctx->child); 353 crypto_free_hash(ctx->child);
304} 354}
@@ -324,19 +374,13 @@ static int cryptd_hash_enqueue(struct ahash_request *req,
324{ 374{
325 struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); 375 struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
326 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 376 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
327 struct cryptd_state *state = 377 struct cryptd_queue *queue =
328 cryptd_get_state(crypto_ahash_tfm(tfm)); 378 cryptd_get_queue(crypto_ahash_tfm(tfm));
329 int err;
330 379
331 rctx->complete = req->base.complete; 380 rctx->complete = req->base.complete;
332 req->base.complete = complete; 381 req->base.complete = complete;
333 382
334 spin_lock_bh(&state->lock); 383 return cryptd_enqueue_request(queue, &req->base);
335 err = ahash_enqueue_request(&state->queue, req);
336 spin_unlock_bh(&state->lock);
337
338 wake_up_process(state->task);
339 return err;
340} 384}
341 385
342static void cryptd_hash_init(struct crypto_async_request *req_async, int err) 386static void cryptd_hash_init(struct crypto_async_request *req_async, int err)
@@ -469,7 +513,7 @@ static int cryptd_hash_digest_enqueue(struct ahash_request *req)
469} 513}
470 514
471static struct crypto_instance *cryptd_alloc_hash( 515static struct crypto_instance *cryptd_alloc_hash(
472 struct rtattr **tb, struct cryptd_state *state) 516 struct rtattr **tb, struct cryptd_queue *queue)
473{ 517{
474 struct crypto_instance *inst; 518 struct crypto_instance *inst;
475 struct crypto_alg *alg; 519 struct crypto_alg *alg;
@@ -479,7 +523,7 @@ static struct crypto_instance *cryptd_alloc_hash(
479 if (IS_ERR(alg)) 523 if (IS_ERR(alg))
480 return ERR_PTR(PTR_ERR(alg)); 524 return ERR_PTR(PTR_ERR(alg));
481 525
482 inst = cryptd_alloc_instance(alg, state); 526 inst = cryptd_alloc_instance(alg, queue);
483 if (IS_ERR(inst)) 527 if (IS_ERR(inst))
484 goto out_put_alg; 528 goto out_put_alg;
485 529
@@ -503,7 +547,7 @@ out_put_alg:
503 return inst; 547 return inst;
504} 548}
505 549
506static struct cryptd_state state; 550static struct cryptd_queue queue;
507 551
508static struct crypto_instance *cryptd_alloc(struct rtattr **tb) 552static struct crypto_instance *cryptd_alloc(struct rtattr **tb)
509{ 553{
@@ -515,9 +559,9 @@ static struct crypto_instance *cryptd_alloc(struct rtattr **tb)
515 559
516 switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { 560 switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
517 case CRYPTO_ALG_TYPE_BLKCIPHER: 561 case CRYPTO_ALG_TYPE_BLKCIPHER:
518 return cryptd_alloc_blkcipher(tb, &state); 562 return cryptd_alloc_blkcipher(tb, &queue);
519 case CRYPTO_ALG_TYPE_DIGEST: 563 case CRYPTO_ALG_TYPE_DIGEST:
520 return cryptd_alloc_hash(tb, &state); 564 return cryptd_alloc_hash(tb, &queue);
521 } 565 }
522 566
523 return ERR_PTR(-EINVAL); 567 return ERR_PTR(-EINVAL);
@@ -572,82 +616,24 @@ void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm)
572} 616}
573EXPORT_SYMBOL_GPL(cryptd_free_ablkcipher); 617EXPORT_SYMBOL_GPL(cryptd_free_ablkcipher);
574 618
575static inline int cryptd_create_thread(struct cryptd_state *state,
576 int (*fn)(void *data), const char *name)
577{
578 spin_lock_init(&state->lock);
579 mutex_init(&state->mutex);
580 crypto_init_queue(&state->queue, CRYPTD_MAX_QLEN);
581
582 state->task = kthread_run(fn, state, name);
583 if (IS_ERR(state->task))
584 return PTR_ERR(state->task);
585
586 return 0;
587}
588
589static inline void cryptd_stop_thread(struct cryptd_state *state)
590{
591 BUG_ON(state->queue.qlen);
592 kthread_stop(state->task);
593}
594
595static int cryptd_thread(void *data)
596{
597 struct cryptd_state *state = data;
598 int stop;
599
600 current->flags |= PF_NOFREEZE;
601
602 do {
603 struct crypto_async_request *req, *backlog;
604
605 mutex_lock(&state->mutex);
606 __set_current_state(TASK_INTERRUPTIBLE);
607
608 spin_lock_bh(&state->lock);
609 backlog = crypto_get_backlog(&state->queue);
610 req = crypto_dequeue_request(&state->queue);
611 spin_unlock_bh(&state->lock);
612
613 stop = kthread_should_stop();
614
615 if (stop || req) {
616 __set_current_state(TASK_RUNNING);
617 if (req) {
618 if (backlog)
619 backlog->complete(backlog,
620 -EINPROGRESS);
621 req->complete(req, 0);
622 }
623 }
624
625 mutex_unlock(&state->mutex);
626
627 schedule();
628 } while (!stop);
629
630 return 0;
631}
632
633static int __init cryptd_init(void) 619static int __init cryptd_init(void)
634{ 620{
635 int err; 621 int err;
636 622
637 err = cryptd_create_thread(&state, cryptd_thread, "cryptd"); 623 err = cryptd_init_queue(&queue, CRYPTD_MAX_CPU_QLEN);
638 if (err) 624 if (err)
639 return err; 625 return err;
640 626
641 err = crypto_register_template(&cryptd_tmpl); 627 err = crypto_register_template(&cryptd_tmpl);
642 if (err) 628 if (err)
643 kthread_stop(state.task); 629 cryptd_fini_queue(&queue);
644 630
645 return err; 631 return err;
646} 632}
647 633
648static void __exit cryptd_exit(void) 634static void __exit cryptd_exit(void)
649{ 635{
650 cryptd_stop_thread(&state); 636 cryptd_fini_queue(&queue);
651 crypto_unregister_template(&cryptd_tmpl); 637 crypto_unregister_template(&cryptd_tmpl);
652} 638}
653 639