diff options
author | Vivek Goyal <vgoyal@redhat.com> | 2012-03-08 13:53:56 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-03-20 07:45:37 -0400 |
commit | 1cd9e039fc258f91fe38b97b3c622b13a3b8a795 (patch) | |
tree | 908f9747124b24ea4413025ab9294bd7d026b4f8 /block | |
parent | 671058fb2a2aac4e70f01b316b06bc59b98bd138 (diff) |
blkcg: alloc per cpu stats from worker thread in a delayed manner
Current per cpu stat allocation assumes GFP_KERNEL allocation flag. But in
IO path there are times when we want GFP_NOIO semantics. As there is no
way to pass the allocation flags to alloc_percpu(), this patch delays the
allocation of stats using a worker thread.
v2-> tejun suggested following changes. Changed the patch accordingly.
- move alloc_node location in structure
- reduce the size of names of some of the fields
- Reduce the scope of locking of alloc_list_lock
- Simplified stat_alloc_fn() by allocating stats for all
policies in one go and then assigning these to a group.
v3 -> Andrew suggested to put some comments in the code. Also raised
concerns about trying to allocate infinitely in case of allocation
failure. I have changed the logic to sleep for 10ms before retrying.
That should take care of non-preemptible UP kernels.
v4 -> Tejun had more suggestions.
- drop list_for_each_entry_all()
- instead of msleep() use queue_delayed_work()
- Some cleanups realted to more compact coding.
v5-> tejun suggested more cleanups leading to more compact code.
tj: - Relocated pcpu_stats into blkio_stat_alloc_fn().
- Minor comment update.
- This also fixes suspicious RCU usage warning caused by invoking
cgroup_path() from blkg_alloc() without holding RCU read lock.
Now that blkg_alloc() doesn't require sleepable context, RCU
read lock from blkg_lookup_create() is maintained throughout
blkg_alloc().
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 129 | ||||
-rw-r--r-- | block/blk-cgroup.h | 2 |
2 files changed, 91 insertions, 40 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index ee962f327ba5..622fb4143226 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -30,6 +30,13 @@ static LIST_HEAD(blkio_list); | |||
30 | static DEFINE_MUTEX(all_q_mutex); | 30 | static DEFINE_MUTEX(all_q_mutex); |
31 | static LIST_HEAD(all_q_list); | 31 | static LIST_HEAD(all_q_list); |
32 | 32 | ||
33 | /* List of groups pending per cpu stats allocation */ | ||
34 | static DEFINE_SPINLOCK(alloc_list_lock); | ||
35 | static LIST_HEAD(alloc_list); | ||
36 | |||
37 | static void blkio_stat_alloc_fn(struct work_struct *); | ||
38 | static DECLARE_DELAYED_WORK(blkio_stat_alloc_work, blkio_stat_alloc_fn); | ||
39 | |||
33 | struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; | 40 | struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; |
34 | EXPORT_SYMBOL_GPL(blkio_root_cgroup); | 41 | EXPORT_SYMBOL_GPL(blkio_root_cgroup); |
35 | 42 | ||
@@ -391,6 +398,10 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg, | |||
391 | struct blkio_group_stats_cpu *stats_cpu; | 398 | struct blkio_group_stats_cpu *stats_cpu; |
392 | unsigned long flags; | 399 | unsigned long flags; |
393 | 400 | ||
401 | /* If per cpu stats are not allocated yet, don't do any accounting. */ | ||
402 | if (pd->stats_cpu == NULL) | ||
403 | return; | ||
404 | |||
394 | /* | 405 | /* |
395 | * Disabling interrupts to provide mutual exclusion between two | 406 | * Disabling interrupts to provide mutual exclusion between two |
396 | * writes on same cpu. It probably is not needed for 64bit. Not | 407 | * writes on same cpu. It probably is not needed for 64bit. Not |
@@ -443,6 +454,10 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, | |||
443 | struct blkio_group_stats_cpu *stats_cpu; | 454 | struct blkio_group_stats_cpu *stats_cpu; |
444 | unsigned long flags; | 455 | unsigned long flags; |
445 | 456 | ||
457 | /* If per cpu stats are not allocated yet, don't do any accounting. */ | ||
458 | if (pd->stats_cpu == NULL) | ||
459 | return; | ||
460 | |||
446 | /* | 461 | /* |
447 | * Disabling interrupts to provide mutual exclusion between two | 462 | * Disabling interrupts to provide mutual exclusion between two |
448 | * writes on same cpu. It probably is not needed for 64bit. Not | 463 | * writes on same cpu. It probably is not needed for 64bit. Not |
@@ -460,6 +475,60 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, | |||
460 | } | 475 | } |
461 | EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); | 476 | EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); |
462 | 477 | ||
478 | /* | ||
479 | * Worker for allocating per cpu stat for blk groups. This is scheduled on | ||
480 | * the system_nrt_wq once there are some groups on the alloc_list waiting | ||
481 | * for allocation. | ||
482 | */ | ||
483 | static void blkio_stat_alloc_fn(struct work_struct *work) | ||
484 | { | ||
485 | static void *pcpu_stats[BLKIO_NR_POLICIES]; | ||
486 | struct delayed_work *dwork = to_delayed_work(work); | ||
487 | struct blkio_group *blkg; | ||
488 | int i; | ||
489 | bool empty = false; | ||
490 | |||
491 | alloc_stats: | ||
492 | for (i = 0; i < BLKIO_NR_POLICIES; i++) { | ||
493 | if (pcpu_stats[i] != NULL) | ||
494 | continue; | ||
495 | |||
496 | pcpu_stats[i] = alloc_percpu(struct blkio_group_stats_cpu); | ||
497 | |||
498 | /* Allocation failed. Try again after some time. */ | ||
499 | if (pcpu_stats[i] == NULL) { | ||
500 | queue_delayed_work(system_nrt_wq, dwork, | ||
501 | msecs_to_jiffies(10)); | ||
502 | return; | ||
503 | } | ||
504 | } | ||
505 | |||
506 | spin_lock_irq(&blkio_list_lock); | ||
507 | spin_lock(&alloc_list_lock); | ||
508 | |||
509 | /* cgroup got deleted or queue exited. */ | ||
510 | if (!list_empty(&alloc_list)) { | ||
511 | blkg = list_first_entry(&alloc_list, struct blkio_group, | ||
512 | alloc_node); | ||
513 | for (i = 0; i < BLKIO_NR_POLICIES; i++) { | ||
514 | struct blkg_policy_data *pd = blkg->pd[i]; | ||
515 | |||
516 | if (blkio_policy[i] && pd && !pd->stats_cpu) | ||
517 | swap(pd->stats_cpu, pcpu_stats[i]); | ||
518 | } | ||
519 | |||
520 | list_del_init(&blkg->alloc_node); | ||
521 | } | ||
522 | |||
523 | empty = list_empty(&alloc_list); | ||
524 | |||
525 | spin_unlock(&alloc_list_lock); | ||
526 | spin_unlock_irq(&blkio_list_lock); | ||
527 | |||
528 | if (!empty) | ||
529 | goto alloc_stats; | ||
530 | } | ||
531 | |||
463 | /** | 532 | /** |
464 | * blkg_free - free a blkg | 533 | * blkg_free - free a blkg |
465 | * @blkg: blkg to free | 534 | * @blkg: blkg to free |
@@ -491,9 +560,6 @@ static void blkg_free(struct blkio_group *blkg) | |||
491 | * @q: request_queue the new blkg is associated with | 560 | * @q: request_queue the new blkg is associated with |
492 | * | 561 | * |
493 | * Allocate a new blkg assocating @blkcg and @q. | 562 | * Allocate a new blkg assocating @blkcg and @q. |
494 | * | ||
495 | * FIXME: Should be called with queue locked but currently isn't due to | ||
496 | * percpu stat breakage. | ||
497 | */ | 563 | */ |
498 | static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg, | 564 | static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg, |
499 | struct request_queue *q) | 565 | struct request_queue *q) |
@@ -509,6 +575,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg, | |||
509 | spin_lock_init(&blkg->stats_lock); | 575 | spin_lock_init(&blkg->stats_lock); |
510 | blkg->q = q; | 576 | blkg->q = q; |
511 | INIT_LIST_HEAD(&blkg->q_node); | 577 | INIT_LIST_HEAD(&blkg->q_node); |
578 | INIT_LIST_HEAD(&blkg->alloc_node); | ||
512 | blkg->blkcg = blkcg; | 579 | blkg->blkcg = blkcg; |
513 | blkg->refcnt = 1; | 580 | blkg->refcnt = 1; |
514 | cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); | 581 | cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); |
@@ -530,13 +597,6 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg, | |||
530 | 597 | ||
531 | blkg->pd[i] = pd; | 598 | blkg->pd[i] = pd; |
532 | pd->blkg = blkg; | 599 | pd->blkg = blkg; |
533 | |||
534 | /* broken, read comment in the callsite */ | ||
535 | pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu); | ||
536 | if (!pd->stats_cpu) { | ||
537 | blkg_free(blkg); | ||
538 | return NULL; | ||
539 | } | ||
540 | } | 600 | } |
541 | 601 | ||
542 | /* invoke per-policy init */ | 602 | /* invoke per-policy init */ |
@@ -556,7 +616,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, | |||
556 | bool for_root) | 616 | bool for_root) |
557 | __releases(q->queue_lock) __acquires(q->queue_lock) | 617 | __releases(q->queue_lock) __acquires(q->queue_lock) |
558 | { | 618 | { |
559 | struct blkio_group *blkg, *new_blkg; | 619 | struct blkio_group *blkg; |
560 | 620 | ||
561 | WARN_ON_ONCE(!rcu_read_lock_held()); | 621 | WARN_ON_ONCE(!rcu_read_lock_held()); |
562 | lockdep_assert_held(q->queue_lock); | 622 | lockdep_assert_held(q->queue_lock); |
@@ -580,48 +640,27 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, | |||
580 | 640 | ||
581 | /* | 641 | /* |
582 | * Allocate and initialize. | 642 | * Allocate and initialize. |
583 | * | ||
584 | * FIXME: The following is broken. Percpu memory allocation | ||
585 | * requires %GFP_KERNEL context and can't be performed from IO | ||
586 | * path. Allocation here should inherently be atomic and the | ||
587 | * following lock dancing can be removed once the broken percpu | ||
588 | * allocation is fixed. | ||
589 | */ | 643 | */ |
590 | spin_unlock_irq(q->queue_lock); | 644 | blkg = blkg_alloc(blkcg, q); |
591 | rcu_read_unlock(); | ||
592 | |||
593 | new_blkg = blkg_alloc(blkcg, q); | ||
594 | |||
595 | rcu_read_lock(); | ||
596 | spin_lock_irq(q->queue_lock); | ||
597 | |||
598 | /* did bypass get turned on inbetween? */ | ||
599 | if (unlikely(blk_queue_bypass(q)) && !for_root) { | ||
600 | blkg = ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); | ||
601 | goto out; | ||
602 | } | ||
603 | |||
604 | /* did someone beat us to it? */ | ||
605 | blkg = blkg_lookup(blkcg, q); | ||
606 | if (unlikely(blkg)) | ||
607 | goto out; | ||
608 | 645 | ||
609 | /* did alloc fail? */ | 646 | /* did alloc fail? */ |
610 | if (unlikely(!new_blkg)) { | 647 | if (unlikely(!blkg)) { |
611 | blkg = ERR_PTR(-ENOMEM); | 648 | blkg = ERR_PTR(-ENOMEM); |
612 | goto out; | 649 | goto out; |
613 | } | 650 | } |
614 | 651 | ||
615 | /* insert */ | 652 | /* insert */ |
616 | spin_lock(&blkcg->lock); | 653 | spin_lock(&blkcg->lock); |
617 | swap(blkg, new_blkg); | ||
618 | |||
619 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); | 654 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); |
620 | list_add(&blkg->q_node, &q->blkg_list); | 655 | list_add(&blkg->q_node, &q->blkg_list); |
621 | |||
622 | spin_unlock(&blkcg->lock); | 656 | spin_unlock(&blkcg->lock); |
657 | |||
658 | spin_lock(&alloc_list_lock); | ||
659 | list_add(&blkg->alloc_node, &alloc_list); | ||
660 | /* Queue per cpu stat allocation from worker thread. */ | ||
661 | queue_delayed_work(system_nrt_wq, &blkio_stat_alloc_work, 0); | ||
662 | spin_unlock(&alloc_list_lock); | ||
623 | out: | 663 | out: |
624 | blkg_free(new_blkg); | ||
625 | return blkg; | 664 | return blkg; |
626 | } | 665 | } |
627 | EXPORT_SYMBOL_GPL(blkg_lookup_create); | 666 | EXPORT_SYMBOL_GPL(blkg_lookup_create); |
@@ -654,6 +693,10 @@ static void blkg_destroy(struct blkio_group *blkg) | |||
654 | list_del_init(&blkg->q_node); | 693 | list_del_init(&blkg->q_node); |
655 | hlist_del_init_rcu(&blkg->blkcg_node); | 694 | hlist_del_init_rcu(&blkg->blkcg_node); |
656 | 695 | ||
696 | spin_lock(&alloc_list_lock); | ||
697 | list_del_init(&blkg->alloc_node); | ||
698 | spin_unlock(&alloc_list_lock); | ||
699 | |||
657 | /* | 700 | /* |
658 | * Put the reference taken at the time of creation so that when all | 701 | * Put the reference taken at the time of creation so that when all |
659 | * queues are gone, group can be destroyed. | 702 | * queues are gone, group can be destroyed. |
@@ -752,6 +795,9 @@ static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid) | |||
752 | struct blkg_policy_data *pd = blkg->pd[plid]; | 795 | struct blkg_policy_data *pd = blkg->pd[plid]; |
753 | struct blkio_group_stats_cpu *stats_cpu; | 796 | struct blkio_group_stats_cpu *stats_cpu; |
754 | int i, j, k; | 797 | int i, j, k; |
798 | |||
799 | if (pd->stats_cpu == NULL) | ||
800 | return; | ||
755 | /* | 801 | /* |
756 | * Note: On 64 bit arch this should not be an issue. This has the | 802 | * Note: On 64 bit arch this should not be an issue. This has the |
757 | * possibility of returning some inconsistent value on 32bit arch | 803 | * possibility of returning some inconsistent value on 32bit arch |
@@ -883,6 +929,9 @@ static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid, | |||
883 | struct blkio_group_stats_cpu *stats_cpu; | 929 | struct blkio_group_stats_cpu *stats_cpu; |
884 | u64 val = 0, tval; | 930 | u64 val = 0, tval; |
885 | 931 | ||
932 | if (pd->stats_cpu == NULL) | ||
933 | return val; | ||
934 | |||
886 | for_each_possible_cpu(cpu) { | 935 | for_each_possible_cpu(cpu) { |
887 | unsigned int start; | 936 | unsigned int start; |
888 | stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu); | 937 | stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu); |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 98cd8533378f..1de32fe0e2af 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -190,6 +190,8 @@ struct blkio_group { | |||
190 | spinlock_t stats_lock; | 190 | spinlock_t stats_lock; |
191 | struct blkg_policy_data *pd[BLKIO_NR_POLICIES]; | 191 | struct blkg_policy_data *pd[BLKIO_NR_POLICIES]; |
192 | 192 | ||
193 | /* List of blkg waiting for per cpu stats memory to be allocated */ | ||
194 | struct list_head alloc_node; | ||
193 | struct rcu_head rcu_head; | 195 | struct rcu_head rcu_head; |
194 | }; | 196 | }; |
195 | 197 | ||