aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@redhat.com>2012-03-08 13:53:56 -0500
committerJens Axboe <axboe@kernel.dk>2012-03-20 07:45:37 -0400
commit1cd9e039fc258f91fe38b97b3c622b13a3b8a795 (patch)
tree908f9747124b24ea4413025ab9294bd7d026b4f8 /block
parent671058fb2a2aac4e70f01b316b06bc59b98bd138 (diff)
blkcg: alloc per cpu stats from worker thread in a delayed manner
Current per cpu stat allocation assumes GFP_KERNEL allocation flag. But in IO path there are times when we want GFP_NOIO semantics. As there is no way to pass the allocation flags to alloc_percpu(), this patch delays the allocation of stats using a worker thread. v2-> tejun suggested following changes. Changed the patch accordingly. - move alloc_node location in structure - reduce the size of names of some of the fields - Reduce the scope of locking of alloc_list_lock - Simplified stat_alloc_fn() by allocating stats for all policies in one go and then assigning these to a group. v3 -> Andrew suggested to put some comments in the code. Also raised concerns about trying to allocate infinitely in case of allocation failure. I have changed the logic to sleep for 10ms before retrying. That should take care of non-preemptible UP kernels. v4 -> Tejun had more suggestions. - drop list_for_each_entry_all() - instead of msleep() use queue_delayed_work() - Some cleanups realted to more compact coding. v5-> tejun suggested more cleanups leading to more compact code. tj: - Relocated pcpu_stats into blkio_stat_alloc_fn(). - Minor comment update. - This also fixes suspicious RCU usage warning caused by invoking cgroup_path() from blkg_alloc() without holding RCU read lock. Now that blkg_alloc() doesn't require sleepable context, RCU read lock from blkg_lookup_create() is maintained throughout blkg_alloc(). Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c129
-rw-r--r--block/blk-cgroup.h2
2 files changed, 91 insertions, 40 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index ee962f327ba5..622fb4143226 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -30,6 +30,13 @@ static LIST_HEAD(blkio_list);
30static DEFINE_MUTEX(all_q_mutex); 30static DEFINE_MUTEX(all_q_mutex);
31static LIST_HEAD(all_q_list); 31static LIST_HEAD(all_q_list);
32 32
33/* List of groups pending per cpu stats allocation */
34static DEFINE_SPINLOCK(alloc_list_lock);
35static LIST_HEAD(alloc_list);
36
37static void blkio_stat_alloc_fn(struct work_struct *);
38static DECLARE_DELAYED_WORK(blkio_stat_alloc_work, blkio_stat_alloc_fn);
39
33struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; 40struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
34EXPORT_SYMBOL_GPL(blkio_root_cgroup); 41EXPORT_SYMBOL_GPL(blkio_root_cgroup);
35 42
@@ -391,6 +398,10 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
391 struct blkio_group_stats_cpu *stats_cpu; 398 struct blkio_group_stats_cpu *stats_cpu;
392 unsigned long flags; 399 unsigned long flags;
393 400
401 /* If per cpu stats are not allocated yet, don't do any accounting. */
402 if (pd->stats_cpu == NULL)
403 return;
404
394 /* 405 /*
395 * Disabling interrupts to provide mutual exclusion between two 406 * Disabling interrupts to provide mutual exclusion between two
396 * writes on same cpu. It probably is not needed for 64bit. Not 407 * writes on same cpu. It probably is not needed for 64bit. Not
@@ -443,6 +454,10 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
443 struct blkio_group_stats_cpu *stats_cpu; 454 struct blkio_group_stats_cpu *stats_cpu;
444 unsigned long flags; 455 unsigned long flags;
445 456
457 /* If per cpu stats are not allocated yet, don't do any accounting. */
458 if (pd->stats_cpu == NULL)
459 return;
460
446 /* 461 /*
447 * Disabling interrupts to provide mutual exclusion between two 462 * Disabling interrupts to provide mutual exclusion between two
448 * writes on same cpu. It probably is not needed for 64bit. Not 463 * writes on same cpu. It probably is not needed for 64bit. Not
@@ -460,6 +475,60 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
460} 475}
461EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); 476EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
462 477
478/*
479 * Worker for allocating per cpu stat for blk groups. This is scheduled on
480 * the system_nrt_wq once there are some groups on the alloc_list waiting
481 * for allocation.
482 */
483static void blkio_stat_alloc_fn(struct work_struct *work)
484{
485 static void *pcpu_stats[BLKIO_NR_POLICIES];
486 struct delayed_work *dwork = to_delayed_work(work);
487 struct blkio_group *blkg;
488 int i;
489 bool empty = false;
490
491alloc_stats:
492 for (i = 0; i < BLKIO_NR_POLICIES; i++) {
493 if (pcpu_stats[i] != NULL)
494 continue;
495
496 pcpu_stats[i] = alloc_percpu(struct blkio_group_stats_cpu);
497
498 /* Allocation failed. Try again after some time. */
499 if (pcpu_stats[i] == NULL) {
500 queue_delayed_work(system_nrt_wq, dwork,
501 msecs_to_jiffies(10));
502 return;
503 }
504 }
505
506 spin_lock_irq(&blkio_list_lock);
507 spin_lock(&alloc_list_lock);
508
509 /* cgroup got deleted or queue exited. */
510 if (!list_empty(&alloc_list)) {
511 blkg = list_first_entry(&alloc_list, struct blkio_group,
512 alloc_node);
513 for (i = 0; i < BLKIO_NR_POLICIES; i++) {
514 struct blkg_policy_data *pd = blkg->pd[i];
515
516 if (blkio_policy[i] && pd && !pd->stats_cpu)
517 swap(pd->stats_cpu, pcpu_stats[i]);
518 }
519
520 list_del_init(&blkg->alloc_node);
521 }
522
523 empty = list_empty(&alloc_list);
524
525 spin_unlock(&alloc_list_lock);
526 spin_unlock_irq(&blkio_list_lock);
527
528 if (!empty)
529 goto alloc_stats;
530}
531
463/** 532/**
464 * blkg_free - free a blkg 533 * blkg_free - free a blkg
465 * @blkg: blkg to free 534 * @blkg: blkg to free
@@ -491,9 +560,6 @@ static void blkg_free(struct blkio_group *blkg)
491 * @q: request_queue the new blkg is associated with 560 * @q: request_queue the new blkg is associated with
492 * 561 *
493 * Allocate a new blkg assocating @blkcg and @q. 562 * Allocate a new blkg assocating @blkcg and @q.
494 *
495 * FIXME: Should be called with queue locked but currently isn't due to
496 * percpu stat breakage.
497 */ 563 */
498static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg, 564static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
499 struct request_queue *q) 565 struct request_queue *q)
@@ -509,6 +575,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
509 spin_lock_init(&blkg->stats_lock); 575 spin_lock_init(&blkg->stats_lock);
510 blkg->q = q; 576 blkg->q = q;
511 INIT_LIST_HEAD(&blkg->q_node); 577 INIT_LIST_HEAD(&blkg->q_node);
578 INIT_LIST_HEAD(&blkg->alloc_node);
512 blkg->blkcg = blkcg; 579 blkg->blkcg = blkcg;
513 blkg->refcnt = 1; 580 blkg->refcnt = 1;
514 cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); 581 cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
@@ -530,13 +597,6 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
530 597
531 blkg->pd[i] = pd; 598 blkg->pd[i] = pd;
532 pd->blkg = blkg; 599 pd->blkg = blkg;
533
534 /* broken, read comment in the callsite */
535 pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
536 if (!pd->stats_cpu) {
537 blkg_free(blkg);
538 return NULL;
539 }
540 } 600 }
541 601
542 /* invoke per-policy init */ 602 /* invoke per-policy init */
@@ -556,7 +616,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
556 bool for_root) 616 bool for_root)
557 __releases(q->queue_lock) __acquires(q->queue_lock) 617 __releases(q->queue_lock) __acquires(q->queue_lock)
558{ 618{
559 struct blkio_group *blkg, *new_blkg; 619 struct blkio_group *blkg;
560 620
561 WARN_ON_ONCE(!rcu_read_lock_held()); 621 WARN_ON_ONCE(!rcu_read_lock_held());
562 lockdep_assert_held(q->queue_lock); 622 lockdep_assert_held(q->queue_lock);
@@ -580,48 +640,27 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
580 640
581 /* 641 /*
582 * Allocate and initialize. 642 * Allocate and initialize.
583 *
584 * FIXME: The following is broken. Percpu memory allocation
585 * requires %GFP_KERNEL context and can't be performed from IO
586 * path. Allocation here should inherently be atomic and the
587 * following lock dancing can be removed once the broken percpu
588 * allocation is fixed.
589 */ 643 */
590 spin_unlock_irq(q->queue_lock); 644 blkg = blkg_alloc(blkcg, q);
591 rcu_read_unlock();
592
593 new_blkg = blkg_alloc(blkcg, q);
594
595 rcu_read_lock();
596 spin_lock_irq(q->queue_lock);
597
598 /* did bypass get turned on inbetween? */
599 if (unlikely(blk_queue_bypass(q)) && !for_root) {
600 blkg = ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
601 goto out;
602 }
603
604 /* did someone beat us to it? */
605 blkg = blkg_lookup(blkcg, q);
606 if (unlikely(blkg))
607 goto out;
608 645
609 /* did alloc fail? */ 646 /* did alloc fail? */
610 if (unlikely(!new_blkg)) { 647 if (unlikely(!blkg)) {
611 blkg = ERR_PTR(-ENOMEM); 648 blkg = ERR_PTR(-ENOMEM);
612 goto out; 649 goto out;
613 } 650 }
614 651
615 /* insert */ 652 /* insert */
616 spin_lock(&blkcg->lock); 653 spin_lock(&blkcg->lock);
617 swap(blkg, new_blkg);
618
619 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); 654 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
620 list_add(&blkg->q_node, &q->blkg_list); 655 list_add(&blkg->q_node, &q->blkg_list);
621
622 spin_unlock(&blkcg->lock); 656 spin_unlock(&blkcg->lock);
657
658 spin_lock(&alloc_list_lock);
659 list_add(&blkg->alloc_node, &alloc_list);
660 /* Queue per cpu stat allocation from worker thread. */
661 queue_delayed_work(system_nrt_wq, &blkio_stat_alloc_work, 0);
662 spin_unlock(&alloc_list_lock);
623out: 663out:
624 blkg_free(new_blkg);
625 return blkg; 664 return blkg;
626} 665}
627EXPORT_SYMBOL_GPL(blkg_lookup_create); 666EXPORT_SYMBOL_GPL(blkg_lookup_create);
@@ -654,6 +693,10 @@ static void blkg_destroy(struct blkio_group *blkg)
654 list_del_init(&blkg->q_node); 693 list_del_init(&blkg->q_node);
655 hlist_del_init_rcu(&blkg->blkcg_node); 694 hlist_del_init_rcu(&blkg->blkcg_node);
656 695
696 spin_lock(&alloc_list_lock);
697 list_del_init(&blkg->alloc_node);
698 spin_unlock(&alloc_list_lock);
699
657 /* 700 /*
658 * Put the reference taken at the time of creation so that when all 701 * Put the reference taken at the time of creation so that when all
659 * queues are gone, group can be destroyed. 702 * queues are gone, group can be destroyed.
@@ -752,6 +795,9 @@ static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
752 struct blkg_policy_data *pd = blkg->pd[plid]; 795 struct blkg_policy_data *pd = blkg->pd[plid];
753 struct blkio_group_stats_cpu *stats_cpu; 796 struct blkio_group_stats_cpu *stats_cpu;
754 int i, j, k; 797 int i, j, k;
798
799 if (pd->stats_cpu == NULL)
800 return;
755 /* 801 /*
756 * Note: On 64 bit arch this should not be an issue. This has the 802 * Note: On 64 bit arch this should not be an issue. This has the
757 * possibility of returning some inconsistent value on 32bit arch 803 * possibility of returning some inconsistent value on 32bit arch
@@ -883,6 +929,9 @@ static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid,
883 struct blkio_group_stats_cpu *stats_cpu; 929 struct blkio_group_stats_cpu *stats_cpu;
884 u64 val = 0, tval; 930 u64 val = 0, tval;
885 931
932 if (pd->stats_cpu == NULL)
933 return val;
934
886 for_each_possible_cpu(cpu) { 935 for_each_possible_cpu(cpu) {
887 unsigned int start; 936 unsigned int start;
888 stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu); 937 stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 98cd8533378f..1de32fe0e2af 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -190,6 +190,8 @@ struct blkio_group {
190 spinlock_t stats_lock; 190 spinlock_t stats_lock;
191 struct blkg_policy_data *pd[BLKIO_NR_POLICIES]; 191 struct blkg_policy_data *pd[BLKIO_NR_POLICIES];
192 192
193 /* List of blkg waiting for per cpu stats memory to be allocated */
194 struct list_head alloc_node;
193 struct rcu_head rcu_head; 195 struct rcu_head rcu_head;
194}; 196};
195 197