aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-03-05 16:15:21 -0500
committerJens Axboe <axboe@kernel.dk>2012-03-06 15:27:24 -0500
commit9f13ef678efd977487fc0c2e489f17c9a8c67a3e (patch)
treee58a2dd153ad24b2ea173d5dfb575c507e1f7589 /block
parente8989fae38d9831c72b20375a206a919ca468c52 (diff)
blkcg: use double locking instead of RCU for blkg synchronization
blkgs are chained from both blkcgs and request_queues and thus subjected to two locks - blkcg->lock and q->queue_lock. As both blkcg and q can go away anytime, locking during removal is tricky. It's currently solved by wrapping removal inside RCU, which makes the synchronization complex. There are three locks to worry about - the outer RCU, q lock and blkcg lock, and it leads to nasty subtle complications like conditional synchronize_rcu() on queue exit paths. For all other paths, blkcg lock is naturally nested inside q lock and the only exception is blkcg removal path, which is a very cold path and can be implemented as clumsy but conceptually-simple reverse double lock dancing. This patch updates blkg removal path such that blkgs are removed while holding both q and blkcg locks, which is trivial for request queue exit path - blkg_destroy_all(). The blkcg removal path, blkiocg_pre_destroy(), implements reverse double lock dancing essentially identical to ioc_release_fn(). This simplifies blkg locking - no half-dead blkgs to worry about. Now unnecessary RCU annotations will be removed by the next patch. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c136
-rw-r--r--block/blk-cgroup.h4
-rw-r--r--block/cfq.h10
3 files changed, 51 insertions, 99 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index cad5f15cf49b..e9e3b038c702 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -620,32 +620,6 @@ out:
620} 620}
621EXPORT_SYMBOL_GPL(blkg_lookup_create); 621EXPORT_SYMBOL_GPL(blkg_lookup_create);
622 622
623static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
624{
625 hlist_del_init_rcu(&blkg->blkcg_node);
626}
627
628/*
629 * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1
630 * indicating that blk_group was unhashed by the time we got to it.
631 */
632int blkiocg_del_blkio_group(struct blkio_group *blkg)
633{
634 struct blkio_cgroup *blkcg = blkg->blkcg;
635 unsigned long flags;
636 int ret = 1;
637
638 spin_lock_irqsave(&blkcg->lock, flags);
639 if (!hlist_unhashed(&blkg->blkcg_node)) {
640 __blkiocg_del_blkio_group(blkg);
641 ret = 0;
642 }
643 spin_unlock_irqrestore(&blkcg->lock, flags);
644
645 return ret;
646}
647EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
648
649/* called under rcu_read_lock(). */ 623/* called under rcu_read_lock(). */
650struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, 624struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
651 struct request_queue *q) 625 struct request_queue *q)
@@ -663,12 +637,16 @@ EXPORT_SYMBOL_GPL(blkg_lookup);
663static void blkg_destroy(struct blkio_group *blkg) 637static void blkg_destroy(struct blkio_group *blkg)
664{ 638{
665 struct request_queue *q = blkg->q; 639 struct request_queue *q = blkg->q;
640 struct blkio_cgroup *blkcg = blkg->blkcg;
666 641
667 lockdep_assert_held(q->queue_lock); 642 lockdep_assert_held(q->queue_lock);
643 lockdep_assert_held(&blkcg->lock);
668 644
669 /* Something wrong if we are trying to remove same group twice */ 645 /* Something wrong if we are trying to remove same group twice */
670 WARN_ON_ONCE(list_empty(&blkg->q_node)); 646 WARN_ON_ONCE(list_empty(&blkg->q_node));
647 WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
671 list_del_init(&blkg->q_node); 648 list_del_init(&blkg->q_node);
649 hlist_del_init_rcu(&blkg->blkcg_node);
672 650
673 WARN_ON_ONCE(q->nr_blkgs <= 0); 651 WARN_ON_ONCE(q->nr_blkgs <= 0);
674 q->nr_blkgs--; 652 q->nr_blkgs--;
@@ -713,45 +691,33 @@ void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
713} 691}
714EXPORT_SYMBOL_GPL(update_root_blkg_pd); 692EXPORT_SYMBOL_GPL(update_root_blkg_pd);
715 693
694/**
695 * blkg_destroy_all - destroy all blkgs associated with a request_queue
696 * @q: request_queue of interest
697 * @destroy_root: whether to destroy root blkg or not
698 *
699 * Destroy blkgs associated with @q. If @destroy_root is %true, all are
700 * destroyed; otherwise, root blkg is left alone.
701 */
716void blkg_destroy_all(struct request_queue *q, bool destroy_root) 702void blkg_destroy_all(struct request_queue *q, bool destroy_root)
717{ 703{
718 struct blkio_group *blkg, *n; 704 struct blkio_group *blkg, *n;
719 705
720 while (true) { 706 spin_lock_irq(q->queue_lock);
721 bool done = true;
722
723 spin_lock_irq(q->queue_lock);
724
725 list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
726 /* skip root? */
727 if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
728 continue;
729
730 /*
731 * If cgroup removal path got to blk_group first
732 * and removed it from cgroup list, then it will
733 * take care of destroying cfqg also.
734 */
735 if (!blkiocg_del_blkio_group(blkg))
736 blkg_destroy(blkg);
737 else
738 done = false;
739 }
740 707
741 spin_unlock_irq(q->queue_lock); 708 list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
709 struct blkio_cgroup *blkcg = blkg->blkcg;
742 710
743 /* 711 /* skip root? */
744 * Group list may not be empty if we raced cgroup removal 712 if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
745 * and lost. cgroup removal is guaranteed to make forward 713 continue;
746 * progress and retrying after a while is enough. This
747 * ugliness is scheduled to be removed after locking
748 * update.
749 */
750 if (done)
751 break;
752 714
753 msleep(10); /* just some random duration I like */ 715 spin_lock(&blkcg->lock);
716 blkg_destroy(blkg);
717 spin_unlock(&blkcg->lock);
754 } 718 }
719
720 spin_unlock_irq(q->queue_lock);
755} 721}
756EXPORT_SYMBOL_GPL(blkg_destroy_all); 722EXPORT_SYMBOL_GPL(blkg_destroy_all);
757 723
@@ -1600,45 +1566,45 @@ static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1600 ARRAY_SIZE(blkio_files)); 1566 ARRAY_SIZE(blkio_files));
1601} 1567}
1602 1568
1569/**
1570 * blkiocg_pre_destroy - cgroup pre_destroy callback
1571 * @subsys: cgroup subsys
1572 * @cgroup: cgroup of interest
1573 *
1574 * This function is called when @cgroup is about to go away and responsible
1575 * for shooting down all blkgs associated with @cgroup. blkgs should be
1576 * removed while holding both q and blkcg locks. As blkcg lock is nested
1577 * inside q lock, this function performs reverse double lock dancing.
1578 *
1579 * This is the blkcg counterpart of ioc_release_fn().
1580 */
1603static int blkiocg_pre_destroy(struct cgroup_subsys *subsys, 1581static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
1604 struct cgroup *cgroup) 1582 struct cgroup *cgroup)
1605{ 1583{
1606 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); 1584 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
1607 unsigned long flags;
1608 struct blkio_group *blkg;
1609 struct request_queue *q;
1610 1585
1611 rcu_read_lock(); 1586 rcu_read_lock();
1587 spin_lock_irq(&blkcg->lock);
1612 1588
1613 do { 1589 while (!hlist_empty(&blkcg->blkg_list)) {
1614 spin_lock_irqsave(&blkcg->lock, flags); 1590 struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
1591 struct blkio_group, blkcg_node);
1592 struct request_queue *q = rcu_dereference(blkg->q);
1615 1593
1616 if (hlist_empty(&blkcg->blkg_list)) { 1594 if (spin_trylock(q->queue_lock)) {
1617 spin_unlock_irqrestore(&blkcg->lock, flags); 1595 blkg_destroy(blkg);
1618 break; 1596 spin_unlock(q->queue_lock);
1597 } else {
1598 spin_unlock_irq(&blkcg->lock);
1599 rcu_read_unlock();
1600 cpu_relax();
1601 rcu_read_lock();
1602 spin_lock(&blkcg->lock);
1619 } 1603 }
1604 }
1620 1605
1621 blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group, 1606 spin_unlock_irq(&blkcg->lock);
1622 blkcg_node);
1623 q = rcu_dereference(blkg->q);
1624 __blkiocg_del_blkio_group(blkg);
1625
1626 spin_unlock_irqrestore(&blkcg->lock, flags);
1627
1628 /*
1629 * This blkio_group is being unlinked as associated cgroup is
1630 * going away. Let all the IO controlling policies know about
1631 * this event.
1632 */
1633 spin_lock(&blkio_list_lock);
1634 spin_lock_irqsave(q->queue_lock, flags);
1635 blkg_destroy(blkg);
1636 spin_unlock_irqrestore(q->queue_lock, flags);
1637 spin_unlock(&blkio_list_lock);
1638 } while (1);
1639
1640 rcu_read_unlock(); 1607 rcu_read_unlock();
1641
1642 return 0; 1608 return 0;
1643} 1609}
1644 1610
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 6e8ee86a2870..df73040a6a5f 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -380,7 +380,6 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
380extern struct blkio_cgroup blkio_root_cgroup; 380extern struct blkio_cgroup blkio_root_cgroup;
381extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); 381extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
382extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk); 382extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
383extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
384extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, 383extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
385 struct request_queue *q); 384 struct request_queue *q);
386struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, 385struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
@@ -416,9 +415,6 @@ cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
416static inline struct blkio_cgroup * 415static inline struct blkio_cgroup *
417task_blkio_cgroup(struct task_struct *tsk) { return NULL; } 416task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
418 417
419static inline int
420blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
421
422static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, 418static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
423 void *key) { return NULL; } 419 void *key) { return NULL; }
424static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, 420static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
diff --git a/block/cfq.h b/block/cfq.h
index 5584e1b63ca8..c8b15ef57e5d 100644
--- a/block/cfq.h
+++ b/block/cfq.h
@@ -79,11 +79,6 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
79 direction, sync); 79 direction, sync);
80} 80}
81 81
82static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
83{
84 return blkiocg_del_blkio_group(blkg);
85}
86
87#else /* CFQ_GROUP_IOSCHED */ 82#else /* CFQ_GROUP_IOSCHED */
88static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg, 83static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg,
89 struct blkio_policy_type *pol, 84 struct blkio_policy_type *pol,
@@ -119,10 +114,5 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
119 struct blkio_policy_type *pol, uint64_t start_time, 114 struct blkio_policy_type *pol, uint64_t start_time,
120 uint64_t io_start_time, bool direction, bool sync) { } 115 uint64_t io_start_time, bool direction, bool sync) { }
121 116
122static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
123{
124 return 0;
125}
126
127#endif /* CFQ_GROUP_IOSCHED */ 117#endif /* CFQ_GROUP_IOSCHED */
128#endif 118#endif