diff options
author | Tejun Heo <tj@kernel.org> | 2012-03-05 16:15:21 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-03-06 15:27:24 -0500 |
commit | 9f13ef678efd977487fc0c2e489f17c9a8c67a3e (patch) | |
tree | e58a2dd153ad24b2ea173d5dfb575c507e1f7589 /block | |
parent | e8989fae38d9831c72b20375a206a919ca468c52 (diff) |
blkcg: use double locking instead of RCU for blkg synchronization
blkgs are chained from both blkcgs and request_queues and thus
subjected to two locks - blkcg->lock and q->queue_lock. As both blkcg
and q can go away anytime, locking during removal is tricky. It's
currently solved by wrapping removal inside RCU, which makes the
synchronization complex. There are three locks to worry about - the
outer RCU, q lock and blkcg lock, and it leads to nasty subtle
complications like conditional synchronize_rcu() on queue exit paths.
For all other paths, blkcg lock is naturally nested inside q lock and
the only exception is blkcg removal path, which is a very cold path
and can be implemented as clumsy but conceptually-simple reverse
double lock dancing.
This patch updates blkg removal path such that blkgs are removed while
holding both q and blkcg locks, which is trivial for request queue
exit path - blkg_destroy_all(). The blkcg removal path,
blkiocg_pre_destroy(), implements reverse double lock dancing
essentially identical to ioc_release_fn().
This simplifies blkg locking - no half-dead blkgs to worry about. Now
unnecessary RCU annotations will be removed by the next patch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 136 | ||||
-rw-r--r-- | block/blk-cgroup.h | 4 | ||||
-rw-r--r-- | block/cfq.h | 10 |
3 files changed, 51 insertions, 99 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index cad5f15cf49b..e9e3b038c702 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -620,32 +620,6 @@ out: | |||
620 | } | 620 | } |
621 | EXPORT_SYMBOL_GPL(blkg_lookup_create); | 621 | EXPORT_SYMBOL_GPL(blkg_lookup_create); |
622 | 622 | ||
623 | static void __blkiocg_del_blkio_group(struct blkio_group *blkg) | ||
624 | { | ||
625 | hlist_del_init_rcu(&blkg->blkcg_node); | ||
626 | } | ||
627 | |||
628 | /* | ||
629 | * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1 | ||
630 | * indicating that blk_group was unhashed by the time we got to it. | ||
631 | */ | ||
632 | int blkiocg_del_blkio_group(struct blkio_group *blkg) | ||
633 | { | ||
634 | struct blkio_cgroup *blkcg = blkg->blkcg; | ||
635 | unsigned long flags; | ||
636 | int ret = 1; | ||
637 | |||
638 | spin_lock_irqsave(&blkcg->lock, flags); | ||
639 | if (!hlist_unhashed(&blkg->blkcg_node)) { | ||
640 | __blkiocg_del_blkio_group(blkg); | ||
641 | ret = 0; | ||
642 | } | ||
643 | spin_unlock_irqrestore(&blkcg->lock, flags); | ||
644 | |||
645 | return ret; | ||
646 | } | ||
647 | EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group); | ||
648 | |||
649 | /* called under rcu_read_lock(). */ | 623 | /* called under rcu_read_lock(). */ |
650 | struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, | 624 | struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, |
651 | struct request_queue *q) | 625 | struct request_queue *q) |
@@ -663,12 +637,16 @@ EXPORT_SYMBOL_GPL(blkg_lookup); | |||
663 | static void blkg_destroy(struct blkio_group *blkg) | 637 | static void blkg_destroy(struct blkio_group *blkg) |
664 | { | 638 | { |
665 | struct request_queue *q = blkg->q; | 639 | struct request_queue *q = blkg->q; |
640 | struct blkio_cgroup *blkcg = blkg->blkcg; | ||
666 | 641 | ||
667 | lockdep_assert_held(q->queue_lock); | 642 | lockdep_assert_held(q->queue_lock); |
643 | lockdep_assert_held(&blkcg->lock); | ||
668 | 644 | ||
669 | /* Something wrong if we are trying to remove same group twice */ | 645 | /* Something wrong if we are trying to remove same group twice */ |
670 | WARN_ON_ONCE(list_empty(&blkg->q_node)); | 646 | WARN_ON_ONCE(list_empty(&blkg->q_node)); |
647 | WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); | ||
671 | list_del_init(&blkg->q_node); | 648 | list_del_init(&blkg->q_node); |
649 | hlist_del_init_rcu(&blkg->blkcg_node); | ||
672 | 650 | ||
673 | WARN_ON_ONCE(q->nr_blkgs <= 0); | 651 | WARN_ON_ONCE(q->nr_blkgs <= 0); |
674 | q->nr_blkgs--; | 652 | q->nr_blkgs--; |
@@ -713,45 +691,33 @@ void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid) | |||
713 | } | 691 | } |
714 | EXPORT_SYMBOL_GPL(update_root_blkg_pd); | 692 | EXPORT_SYMBOL_GPL(update_root_blkg_pd); |
715 | 693 | ||
694 | /** | ||
695 | * blkg_destroy_all - destroy all blkgs associated with a request_queue | ||
696 | * @q: request_queue of interest | ||
697 | * @destroy_root: whether to destroy root blkg or not | ||
698 | * | ||
699 | * Destroy blkgs associated with @q. If @destroy_root is %true, all are | ||
700 | * destroyed; otherwise, root blkg is left alone. | ||
701 | */ | ||
716 | void blkg_destroy_all(struct request_queue *q, bool destroy_root) | 702 | void blkg_destroy_all(struct request_queue *q, bool destroy_root) |
717 | { | 703 | { |
718 | struct blkio_group *blkg, *n; | 704 | struct blkio_group *blkg, *n; |
719 | 705 | ||
720 | while (true) { | 706 | spin_lock_irq(q->queue_lock); |
721 | bool done = true; | ||
722 | |||
723 | spin_lock_irq(q->queue_lock); | ||
724 | |||
725 | list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { | ||
726 | /* skip root? */ | ||
727 | if (!destroy_root && blkg->blkcg == &blkio_root_cgroup) | ||
728 | continue; | ||
729 | |||
730 | /* | ||
731 | * If cgroup removal path got to blk_group first | ||
732 | * and removed it from cgroup list, then it will | ||
733 | * take care of destroying cfqg also. | ||
734 | */ | ||
735 | if (!blkiocg_del_blkio_group(blkg)) | ||
736 | blkg_destroy(blkg); | ||
737 | else | ||
738 | done = false; | ||
739 | } | ||
740 | 707 | ||
741 | spin_unlock_irq(q->queue_lock); | 708 | list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { |
709 | struct blkio_cgroup *blkcg = blkg->blkcg; | ||
742 | 710 | ||
743 | /* | 711 | /* skip root? */ |
744 | * Group list may not be empty if we raced cgroup removal | 712 | if (!destroy_root && blkg->blkcg == &blkio_root_cgroup) |
745 | * and lost. cgroup removal is guaranteed to make forward | 713 | continue; |
746 | * progress and retrying after a while is enough. This | ||
747 | * ugliness is scheduled to be removed after locking | ||
748 | * update. | ||
749 | */ | ||
750 | if (done) | ||
751 | break; | ||
752 | 714 | ||
753 | msleep(10); /* just some random duration I like */ | 715 | spin_lock(&blkcg->lock); |
716 | blkg_destroy(blkg); | ||
717 | spin_unlock(&blkcg->lock); | ||
754 | } | 718 | } |
719 | |||
720 | spin_unlock_irq(q->queue_lock); | ||
755 | } | 721 | } |
756 | EXPORT_SYMBOL_GPL(blkg_destroy_all); | 722 | EXPORT_SYMBOL_GPL(blkg_destroy_all); |
757 | 723 | ||
@@ -1600,45 +1566,45 @@ static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) | |||
1600 | ARRAY_SIZE(blkio_files)); | 1566 | ARRAY_SIZE(blkio_files)); |
1601 | } | 1567 | } |
1602 | 1568 | ||
1569 | /** | ||
1570 | * blkiocg_pre_destroy - cgroup pre_destroy callback | ||
1571 | * @subsys: cgroup subsys | ||
1572 | * @cgroup: cgroup of interest | ||
1573 | * | ||
1574 | * This function is called when @cgroup is about to go away and responsible | ||
1575 | * for shooting down all blkgs associated with @cgroup. blkgs should be | ||
1576 | * removed while holding both q and blkcg locks. As blkcg lock is nested | ||
1577 | * inside q lock, this function performs reverse double lock dancing. | ||
1578 | * | ||
1579 | * This is the blkcg counterpart of ioc_release_fn(). | ||
1580 | */ | ||
1603 | static int blkiocg_pre_destroy(struct cgroup_subsys *subsys, | 1581 | static int blkiocg_pre_destroy(struct cgroup_subsys *subsys, |
1604 | struct cgroup *cgroup) | 1582 | struct cgroup *cgroup) |
1605 | { | 1583 | { |
1606 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); | 1584 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); |
1607 | unsigned long flags; | ||
1608 | struct blkio_group *blkg; | ||
1609 | struct request_queue *q; | ||
1610 | 1585 | ||
1611 | rcu_read_lock(); | 1586 | rcu_read_lock(); |
1587 | spin_lock_irq(&blkcg->lock); | ||
1612 | 1588 | ||
1613 | do { | 1589 | while (!hlist_empty(&blkcg->blkg_list)) { |
1614 | spin_lock_irqsave(&blkcg->lock, flags); | 1590 | struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first, |
1591 | struct blkio_group, blkcg_node); | ||
1592 | struct request_queue *q = rcu_dereference(blkg->q); | ||
1615 | 1593 | ||
1616 | if (hlist_empty(&blkcg->blkg_list)) { | 1594 | if (spin_trylock(q->queue_lock)) { |
1617 | spin_unlock_irqrestore(&blkcg->lock, flags); | 1595 | blkg_destroy(blkg); |
1618 | break; | 1596 | spin_unlock(q->queue_lock); |
1597 | } else { | ||
1598 | spin_unlock_irq(&blkcg->lock); | ||
1599 | rcu_read_unlock(); | ||
1600 | cpu_relax(); | ||
1601 | rcu_read_lock(); | ||
1602 | spin_lock(&blkcg->lock); | ||
1619 | } | 1603 | } |
1604 | } | ||
1620 | 1605 | ||
1621 | blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group, | 1606 | spin_unlock_irq(&blkcg->lock); |
1622 | blkcg_node); | ||
1623 | q = rcu_dereference(blkg->q); | ||
1624 | __blkiocg_del_blkio_group(blkg); | ||
1625 | |||
1626 | spin_unlock_irqrestore(&blkcg->lock, flags); | ||
1627 | |||
1628 | /* | ||
1629 | * This blkio_group is being unlinked as associated cgroup is | ||
1630 | * going away. Let all the IO controlling policies know about | ||
1631 | * this event. | ||
1632 | */ | ||
1633 | spin_lock(&blkio_list_lock); | ||
1634 | spin_lock_irqsave(q->queue_lock, flags); | ||
1635 | blkg_destroy(blkg); | ||
1636 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
1637 | spin_unlock(&blkio_list_lock); | ||
1638 | } while (1); | ||
1639 | |||
1640 | rcu_read_unlock(); | 1607 | rcu_read_unlock(); |
1641 | |||
1642 | return 0; | 1608 | return 0; |
1643 | } | 1609 | } |
1644 | 1610 | ||
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 6e8ee86a2870..df73040a6a5f 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -380,7 +380,6 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg, | |||
380 | extern struct blkio_cgroup blkio_root_cgroup; | 380 | extern struct blkio_cgroup blkio_root_cgroup; |
381 | extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); | 381 | extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); |
382 | extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk); | 382 | extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk); |
383 | extern int blkiocg_del_blkio_group(struct blkio_group *blkg); | ||
384 | extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, | 383 | extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, |
385 | struct request_queue *q); | 384 | struct request_queue *q); |
386 | struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, | 385 | struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, |
@@ -416,9 +415,6 @@ cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } | |||
416 | static inline struct blkio_cgroup * | 415 | static inline struct blkio_cgroup * |
417 | task_blkio_cgroup(struct task_struct *tsk) { return NULL; } | 416 | task_blkio_cgroup(struct task_struct *tsk) { return NULL; } |
418 | 417 | ||
419 | static inline int | ||
420 | blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } | ||
421 | |||
422 | static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, | 418 | static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, |
423 | void *key) { return NULL; } | 419 | void *key) { return NULL; } |
424 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, | 420 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
diff --git a/block/cfq.h b/block/cfq.h index 5584e1b63ca8..c8b15ef57e5d 100644 --- a/block/cfq.h +++ b/block/cfq.h | |||
@@ -79,11 +79,6 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, | |||
79 | direction, sync); | 79 | direction, sync); |
80 | } | 80 | } |
81 | 81 | ||
82 | static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) | ||
83 | { | ||
84 | return blkiocg_del_blkio_group(blkg); | ||
85 | } | ||
86 | |||
87 | #else /* CFQ_GROUP_IOSCHED */ | 82 | #else /* CFQ_GROUP_IOSCHED */ |
88 | static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg, | 83 | static inline void cfq_blkiocg_update_io_add_stats(struct blkio_group *blkg, |
89 | struct blkio_policy_type *pol, | 84 | struct blkio_policy_type *pol, |
@@ -119,10 +114,5 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, | |||
119 | struct blkio_policy_type *pol, uint64_t start_time, | 114 | struct blkio_policy_type *pol, uint64_t start_time, |
120 | uint64_t io_start_time, bool direction, bool sync) { } | 115 | uint64_t io_start_time, bool direction, bool sync) { } |
121 | 116 | ||
122 | static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) | ||
123 | { | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | #endif /* CFQ_GROUP_IOSCHED */ | 117 | #endif /* CFQ_GROUP_IOSCHED */ |
128 | #endif | 118 | #endif |