diff options
author | Vivek Goyal <vgoyal@redhat.com> | 2011-05-19 15:38:22 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2011-05-20 14:34:52 -0400 |
commit | 56edf7d75db5b14d628b46623c414ffbeed68d7f (patch) | |
tree | 7d7ff46f03676154d0edf3da85c8e738d506ad92 /block | |
parent | 3e59cf9d66a87763fef6c232a4a8dc664461ca50 (diff) |
cfq-iosched: Fix a possible race with cfq cgroup removal code
blkg->key = cfqd is an rcu protected pointer and hence we used to do
call_rcu(cfqd->rcu_head) to free up cfqd after one rcu grace period.
The problem here is that even though cfqd is around, there are no
gurantees that associated request queue (td->queue) or q->queue_lock
is still around. A driver might have called blk_cleanup_queue() and
release the lock.
It might happen that after freeing up the lock we call
blkg->key->queue->queue_ock and crash. This is possible in following
path.
blkiocg_destroy()
blkio_unlink_group_fn()
cfq_unlink_blkio_group()
Hence, wait for an rcu peirod if there are groups which have not
been unlinked from blkcg->blkg_list. That way, if there are any groups
which are taking cfq_unlink_blkio_group() path, can safely take queue
lock.
This is how we have taken care of race in throttling logic also.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/cfq-iosched.c | 48 |
1 files changed, 36 insertions, 12 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index a905b5519ab6..e2e6719832e1 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -300,7 +300,9 @@ struct cfq_data { | |||
300 | 300 | ||
301 | /* List of cfq groups being managed on this device*/ | 301 | /* List of cfq groups being managed on this device*/ |
302 | struct hlist_head cfqg_list; | 302 | struct hlist_head cfqg_list; |
303 | struct rcu_head rcu; | 303 | |
304 | /* Number of groups which are on blkcg->blkg_list */ | ||
305 | unsigned int nr_blkcg_linked_grps; | ||
304 | }; | 306 | }; |
305 | 307 | ||
306 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); | 308 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); |
@@ -1063,6 +1065,7 @@ static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd, | |||
1063 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, | 1065 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, |
1064 | 0); | 1066 | 0); |
1065 | 1067 | ||
1068 | cfqd->nr_blkcg_linked_grps++; | ||
1066 | cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | 1069 | cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); |
1067 | 1070 | ||
1068 | /* Add group on cfqd list */ | 1071 | /* Add group on cfqd list */ |
@@ -3815,15 +3818,11 @@ static void cfq_put_async_queues(struct cfq_data *cfqd) | |||
3815 | cfq_put_queue(cfqd->async_idle_cfqq); | 3818 | cfq_put_queue(cfqd->async_idle_cfqq); |
3816 | } | 3819 | } |
3817 | 3820 | ||
3818 | static void cfq_cfqd_free(struct rcu_head *head) | ||
3819 | { | ||
3820 | kfree(container_of(head, struct cfq_data, rcu)); | ||
3821 | } | ||
3822 | |||
3823 | static void cfq_exit_queue(struct elevator_queue *e) | 3821 | static void cfq_exit_queue(struct elevator_queue *e) |
3824 | { | 3822 | { |
3825 | struct cfq_data *cfqd = e->elevator_data; | 3823 | struct cfq_data *cfqd = e->elevator_data; |
3826 | struct request_queue *q = cfqd->queue; | 3824 | struct request_queue *q = cfqd->queue; |
3825 | bool wait = false; | ||
3827 | 3826 | ||
3828 | cfq_shutdown_timer_wq(cfqd); | 3827 | cfq_shutdown_timer_wq(cfqd); |
3829 | 3828 | ||
@@ -3842,7 +3841,13 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3842 | 3841 | ||
3843 | cfq_put_async_queues(cfqd); | 3842 | cfq_put_async_queues(cfqd); |
3844 | cfq_release_cfq_groups(cfqd); | 3843 | cfq_release_cfq_groups(cfqd); |
3845 | cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg); | 3844 | |
3845 | /* | ||
3846 | * If there are groups which we could not unlink from blkcg list, | ||
3847 | * wait for a rcu period for them to be freed. | ||
3848 | */ | ||
3849 | if (cfqd->nr_blkcg_linked_grps) | ||
3850 | wait = true; | ||
3846 | 3851 | ||
3847 | spin_unlock_irq(q->queue_lock); | 3852 | spin_unlock_irq(q->queue_lock); |
3848 | 3853 | ||
@@ -3852,8 +3857,20 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3852 | ida_remove(&cic_index_ida, cfqd->cic_index); | 3857 | ida_remove(&cic_index_ida, cfqd->cic_index); |
3853 | spin_unlock(&cic_index_lock); | 3858 | spin_unlock(&cic_index_lock); |
3854 | 3859 | ||
3855 | /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ | 3860 | /* |
3856 | call_rcu(&cfqd->rcu, cfq_cfqd_free); | 3861 | * Wait for cfqg->blkg->key accessors to exit their grace periods. |
3862 | * Do this wait only if there are other unlinked groups out | ||
3863 | * there. This can happen if cgroup deletion path claimed the | ||
3864 | * responsibility of cleaning up a group before queue cleanup code | ||
3865 | * get to the group. | ||
3866 | * | ||
3867 | * Do not call synchronize_rcu() unconditionally as there are drivers | ||
3868 | * which create/delete request queue hundreds of times during scan/boot | ||
3869 | * and synchronize_rcu() can take significant time and slow down boot. | ||
3870 | */ | ||
3871 | if (wait) | ||
3872 | synchronize_rcu(); | ||
3873 | kfree(cfqd); | ||
3857 | } | 3874 | } |
3858 | 3875 | ||
3859 | static int cfq_alloc_cic_index(void) | 3876 | static int cfq_alloc_cic_index(void) |
@@ -3909,14 +3926,21 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3909 | 3926 | ||
3910 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 3927 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
3911 | /* | 3928 | /* |
3912 | * Take a reference to root group which we never drop. This is just | 3929 | * Set root group reference to 2. One reference will be dropped when |
3913 | * to make sure that cfq_put_cfqg() does not try to kfree root group | 3930 | * all groups on cfqd->cfqg_list are being deleted during queue exit. |
3931 | * Other reference will remain there as we don't want to delete this | ||
3932 | * group as it is statically allocated and gets destroyed when | ||
3933 | * throtl_data goes away. | ||
3914 | */ | 3934 | */ |
3915 | cfqg->ref = 1; | 3935 | cfqg->ref = 2; |
3916 | rcu_read_lock(); | 3936 | rcu_read_lock(); |
3917 | cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, | 3937 | cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, |
3918 | (void *)cfqd, 0); | 3938 | (void *)cfqd, 0); |
3919 | rcu_read_unlock(); | 3939 | rcu_read_unlock(); |
3940 | cfqd->nr_blkcg_linked_grps++; | ||
3941 | |||
3942 | /* Add group on cfqd->cfqg_list */ | ||
3943 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); | ||
3920 | #endif | 3944 | #endif |
3921 | /* | 3945 | /* |
3922 | * Not strictly needed (since RB_ROOT just clears the node and we | 3946 | * Not strictly needed (since RB_ROOT just clears the node and we |