aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@redhat.com>2011-05-19 15:38:22 -0400
committerJens Axboe <jaxboe@fusionio.com>2011-05-20 14:34:52 -0400
commit56edf7d75db5b14d628b46623c414ffbeed68d7f (patch)
tree7d7ff46f03676154d0edf3da85c8e738d506ad92 /block
parent3e59cf9d66a87763fef6c232a4a8dc664461ca50 (diff)
cfq-iosched: Fix a possible race with cfq cgroup removal code
blkg->key = cfqd is an rcu protected pointer and hence we used to do call_rcu(cfqd->rcu_head) to free up cfqd after one rcu grace period. The problem here is that even though cfqd is around, there are no gurantees that associated request queue (td->queue) or q->queue_lock is still around. A driver might have called blk_cleanup_queue() and release the lock. It might happen that after freeing up the lock we call blkg->key->queue->queue_ock and crash. This is possible in following path. blkiocg_destroy() blkio_unlink_group_fn() cfq_unlink_blkio_group() Hence, wait for an rcu peirod if there are groups which have not been unlinked from blkcg->blkg_list. That way, if there are any groups which are taking cfq_unlink_blkio_group() path, can safely take queue lock. This is how we have taken care of race in throttling logic also. Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'block')
-rw-r--r--block/cfq-iosched.c48
1 files changed, 36 insertions, 12 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a905b5519ab6..e2e6719832e1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -300,7 +300,9 @@ struct cfq_data {
300 300
301 /* List of cfq groups being managed on this device*/ 301 /* List of cfq groups being managed on this device*/
302 struct hlist_head cfqg_list; 302 struct hlist_head cfqg_list;
303 struct rcu_head rcu; 303
304 /* Number of groups which are on blkcg->blkg_list */
305 unsigned int nr_blkcg_linked_grps;
304}; 306};
305 307
306static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); 308static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -1063,6 +1065,7 @@ static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd,
1063 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, 1065 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
1064 0); 1066 0);
1065 1067
1068 cfqd->nr_blkcg_linked_grps++;
1066 cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); 1069 cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
1067 1070
1068 /* Add group on cfqd list */ 1071 /* Add group on cfqd list */
@@ -3815,15 +3818,11 @@ static void cfq_put_async_queues(struct cfq_data *cfqd)
3815 cfq_put_queue(cfqd->async_idle_cfqq); 3818 cfq_put_queue(cfqd->async_idle_cfqq);
3816} 3819}
3817 3820
3818static void cfq_cfqd_free(struct rcu_head *head)
3819{
3820 kfree(container_of(head, struct cfq_data, rcu));
3821}
3822
3823static void cfq_exit_queue(struct elevator_queue *e) 3821static void cfq_exit_queue(struct elevator_queue *e)
3824{ 3822{
3825 struct cfq_data *cfqd = e->elevator_data; 3823 struct cfq_data *cfqd = e->elevator_data;
3826 struct request_queue *q = cfqd->queue; 3824 struct request_queue *q = cfqd->queue;
3825 bool wait = false;
3827 3826
3828 cfq_shutdown_timer_wq(cfqd); 3827 cfq_shutdown_timer_wq(cfqd);
3829 3828
@@ -3842,7 +3841,13 @@ static void cfq_exit_queue(struct elevator_queue *e)
3842 3841
3843 cfq_put_async_queues(cfqd); 3842 cfq_put_async_queues(cfqd);
3844 cfq_release_cfq_groups(cfqd); 3843 cfq_release_cfq_groups(cfqd);
3845 cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg); 3844
3845 /*
3846 * If there are groups which we could not unlink from blkcg list,
3847 * wait for a rcu period for them to be freed.
3848 */
3849 if (cfqd->nr_blkcg_linked_grps)
3850 wait = true;
3846 3851
3847 spin_unlock_irq(q->queue_lock); 3852 spin_unlock_irq(q->queue_lock);
3848 3853
@@ -3852,8 +3857,20 @@ static void cfq_exit_queue(struct elevator_queue *e)
3852 ida_remove(&cic_index_ida, cfqd->cic_index); 3857 ida_remove(&cic_index_ida, cfqd->cic_index);
3853 spin_unlock(&cic_index_lock); 3858 spin_unlock(&cic_index_lock);
3854 3859
3855 /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ 3860 /*
3856 call_rcu(&cfqd->rcu, cfq_cfqd_free); 3861 * Wait for cfqg->blkg->key accessors to exit their grace periods.
3862 * Do this wait only if there are other unlinked groups out
3863 * there. This can happen if cgroup deletion path claimed the
3864 * responsibility of cleaning up a group before queue cleanup code
3865 * get to the group.
3866 *
3867 * Do not call synchronize_rcu() unconditionally as there are drivers
3868 * which create/delete request queue hundreds of times during scan/boot
3869 * and synchronize_rcu() can take significant time and slow down boot.
3870 */
3871 if (wait)
3872 synchronize_rcu();
3873 kfree(cfqd);
3857} 3874}
3858 3875
3859static int cfq_alloc_cic_index(void) 3876static int cfq_alloc_cic_index(void)
@@ -3909,14 +3926,21 @@ static void *cfq_init_queue(struct request_queue *q)
3909 3926
3910#ifdef CONFIG_CFQ_GROUP_IOSCHED 3927#ifdef CONFIG_CFQ_GROUP_IOSCHED
3911 /* 3928 /*
3912 * Take a reference to root group which we never drop. This is just 3929 * Set root group reference to 2. One reference will be dropped when
3913 * to make sure that cfq_put_cfqg() does not try to kfree root group 3930 * all groups on cfqd->cfqg_list are being deleted during queue exit.
3931 * Other reference will remain there as we don't want to delete this
3932 * group as it is statically allocated and gets destroyed when
3933 * throtl_data goes away.
3914 */ 3934 */
3915 cfqg->ref = 1; 3935 cfqg->ref = 2;
3916 rcu_read_lock(); 3936 rcu_read_lock();
3917 cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, 3937 cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
3918 (void *)cfqd, 0); 3938 (void *)cfqd, 0);
3919 rcu_read_unlock(); 3939 rcu_read_unlock();
3940 cfqd->nr_blkcg_linked_grps++;
3941
3942 /* Add group on cfqd->cfqg_list */
3943 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
3920#endif 3944#endif
3921 /* 3945 /*
3922 * Not strictly needed (since RB_ROOT just clears the node and we 3946 * Not strictly needed (since RB_ROOT just clears the node and we