blkcg: drop unnecessary RCU locking

Now that blkg additions / removals are always done under both q and blkcg locks, the only places RCU locking is necessary are blkg_lookup[_create]() for lookup w/o blkcg lock. This patch drops unncessary RCU locking replacing it with plain blkcg locking as necessary. * blkiocg_pre_destroy() already perform proper locking and don't need RCU. Dropped. * blkio_read_blkg_stats() now uses blkcg->lock instead of RCU read lock. This isn't a hot path. * Now unnecessary synchronize_rcu() from queue exit paths removed. This makes q->nr_blkgs unnecessary. Dropped. * RCU annotation on blkg->q removed. -v2: Vivek pointed out that blkg_lookup_create() still needs to be called under rcu_read_lock(). Updated. -v3: After the update, stats_lock locking in blkio_read_blkg_stats() shouldn't be using _irq variant as it otherwise ends up enabling irq while blkcg->lock is locked. Fixed. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Tejun Heo <tj@kernel.org> 2012-03-05 16:15:22 -0500
committer: Jens Axboe <axboe@kernel.dk> 2012-03-06 15:27:24 -0500
commit: c875f4d0250a1f070fa26087a73bdd8f54c48100 (patch)
tree: 4ed2bae2fc48e54ac712d28eaaae8217c8064c1d
parent: 9f13ef678efd977487fc0c2e489f17c9a8c67a3e (diff)
5 files changed, 12 insertions, 74 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e9e3b038c702..27d39a810cb6 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -500,7 +500,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
                return NULL;
        spin_lock_init(&blkg->stats_lock);
-        rcu_assign_pointer(blkg->q, q);
+        blkg->q = q;
        INIT_LIST_HEAD(&blkg->q_node);
        blkg->blkcg = blkcg;
        blkg->refcnt = 1;
@@ -611,7 +611,6 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
        hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
        list_add(&blkg->q_node, &q->blkg_list);
-        q->nr_blkgs++;
        spin_unlock(&blkcg->lock);
 out:
@@ -648,9 +647,6 @@ static void blkg_destroy(struct blkio_group *blkg)
        list_del_init(&blkg->q_node);
        hlist_del_init_rcu(&blkg->blkcg_node);
-        WARN_ON_ONCE(q->nr_blkgs <= 0);
-        q->nr_blkgs--;
        /*
         * Put the reference taken at the time of creation so that when all
         * queues are gone, group can be destroyed.
@@ -1232,8 +1228,9 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
        struct hlist_node *n;
        uint64_t cgroup_total = 0;
-        rcu_read_lock();
+        spin_lock_irq(&blkcg->lock);
-        hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
+        hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
                const char *dname = blkg_dev_name(blkg);
                int plid = BLKIOFILE_POLICY(cft->private);
@@ -1243,15 +1240,16 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
                        cgroup_total += blkio_get_stat_cpu(blkg, plid,
                                                           cb, dname, type);
                } else {
-                        spin_lock_irq(&blkg->stats_lock);
+                        spin_lock(&blkg->stats_lock);
                        cgroup_total += blkio_get_stat(blkg, plid,
                                                       cb, dname, type);
-                        spin_unlock_irq(&blkg->stats_lock);
+                        spin_unlock(&blkg->stats_lock);
                }
        }
        if (show_total)
                cb->fill(cb, "Total", cgroup_total);
-        rcu_read_unlock();
+        spin_unlock_irq(&blkcg->lock);
        return 0;
 }
@@ -1583,28 +1581,24 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
 {
        struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
-        rcu_read_lock();
        spin_lock_irq(&blkcg->lock);
        while (!hlist_empty(&blkcg->blkg_list)) {
                struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
                                                struct blkio_group, blkcg_node);
-                struct request_queue *q = rcu_dereference(blkg->q);
+                struct request_queue *q = blkg->q;
                if (spin_trylock(q->queue_lock)) {
                        blkg_destroy(blkg);
                        spin_unlock(q->queue_lock);
                } else {
                        spin_unlock_irq(&blkcg->lock);
-                        rcu_read_unlock();
                        cpu_relax();
-                        rcu_read_lock();
                        spin_lock(&blkcg->lock);
                }
        }
        spin_unlock_irq(&blkcg->lock);
-        rcu_read_unlock();
        return 0;
 }
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index df73040a6a5f..66eaefefcbd2 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -176,8 +176,8 @@ struct blkg_policy_data {
 };
 struct blkio_group {
-        /* Pointer to the associated request_queue, RCU protected */
+        /* Pointer to the associated request_queue */
-        struct request_queue __rcu *q;
+        struct request_queue *q;
        struct list_head q_node;
        struct hlist_node blkcg_node;
        struct blkio_cgroup *blkcg;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index e35ee7aeea69..bfa5168249eb 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1046,39 +1046,8 @@ int blk_throtl_init(struct request_queue *q)
 void blk_throtl_exit(struct request_queue *q)
 {
-        struct throtl_data *td = q->td;
+        BUG_ON(!q->td);
-        bool wait;
-        BUG_ON(!td);
        throtl_shutdown_wq(q);
-        /* If there are other groups */
-        spin_lock_irq(q->queue_lock);
-        wait = q->nr_blkgs;
-        spin_unlock_irq(q->queue_lock);
-        /*
-         * Wait for tg_to_blkg(tg)->q accessors to exit their grace periods.
-         * Do this wait only if there are other undestroyed groups out
-         * there (other than root group). This can happen if cgroup deletion
-         * path claimed the responsibility of cleaning up a group before
-         * queue cleanup code get to the group.
-         *
-         * Do not call synchronize_rcu() unconditionally as there are drivers
-         * which create/delete request queue hundreds of times during scan/boot
-         * and synchronize_rcu() can take significant time and slow down boot.
-         */
-        if (wait)
-                synchronize_rcu();
-        /*
-         * Just being safe to make sure after previous flush if some body did
-         * update limits through cgroup and another work got queued, cancel
-         * it.
-         */
-        throtl_shutdown_wq(q);
        kfree(q->td);
 }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 393eaa59913b..9e386d9bcb79 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3449,7 +3449,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
 {
        struct cfq_data *cfqd = e->elevator_data;
        struct request_queue *q = cfqd->queue;
-        bool wait = false;
        cfq_shutdown_timer_wq(cfqd);
@@ -3462,31 +3461,8 @@ static void cfq_exit_queue(struct elevator_queue *e)
        spin_unlock_irq(q->queue_lock);
-#ifdef CONFIG_BLK_CGROUP
-        /*
-         * If there are groups which we could not unlink from blkcg list,
-         * wait for a rcu period for them to be freed.
-         */
-        spin_lock_irq(q->queue_lock);
-        wait = q->nr_blkgs;
-        spin_unlock_irq(q->queue_lock);
-#endif
        cfq_shutdown_timer_wq(cfqd);
-        /*
-         * Wait for cfqg->blkg->key accessors to exit their grace periods.
-         * Do this wait only if there are other unlinked groups out
-         * there. This can happen if cgroup deletion path claimed the
-         * responsibility of cleaning up a group before queue cleanup code
-         * get to the group.
-         *
-         * Do not call synchronize_rcu() unconditionally as there are drivers
-         * which create/delete request queue hundreds of times during scan/boot
-         * and synchronize_rcu() can take significant time and slow down boot.
-         */
-        if (wait)
-                synchronize_rcu();
 #ifndef CONFIG_CFQ_GROUP_IOSCHED
        kfree(cfqd->root_group);
 #endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b4d1d4bfc168..33f1b29e53f4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -365,7 +365,6 @@ struct request_queue {
 #ifdef CONFIG_BLK_CGROUP
        /* XXX: array size hardcoded to avoid include dependency (temporary) */
        struct list_head        blkg_list;
-        int                     nr_blkgs;
 #endif
        struct queue_limits     limits;
author	Tejun Heo <tj@kernel.org>	2012-03-05 16:15:22 -0500
committer	Jens Axboe <axboe@kernel.dk>	2012-03-06 15:27:24 -0500
commit	c875f4d0250a1f070fa26087a73bdd8f54c48100 (patch)
tree	4ed2bae2fc48e54ac712d28eaaae8217c8064c1d
parent	9f13ef678efd977487fc0c2e489f17c9a8c67a3e (diff)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e9e3b038c702..27d39a810cb6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c
@@ -500,7 +500,7 @@ static struct blkio_group blkg_alloc(struct blkio_cgroup blkcg,
500	return NULL;	500	return NULL;
501		501
502	spin_lock_init(&blkg->stats_lock);	502	spin_lock_init(&blkg->stats_lock);
503	rcu_assign_pointer(blkg->q, q);	503	blkg->q = q;
504	INIT_LIST_HEAD(&blkg->q_node);	504	INIT_LIST_HEAD(&blkg->q_node);
505	blkg->blkcg = blkcg;	505	blkg->blkcg = blkcg;
506	blkg->refcnt = 1;	506	blkg->refcnt = 1;
@@ -611,7 +611,6 @@ struct blkio_group blkg_lookup_create(struct blkio_cgroup blkcg,
611		611
612	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);	612	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
613	list_add(&blkg->q_node, &q->blkg_list);	613	list_add(&blkg->q_node, &q->blkg_list);
614	q->nr_blkgs++;
615		614
616	spin_unlock(&blkcg->lock);	615	spin_unlock(&blkcg->lock);
617	out:	616	out:
@@ -648,9 +647,6 @@ static void blkg_destroy(struct blkio_group *blkg)
648	list_del_init(&blkg->q_node);	647	list_del_init(&blkg->q_node);
649	hlist_del_init_rcu(&blkg->blkcg_node);	648	hlist_del_init_rcu(&blkg->blkcg_node);
650		649
651	WARN_ON_ONCE(q->nr_blkgs <= 0);
652	q->nr_blkgs--;
653
654	/*	650	/*
655	* Put the reference taken at the time of creation so that when all	651	* Put the reference taken at the time of creation so that when all
656	* queues are gone, group can be destroyed.	652	* queues are gone, group can be destroyed.
@@ -1232,8 +1228,9 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
1232	struct hlist_node *n;	1228	struct hlist_node *n;
1233	uint64_t cgroup_total = 0;	1229	uint64_t cgroup_total = 0;
1234		1230
1235	rcu_read_lock();	1231	spin_lock_irq(&blkcg->lock);
1236	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {	1232
		1233	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
1237	const char *dname = blkg_dev_name(blkg);	1234	const char *dname = blkg_dev_name(blkg);
1238	int plid = BLKIOFILE_POLICY(cft->private);	1235	int plid = BLKIOFILE_POLICY(cft->private);
1239		1236
@@ -1243,15 +1240,16 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
1243	cgroup_total += blkio_get_stat_cpu(blkg, plid,	1240	cgroup_total += blkio_get_stat_cpu(blkg, plid,
1244	cb, dname, type);	1241	cb, dname, type);
1245	} else {	1242	} else {
1246	spin_lock_irq(&blkg->stats_lock);	1243	spin_lock(&blkg->stats_lock);
1247	cgroup_total += blkio_get_stat(blkg, plid,	1244	cgroup_total += blkio_get_stat(blkg, plid,
1248	cb, dname, type);	1245	cb, dname, type);
1249	spin_unlock_irq(&blkg->stats_lock);	1246	spin_unlock(&blkg->stats_lock);
1250	}	1247	}
1251	}	1248	}
1252	if (show_total)	1249	if (show_total)
1253	cb->fill(cb, "Total", cgroup_total);	1250	cb->fill(cb, "Total", cgroup_total);
1254	rcu_read_unlock();	1251
		1252	spin_unlock_irq(&blkcg->lock);
1255	return 0;	1253	return 0;
1256	}	1254	}
1257		1255
@@ -1583,28 +1581,24 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
1583	{	1581	{
1584	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);	1582	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
1585		1583
1586	rcu_read_lock();
1587	spin_lock_irq(&blkcg->lock);	1584	spin_lock_irq(&blkcg->lock);
1588		1585
1589	while (!hlist_empty(&blkcg->blkg_list)) {	1586	while (!hlist_empty(&blkcg->blkg_list)) {
1590	struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,	1587	struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
1591	struct blkio_group, blkcg_node);	1588	struct blkio_group, blkcg_node);
1592	struct request_queue *q = rcu_dereference(blkg->q);	1589	struct request_queue *q = blkg->q;
1593		1590
1594	if (spin_trylock(q->queue_lock)) {	1591	if (spin_trylock(q->queue_lock)) {
1595	blkg_destroy(blkg);	1592	blkg_destroy(blkg);
1596	spin_unlock(q->queue_lock);	1593	spin_unlock(q->queue_lock);
1597	} else {	1594	} else {
1598	spin_unlock_irq(&blkcg->lock);	1595	spin_unlock_irq(&blkcg->lock);
1599	rcu_read_unlock();
1600	cpu_relax();	1596	cpu_relax();
1601	rcu_read_lock();
1602	spin_lock(&blkcg->lock);	1597	spin_lock(&blkcg->lock);
1603	}	1598	}
1604	}	1599	}
1605		1600
1606	spin_unlock_irq(&blkcg->lock);	1601	spin_unlock_irq(&blkcg->lock);
1607	rcu_read_unlock();
1608	return 0;	1602	return 0;
1609	}	1603	}
1610		1604


diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index df73040a6a5f..66eaefefcbd2 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h
@@ -176,8 +176,8 @@ struct blkg_policy_data {
176	};	176	};
177		177
178	struct blkio_group {	178	struct blkio_group {
179	/* Pointer to the associated request_queue, RCU protected */	179	/* Pointer to the associated request_queue */
180	struct request_queue __rcu *q;	180	struct request_queue *q;
181	struct list_head q_node;	181	struct list_head q_node;
182	struct hlist_node blkcg_node;	182	struct hlist_node blkcg_node;
183	struct blkio_cgroup *blkcg;	183	struct blkio_cgroup *blkcg;


diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e35ee7aeea69..bfa5168249eb 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c
@@ -1046,39 +1046,8 @@ int blk_throtl_init(struct request_queue *q)
1046		1046
1047	void blk_throtl_exit(struct request_queue *q)	1047	void blk_throtl_exit(struct request_queue *q)
1048	{	1048	{
1049	struct throtl_data *td = q->td;	1049	BUG_ON(!q->td);
1050	bool wait;
1051
1052	BUG_ON(!td);
1053
1054	throtl_shutdown_wq(q);	1050	throtl_shutdown_wq(q);
1055
1056	/* If there are other groups */
1057	spin_lock_irq(q->queue_lock);
1058	wait = q->nr_blkgs;
1059	spin_unlock_irq(q->queue_lock);
1060
1061	/*
1062	* Wait for tg_to_blkg(tg)->q accessors to exit their grace periods.
1063	* Do this wait only if there are other undestroyed groups out
1064	* there (other than root group). This can happen if cgroup deletion
1065	* path claimed the responsibility of cleaning up a group before
1066	* queue cleanup code get to the group.
1067	*
1068	* Do not call synchronize_rcu() unconditionally as there are drivers
1069	* which create/delete request queue hundreds of times during scan/boot
1070	* and synchronize_rcu() can take significant time and slow down boot.
1071	*/
1072	if (wait)
1073	synchronize_rcu();
1074
1075	/*
1076	* Just being safe to make sure after previous flush if some body did
1077	* update limits through cgroup and another work got queued, cancel
1078	* it.
1079	*/
1080	throtl_shutdown_wq(q);
1081
1082	kfree(q->td);	1051	kfree(q->td);
1083	}	1052	}
1084		1053


diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 393eaa59913b..9e386d9bcb79 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c
@@ -3449,7 +3449,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
3449	{	3449	{
3450	struct cfq_data *cfqd = e->elevator_data;	3450	struct cfq_data *cfqd = e->elevator_data;
3451	struct request_queue *q = cfqd->queue;	3451	struct request_queue *q = cfqd->queue;
3452	bool wait = false;
3453		3452
3454	cfq_shutdown_timer_wq(cfqd);	3453	cfq_shutdown_timer_wq(cfqd);
3455		3454
@@ -3462,31 +3461,8 @@ static void cfq_exit_queue(struct elevator_queue *e)
3462		3461
3463	spin_unlock_irq(q->queue_lock);	3462	spin_unlock_irq(q->queue_lock);
3464		3463
3465	#ifdef CONFIG_BLK_CGROUP
3466	/*
3467	* If there are groups which we could not unlink from blkcg list,
3468	* wait for a rcu period for them to be freed.
3469	*/
3470	spin_lock_irq(q->queue_lock);
3471	wait = q->nr_blkgs;
3472	spin_unlock_irq(q->queue_lock);
3473	#endif
3474	cfq_shutdown_timer_wq(cfqd);	3464	cfq_shutdown_timer_wq(cfqd);
3475		3465
3476	/*
3477	* Wait for cfqg->blkg->key accessors to exit their grace periods.
3478	* Do this wait only if there are other unlinked groups out
3479	* there. This can happen if cgroup deletion path claimed the
3480	* responsibility of cleaning up a group before queue cleanup code
3481	* get to the group.
3482	*
3483	* Do not call synchronize_rcu() unconditionally as there are drivers
3484	* which create/delete request queue hundreds of times during scan/boot
3485	* and synchronize_rcu() can take significant time and slow down boot.
3486	*/
3487	if (wait)
3488	synchronize_rcu();
3489
3490	#ifndef CONFIG_CFQ_GROUP_IOSCHED	3466	#ifndef CONFIG_CFQ_GROUP_IOSCHED
3491	kfree(cfqd->root_group);	3467	kfree(cfqd->root_group);
3492	#endif	3468	#endif


diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b4d1d4bfc168..33f1b29e53f4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h
@@ -365,7 +365,6 @@ struct request_queue {
365	#ifdef CONFIG_BLK_CGROUP	365	#ifdef CONFIG_BLK_CGROUP
366	/* XXX: array size hardcoded to avoid include dependency (temporary) */	366	/* XXX: array size hardcoded to avoid include dependency (temporary) */
367	struct list_head blkg_list;	367	struct list_head blkg_list;
368	int nr_blkgs;
369	#endif	368	#endif
370		369
371	struct queue_limits limits;	370	struct queue_limits limits;