blkcg: implement per-blkg request allocation

Currently, request_queue has one request_list to allocate requests from regardless of blkcg of the IO being issued. When the unified request pool is used up, cfq proportional IO limits become meaningless - whoever grabs the next request being freed wins the race regardless of the configured weights. This can be easily demonstrated by creating a blkio cgroup w/ very low weight, put a program which can issue a lot of random direct IOs there and running a sequential IO from a different cgroup. As soon as the request pool is used up, the sequential IO bandwidth crashes. This patch implements per-blkg request_list. Each blkg has its own request_list and any IO allocates its request from the matching blkg making blkcgs completely isolated in terms of request allocation. * Root blkcg uses the request_list embedded in each request_queue, which was renamed to @q->root_rl from @q->rq. While making blkcg rl handling a bit harier, this enables avoiding most overhead for root blkcg. * Queue fullness is properly per request_list but bdi isn't blkcg aware yet, so congestion state currently just follows the root blkcg. As writeback isn't aware of blkcg yet, this works okay for async congestion but readahead may get the wrong signals. It's better than blkcg completely collapsing with shared request_list but needs to be improved with future changes. * After this change, each block cgroup gets a full request pool making resource consumption of each cgroup higher. This makes allowing non-root users to create cgroups less desirable; however, note that allowing non-root users to directly manage cgroups is already severely broken regardless of this patch - each block cgroup consumes kernel memory and skews IO weight (IO weights are not hierarchical). v2: queue-sysfs.txt updated and patch description udpated as suggested by Vivek. v3: blk_get_rl() wasn't checking error return from blkg_lookup_create() and may cause oops on lookup failure. Fix it by falling back to root_rl on blkg lookup failures. This problem was spotted by Rakesh Iyer <rni@google.com>. v4: Updated to accomodate 458f27a982 "block: Avoid missed wakeup in request waitqueue". blk_drain_queue() now wakes up waiters on all blkg->rl on the target queue. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Vivek Goyal <vgoyal@redhat.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Tejun Heo <tj@kernel.org> 2012-06-26 18:05:44 -0400
committer: Jens Axboe <axboe@kernel.dk> 2012-06-26 18:42:49 -0400
commit: a051661ca6d134c18599498b185b667859d4339b (patch)
tree: 9d840030874aed9b97a58051bf9568455126e8e8 /block/blk-cgroup.h
parent: 5b788ce3e2acac9bf109743b1281d77347cf2101 (diff)
1 files changed, 102 insertions, 0 deletions
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index e74cce1fbac9..24597309e23d 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -17,6 +17,7 @@
 #include <linux/u64_stats_sync.h>
 #include <linux/seq_file.h>
 #include <linux/radix-tree.h>
+#include <linux/blkdev.h>
 /* Max limits for throttle policy */
 #define THROTL_IOPS_MAX         UINT_MAX
@@ -93,6 +94,8 @@ struct blkcg_gq {
        struct list_head                q_node;
        struct hlist_node               blkcg_node;
        struct blkcg                    *blkcg;
+        /* request allocation list for this blkcg-q pair */
+        struct request_list             rl;
        /* reference count */
        int                             refcnt;
@@ -251,6 +254,95 @@ static inline void blkg_put(struct blkcg_gq *blkg)
 }
 /**
+ * blk_get_rl - get request_list to use
+ * @q: request_queue of interest
+ * @bio: bio which will be attached to the allocated request (may be %NULL)
+ *
+ * The caller wants to allocate a request from @q to use for @bio.  Find
+ * the request_list to use and obtain a reference on it.  Should be called
+ * under queue_lock.  This function is guaranteed to return non-%NULL
+ * request_list.
+ */
+static inline struct request_list *blk_get_rl(struct request_queue *q,
+                                              struct bio *bio)
+{
+        struct blkcg *blkcg;
+        struct blkcg_gq *blkg;
+        rcu_read_lock();
+        blkcg = bio_blkcg(bio);
+        /* bypass blkg lookup and use @q->root_rl directly for root */
+        if (blkcg == &blkcg_root)
+                goto root_rl;
+        /*
+         * Try to use blkg->rl.  blkg lookup may fail under memory pressure
+         * or if either the blkcg or queue is going away.  Fall back to
+         * root_rl in such cases.
+         */
+        blkg = blkg_lookup_create(blkcg, q);
+        if (unlikely(IS_ERR(blkg)))
+                goto root_rl;
+        blkg_get(blkg);
+        rcu_read_unlock();
+        return &blkg->rl;
+root_rl:
+        rcu_read_unlock();
+        return &q->root_rl;
+}
+/**
+ * blk_put_rl - put request_list
+ * @rl: request_list to put
+ *
+ * Put the reference acquired by blk_get_rl().  Should be called under
+ * queue_lock.
+ */
+static inline void blk_put_rl(struct request_list *rl)
+{
+        /* root_rl may not have blkg set */
+        if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
+                blkg_put(rl->blkg);
+}
+/**
+ * blk_rq_set_rl - associate a request with a request_list
+ * @rq: request of interest
+ * @rl: target request_list
+ *
+ * Associate @rq with @rl so that accounting and freeing can know the
+ * request_list @rq came from.
+ */
+static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
+{
+        rq->rl = rl;
+}
+/**
+ * blk_rq_rl - return the request_list a request came from
+ * @rq: request of interest
+ *
+ * Return the request_list @rq is allocated from.
+ */
+static inline struct request_list *blk_rq_rl(struct request *rq)
+{
+        return rq->rl;
+}
+struct request_list *__blk_queue_next_rl(struct request_list *rl,
+                                         struct request_queue *q);
+/**
+ * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
+ *
+ * Should be used under queue_lock.
+ */
+#define blk_queue_for_each_rl(rl, q)    \
+        for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
+/**
 * blkg_stat_add - add a value to a blkg_stat
 * @stat: target blkg_stat
 * @val: value to add
@@ -392,6 +484,7 @@ static inline void blkcg_deactivate_policy(struct request_queue *q,
 static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
                                                  struct blkcg_policy *pol) { return NULL; }
 static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
@@ -399,5 +492,14 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
 static inline void blkg_get(struct blkcg_gq *blkg) { }
 static inline void blkg_put(struct blkcg_gq *blkg) { }
+static inline struct request_list *blk_get_rl(struct request_queue *q,
+                                              struct bio *bio) { return &q->root_rl; }
+static inline void blk_put_rl(struct request_list *rl) { }
+static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
+static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
+#define blk_queue_for_each_rl(rl, q)    \
+        for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
 #endif  /* CONFIG_BLK_CGROUP */
 #endif  /* _BLK_CGROUP_H */
author	Tejun Heo <tj@kernel.org>	2012-06-26 18:05:44 -0400
committer	Jens Axboe <axboe@kernel.dk>	2012-06-26 18:42:49 -0400
commit	a051661ca6d134c18599498b185b667859d4339b (patch)
tree	9d840030874aed9b97a58051bf9568455126e8e8 /block/blk-cgroup.h
parent	5b788ce3e2acac9bf109743b1281d77347cf2101 (diff)

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index e74cce1fbac9..24597309e23d 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h
@@ -17,6 +17,7 @@
17	#include <linux/u64_stats_sync.h>	17	#include <linux/u64_stats_sync.h>
18	#include <linux/seq_file.h>	18	#include <linux/seq_file.h>
19	#include <linux/radix-tree.h>	19	#include <linux/radix-tree.h>
		20	#include <linux/blkdev.h>
20		21
21	/* Max limits for throttle policy */	22	/* Max limits for throttle policy */
22	#define THROTL_IOPS_MAX UINT_MAX	23	#define THROTL_IOPS_MAX UINT_MAX
@@ -93,6 +94,8 @@ struct blkcg_gq {
93	struct list_head q_node;	94	struct list_head q_node;
94	struct hlist_node blkcg_node;	95	struct hlist_node blkcg_node;
95	struct blkcg *blkcg;	96	struct blkcg *blkcg;
		97	/* request allocation list for this blkcg-q pair */
		98	struct request_list rl;
96	/* reference count */	99	/* reference count */
97	int refcnt;	100	int refcnt;
98		101
@@ -251,6 +254,95 @@ static inline void blkg_put(struct blkcg_gq *blkg)
251	}	254	}
252		255
253	/**	256	/**
		257	* blk_get_rl - get request_list to use
		258	* @q: request_queue of interest
		259	* @bio: bio which will be attached to the allocated request (may be %NULL)
		260	*
		261	* The caller wants to allocate a request from @q to use for @bio. Find
		262	* the request_list to use and obtain a reference on it. Should be called
		263	* under queue_lock. This function is guaranteed to return non-%NULL
		264	* request_list.
		265	*/
		266	static inline struct request_list blk_get_rl(struct request_queue q,
		267	struct bio *bio)
		268	{
		269	struct blkcg *blkcg;
		270	struct blkcg_gq *blkg;
		271
		272	rcu_read_lock();
		273
		274	blkcg = bio_blkcg(bio);
		275
		276	/* bypass blkg lookup and use @q->root_rl directly for root */
		277	if (blkcg == &blkcg_root)
		278	goto root_rl;
		279
		280	/*
		281	* Try to use blkg->rl. blkg lookup may fail under memory pressure
		282	* or if either the blkcg or queue is going away. Fall back to
		283	* root_rl in such cases.
		284	*/
		285	blkg = blkg_lookup_create(blkcg, q);
		286	if (unlikely(IS_ERR(blkg)))
		287	goto root_rl;
		288
		289	blkg_get(blkg);
		290	rcu_read_unlock();
		291	return &blkg->rl;
		292	root_rl:
		293	rcu_read_unlock();
		294	return &q->root_rl;
		295	}
		296
		297	/**
		298	* blk_put_rl - put request_list
		299	* @rl: request_list to put
		300	*
		301	* Put the reference acquired by blk_get_rl(). Should be called under
		302	* queue_lock.
		303	*/
		304	static inline void blk_put_rl(struct request_list *rl)
		305	{
		306	/* root_rl may not have blkg set */
		307	if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
		308	blkg_put(rl->blkg);
		309	}
		310
		311	/**
		312	* blk_rq_set_rl - associate a request with a request_list
		313	* @rq: request of interest
		314	* @rl: target request_list
		315	*
		316	* Associate @rq with @rl so that accounting and freeing can know the
		317	* request_list @rq came from.
		318	*/
		319	static inline void blk_rq_set_rl(struct request rq, struct request_list rl)
		320	{
		321	rq->rl = rl;
		322	}
		323
		324	/**
		325	* blk_rq_rl - return the request_list a request came from
		326	* @rq: request of interest
		327	*
		328	* Return the request_list @rq is allocated from.
		329	*/
		330	static inline struct request_list blk_rq_rl(struct request rq)
		331	{
		332	return rq->rl;
		333	}
		334
		335	struct request_list __blk_queue_next_rl(struct request_list rl,
		336	struct request_queue *q);
		337	/**
		338	* blk_queue_for_each_rl - iterate through all request_lists of a request_queue
		339	*
		340	* Should be used under queue_lock.
		341	*/
		342	#define blk_queue_for_each_rl(rl, q) \
		343	for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
		344
		345	/**
254	* blkg_stat_add - add a value to a blkg_stat	346	* blkg_stat_add - add a value to a blkg_stat
255	* @stat: target blkg_stat	347	* @stat: target blkg_stat
256	* @val: value to add	348	* @val: value to add
@@ -392,6 +484,7 @@ static inline void blkcg_deactivate_policy(struct request_queue *q,
392		484
393	static inline struct blkcg cgroup_to_blkcg(struct cgroup cgroup) { return NULL; }	485	static inline struct blkcg cgroup_to_blkcg(struct cgroup cgroup) { return NULL; }
394	static inline struct blkcg bio_blkcg(struct bio bio) { return NULL; }	486	static inline struct blkcg bio_blkcg(struct bio bio) { return NULL; }
		487
395	static inline struct blkg_policy_data blkg_to_pd(struct blkcg_gq blkg,	488	static inline struct blkg_policy_data blkg_to_pd(struct blkcg_gq blkg,
396	struct blkcg_policy *pol) { return NULL; }	489	struct blkcg_policy *pol) { return NULL; }
397	static inline struct blkcg_gq pd_to_blkg(struct blkg_policy_data pd) { return NULL; }	490	static inline struct blkcg_gq pd_to_blkg(struct blkg_policy_data pd) { return NULL; }
@@ -399,5 +492,14 @@ static inline char blkg_path(struct blkcg_gq blkg) { return NULL; }
399	static inline void blkg_get(struct blkcg_gq *blkg) { }	492	static inline void blkg_get(struct blkcg_gq *blkg) { }
400	static inline void blkg_put(struct blkcg_gq *blkg) { }	493	static inline void blkg_put(struct blkcg_gq *blkg) { }
401		494
		495	static inline struct request_list blk_get_rl(struct request_queue q,
		496	struct bio *bio) { return &q->root_rl; }
		497	static inline void blk_put_rl(struct request_list *rl) { }
		498	static inline void blk_rq_set_rl(struct request rq, struct request_list rl) { }
		499	static inline struct request_list blk_rq_rl(struct request rq) { return &rq->q->root_rl; }
		500
		501	#define blk_queue_for_each_rl(rl, q) \
		502	for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
		503
402	#endif /* CONFIG_BLK_CGROUP */	504	#endif /* CONFIG_BLK_CGROUP */
403	#endif /* _BLK_CGROUP_H */	505	#endif /* _BLK_CGROUP_H */