diff options
author | Jens Axboe <axboe@kernel.dk> | 2013-01-11 13:53:53 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2013-01-11 13:53:53 -0500 |
commit | ac9a19745196388ae5d828c0be7a1d6e472101f3 (patch) | |
tree | 49c47e1a07241653deb4a4b4e7a91626f586ad05 | |
parent | 422765c2638924da10ff363b5eed77924911bdc7 (diff) | |
parent | 43114018cb0b253fd03c4ff4d42bcdc43389ac1c (diff) |
Merge branch 'blkcg-cfq-hierarchy' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup into for-3.9/core
Tejun writes:
Hello, Jens.
Please consider pulling from the following branch to receive cfq blkcg
hierarchy support. The branch is based on top of v3.8-rc2.
git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git blkcg-cfq-hierarchy
The patchset was reviewd in the following thread.
http://thread.gmane.org/gmane.linux.kernel.cgroups/5571
-rw-r--r-- | Documentation/block/cfq-iosched.txt | 58 | ||||
-rw-r--r-- | Documentation/cgroups/blkio-controller.txt | 35 | ||||
-rw-r--r-- | block/blk-cgroup.c | 277 | ||||
-rw-r--r-- | block/blk-cgroup.h | 68 | ||||
-rw-r--r-- | block/blk-sysfs.c | 9 | ||||
-rw-r--r-- | block/cfq-iosched.c | 627 | ||||
-rw-r--r-- | include/linux/blkdev.h | 2 |
7 files changed, 902 insertions, 174 deletions
diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt index d89b4fe724d7..a5eb7d19a65d 100644 --- a/Documentation/block/cfq-iosched.txt +++ b/Documentation/block/cfq-iosched.txt | |||
@@ -102,6 +102,64 @@ processing of request. Therefore, increasing the value can imporve the | |||
102 | performace although this can cause the latency of some I/O to increase due | 102 | performace although this can cause the latency of some I/O to increase due |
103 | to more number of requests. | 103 | to more number of requests. |
104 | 104 | ||
105 | CFQ Group scheduling | ||
106 | ==================== | ||
107 | |||
108 | CFQ supports blkio cgroup and has "blkio." prefixed files in each | ||
109 | blkio cgroup directory. It is weight-based and there are four knobs | ||
110 | for configuration - weight[_device] and leaf_weight[_device]. | ||
111 | Internal cgroup nodes (the ones with children) can also have tasks in | ||
112 | them, so the former two configure how much proportion the cgroup as a | ||
113 | whole is entitled to at its parent's level while the latter two | ||
114 | configure how much proportion the tasks in the cgroup have compared to | ||
115 | its direct children. | ||
116 | |||
117 | Another way to think about it is assuming that each internal node has | ||
118 | an implicit leaf child node which hosts all the tasks whose weight is | ||
119 | configured by leaf_weight[_device]. Let's assume a blkio hierarchy | ||
120 | composed of five cgroups - root, A, B, AA and AB - with the following | ||
121 | weights where the names represent the hierarchy. | ||
122 | |||
123 | weight leaf_weight | ||
124 | root : 125 125 | ||
125 | A : 500 750 | ||
126 | B : 250 500 | ||
127 | AA : 500 500 | ||
128 | AB : 1000 500 | ||
129 | |||
130 | root never has a parent making its weight is meaningless. For backward | ||
131 | compatibility, weight is always kept in sync with leaf_weight. B, AA | ||
132 | and AB have no child and thus its tasks have no children cgroup to | ||
133 | compete with. They always get 100% of what the cgroup won at the | ||
134 | parent level. Considering only the weights which matter, the hierarchy | ||
135 | looks like the following. | ||
136 | |||
137 | root | ||
138 | / | \ | ||
139 | A B leaf | ||
140 | 500 250 125 | ||
141 | / | \ | ||
142 | AA AB leaf | ||
143 | 500 1000 750 | ||
144 | |||
145 | If all cgroups have active IOs and competing with each other, disk | ||
146 | time will be distributed like the following. | ||
147 | |||
148 | Distribution below root. The total active weight at this level is | ||
149 | A:500 + B:250 + C:125 = 875. | ||
150 | |||
151 | root-leaf : 125 / 875 =~ 14% | ||
152 | A : 500 / 875 =~ 57% | ||
153 | B(-leaf) : 250 / 875 =~ 28% | ||
154 | |||
155 | A has children and further distributes its 57% among the children and | ||
156 | the implicit leaf node. The total active weight at this level is | ||
157 | AA:500 + AB:1000 + A-leaf:750 = 2250. | ||
158 | |||
159 | A-leaf : ( 750 / 2250) * A =~ 19% | ||
160 | AA(-leaf) : ( 500 / 2250) * A =~ 12% | ||
161 | AB(-leaf) : (1000 / 2250) * A =~ 25% | ||
162 | |||
105 | CFQ IOPS Mode for group scheduling | 163 | CFQ IOPS Mode for group scheduling |
106 | =================================== | 164 | =================================== |
107 | Basic CFQ design is to provide priority based time slices. Higher priority | 165 | Basic CFQ design is to provide priority based time slices. Higher priority |
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index b4b1fb3a83f0..1b70843c574e 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt | |||
@@ -94,13 +94,11 @@ Throttling/Upper Limit policy | |||
94 | 94 | ||
95 | Hierarchical Cgroups | 95 | Hierarchical Cgroups |
96 | ==================== | 96 | ==================== |
97 | - Currently none of the IO control policy supports hierarchical groups. But | 97 | - Currently only CFQ supports hierarchical groups. For throttling, |
98 | cgroup interface does allow creation of hierarchical cgroups and internally | 98 | cgroup interface does allow creation of hierarchical cgroups and |
99 | IO policies treat them as flat hierarchy. | 99 | internally it treats them as flat hierarchy. |
100 | 100 | ||
101 | So this patch will allow creation of cgroup hierarchcy but at the backend | 101 | If somebody created a hierarchy like as follows. |
102 | everything will be treated as flat. So if somebody created a hierarchy like | ||
103 | as follows. | ||
104 | 102 | ||
105 | root | 103 | root |
106 | / \ | 104 | / \ |
@@ -108,16 +106,20 @@ Hierarchical Cgroups | |||
108 | | | 106 | | |
109 | test3 | 107 | test3 |
110 | 108 | ||
111 | CFQ and throttling will practically treat all groups at same level. | 109 | CFQ will handle the hierarchy correctly but and throttling will |
110 | practically treat all groups at same level. For details on CFQ | ||
111 | hierarchy support, refer to Documentation/block/cfq-iosched.txt. | ||
112 | Throttling will treat the hierarchy as if it looks like the | ||
113 | following. | ||
112 | 114 | ||
113 | pivot | 115 | pivot |
114 | / / \ \ | 116 | / / \ \ |
115 | root test1 test2 test3 | 117 | root test1 test2 test3 |
116 | 118 | ||
117 | Down the line we can implement hierarchical accounting/control support | 119 | Nesting cgroups, while allowed, isn't officially supported and blkio |
118 | and also introduce a new cgroup file "use_hierarchy" which will control | 120 | genereates warning when cgroups nest. Once throttling implements |
119 | whether cgroup hierarchy is viewed as flat or hierarchical by the policy.. | 121 | hierarchy support, hierarchy will be supported and the warning will |
120 | This is how memory controller also has implemented the things. | 122 | be removed. |
121 | 123 | ||
122 | Various user visible config options | 124 | Various user visible config options |
123 | =================================== | 125 | =================================== |
@@ -172,6 +174,12 @@ Proportional weight policy files | |||
172 | dev weight | 174 | dev weight |
173 | 8:16 300 | 175 | 8:16 300 |
174 | 176 | ||
177 | - blkio.leaf_weight[_device] | ||
178 | - Equivalents of blkio.weight[_device] for the purpose of | ||
179 | deciding how much weight tasks in the given cgroup has while | ||
180 | competing with the cgroup's child cgroups. For details, | ||
181 | please refer to Documentation/block/cfq-iosched.txt. | ||
182 | |||
175 | - blkio.time | 183 | - blkio.time |
176 | - disk time allocated to cgroup per device in milliseconds. First | 184 | - disk time allocated to cgroup per device in milliseconds. First |
177 | two fields specify the major and minor number of the device and | 185 | two fields specify the major and minor number of the device and |
@@ -279,6 +287,11 @@ Proportional weight policy files | |||
279 | and minor number of the device and third field specifies the number | 287 | and minor number of the device and third field specifies the number |
280 | of times a group was dequeued from a particular device. | 288 | of times a group was dequeued from a particular device. |
281 | 289 | ||
290 | - blkio.*_recursive | ||
291 | - Recursive version of various stats. These files show the | ||
292 | same information as their non-recursive counterparts but | ||
293 | include stats from all the descendant cgroups. | ||
294 | |||
282 | Throttling/Upper limit policy files | 295 | Throttling/Upper limit policy files |
283 | ----------------------------------- | 296 | ----------------------------------- |
284 | - blkio.throttle.read_bps_device | 297 | - blkio.throttle.read_bps_device |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b8858fb0cafa..87ea95d1f533 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -26,11 +26,32 @@ | |||
26 | 26 | ||
27 | static DEFINE_MUTEX(blkcg_pol_mutex); | 27 | static DEFINE_MUTEX(blkcg_pol_mutex); |
28 | 28 | ||
29 | struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT }; | 29 | struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, |
30 | .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; | ||
30 | EXPORT_SYMBOL_GPL(blkcg_root); | 31 | EXPORT_SYMBOL_GPL(blkcg_root); |
31 | 32 | ||
32 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; | 33 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; |
33 | 34 | ||
35 | static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | ||
36 | struct request_queue *q, bool update_hint); | ||
37 | |||
38 | /** | ||
39 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants | ||
40 | * @d_blkg: loop cursor pointing to the current descendant | ||
41 | * @pos_cgrp: used for iteration | ||
42 | * @p_blkg: target blkg to walk descendants of | ||
43 | * | ||
44 | * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU | ||
45 | * read locked. If called under either blkcg or queue lock, the iteration | ||
46 | * is guaranteed to include all and only online blkgs. The caller may | ||
47 | * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip | ||
48 | * subtree. | ||
49 | */ | ||
50 | #define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ | ||
51 | cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ | ||
52 | if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ | ||
53 | (p_blkg)->q, false))) | ||
54 | |||
34 | static bool blkcg_policy_enabled(struct request_queue *q, | 55 | static bool blkcg_policy_enabled(struct request_queue *q, |
35 | const struct blkcg_policy *pol) | 56 | const struct blkcg_policy *pol) |
36 | { | 57 | { |
@@ -112,9 +133,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, | |||
112 | 133 | ||
113 | blkg->pd[i] = pd; | 134 | blkg->pd[i] = pd; |
114 | pd->blkg = blkg; | 135 | pd->blkg = blkg; |
136 | pd->plid = i; | ||
115 | 137 | ||
116 | /* invoke per-policy init */ | 138 | /* invoke per-policy init */ |
117 | if (blkcg_policy_enabled(blkg->q, pol)) | 139 | if (pol->pd_init_fn) |
118 | pol->pd_init_fn(blkg); | 140 | pol->pd_init_fn(blkg); |
119 | } | 141 | } |
120 | 142 | ||
@@ -125,8 +147,19 @@ err_free: | |||
125 | return NULL; | 147 | return NULL; |
126 | } | 148 | } |
127 | 149 | ||
150 | /** | ||
151 | * __blkg_lookup - internal version of blkg_lookup() | ||
152 | * @blkcg: blkcg of interest | ||
153 | * @q: request_queue of interest | ||
154 | * @update_hint: whether to update lookup hint with the result or not | ||
155 | * | ||
156 | * This is internal version and shouldn't be used by policy | ||
157 | * implementations. Looks up blkgs for the @blkcg - @q pair regardless of | ||
158 | * @q's bypass state. If @update_hint is %true, the caller should be | ||
159 | * holding @q->queue_lock and lookup hint is updated on success. | ||
160 | */ | ||
128 | static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | 161 | static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, |
129 | struct request_queue *q) | 162 | struct request_queue *q, bool update_hint) |
130 | { | 163 | { |
131 | struct blkcg_gq *blkg; | 164 | struct blkcg_gq *blkg; |
132 | 165 | ||
@@ -135,14 +168,19 @@ static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | |||
135 | return blkg; | 168 | return blkg; |
136 | 169 | ||
137 | /* | 170 | /* |
138 | * Hint didn't match. Look up from the radix tree. Note that we | 171 | * Hint didn't match. Look up from the radix tree. Note that the |
139 | * may not be holding queue_lock and thus are not sure whether | 172 | * hint can only be updated under queue_lock as otherwise @blkg |
140 | * @blkg from blkg_tree has already been removed or not, so we | 173 | * could have already been removed from blkg_tree. The caller is |
141 | * can't update hint to the lookup result. Leave it to the caller. | 174 | * responsible for grabbing queue_lock if @update_hint. |
142 | */ | 175 | */ |
143 | blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); | 176 | blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); |
144 | if (blkg && blkg->q == q) | 177 | if (blkg && blkg->q == q) { |
178 | if (update_hint) { | ||
179 | lockdep_assert_held(q->queue_lock); | ||
180 | rcu_assign_pointer(blkcg->blkg_hint, blkg); | ||
181 | } | ||
145 | return blkg; | 182 | return blkg; |
183 | } | ||
146 | 184 | ||
147 | return NULL; | 185 | return NULL; |
148 | } | 186 | } |
@@ -162,7 +200,7 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) | |||
162 | 200 | ||
163 | if (unlikely(blk_queue_bypass(q))) | 201 | if (unlikely(blk_queue_bypass(q))) |
164 | return NULL; | 202 | return NULL; |
165 | return __blkg_lookup(blkcg, q); | 203 | return __blkg_lookup(blkcg, q, false); |
166 | } | 204 | } |
167 | EXPORT_SYMBOL_GPL(blkg_lookup); | 205 | EXPORT_SYMBOL_GPL(blkg_lookup); |
168 | 206 | ||
@@ -170,75 +208,129 @@ EXPORT_SYMBOL_GPL(blkg_lookup); | |||
170 | * If @new_blkg is %NULL, this function tries to allocate a new one as | 208 | * If @new_blkg is %NULL, this function tries to allocate a new one as |
171 | * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. | 209 | * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. |
172 | */ | 210 | */ |
173 | static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, | 211 | static struct blkcg_gq *blkg_create(struct blkcg *blkcg, |
174 | struct request_queue *q, | 212 | struct request_queue *q, |
175 | struct blkcg_gq *new_blkg) | 213 | struct blkcg_gq *new_blkg) |
176 | { | 214 | { |
177 | struct blkcg_gq *blkg; | 215 | struct blkcg_gq *blkg; |
178 | int ret; | 216 | int i, ret; |
179 | 217 | ||
180 | WARN_ON_ONCE(!rcu_read_lock_held()); | 218 | WARN_ON_ONCE(!rcu_read_lock_held()); |
181 | lockdep_assert_held(q->queue_lock); | 219 | lockdep_assert_held(q->queue_lock); |
182 | 220 | ||
183 | /* lookup and update hint on success, see __blkg_lookup() for details */ | ||
184 | blkg = __blkg_lookup(blkcg, q); | ||
185 | if (blkg) { | ||
186 | rcu_assign_pointer(blkcg->blkg_hint, blkg); | ||
187 | goto out_free; | ||
188 | } | ||
189 | |||
190 | /* blkg holds a reference to blkcg */ | 221 | /* blkg holds a reference to blkcg */ |
191 | if (!css_tryget(&blkcg->css)) { | 222 | if (!css_tryget(&blkcg->css)) { |
192 | blkg = ERR_PTR(-EINVAL); | 223 | ret = -EINVAL; |
193 | goto out_free; | 224 | goto err_free_blkg; |
194 | } | 225 | } |
195 | 226 | ||
196 | /* allocate */ | 227 | /* allocate */ |
197 | if (!new_blkg) { | 228 | if (!new_blkg) { |
198 | new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); | 229 | new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); |
199 | if (unlikely(!new_blkg)) { | 230 | if (unlikely(!new_blkg)) { |
200 | blkg = ERR_PTR(-ENOMEM); | 231 | ret = -ENOMEM; |
201 | goto out_put; | 232 | goto err_put_css; |
202 | } | 233 | } |
203 | } | 234 | } |
204 | blkg = new_blkg; | 235 | blkg = new_blkg; |
205 | 236 | ||
206 | /* insert */ | 237 | /* link parent and insert */ |
238 | if (blkcg_parent(blkcg)) { | ||
239 | blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); | ||
240 | if (WARN_ON_ONCE(!blkg->parent)) { | ||
241 | blkg = ERR_PTR(-EINVAL); | ||
242 | goto err_put_css; | ||
243 | } | ||
244 | blkg_get(blkg->parent); | ||
245 | } | ||
246 | |||
207 | spin_lock(&blkcg->lock); | 247 | spin_lock(&blkcg->lock); |
208 | ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); | 248 | ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); |
209 | if (likely(!ret)) { | 249 | if (likely(!ret)) { |
210 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); | 250 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); |
211 | list_add(&blkg->q_node, &q->blkg_list); | 251 | list_add(&blkg->q_node, &q->blkg_list); |
252 | |||
253 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | ||
254 | struct blkcg_policy *pol = blkcg_policy[i]; | ||
255 | |||
256 | if (blkg->pd[i] && pol->pd_online_fn) | ||
257 | pol->pd_online_fn(blkg); | ||
258 | } | ||
212 | } | 259 | } |
260 | blkg->online = true; | ||
213 | spin_unlock(&blkcg->lock); | 261 | spin_unlock(&blkcg->lock); |
214 | 262 | ||
215 | if (!ret) | 263 | if (!ret) |
216 | return blkg; | 264 | return blkg; |
217 | 265 | ||
218 | blkg = ERR_PTR(ret); | 266 | /* @blkg failed fully initialized, use the usual release path */ |
219 | out_put: | 267 | blkg_put(blkg); |
268 | return ERR_PTR(ret); | ||
269 | |||
270 | err_put_css: | ||
220 | css_put(&blkcg->css); | 271 | css_put(&blkcg->css); |
221 | out_free: | 272 | err_free_blkg: |
222 | blkg_free(new_blkg); | 273 | blkg_free(new_blkg); |
223 | return blkg; | 274 | return ERR_PTR(ret); |
224 | } | 275 | } |
225 | 276 | ||
277 | /** | ||
278 | * blkg_lookup_create - lookup blkg, try to create one if not there | ||
279 | * @blkcg: blkcg of interest | ||
280 | * @q: request_queue of interest | ||
281 | * | ||
282 | * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to | ||
283 | * create one. blkg creation is performed recursively from blkcg_root such | ||
284 | * that all non-root blkg's have access to the parent blkg. This function | ||
285 | * should be called under RCU read lock and @q->queue_lock. | ||
286 | * | ||
287 | * Returns pointer to the looked up or created blkg on success, ERR_PTR() | ||
288 | * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not | ||
289 | * dead and bypassing, returns ERR_PTR(-EBUSY). | ||
290 | */ | ||
226 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | 291 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
227 | struct request_queue *q) | 292 | struct request_queue *q) |
228 | { | 293 | { |
294 | struct blkcg_gq *blkg; | ||
295 | |||
296 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
297 | lockdep_assert_held(q->queue_lock); | ||
298 | |||
229 | /* | 299 | /* |
230 | * This could be the first entry point of blkcg implementation and | 300 | * This could be the first entry point of blkcg implementation and |
231 | * we shouldn't allow anything to go through for a bypassing queue. | 301 | * we shouldn't allow anything to go through for a bypassing queue. |
232 | */ | 302 | */ |
233 | if (unlikely(blk_queue_bypass(q))) | 303 | if (unlikely(blk_queue_bypass(q))) |
234 | return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); | 304 | return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); |
235 | return __blkg_lookup_create(blkcg, q, NULL); | 305 | |
306 | blkg = __blkg_lookup(blkcg, q, true); | ||
307 | if (blkg) | ||
308 | return blkg; | ||
309 | |||
310 | /* | ||
311 | * Create blkgs walking down from blkcg_root to @blkcg, so that all | ||
312 | * non-root blkgs have access to their parents. | ||
313 | */ | ||
314 | while (true) { | ||
315 | struct blkcg *pos = blkcg; | ||
316 | struct blkcg *parent = blkcg_parent(blkcg); | ||
317 | |||
318 | while (parent && !__blkg_lookup(parent, q, false)) { | ||
319 | pos = parent; | ||
320 | parent = blkcg_parent(parent); | ||
321 | } | ||
322 | |||
323 | blkg = blkg_create(pos, q, NULL); | ||
324 | if (pos == blkcg || IS_ERR(blkg)) | ||
325 | return blkg; | ||
326 | } | ||
236 | } | 327 | } |
237 | EXPORT_SYMBOL_GPL(blkg_lookup_create); | 328 | EXPORT_SYMBOL_GPL(blkg_lookup_create); |
238 | 329 | ||
239 | static void blkg_destroy(struct blkcg_gq *blkg) | 330 | static void blkg_destroy(struct blkcg_gq *blkg) |
240 | { | 331 | { |
241 | struct blkcg *blkcg = blkg->blkcg; | 332 | struct blkcg *blkcg = blkg->blkcg; |
333 | int i; | ||
242 | 334 | ||
243 | lockdep_assert_held(blkg->q->queue_lock); | 335 | lockdep_assert_held(blkg->q->queue_lock); |
244 | lockdep_assert_held(&blkcg->lock); | 336 | lockdep_assert_held(&blkcg->lock); |
@@ -247,6 +339,14 @@ static void blkg_destroy(struct blkcg_gq *blkg) | |||
247 | WARN_ON_ONCE(list_empty(&blkg->q_node)); | 339 | WARN_ON_ONCE(list_empty(&blkg->q_node)); |
248 | WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); | 340 | WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); |
249 | 341 | ||
342 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | ||
343 | struct blkcg_policy *pol = blkcg_policy[i]; | ||
344 | |||
345 | if (blkg->pd[i] && pol->pd_offline_fn) | ||
346 | pol->pd_offline_fn(blkg); | ||
347 | } | ||
348 | blkg->online = false; | ||
349 | |||
250 | radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); | 350 | radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); |
251 | list_del_init(&blkg->q_node); | 351 | list_del_init(&blkg->q_node); |
252 | hlist_del_init_rcu(&blkg->blkcg_node); | 352 | hlist_del_init_rcu(&blkg->blkcg_node); |
@@ -301,8 +401,10 @@ static void blkg_rcu_free(struct rcu_head *rcu_head) | |||
301 | 401 | ||
302 | void __blkg_release(struct blkcg_gq *blkg) | 402 | void __blkg_release(struct blkcg_gq *blkg) |
303 | { | 403 | { |
304 | /* release the extra blkcg reference this blkg has been holding */ | 404 | /* release the blkcg and parent blkg refs this blkg has been holding */ |
305 | css_put(&blkg->blkcg->css); | 405 | css_put(&blkg->blkcg->css); |
406 | if (blkg->parent) | ||
407 | blkg_put(blkg->parent); | ||
306 | 408 | ||
307 | /* | 409 | /* |
308 | * A group is freed in rcu manner. But having an rcu lock does not | 410 | * A group is freed in rcu manner. But having an rcu lock does not |
@@ -402,8 +504,9 @@ static const char *blkg_dev_name(struct blkcg_gq *blkg) | |||
402 | * | 504 | * |
403 | * This function invokes @prfill on each blkg of @blkcg if pd for the | 505 | * This function invokes @prfill on each blkg of @blkcg if pd for the |
404 | * policy specified by @pol exists. @prfill is invoked with @sf, the | 506 | * policy specified by @pol exists. @prfill is invoked with @sf, the |
405 | * policy data and @data. If @show_total is %true, the sum of the return | 507 | * policy data and @data and the matching queue lock held. If @show_total |
406 | * values from @prfill is printed with "Total" label at the end. | 508 | * is %true, the sum of the return values from @prfill is printed with |
509 | * "Total" label at the end. | ||
407 | * | 510 | * |
408 | * This is to be used to construct print functions for | 511 | * This is to be used to construct print functions for |
409 | * cftype->read_seq_string method. | 512 | * cftype->read_seq_string method. |
@@ -418,11 +521,14 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, | |||
418 | struct hlist_node *n; | 521 | struct hlist_node *n; |
419 | u64 total = 0; | 522 | u64 total = 0; |
420 | 523 | ||
421 | spin_lock_irq(&blkcg->lock); | 524 | rcu_read_lock(); |
422 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) | 525 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) { |
526 | spin_lock_irq(blkg->q->queue_lock); | ||
423 | if (blkcg_policy_enabled(blkg->q, pol)) | 527 | if (blkcg_policy_enabled(blkg->q, pol)) |
424 | total += prfill(sf, blkg->pd[pol->plid], data); | 528 | total += prfill(sf, blkg->pd[pol->plid], data); |
425 | spin_unlock_irq(&blkcg->lock); | 529 | spin_unlock_irq(blkg->q->queue_lock); |
530 | } | ||
531 | rcu_read_unlock(); | ||
426 | 532 | ||
427 | if (show_total) | 533 | if (show_total) |
428 | seq_printf(sf, "Total %llu\n", (unsigned long long)total); | 534 | seq_printf(sf, "Total %llu\n", (unsigned long long)total); |
@@ -481,6 +587,7 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | |||
481 | seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); | 587 | seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); |
482 | return v; | 588 | return v; |
483 | } | 589 | } |
590 | EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); | ||
484 | 591 | ||
485 | /** | 592 | /** |
486 | * blkg_prfill_stat - prfill callback for blkg_stat | 593 | * blkg_prfill_stat - prfill callback for blkg_stat |
@@ -514,6 +621,82 @@ u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | |||
514 | EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); | 621 | EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); |
515 | 622 | ||
516 | /** | 623 | /** |
624 | * blkg_stat_recursive_sum - collect hierarchical blkg_stat | ||
625 | * @pd: policy private data of interest | ||
626 | * @off: offset to the blkg_stat in @pd | ||
627 | * | ||
628 | * Collect the blkg_stat specified by @off from @pd and all its online | ||
629 | * descendants and return the sum. The caller must be holding the queue | ||
630 | * lock for online tests. | ||
631 | */ | ||
632 | u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) | ||
633 | { | ||
634 | struct blkcg_policy *pol = blkcg_policy[pd->plid]; | ||
635 | struct blkcg_gq *pos_blkg; | ||
636 | struct cgroup *pos_cgrp; | ||
637 | u64 sum; | ||
638 | |||
639 | lockdep_assert_held(pd->blkg->q->queue_lock); | ||
640 | |||
641 | sum = blkg_stat_read((void *)pd + off); | ||
642 | |||
643 | rcu_read_lock(); | ||
644 | blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { | ||
645 | struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); | ||
646 | struct blkg_stat *stat = (void *)pos_pd + off; | ||
647 | |||
648 | if (pos_blkg->online) | ||
649 | sum += blkg_stat_read(stat); | ||
650 | } | ||
651 | rcu_read_unlock(); | ||
652 | |||
653 | return sum; | ||
654 | } | ||
655 | EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); | ||
656 | |||
657 | /** | ||
658 | * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat | ||
659 | * @pd: policy private data of interest | ||
660 | * @off: offset to the blkg_stat in @pd | ||
661 | * | ||
662 | * Collect the blkg_rwstat specified by @off from @pd and all its online | ||
663 | * descendants and return the sum. The caller must be holding the queue | ||
664 | * lock for online tests. | ||
665 | */ | ||
666 | struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, | ||
667 | int off) | ||
668 | { | ||
669 | struct blkcg_policy *pol = blkcg_policy[pd->plid]; | ||
670 | struct blkcg_gq *pos_blkg; | ||
671 | struct cgroup *pos_cgrp; | ||
672 | struct blkg_rwstat sum; | ||
673 | int i; | ||
674 | |||
675 | lockdep_assert_held(pd->blkg->q->queue_lock); | ||
676 | |||
677 | sum = blkg_rwstat_read((void *)pd + off); | ||
678 | |||
679 | rcu_read_lock(); | ||
680 | blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { | ||
681 | struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); | ||
682 | struct blkg_rwstat *rwstat = (void *)pos_pd + off; | ||
683 | struct blkg_rwstat tmp; | ||
684 | |||
685 | if (!pos_blkg->online) | ||
686 | continue; | ||
687 | |||
688 | tmp = blkg_rwstat_read(rwstat); | ||
689 | |||
690 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | ||
691 | sum.cnt[i] += tmp.cnt[i]; | ||
692 | } | ||
693 | rcu_read_unlock(); | ||
694 | |||
695 | return sum; | ||
696 | } | ||
697 | EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); | ||
698 | |||
699 | /** | ||
517 | * blkg_conf_prep - parse and prepare for per-blkg config update | 700 | * blkg_conf_prep - parse and prepare for per-blkg config update |
518 | * @blkcg: target block cgroup | 701 | * @blkcg: target block cgroup |
519 | * @pol: target policy | 702 | * @pol: target policy |
@@ -658,6 +841,7 @@ static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup) | |||
658 | return ERR_PTR(-ENOMEM); | 841 | return ERR_PTR(-ENOMEM); |
659 | 842 | ||
660 | blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; | 843 | blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; |
844 | blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; | ||
661 | blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ | 845 | blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ |
662 | done: | 846 | done: |
663 | spin_lock_init(&blkcg->lock); | 847 | spin_lock_init(&blkcg->lock); |
@@ -777,7 +961,7 @@ int blkcg_activate_policy(struct request_queue *q, | |||
777 | const struct blkcg_policy *pol) | 961 | const struct blkcg_policy *pol) |
778 | { | 962 | { |
779 | LIST_HEAD(pds); | 963 | LIST_HEAD(pds); |
780 | struct blkcg_gq *blkg; | 964 | struct blkcg_gq *blkg, *new_blkg; |
781 | struct blkg_policy_data *pd, *n; | 965 | struct blkg_policy_data *pd, *n; |
782 | int cnt = 0, ret; | 966 | int cnt = 0, ret; |
783 | bool preloaded; | 967 | bool preloaded; |
@@ -786,19 +970,27 @@ int blkcg_activate_policy(struct request_queue *q, | |||
786 | return 0; | 970 | return 0; |
787 | 971 | ||
788 | /* preallocations for root blkg */ | 972 | /* preallocations for root blkg */ |
789 | blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); | 973 | new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); |
790 | if (!blkg) | 974 | if (!new_blkg) |
791 | return -ENOMEM; | 975 | return -ENOMEM; |
792 | 976 | ||
793 | preloaded = !radix_tree_preload(GFP_KERNEL); | 977 | preloaded = !radix_tree_preload(GFP_KERNEL); |
794 | 978 | ||
795 | blk_queue_bypass_start(q); | 979 | blk_queue_bypass_start(q); |
796 | 980 | ||
797 | /* make sure the root blkg exists and count the existing blkgs */ | 981 | /* |
982 | * Make sure the root blkg exists and count the existing blkgs. As | ||
983 | * @q is bypassing at this point, blkg_lookup_create() can't be | ||
984 | * used. Open code it. | ||
985 | */ | ||
798 | spin_lock_irq(q->queue_lock); | 986 | spin_lock_irq(q->queue_lock); |
799 | 987 | ||
800 | rcu_read_lock(); | 988 | rcu_read_lock(); |
801 | blkg = __blkg_lookup_create(&blkcg_root, q, blkg); | 989 | blkg = __blkg_lookup(&blkcg_root, q, false); |
990 | if (blkg) | ||
991 | blkg_free(new_blkg); | ||
992 | else | ||
993 | blkg = blkg_create(&blkcg_root, q, new_blkg); | ||
802 | rcu_read_unlock(); | 994 | rcu_read_unlock(); |
803 | 995 | ||
804 | if (preloaded) | 996 | if (preloaded) |
@@ -846,6 +1038,7 @@ int blkcg_activate_policy(struct request_queue *q, | |||
846 | 1038 | ||
847 | blkg->pd[pol->plid] = pd; | 1039 | blkg->pd[pol->plid] = pd; |
848 | pd->blkg = blkg; | 1040 | pd->blkg = blkg; |
1041 | pd->plid = pol->plid; | ||
849 | pol->pd_init_fn(blkg); | 1042 | pol->pd_init_fn(blkg); |
850 | 1043 | ||
851 | spin_unlock(&blkg->blkcg->lock); | 1044 | spin_unlock(&blkg->blkcg->lock); |
@@ -892,6 +1085,8 @@ void blkcg_deactivate_policy(struct request_queue *q, | |||
892 | /* grab blkcg lock too while removing @pd from @blkg */ | 1085 | /* grab blkcg lock too while removing @pd from @blkg */ |
893 | spin_lock(&blkg->blkcg->lock); | 1086 | spin_lock(&blkg->blkcg->lock); |
894 | 1087 | ||
1088 | if (pol->pd_offline_fn) | ||
1089 | pol->pd_offline_fn(blkg); | ||
895 | if (pol->pd_exit_fn) | 1090 | if (pol->pd_exit_fn) |
896 | pol->pd_exit_fn(blkg); | 1091 | pol->pd_exit_fn(blkg); |
897 | 1092 | ||
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 24597309e23d..f2b292925ccd 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -54,6 +54,7 @@ struct blkcg { | |||
54 | 54 | ||
55 | /* TODO: per-policy storage in blkcg */ | 55 | /* TODO: per-policy storage in blkcg */ |
56 | unsigned int cfq_weight; /* belongs to cfq */ | 56 | unsigned int cfq_weight; /* belongs to cfq */ |
57 | unsigned int cfq_leaf_weight; | ||
57 | }; | 58 | }; |
58 | 59 | ||
59 | struct blkg_stat { | 60 | struct blkg_stat { |
@@ -80,8 +81,9 @@ struct blkg_rwstat { | |||
80 | * beginning and pd_size can't be smaller than pd. | 81 | * beginning and pd_size can't be smaller than pd. |
81 | */ | 82 | */ |
82 | struct blkg_policy_data { | 83 | struct blkg_policy_data { |
83 | /* the blkg this per-policy data belongs to */ | 84 | /* the blkg and policy id this per-policy data belongs to */ |
84 | struct blkcg_gq *blkg; | 85 | struct blkcg_gq *blkg; |
86 | int plid; | ||
85 | 87 | ||
86 | /* used during policy activation */ | 88 | /* used during policy activation */ |
87 | struct list_head alloc_node; | 89 | struct list_head alloc_node; |
@@ -94,17 +96,27 @@ struct blkcg_gq { | |||
94 | struct list_head q_node; | 96 | struct list_head q_node; |
95 | struct hlist_node blkcg_node; | 97 | struct hlist_node blkcg_node; |
96 | struct blkcg *blkcg; | 98 | struct blkcg *blkcg; |
99 | |||
100 | /* all non-root blkcg_gq's are guaranteed to have access to parent */ | ||
101 | struct blkcg_gq *parent; | ||
102 | |||
97 | /* request allocation list for this blkcg-q pair */ | 103 | /* request allocation list for this blkcg-q pair */ |
98 | struct request_list rl; | 104 | struct request_list rl; |
105 | |||
99 | /* reference count */ | 106 | /* reference count */ |
100 | int refcnt; | 107 | int refcnt; |
101 | 108 | ||
109 | /* is this blkg online? protected by both blkcg and q locks */ | ||
110 | bool online; | ||
111 | |||
102 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; | 112 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; |
103 | 113 | ||
104 | struct rcu_head rcu_head; | 114 | struct rcu_head rcu_head; |
105 | }; | 115 | }; |
106 | 116 | ||
107 | typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); | 117 | typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); |
118 | typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); | ||
119 | typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); | ||
108 | typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); | 120 | typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); |
109 | typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); | 121 | typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); |
110 | 122 | ||
@@ -117,6 +129,8 @@ struct blkcg_policy { | |||
117 | 129 | ||
118 | /* operations */ | 130 | /* operations */ |
119 | blkcg_pol_init_pd_fn *pd_init_fn; | 131 | blkcg_pol_init_pd_fn *pd_init_fn; |
132 | blkcg_pol_online_pd_fn *pd_online_fn; | ||
133 | blkcg_pol_offline_pd_fn *pd_offline_fn; | ||
120 | blkcg_pol_exit_pd_fn *pd_exit_fn; | 134 | blkcg_pol_exit_pd_fn *pd_exit_fn; |
121 | blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; | 135 | blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; |
122 | }; | 136 | }; |
@@ -150,6 +164,10 @@ u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); | |||
150 | u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | 164 | u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, |
151 | int off); | 165 | int off); |
152 | 166 | ||
167 | u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); | ||
168 | struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, | ||
169 | int off); | ||
170 | |||
153 | struct blkg_conf_ctx { | 171 | struct blkg_conf_ctx { |
154 | struct gendisk *disk; | 172 | struct gendisk *disk; |
155 | struct blkcg_gq *blkg; | 173 | struct blkcg_gq *blkg; |
@@ -181,6 +199,19 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) | |||
181 | } | 199 | } |
182 | 200 | ||
183 | /** | 201 | /** |
202 | * blkcg_parent - get the parent of a blkcg | ||
203 | * @blkcg: blkcg of interest | ||
204 | * | ||
205 | * Return the parent blkcg of @blkcg. Can be called anytime. | ||
206 | */ | ||
207 | static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) | ||
208 | { | ||
209 | struct cgroup *pcg = blkcg->css.cgroup->parent; | ||
210 | |||
211 | return pcg ? cgroup_to_blkcg(pcg) : NULL; | ||
212 | } | ||
213 | |||
214 | /** | ||
184 | * blkg_to_pdata - get policy private data | 215 | * blkg_to_pdata - get policy private data |
185 | * @blkg: blkg of interest | 216 | * @blkg: blkg of interest |
186 | * @pol: policy of interest | 217 | * @pol: policy of interest |
@@ -387,6 +418,18 @@ static inline void blkg_stat_reset(struct blkg_stat *stat) | |||
387 | } | 418 | } |
388 | 419 | ||
389 | /** | 420 | /** |
421 | * blkg_stat_merge - merge a blkg_stat into another | ||
422 | * @to: the destination blkg_stat | ||
423 | * @from: the source | ||
424 | * | ||
425 | * Add @from's count to @to. | ||
426 | */ | ||
427 | static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) | ||
428 | { | ||
429 | blkg_stat_add(to, blkg_stat_read(from)); | ||
430 | } | ||
431 | |||
432 | /** | ||
390 | * blkg_rwstat_add - add a value to a blkg_rwstat | 433 | * blkg_rwstat_add - add a value to a blkg_rwstat |
391 | * @rwstat: target blkg_rwstat | 434 | * @rwstat: target blkg_rwstat |
392 | * @rw: mask of REQ_{WRITE|SYNC} | 435 | * @rw: mask of REQ_{WRITE|SYNC} |
@@ -434,14 +477,14 @@ static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) | |||
434 | } | 477 | } |
435 | 478 | ||
436 | /** | 479 | /** |
437 | * blkg_rwstat_sum - read the total count of a blkg_rwstat | 480 | * blkg_rwstat_total - read the total count of a blkg_rwstat |
438 | * @rwstat: blkg_rwstat to read | 481 | * @rwstat: blkg_rwstat to read |
439 | * | 482 | * |
440 | * Return the total count of @rwstat regardless of the IO direction. This | 483 | * Return the total count of @rwstat regardless of the IO direction. This |
441 | * function can be called without synchronization and takes care of u64 | 484 | * function can be called without synchronization and takes care of u64 |
442 | * atomicity. | 485 | * atomicity. |
443 | */ | 486 | */ |
444 | static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat) | 487 | static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) |
445 | { | 488 | { |
446 | struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); | 489 | struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); |
447 | 490 | ||
@@ -457,6 +500,25 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) | |||
457 | memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); | 500 | memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); |
458 | } | 501 | } |
459 | 502 | ||
503 | /** | ||
504 | * blkg_rwstat_merge - merge a blkg_rwstat into another | ||
505 | * @to: the destination blkg_rwstat | ||
506 | * @from: the source | ||
507 | * | ||
508 | * Add @from's counts to @to. | ||
509 | */ | ||
510 | static inline void blkg_rwstat_merge(struct blkg_rwstat *to, | ||
511 | struct blkg_rwstat *from) | ||
512 | { | ||
513 | struct blkg_rwstat v = blkg_rwstat_read(from); | ||
514 | int i; | ||
515 | |||
516 | u64_stats_update_begin(&to->syncp); | ||
517 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | ||
518 | to->cnt[i] += v.cnt[i]; | ||
519 | u64_stats_update_end(&to->syncp); | ||
520 | } | ||
521 | |||
460 | #else /* CONFIG_BLK_CGROUP */ | 522 | #else /* CONFIG_BLK_CGROUP */ |
461 | 523 | ||
462 | struct cgroup; | 524 | struct cgroup; |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 788147797a79..6206a934eb8c 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -497,6 +497,13 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, | |||
497 | return res; | 497 | return res; |
498 | } | 498 | } |
499 | 499 | ||
500 | static void blk_free_queue_rcu(struct rcu_head *rcu_head) | ||
501 | { | ||
502 | struct request_queue *q = container_of(rcu_head, struct request_queue, | ||
503 | rcu_head); | ||
504 | kmem_cache_free(blk_requestq_cachep, q); | ||
505 | } | ||
506 | |||
500 | /** | 507 | /** |
501 | * blk_release_queue: - release a &struct request_queue when it is no longer needed | 508 | * blk_release_queue: - release a &struct request_queue when it is no longer needed |
502 | * @kobj: the kobj belonging to the request queue to be released | 509 | * @kobj: the kobj belonging to the request queue to be released |
@@ -538,7 +545,7 @@ static void blk_release_queue(struct kobject *kobj) | |||
538 | bdi_destroy(&q->backing_dev_info); | 545 | bdi_destroy(&q->backing_dev_info); |
539 | 546 | ||
540 | ida_simple_remove(&blk_queue_ida, q->id); | 547 | ida_simple_remove(&blk_queue_ida, q->id); |
541 | kmem_cache_free(blk_requestq_cachep, q); | 548 | call_rcu(&q->rcu_head, blk_free_queue_rcu); |
542 | } | 549 | } |
543 | 550 | ||
544 | static const struct sysfs_ops queue_sysfs_ops = { | 551 | static const struct sysfs_ops queue_sysfs_ops = { |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e62e9205b80a..b66365b6ba77 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -85,7 +85,6 @@ struct cfq_rb_root { | |||
85 | struct rb_root rb; | 85 | struct rb_root rb; |
86 | struct rb_node *left; | 86 | struct rb_node *left; |
87 | unsigned count; | 87 | unsigned count; |
88 | unsigned total_weight; | ||
89 | u64 min_vdisktime; | 88 | u64 min_vdisktime; |
90 | struct cfq_ttime ttime; | 89 | struct cfq_ttime ttime; |
91 | }; | 90 | }; |
@@ -155,7 +154,7 @@ struct cfq_queue { | |||
155 | * First index in the service_trees. | 154 | * First index in the service_trees. |
156 | * IDLE is handled separately, so it has negative index | 155 | * IDLE is handled separately, so it has negative index |
157 | */ | 156 | */ |
158 | enum wl_prio_t { | 157 | enum wl_class_t { |
159 | BE_WORKLOAD = 0, | 158 | BE_WORKLOAD = 0, |
160 | RT_WORKLOAD = 1, | 159 | RT_WORKLOAD = 1, |
161 | IDLE_WORKLOAD = 2, | 160 | IDLE_WORKLOAD = 2, |
@@ -223,10 +222,45 @@ struct cfq_group { | |||
223 | 222 | ||
224 | /* group service_tree key */ | 223 | /* group service_tree key */ |
225 | u64 vdisktime; | 224 | u64 vdisktime; |
225 | |||
226 | /* | ||
227 | * The number of active cfqgs and sum of their weights under this | ||
228 | * cfqg. This covers this cfqg's leaf_weight and all children's | ||
229 | * weights, but does not cover weights of further descendants. | ||
230 | * | ||
231 | * If a cfqg is on the service tree, it's active. An active cfqg | ||
232 | * also activates its parent and contributes to the children_weight | ||
233 | * of the parent. | ||
234 | */ | ||
235 | int nr_active; | ||
236 | unsigned int children_weight; | ||
237 | |||
238 | /* | ||
239 | * vfraction is the fraction of vdisktime that the tasks in this | ||
240 | * cfqg are entitled to. This is determined by compounding the | ||
241 | * ratios walking up from this cfqg to the root. | ||
242 | * | ||
243 | * It is in fixed point w/ CFQ_SERVICE_SHIFT and the sum of all | ||
244 | * vfractions on a service tree is approximately 1. The sum may | ||
245 | * deviate a bit due to rounding errors and fluctuations caused by | ||
246 | * cfqgs entering and leaving the service tree. | ||
247 | */ | ||
248 | unsigned int vfraction; | ||
249 | |||
250 | /* | ||
251 | * There are two weights - (internal) weight is the weight of this | ||
252 | * cfqg against the sibling cfqgs. leaf_weight is the wight of | ||
253 | * this cfqg against the child cfqgs. For the root cfqg, both | ||
254 | * weights are kept in sync for backward compatibility. | ||
255 | */ | ||
226 | unsigned int weight; | 256 | unsigned int weight; |
227 | unsigned int new_weight; | 257 | unsigned int new_weight; |
228 | unsigned int dev_weight; | 258 | unsigned int dev_weight; |
229 | 259 | ||
260 | unsigned int leaf_weight; | ||
261 | unsigned int new_leaf_weight; | ||
262 | unsigned int dev_leaf_weight; | ||
263 | |||
230 | /* number of cfqq currently on this group */ | 264 | /* number of cfqq currently on this group */ |
231 | int nr_cfqq; | 265 | int nr_cfqq; |
232 | 266 | ||
@@ -248,14 +282,15 @@ struct cfq_group { | |||
248 | struct cfq_rb_root service_trees[2][3]; | 282 | struct cfq_rb_root service_trees[2][3]; |
249 | struct cfq_rb_root service_tree_idle; | 283 | struct cfq_rb_root service_tree_idle; |
250 | 284 | ||
251 | unsigned long saved_workload_slice; | 285 | unsigned long saved_wl_slice; |
252 | enum wl_type_t saved_workload; | 286 | enum wl_type_t saved_wl_type; |
253 | enum wl_prio_t saved_serving_prio; | 287 | enum wl_class_t saved_wl_class; |
254 | 288 | ||
255 | /* number of requests that are on the dispatch list or inside driver */ | 289 | /* number of requests that are on the dispatch list or inside driver */ |
256 | int dispatched; | 290 | int dispatched; |
257 | struct cfq_ttime ttime; | 291 | struct cfq_ttime ttime; |
258 | struct cfqg_stats stats; | 292 | struct cfqg_stats stats; /* stats for this cfqg */ |
293 | struct cfqg_stats dead_stats; /* stats pushed from dead children */ | ||
259 | }; | 294 | }; |
260 | 295 | ||
261 | struct cfq_io_cq { | 296 | struct cfq_io_cq { |
@@ -280,8 +315,8 @@ struct cfq_data { | |||
280 | /* | 315 | /* |
281 | * The priority currently being served | 316 | * The priority currently being served |
282 | */ | 317 | */ |
283 | enum wl_prio_t serving_prio; | 318 | enum wl_class_t serving_wl_class; |
284 | enum wl_type_t serving_type; | 319 | enum wl_type_t serving_wl_type; |
285 | unsigned long workload_expires; | 320 | unsigned long workload_expires; |
286 | struct cfq_group *serving_group; | 321 | struct cfq_group *serving_group; |
287 | 322 | ||
@@ -353,17 +388,17 @@ struct cfq_data { | |||
353 | 388 | ||
354 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); | 389 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); |
355 | 390 | ||
356 | static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, | 391 | static struct cfq_rb_root *st_for(struct cfq_group *cfqg, |
357 | enum wl_prio_t prio, | 392 | enum wl_class_t class, |
358 | enum wl_type_t type) | 393 | enum wl_type_t type) |
359 | { | 394 | { |
360 | if (!cfqg) | 395 | if (!cfqg) |
361 | return NULL; | 396 | return NULL; |
362 | 397 | ||
363 | if (prio == IDLE_WORKLOAD) | 398 | if (class == IDLE_WORKLOAD) |
364 | return &cfqg->service_tree_idle; | 399 | return &cfqg->service_tree_idle; |
365 | 400 | ||
366 | return &cfqg->service_trees[prio][type]; | 401 | return &cfqg->service_trees[class][type]; |
367 | } | 402 | } |
368 | 403 | ||
369 | enum cfqq_state_flags { | 404 | enum cfqq_state_flags { |
@@ -502,7 +537,7 @@ static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) | |||
502 | { | 537 | { |
503 | struct cfqg_stats *stats = &cfqg->stats; | 538 | struct cfqg_stats *stats = &cfqg->stats; |
504 | 539 | ||
505 | if (blkg_rwstat_sum(&stats->queued)) | 540 | if (blkg_rwstat_total(&stats->queued)) |
506 | return; | 541 | return; |
507 | 542 | ||
508 | /* | 543 | /* |
@@ -546,7 +581,7 @@ static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) | |||
546 | struct cfqg_stats *stats = &cfqg->stats; | 581 | struct cfqg_stats *stats = &cfqg->stats; |
547 | 582 | ||
548 | blkg_stat_add(&stats->avg_queue_size_sum, | 583 | blkg_stat_add(&stats->avg_queue_size_sum, |
549 | blkg_rwstat_sum(&stats->queued)); | 584 | blkg_rwstat_total(&stats->queued)); |
550 | blkg_stat_add(&stats->avg_queue_size_samples, 1); | 585 | blkg_stat_add(&stats->avg_queue_size_samples, 1); |
551 | cfqg_stats_update_group_wait_time(stats); | 586 | cfqg_stats_update_group_wait_time(stats); |
552 | } | 587 | } |
@@ -572,6 +607,13 @@ static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) | |||
572 | return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq)); | 607 | return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq)); |
573 | } | 608 | } |
574 | 609 | ||
610 | static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) | ||
611 | { | ||
612 | struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent; | ||
613 | |||
614 | return pblkg ? blkg_to_cfqg(pblkg) : NULL; | ||
615 | } | ||
616 | |||
575 | static inline void cfqg_get(struct cfq_group *cfqg) | 617 | static inline void cfqg_get(struct cfq_group *cfqg) |
576 | { | 618 | { |
577 | return blkg_get(cfqg_to_blkg(cfqg)); | 619 | return blkg_get(cfqg_to_blkg(cfqg)); |
@@ -586,8 +628,9 @@ static inline void cfqg_put(struct cfq_group *cfqg) | |||
586 | char __pbuf[128]; \ | 628 | char __pbuf[128]; \ |
587 | \ | 629 | \ |
588 | blkg_path(cfqg_to_blkg((cfqq)->cfqg), __pbuf, sizeof(__pbuf)); \ | 630 | blkg_path(cfqg_to_blkg((cfqq)->cfqg), __pbuf, sizeof(__pbuf)); \ |
589 | blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ | 631 | blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c %s " fmt, (cfqq)->pid, \ |
590 | cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ | 632 | cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ |
633 | cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\ | ||
591 | __pbuf, ##args); \ | 634 | __pbuf, ##args); \ |
592 | } while (0) | 635 | } while (0) |
593 | 636 | ||
@@ -646,11 +689,9 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, | |||
646 | io_start_time - start_time); | 689 | io_start_time - start_time); |
647 | } | 690 | } |
648 | 691 | ||
649 | static void cfq_pd_reset_stats(struct blkcg_gq *blkg) | 692 | /* @stats = 0 */ |
693 | static void cfqg_stats_reset(struct cfqg_stats *stats) | ||
650 | { | 694 | { |
651 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); | ||
652 | struct cfqg_stats *stats = &cfqg->stats; | ||
653 | |||
654 | /* queued stats shouldn't be cleared */ | 695 | /* queued stats shouldn't be cleared */ |
655 | blkg_rwstat_reset(&stats->service_bytes); | 696 | blkg_rwstat_reset(&stats->service_bytes); |
656 | blkg_rwstat_reset(&stats->serviced); | 697 | blkg_rwstat_reset(&stats->serviced); |
@@ -669,13 +710,58 @@ static void cfq_pd_reset_stats(struct blkcg_gq *blkg) | |||
669 | #endif | 710 | #endif |
670 | } | 711 | } |
671 | 712 | ||
713 | /* @to += @from */ | ||
714 | static void cfqg_stats_merge(struct cfqg_stats *to, struct cfqg_stats *from) | ||
715 | { | ||
716 | /* queued stats shouldn't be cleared */ | ||
717 | blkg_rwstat_merge(&to->service_bytes, &from->service_bytes); | ||
718 | blkg_rwstat_merge(&to->serviced, &from->serviced); | ||
719 | blkg_rwstat_merge(&to->merged, &from->merged); | ||
720 | blkg_rwstat_merge(&to->service_time, &from->service_time); | ||
721 | blkg_rwstat_merge(&to->wait_time, &from->wait_time); | ||
722 | blkg_stat_merge(&from->time, &from->time); | ||
723 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
724 | blkg_stat_merge(&to->unaccounted_time, &from->unaccounted_time); | ||
725 | blkg_stat_merge(&to->avg_queue_size_sum, &from->avg_queue_size_sum); | ||
726 | blkg_stat_merge(&to->avg_queue_size_samples, &from->avg_queue_size_samples); | ||
727 | blkg_stat_merge(&to->dequeue, &from->dequeue); | ||
728 | blkg_stat_merge(&to->group_wait_time, &from->group_wait_time); | ||
729 | blkg_stat_merge(&to->idle_time, &from->idle_time); | ||
730 | blkg_stat_merge(&to->empty_time, &from->empty_time); | ||
731 | #endif | ||
732 | } | ||
733 | |||
734 | /* | ||
735 | * Transfer @cfqg's stats to its parent's dead_stats so that the ancestors' | ||
736 | * recursive stats can still account for the amount used by this cfqg after | ||
737 | * it's gone. | ||
738 | */ | ||
739 | static void cfqg_stats_xfer_dead(struct cfq_group *cfqg) | ||
740 | { | ||
741 | struct cfq_group *parent = cfqg_parent(cfqg); | ||
742 | |||
743 | lockdep_assert_held(cfqg_to_blkg(cfqg)->q->queue_lock); | ||
744 | |||
745 | if (unlikely(!parent)) | ||
746 | return; | ||
747 | |||
748 | cfqg_stats_merge(&parent->dead_stats, &cfqg->stats); | ||
749 | cfqg_stats_merge(&parent->dead_stats, &cfqg->dead_stats); | ||
750 | cfqg_stats_reset(&cfqg->stats); | ||
751 | cfqg_stats_reset(&cfqg->dead_stats); | ||
752 | } | ||
753 | |||
672 | #else /* CONFIG_CFQ_GROUP_IOSCHED */ | 754 | #else /* CONFIG_CFQ_GROUP_IOSCHED */ |
673 | 755 | ||
756 | static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) { return NULL; } | ||
674 | static inline void cfqg_get(struct cfq_group *cfqg) { } | 757 | static inline void cfqg_get(struct cfq_group *cfqg) { } |
675 | static inline void cfqg_put(struct cfq_group *cfqg) { } | 758 | static inline void cfqg_put(struct cfq_group *cfqg) { } |
676 | 759 | ||
677 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ | 760 | #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ |
678 | blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) | 761 | blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c " fmt, (cfqq)->pid, \ |
762 | cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ | ||
763 | cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\ | ||
764 | ##args) | ||
679 | #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0) | 765 | #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0) |
680 | 766 | ||
681 | static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, | 767 | static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, |
@@ -732,7 +818,7 @@ static inline bool iops_mode(struct cfq_data *cfqd) | |||
732 | return false; | 818 | return false; |
733 | } | 819 | } |
734 | 820 | ||
735 | static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq) | 821 | static inline enum wl_class_t cfqq_class(struct cfq_queue *cfqq) |
736 | { | 822 | { |
737 | if (cfq_class_idle(cfqq)) | 823 | if (cfq_class_idle(cfqq)) |
738 | return IDLE_WORKLOAD; | 824 | return IDLE_WORKLOAD; |
@@ -751,23 +837,23 @@ static enum wl_type_t cfqq_type(struct cfq_queue *cfqq) | |||
751 | return SYNC_WORKLOAD; | 837 | return SYNC_WORKLOAD; |
752 | } | 838 | } |
753 | 839 | ||
754 | static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl, | 840 | static inline int cfq_group_busy_queues_wl(enum wl_class_t wl_class, |
755 | struct cfq_data *cfqd, | 841 | struct cfq_data *cfqd, |
756 | struct cfq_group *cfqg) | 842 | struct cfq_group *cfqg) |
757 | { | 843 | { |
758 | if (wl == IDLE_WORKLOAD) | 844 | if (wl_class == IDLE_WORKLOAD) |
759 | return cfqg->service_tree_idle.count; | 845 | return cfqg->service_tree_idle.count; |
760 | 846 | ||
761 | return cfqg->service_trees[wl][ASYNC_WORKLOAD].count | 847 | return cfqg->service_trees[wl_class][ASYNC_WORKLOAD].count + |
762 | + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count | 848 | cfqg->service_trees[wl_class][SYNC_NOIDLE_WORKLOAD].count + |
763 | + cfqg->service_trees[wl][SYNC_WORKLOAD].count; | 849 | cfqg->service_trees[wl_class][SYNC_WORKLOAD].count; |
764 | } | 850 | } |
765 | 851 | ||
766 | static inline int cfqg_busy_async_queues(struct cfq_data *cfqd, | 852 | static inline int cfqg_busy_async_queues(struct cfq_data *cfqd, |
767 | struct cfq_group *cfqg) | 853 | struct cfq_group *cfqg) |
768 | { | 854 | { |
769 | return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count | 855 | return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count + |
770 | + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count; | 856 | cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count; |
771 | } | 857 | } |
772 | 858 | ||
773 | static void cfq_dispatch_insert(struct request_queue *, struct request *); | 859 | static void cfq_dispatch_insert(struct request_queue *, struct request *); |
@@ -847,13 +933,27 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
847 | return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); | 933 | return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); |
848 | } | 934 | } |
849 | 935 | ||
850 | static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg) | 936 | /** |
937 | * cfqg_scale_charge - scale disk time charge according to cfqg weight | ||
938 | * @charge: disk time being charged | ||
939 | * @vfraction: vfraction of the cfqg, fixed point w/ CFQ_SERVICE_SHIFT | ||
940 | * | ||
941 | * Scale @charge according to @vfraction, which is in range (0, 1]. The | ||
942 | * scaling is inversely proportional. | ||
943 | * | ||
944 | * scaled = charge / vfraction | ||
945 | * | ||
946 | * The result is also in fixed point w/ CFQ_SERVICE_SHIFT. | ||
947 | */ | ||
948 | static inline u64 cfqg_scale_charge(unsigned long charge, | ||
949 | unsigned int vfraction) | ||
851 | { | 950 | { |
852 | u64 d = delta << CFQ_SERVICE_SHIFT; | 951 | u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */ |
853 | 952 | ||
854 | d = d * CFQ_WEIGHT_DEFAULT; | 953 | /* charge / vfraction */ |
855 | do_div(d, cfqg->weight); | 954 | c <<= CFQ_SERVICE_SHIFT; |
856 | return d; | 955 | do_div(c, vfraction); |
956 | return c; | ||
857 | } | 957 | } |
858 | 958 | ||
859 | static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) | 959 | static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) |
@@ -909,9 +1009,7 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd, | |||
909 | static inline unsigned | 1009 | static inline unsigned |
910 | cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) | 1010 | cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) |
911 | { | 1011 | { |
912 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 1012 | return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT; |
913 | |||
914 | return cfqd->cfq_target_latency * cfqg->weight / st->total_weight; | ||
915 | } | 1013 | } |
916 | 1014 | ||
917 | static inline unsigned | 1015 | static inline unsigned |
@@ -1178,20 +1276,61 @@ static void | |||
1178 | cfq_update_group_weight(struct cfq_group *cfqg) | 1276 | cfq_update_group_weight(struct cfq_group *cfqg) |
1179 | { | 1277 | { |
1180 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); | 1278 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); |
1279 | |||
1181 | if (cfqg->new_weight) { | 1280 | if (cfqg->new_weight) { |
1182 | cfqg->weight = cfqg->new_weight; | 1281 | cfqg->weight = cfqg->new_weight; |
1183 | cfqg->new_weight = 0; | 1282 | cfqg->new_weight = 0; |
1184 | } | 1283 | } |
1284 | |||
1285 | if (cfqg->new_leaf_weight) { | ||
1286 | cfqg->leaf_weight = cfqg->new_leaf_weight; | ||
1287 | cfqg->new_leaf_weight = 0; | ||
1288 | } | ||
1185 | } | 1289 | } |
1186 | 1290 | ||
1187 | static void | 1291 | static void |
1188 | cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) | 1292 | cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) |
1189 | { | 1293 | { |
1294 | unsigned int vfr = 1 << CFQ_SERVICE_SHIFT; /* start with 1 */ | ||
1295 | struct cfq_group *pos = cfqg; | ||
1296 | struct cfq_group *parent; | ||
1297 | bool propagate; | ||
1298 | |||
1299 | /* add to the service tree */ | ||
1190 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); | 1300 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); |
1191 | 1301 | ||
1192 | cfq_update_group_weight(cfqg); | 1302 | cfq_update_group_weight(cfqg); |
1193 | __cfq_group_service_tree_add(st, cfqg); | 1303 | __cfq_group_service_tree_add(st, cfqg); |
1194 | st->total_weight += cfqg->weight; | 1304 | |
1305 | /* | ||
1306 | * Activate @cfqg and calculate the portion of vfraction @cfqg is | ||
1307 | * entitled to. vfraction is calculated by walking the tree | ||
1308 | * towards the root calculating the fraction it has at each level. | ||
1309 | * The compounded ratio is how much vfraction @cfqg owns. | ||
1310 | * | ||
1311 | * Start with the proportion tasks in this cfqg has against active | ||
1312 | * children cfqgs - its leaf_weight against children_weight. | ||
1313 | */ | ||
1314 | propagate = !pos->nr_active++; | ||
1315 | pos->children_weight += pos->leaf_weight; | ||
1316 | vfr = vfr * pos->leaf_weight / pos->children_weight; | ||
1317 | |||
1318 | /* | ||
1319 | * Compound ->weight walking up the tree. Both activation and | ||
1320 | * vfraction calculation are done in the same loop. Propagation | ||
1321 | * stops once an already activated node is met. vfraction | ||
1322 | * calculation should always continue to the root. | ||
1323 | */ | ||
1324 | while ((parent = cfqg_parent(pos))) { | ||
1325 | if (propagate) { | ||
1326 | propagate = !parent->nr_active++; | ||
1327 | parent->children_weight += pos->weight; | ||
1328 | } | ||
1329 | vfr = vfr * pos->weight / parent->children_weight; | ||
1330 | pos = parent; | ||
1331 | } | ||
1332 | |||
1333 | cfqg->vfraction = max_t(unsigned, vfr, 1); | ||
1195 | } | 1334 | } |
1196 | 1335 | ||
1197 | static void | 1336 | static void |
@@ -1222,7 +1361,32 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
1222 | static void | 1361 | static void |
1223 | cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg) | 1362 | cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg) |
1224 | { | 1363 | { |
1225 | st->total_weight -= cfqg->weight; | 1364 | struct cfq_group *pos = cfqg; |
1365 | bool propagate; | ||
1366 | |||
1367 | /* | ||
1368 | * Undo activation from cfq_group_service_tree_add(). Deactivate | ||
1369 | * @cfqg and propagate deactivation upwards. | ||
1370 | */ | ||
1371 | propagate = !--pos->nr_active; | ||
1372 | pos->children_weight -= pos->leaf_weight; | ||
1373 | |||
1374 | while (propagate) { | ||
1375 | struct cfq_group *parent = cfqg_parent(pos); | ||
1376 | |||
1377 | /* @pos has 0 nr_active at this point */ | ||
1378 | WARN_ON_ONCE(pos->children_weight); | ||
1379 | pos->vfraction = 0; | ||
1380 | |||
1381 | if (!parent) | ||
1382 | break; | ||
1383 | |||
1384 | propagate = !--parent->nr_active; | ||
1385 | parent->children_weight -= pos->weight; | ||
1386 | pos = parent; | ||
1387 | } | ||
1388 | |||
1389 | /* remove from the service tree */ | ||
1226 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | 1390 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) |
1227 | cfq_rb_erase(&cfqg->rb_node, st); | 1391 | cfq_rb_erase(&cfqg->rb_node, st); |
1228 | } | 1392 | } |
@@ -1241,7 +1405,7 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
1241 | 1405 | ||
1242 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); | 1406 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); |
1243 | cfq_group_service_tree_del(st, cfqg); | 1407 | cfq_group_service_tree_del(st, cfqg); |
1244 | cfqg->saved_workload_slice = 0; | 1408 | cfqg->saved_wl_slice = 0; |
1245 | cfqg_stats_update_dequeue(cfqg); | 1409 | cfqg_stats_update_dequeue(cfqg); |
1246 | } | 1410 | } |
1247 | 1411 | ||
@@ -1284,6 +1448,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
1284 | unsigned int used_sl, charge, unaccounted_sl = 0; | 1448 | unsigned int used_sl, charge, unaccounted_sl = 0; |
1285 | int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) | 1449 | int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) |
1286 | - cfqg->service_tree_idle.count; | 1450 | - cfqg->service_tree_idle.count; |
1451 | unsigned int vfr; | ||
1287 | 1452 | ||
1288 | BUG_ON(nr_sync < 0); | 1453 | BUG_ON(nr_sync < 0); |
1289 | used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); | 1454 | used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); |
@@ -1293,20 +1458,25 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
1293 | else if (!cfq_cfqq_sync(cfqq) && !nr_sync) | 1458 | else if (!cfq_cfqq_sync(cfqq) && !nr_sync) |
1294 | charge = cfqq->allocated_slice; | 1459 | charge = cfqq->allocated_slice; |
1295 | 1460 | ||
1296 | /* Can't update vdisktime while group is on service tree */ | 1461 | /* |
1462 | * Can't update vdisktime while on service tree and cfqg->vfraction | ||
1463 | * is valid only while on it. Cache vfr, leave the service tree, | ||
1464 | * update vdisktime and go back on. The re-addition to the tree | ||
1465 | * will also update the weights as necessary. | ||
1466 | */ | ||
1467 | vfr = cfqg->vfraction; | ||
1297 | cfq_group_service_tree_del(st, cfqg); | 1468 | cfq_group_service_tree_del(st, cfqg); |
1298 | cfqg->vdisktime += cfq_scale_slice(charge, cfqg); | 1469 | cfqg->vdisktime += cfqg_scale_charge(charge, vfr); |
1299 | /* If a new weight was requested, update now, off tree */ | ||
1300 | cfq_group_service_tree_add(st, cfqg); | 1470 | cfq_group_service_tree_add(st, cfqg); |
1301 | 1471 | ||
1302 | /* This group is being expired. Save the context */ | 1472 | /* This group is being expired. Save the context */ |
1303 | if (time_after(cfqd->workload_expires, jiffies)) { | 1473 | if (time_after(cfqd->workload_expires, jiffies)) { |
1304 | cfqg->saved_workload_slice = cfqd->workload_expires | 1474 | cfqg->saved_wl_slice = cfqd->workload_expires |
1305 | - jiffies; | 1475 | - jiffies; |
1306 | cfqg->saved_workload = cfqd->serving_type; | 1476 | cfqg->saved_wl_type = cfqd->serving_wl_type; |
1307 | cfqg->saved_serving_prio = cfqd->serving_prio; | 1477 | cfqg->saved_wl_class = cfqd->serving_wl_class; |
1308 | } else | 1478 | } else |
1309 | cfqg->saved_workload_slice = 0; | 1479 | cfqg->saved_wl_slice = 0; |
1310 | 1480 | ||
1311 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, | 1481 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, |
1312 | st->min_vdisktime); | 1482 | st->min_vdisktime); |
@@ -1344,6 +1514,52 @@ static void cfq_pd_init(struct blkcg_gq *blkg) | |||
1344 | 1514 | ||
1345 | cfq_init_cfqg_base(cfqg); | 1515 | cfq_init_cfqg_base(cfqg); |
1346 | cfqg->weight = blkg->blkcg->cfq_weight; | 1516 | cfqg->weight = blkg->blkcg->cfq_weight; |
1517 | cfqg->leaf_weight = blkg->blkcg->cfq_leaf_weight; | ||
1518 | } | ||
1519 | |||
1520 | static void cfq_pd_offline(struct blkcg_gq *blkg) | ||
1521 | { | ||
1522 | /* | ||
1523 | * @blkg is going offline and will be ignored by | ||
1524 | * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so | ||
1525 | * that they don't get lost. If IOs complete after this point, the | ||
1526 | * stats for them will be lost. Oh well... | ||
1527 | */ | ||
1528 | cfqg_stats_xfer_dead(blkg_to_cfqg(blkg)); | ||
1529 | } | ||
1530 | |||
1531 | /* offset delta from cfqg->stats to cfqg->dead_stats */ | ||
1532 | static const int dead_stats_off_delta = offsetof(struct cfq_group, dead_stats) - | ||
1533 | offsetof(struct cfq_group, stats); | ||
1534 | |||
1535 | /* to be used by recursive prfill, sums live and dead stats recursively */ | ||
1536 | static u64 cfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off) | ||
1537 | { | ||
1538 | u64 sum = 0; | ||
1539 | |||
1540 | sum += blkg_stat_recursive_sum(pd, off); | ||
1541 | sum += blkg_stat_recursive_sum(pd, off + dead_stats_off_delta); | ||
1542 | return sum; | ||
1543 | } | ||
1544 | |||
1545 | /* to be used by recursive prfill, sums live and dead rwstats recursively */ | ||
1546 | static struct blkg_rwstat cfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd, | ||
1547 | int off) | ||
1548 | { | ||
1549 | struct blkg_rwstat a, b; | ||
1550 | |||
1551 | a = blkg_rwstat_recursive_sum(pd, off); | ||
1552 | b = blkg_rwstat_recursive_sum(pd, off + dead_stats_off_delta); | ||
1553 | blkg_rwstat_merge(&a, &b); | ||
1554 | return a; | ||
1555 | } | ||
1556 | |||
1557 | static void cfq_pd_reset_stats(struct blkcg_gq *blkg) | ||
1558 | { | ||
1559 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); | ||
1560 | |||
1561 | cfqg_stats_reset(&cfqg->stats); | ||
1562 | cfqg_stats_reset(&cfqg->dead_stats); | ||
1347 | } | 1563 | } |
1348 | 1564 | ||
1349 | /* | 1565 | /* |
@@ -1400,6 +1616,26 @@ static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft, | |||
1400 | return 0; | 1616 | return 0; |
1401 | } | 1617 | } |
1402 | 1618 | ||
1619 | static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf, | ||
1620 | struct blkg_policy_data *pd, int off) | ||
1621 | { | ||
1622 | struct cfq_group *cfqg = pd_to_cfqg(pd); | ||
1623 | |||
1624 | if (!cfqg->dev_leaf_weight) | ||
1625 | return 0; | ||
1626 | return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); | ||
1627 | } | ||
1628 | |||
1629 | static int cfqg_print_leaf_weight_device(struct cgroup *cgrp, | ||
1630 | struct cftype *cft, | ||
1631 | struct seq_file *sf) | ||
1632 | { | ||
1633 | blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), | ||
1634 | cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq, 0, | ||
1635 | false); | ||
1636 | return 0; | ||
1637 | } | ||
1638 | |||
1403 | static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft, | 1639 | static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft, |
1404 | struct seq_file *sf) | 1640 | struct seq_file *sf) |
1405 | { | 1641 | { |
@@ -1407,8 +1643,16 @@ static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft, | |||
1407 | return 0; | 1643 | return 0; |
1408 | } | 1644 | } |
1409 | 1645 | ||
1410 | static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, | 1646 | static int cfq_print_leaf_weight(struct cgroup *cgrp, struct cftype *cft, |
1411 | const char *buf) | 1647 | struct seq_file *sf) |
1648 | { | ||
1649 | seq_printf(sf, "%u\n", | ||
1650 | cgroup_to_blkcg(cgrp)->cfq_leaf_weight); | ||
1651 | return 0; | ||
1652 | } | ||
1653 | |||
1654 | static int __cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, | ||
1655 | const char *buf, bool is_leaf_weight) | ||
1412 | { | 1656 | { |
1413 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | 1657 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); |
1414 | struct blkg_conf_ctx ctx; | 1658 | struct blkg_conf_ctx ctx; |
@@ -1422,8 +1666,13 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, | |||
1422 | ret = -EINVAL; | 1666 | ret = -EINVAL; |
1423 | cfqg = blkg_to_cfqg(ctx.blkg); | 1667 | cfqg = blkg_to_cfqg(ctx.blkg); |
1424 | if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) { | 1668 | if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) { |
1425 | cfqg->dev_weight = ctx.v; | 1669 | if (!is_leaf_weight) { |
1426 | cfqg->new_weight = cfqg->dev_weight ?: blkcg->cfq_weight; | 1670 | cfqg->dev_weight = ctx.v; |
1671 | cfqg->new_weight = ctx.v ?: blkcg->cfq_weight; | ||
1672 | } else { | ||
1673 | cfqg->dev_leaf_weight = ctx.v; | ||
1674 | cfqg->new_leaf_weight = ctx.v ?: blkcg->cfq_leaf_weight; | ||
1675 | } | ||
1427 | ret = 0; | 1676 | ret = 0; |
1428 | } | 1677 | } |
1429 | 1678 | ||
@@ -1431,7 +1680,20 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, | |||
1431 | return ret; | 1680 | return ret; |
1432 | } | 1681 | } |
1433 | 1682 | ||
1434 | static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) | 1683 | static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, |
1684 | const char *buf) | ||
1685 | { | ||
1686 | return __cfqg_set_weight_device(cgrp, cft, buf, false); | ||
1687 | } | ||
1688 | |||
1689 | static int cfqg_set_leaf_weight_device(struct cgroup *cgrp, struct cftype *cft, | ||
1690 | const char *buf) | ||
1691 | { | ||
1692 | return __cfqg_set_weight_device(cgrp, cft, buf, true); | ||
1693 | } | ||
1694 | |||
1695 | static int __cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val, | ||
1696 | bool is_leaf_weight) | ||
1435 | { | 1697 | { |
1436 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | 1698 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); |
1437 | struct blkcg_gq *blkg; | 1699 | struct blkcg_gq *blkg; |
@@ -1441,19 +1703,41 @@ static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) | |||
1441 | return -EINVAL; | 1703 | return -EINVAL; |
1442 | 1704 | ||
1443 | spin_lock_irq(&blkcg->lock); | 1705 | spin_lock_irq(&blkcg->lock); |
1444 | blkcg->cfq_weight = (unsigned int)val; | 1706 | |
1707 | if (!is_leaf_weight) | ||
1708 | blkcg->cfq_weight = val; | ||
1709 | else | ||
1710 | blkcg->cfq_leaf_weight = val; | ||
1445 | 1711 | ||
1446 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | 1712 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { |
1447 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); | 1713 | struct cfq_group *cfqg = blkg_to_cfqg(blkg); |
1448 | 1714 | ||
1449 | if (cfqg && !cfqg->dev_weight) | 1715 | if (!cfqg) |
1450 | cfqg->new_weight = blkcg->cfq_weight; | 1716 | continue; |
1717 | |||
1718 | if (!is_leaf_weight) { | ||
1719 | if (!cfqg->dev_weight) | ||
1720 | cfqg->new_weight = blkcg->cfq_weight; | ||
1721 | } else { | ||
1722 | if (!cfqg->dev_leaf_weight) | ||
1723 | cfqg->new_leaf_weight = blkcg->cfq_leaf_weight; | ||
1724 | } | ||
1451 | } | 1725 | } |
1452 | 1726 | ||
1453 | spin_unlock_irq(&blkcg->lock); | 1727 | spin_unlock_irq(&blkcg->lock); |
1454 | return 0; | 1728 | return 0; |
1455 | } | 1729 | } |
1456 | 1730 | ||
1731 | static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) | ||
1732 | { | ||
1733 | return __cfq_set_weight(cgrp, cft, val, false); | ||
1734 | } | ||
1735 | |||
1736 | static int cfq_set_leaf_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) | ||
1737 | { | ||
1738 | return __cfq_set_weight(cgrp, cft, val, true); | ||
1739 | } | ||
1740 | |||
1457 | static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft, | 1741 | static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft, |
1458 | struct seq_file *sf) | 1742 | struct seq_file *sf) |
1459 | { | 1743 | { |
@@ -1474,6 +1758,42 @@ static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft, | |||
1474 | return 0; | 1758 | return 0; |
1475 | } | 1759 | } |
1476 | 1760 | ||
1761 | static u64 cfqg_prfill_stat_recursive(struct seq_file *sf, | ||
1762 | struct blkg_policy_data *pd, int off) | ||
1763 | { | ||
1764 | u64 sum = cfqg_stat_pd_recursive_sum(pd, off); | ||
1765 | |||
1766 | return __blkg_prfill_u64(sf, pd, sum); | ||
1767 | } | ||
1768 | |||
1769 | static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf, | ||
1770 | struct blkg_policy_data *pd, int off) | ||
1771 | { | ||
1772 | struct blkg_rwstat sum = cfqg_rwstat_pd_recursive_sum(pd, off); | ||
1773 | |||
1774 | return __blkg_prfill_rwstat(sf, pd, &sum); | ||
1775 | } | ||
1776 | |||
1777 | static int cfqg_print_stat_recursive(struct cgroup *cgrp, struct cftype *cft, | ||
1778 | struct seq_file *sf) | ||
1779 | { | ||
1780 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | ||
1781 | |||
1782 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive, | ||
1783 | &blkcg_policy_cfq, cft->private, false); | ||
1784 | return 0; | ||
1785 | } | ||
1786 | |||
1787 | static int cfqg_print_rwstat_recursive(struct cgroup *cgrp, struct cftype *cft, | ||
1788 | struct seq_file *sf) | ||
1789 | { | ||
1790 | struct blkcg *blkcg = cgroup_to_blkcg(cgrp); | ||
1791 | |||
1792 | blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive, | ||
1793 | &blkcg_policy_cfq, cft->private, true); | ||
1794 | return 0; | ||
1795 | } | ||
1796 | |||
1477 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 1797 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
1478 | static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, | 1798 | static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, |
1479 | struct blkg_policy_data *pd, int off) | 1799 | struct blkg_policy_data *pd, int off) |
@@ -1503,17 +1823,49 @@ static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft, | |||
1503 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ | 1823 | #endif /* CONFIG_DEBUG_BLK_CGROUP */ |
1504 | 1824 | ||
1505 | static struct cftype cfq_blkcg_files[] = { | 1825 | static struct cftype cfq_blkcg_files[] = { |
1826 | /* on root, weight is mapped to leaf_weight */ | ||
1506 | { | 1827 | { |
1507 | .name = "weight_device", | 1828 | .name = "weight_device", |
1829 | .flags = CFTYPE_ONLY_ON_ROOT, | ||
1830 | .read_seq_string = cfqg_print_leaf_weight_device, | ||
1831 | .write_string = cfqg_set_leaf_weight_device, | ||
1832 | .max_write_len = 256, | ||
1833 | }, | ||
1834 | { | ||
1835 | .name = "weight", | ||
1836 | .flags = CFTYPE_ONLY_ON_ROOT, | ||
1837 | .read_seq_string = cfq_print_leaf_weight, | ||
1838 | .write_u64 = cfq_set_leaf_weight, | ||
1839 | }, | ||
1840 | |||
1841 | /* no such mapping necessary for !roots */ | ||
1842 | { | ||
1843 | .name = "weight_device", | ||
1844 | .flags = CFTYPE_NOT_ON_ROOT, | ||
1508 | .read_seq_string = cfqg_print_weight_device, | 1845 | .read_seq_string = cfqg_print_weight_device, |
1509 | .write_string = cfqg_set_weight_device, | 1846 | .write_string = cfqg_set_weight_device, |
1510 | .max_write_len = 256, | 1847 | .max_write_len = 256, |
1511 | }, | 1848 | }, |
1512 | { | 1849 | { |
1513 | .name = "weight", | 1850 | .name = "weight", |
1851 | .flags = CFTYPE_NOT_ON_ROOT, | ||
1514 | .read_seq_string = cfq_print_weight, | 1852 | .read_seq_string = cfq_print_weight, |
1515 | .write_u64 = cfq_set_weight, | 1853 | .write_u64 = cfq_set_weight, |
1516 | }, | 1854 | }, |
1855 | |||
1856 | { | ||
1857 | .name = "leaf_weight_device", | ||
1858 | .read_seq_string = cfqg_print_leaf_weight_device, | ||
1859 | .write_string = cfqg_set_leaf_weight_device, | ||
1860 | .max_write_len = 256, | ||
1861 | }, | ||
1862 | { | ||
1863 | .name = "leaf_weight", | ||
1864 | .read_seq_string = cfq_print_leaf_weight, | ||
1865 | .write_u64 = cfq_set_leaf_weight, | ||
1866 | }, | ||
1867 | |||
1868 | /* statistics, covers only the tasks in the cfqg */ | ||
1517 | { | 1869 | { |
1518 | .name = "time", | 1870 | .name = "time", |
1519 | .private = offsetof(struct cfq_group, stats.time), | 1871 | .private = offsetof(struct cfq_group, stats.time), |
@@ -1554,6 +1906,48 @@ static struct cftype cfq_blkcg_files[] = { | |||
1554 | .private = offsetof(struct cfq_group, stats.queued), | 1906 | .private = offsetof(struct cfq_group, stats.queued), |
1555 | .read_seq_string = cfqg_print_rwstat, | 1907 | .read_seq_string = cfqg_print_rwstat, |
1556 | }, | 1908 | }, |
1909 | |||
1910 | /* the same statictics which cover the cfqg and its descendants */ | ||
1911 | { | ||
1912 | .name = "time_recursive", | ||
1913 | .private = offsetof(struct cfq_group, stats.time), | ||
1914 | .read_seq_string = cfqg_print_stat_recursive, | ||
1915 | }, | ||
1916 | { | ||
1917 | .name = "sectors_recursive", | ||
1918 | .private = offsetof(struct cfq_group, stats.sectors), | ||
1919 | .read_seq_string = cfqg_print_stat_recursive, | ||
1920 | }, | ||
1921 | { | ||
1922 | .name = "io_service_bytes_recursive", | ||
1923 | .private = offsetof(struct cfq_group, stats.service_bytes), | ||
1924 | .read_seq_string = cfqg_print_rwstat_recursive, | ||
1925 | }, | ||
1926 | { | ||
1927 | .name = "io_serviced_recursive", | ||
1928 | .private = offsetof(struct cfq_group, stats.serviced), | ||
1929 | .read_seq_string = cfqg_print_rwstat_recursive, | ||
1930 | }, | ||
1931 | { | ||
1932 | .name = "io_service_time_recursive", | ||
1933 | .private = offsetof(struct cfq_group, stats.service_time), | ||
1934 | .read_seq_string = cfqg_print_rwstat_recursive, | ||
1935 | }, | ||
1936 | { | ||
1937 | .name = "io_wait_time_recursive", | ||
1938 | .private = offsetof(struct cfq_group, stats.wait_time), | ||
1939 | .read_seq_string = cfqg_print_rwstat_recursive, | ||
1940 | }, | ||
1941 | { | ||
1942 | .name = "io_merged_recursive", | ||
1943 | .private = offsetof(struct cfq_group, stats.merged), | ||
1944 | .read_seq_string = cfqg_print_rwstat_recursive, | ||
1945 | }, | ||
1946 | { | ||
1947 | .name = "io_queued_recursive", | ||
1948 | .private = offsetof(struct cfq_group, stats.queued), | ||
1949 | .read_seq_string = cfqg_print_rwstat_recursive, | ||
1950 | }, | ||
1557 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 1951 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
1558 | { | 1952 | { |
1559 | .name = "avg_queue_size", | 1953 | .name = "avg_queue_size", |
@@ -1612,15 +2006,14 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1612 | struct rb_node **p, *parent; | 2006 | struct rb_node **p, *parent; |
1613 | struct cfq_queue *__cfqq; | 2007 | struct cfq_queue *__cfqq; |
1614 | unsigned long rb_key; | 2008 | unsigned long rb_key; |
1615 | struct cfq_rb_root *service_tree; | 2009 | struct cfq_rb_root *st; |
1616 | int left; | 2010 | int left; |
1617 | int new_cfqq = 1; | 2011 | int new_cfqq = 1; |
1618 | 2012 | ||
1619 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), | 2013 | st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq)); |
1620 | cfqq_type(cfqq)); | ||
1621 | if (cfq_class_idle(cfqq)) { | 2014 | if (cfq_class_idle(cfqq)) { |
1622 | rb_key = CFQ_IDLE_DELAY; | 2015 | rb_key = CFQ_IDLE_DELAY; |
1623 | parent = rb_last(&service_tree->rb); | 2016 | parent = rb_last(&st->rb); |
1624 | if (parent && parent != &cfqq->rb_node) { | 2017 | if (parent && parent != &cfqq->rb_node) { |
1625 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); | 2018 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); |
1626 | rb_key += __cfqq->rb_key; | 2019 | rb_key += __cfqq->rb_key; |
@@ -1638,7 +2031,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1638 | cfqq->slice_resid = 0; | 2031 | cfqq->slice_resid = 0; |
1639 | } else { | 2032 | } else { |
1640 | rb_key = -HZ; | 2033 | rb_key = -HZ; |
1641 | __cfqq = cfq_rb_first(service_tree); | 2034 | __cfqq = cfq_rb_first(st); |
1642 | rb_key += __cfqq ? __cfqq->rb_key : jiffies; | 2035 | rb_key += __cfqq ? __cfqq->rb_key : jiffies; |
1643 | } | 2036 | } |
1644 | 2037 | ||
@@ -1647,8 +2040,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1647 | /* | 2040 | /* |
1648 | * same position, nothing more to do | 2041 | * same position, nothing more to do |
1649 | */ | 2042 | */ |
1650 | if (rb_key == cfqq->rb_key && | 2043 | if (rb_key == cfqq->rb_key && cfqq->service_tree == st) |
1651 | cfqq->service_tree == service_tree) | ||
1652 | return; | 2044 | return; |
1653 | 2045 | ||
1654 | cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); | 2046 | cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); |
@@ -1657,11 +2049,9 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1657 | 2049 | ||
1658 | left = 1; | 2050 | left = 1; |
1659 | parent = NULL; | 2051 | parent = NULL; |
1660 | cfqq->service_tree = service_tree; | 2052 | cfqq->service_tree = st; |
1661 | p = &service_tree->rb.rb_node; | 2053 | p = &st->rb.rb_node; |
1662 | while (*p) { | 2054 | while (*p) { |
1663 | struct rb_node **n; | ||
1664 | |||
1665 | parent = *p; | 2055 | parent = *p; |
1666 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); | 2056 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); |
1667 | 2057 | ||
@@ -1669,22 +2059,20 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1669 | * sort by key, that represents service time. | 2059 | * sort by key, that represents service time. |
1670 | */ | 2060 | */ |
1671 | if (time_before(rb_key, __cfqq->rb_key)) | 2061 | if (time_before(rb_key, __cfqq->rb_key)) |
1672 | n = &(*p)->rb_left; | 2062 | p = &parent->rb_left; |
1673 | else { | 2063 | else { |
1674 | n = &(*p)->rb_right; | 2064 | p = &parent->rb_right; |
1675 | left = 0; | 2065 | left = 0; |
1676 | } | 2066 | } |
1677 | |||
1678 | p = n; | ||
1679 | } | 2067 | } |
1680 | 2068 | ||
1681 | if (left) | 2069 | if (left) |
1682 | service_tree->left = &cfqq->rb_node; | 2070 | st->left = &cfqq->rb_node; |
1683 | 2071 | ||
1684 | cfqq->rb_key = rb_key; | 2072 | cfqq->rb_key = rb_key; |
1685 | rb_link_node(&cfqq->rb_node, parent, p); | 2073 | rb_link_node(&cfqq->rb_node, parent, p); |
1686 | rb_insert_color(&cfqq->rb_node, &service_tree->rb); | 2074 | rb_insert_color(&cfqq->rb_node, &st->rb); |
1687 | service_tree->count++; | 2075 | st->count++; |
1688 | if (add_front || !new_cfqq) | 2076 | if (add_front || !new_cfqq) |
1689 | return; | 2077 | return; |
1690 | cfq_group_notify_queue_add(cfqd, cfqq->cfqg); | 2078 | cfq_group_notify_queue_add(cfqd, cfqq->cfqg); |
@@ -2030,8 +2418,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, | |||
2030 | struct cfq_queue *cfqq) | 2418 | struct cfq_queue *cfqq) |
2031 | { | 2419 | { |
2032 | if (cfqq) { | 2420 | if (cfqq) { |
2033 | cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", | 2421 | cfq_log_cfqq(cfqd, cfqq, "set_active wl_class:%d wl_type:%d", |
2034 | cfqd->serving_prio, cfqd->serving_type); | 2422 | cfqd->serving_wl_class, cfqd->serving_wl_type); |
2035 | cfqg_stats_update_avg_queue_size(cfqq->cfqg); | 2423 | cfqg_stats_update_avg_queue_size(cfqq->cfqg); |
2036 | cfqq->slice_start = 0; | 2424 | cfqq->slice_start = 0; |
2037 | cfqq->dispatch_start = jiffies; | 2425 | cfqq->dispatch_start = jiffies; |
@@ -2117,19 +2505,18 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) | |||
2117 | */ | 2505 | */ |
2118 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) | 2506 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) |
2119 | { | 2507 | { |
2120 | struct cfq_rb_root *service_tree = | 2508 | struct cfq_rb_root *st = st_for(cfqd->serving_group, |
2121 | service_tree_for(cfqd->serving_group, cfqd->serving_prio, | 2509 | cfqd->serving_wl_class, cfqd->serving_wl_type); |
2122 | cfqd->serving_type); | ||
2123 | 2510 | ||
2124 | if (!cfqd->rq_queued) | 2511 | if (!cfqd->rq_queued) |
2125 | return NULL; | 2512 | return NULL; |
2126 | 2513 | ||
2127 | /* There is nothing to dispatch */ | 2514 | /* There is nothing to dispatch */ |
2128 | if (!service_tree) | 2515 | if (!st) |
2129 | return NULL; | 2516 | return NULL; |
2130 | if (RB_EMPTY_ROOT(&service_tree->rb)) | 2517 | if (RB_EMPTY_ROOT(&st->rb)) |
2131 | return NULL; | 2518 | return NULL; |
2132 | return cfq_rb_first(service_tree); | 2519 | return cfq_rb_first(st); |
2133 | } | 2520 | } |
2134 | 2521 | ||
2135 | static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd) | 2522 | static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd) |
@@ -2285,17 +2672,17 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, | |||
2285 | 2672 | ||
2286 | static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 2673 | static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
2287 | { | 2674 | { |
2288 | enum wl_prio_t prio = cfqq_prio(cfqq); | 2675 | enum wl_class_t wl_class = cfqq_class(cfqq); |
2289 | struct cfq_rb_root *service_tree = cfqq->service_tree; | 2676 | struct cfq_rb_root *st = cfqq->service_tree; |
2290 | 2677 | ||
2291 | BUG_ON(!service_tree); | 2678 | BUG_ON(!st); |
2292 | BUG_ON(!service_tree->count); | 2679 | BUG_ON(!st->count); |
2293 | 2680 | ||
2294 | if (!cfqd->cfq_slice_idle) | 2681 | if (!cfqd->cfq_slice_idle) |
2295 | return false; | 2682 | return false; |
2296 | 2683 | ||
2297 | /* We never do for idle class queues. */ | 2684 | /* We never do for idle class queues. */ |
2298 | if (prio == IDLE_WORKLOAD) | 2685 | if (wl_class == IDLE_WORKLOAD) |
2299 | return false; | 2686 | return false; |
2300 | 2687 | ||
2301 | /* We do for queues that were marked with idle window flag. */ | 2688 | /* We do for queues that were marked with idle window flag. */ |
@@ -2307,11 +2694,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2307 | * Otherwise, we do only if they are the last ones | 2694 | * Otherwise, we do only if they are the last ones |
2308 | * in their service tree. | 2695 | * in their service tree. |
2309 | */ | 2696 | */ |
2310 | if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) && | 2697 | if (st->count == 1 && cfq_cfqq_sync(cfqq) && |
2311 | !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false)) | 2698 | !cfq_io_thinktime_big(cfqd, &st->ttime, false)) |
2312 | return true; | 2699 | return true; |
2313 | cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", | 2700 | cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", st->count); |
2314 | service_tree->count); | ||
2315 | return false; | 2701 | return false; |
2316 | } | 2702 | } |
2317 | 2703 | ||
@@ -2494,8 +2880,8 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) | |||
2494 | } | 2880 | } |
2495 | } | 2881 | } |
2496 | 2882 | ||
2497 | static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, | 2883 | static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd, |
2498 | struct cfq_group *cfqg, enum wl_prio_t prio) | 2884 | struct cfq_group *cfqg, enum wl_class_t wl_class) |
2499 | { | 2885 | { |
2500 | struct cfq_queue *queue; | 2886 | struct cfq_queue *queue; |
2501 | int i; | 2887 | int i; |
@@ -2505,7 +2891,7 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, | |||
2505 | 2891 | ||
2506 | for (i = 0; i <= SYNC_WORKLOAD; ++i) { | 2892 | for (i = 0; i <= SYNC_WORKLOAD; ++i) { |
2507 | /* select the one with lowest rb_key */ | 2893 | /* select the one with lowest rb_key */ |
2508 | queue = cfq_rb_first(service_tree_for(cfqg, prio, i)); | 2894 | queue = cfq_rb_first(st_for(cfqg, wl_class, i)); |
2509 | if (queue && | 2895 | if (queue && |
2510 | (!key_valid || time_before(queue->rb_key, lowest_key))) { | 2896 | (!key_valid || time_before(queue->rb_key, lowest_key))) { |
2511 | lowest_key = queue->rb_key; | 2897 | lowest_key = queue->rb_key; |
@@ -2517,26 +2903,27 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, | |||
2517 | return cur_best; | 2903 | return cur_best; |
2518 | } | 2904 | } |
2519 | 2905 | ||
2520 | static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | 2906 | static void |
2907 | choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg) | ||
2521 | { | 2908 | { |
2522 | unsigned slice; | 2909 | unsigned slice; |
2523 | unsigned count; | 2910 | unsigned count; |
2524 | struct cfq_rb_root *st; | 2911 | struct cfq_rb_root *st; |
2525 | unsigned group_slice; | 2912 | unsigned group_slice; |
2526 | enum wl_prio_t original_prio = cfqd->serving_prio; | 2913 | enum wl_class_t original_class = cfqd->serving_wl_class; |
2527 | 2914 | ||
2528 | /* Choose next priority. RT > BE > IDLE */ | 2915 | /* Choose next priority. RT > BE > IDLE */ |
2529 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) | 2916 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) |
2530 | cfqd->serving_prio = RT_WORKLOAD; | 2917 | cfqd->serving_wl_class = RT_WORKLOAD; |
2531 | else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg)) | 2918 | else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg)) |
2532 | cfqd->serving_prio = BE_WORKLOAD; | 2919 | cfqd->serving_wl_class = BE_WORKLOAD; |
2533 | else { | 2920 | else { |
2534 | cfqd->serving_prio = IDLE_WORKLOAD; | 2921 | cfqd->serving_wl_class = IDLE_WORKLOAD; |
2535 | cfqd->workload_expires = jiffies + 1; | 2922 | cfqd->workload_expires = jiffies + 1; |
2536 | return; | 2923 | return; |
2537 | } | 2924 | } |
2538 | 2925 | ||
2539 | if (original_prio != cfqd->serving_prio) | 2926 | if (original_class != cfqd->serving_wl_class) |
2540 | goto new_workload; | 2927 | goto new_workload; |
2541 | 2928 | ||
2542 | /* | 2929 | /* |
@@ -2544,7 +2931,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2544 | * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload | 2931 | * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload |
2545 | * expiration time | 2932 | * expiration time |
2546 | */ | 2933 | */ |
2547 | st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); | 2934 | st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type); |
2548 | count = st->count; | 2935 | count = st->count; |
2549 | 2936 | ||
2550 | /* | 2937 | /* |
@@ -2555,9 +2942,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
2555 | 2942 | ||
2556 | new_workload: | 2943 | new_workload: |
2557 | /* otherwise select new workload type */ | 2944 | /* otherwise select new workload type */ |
2558 | cfqd->serving_type = | 2945 | cfqd->serving_wl_type = cfq_choose_wl_type(cfqd, cfqg, |
2559 | cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); | 2946 | cfqd->serving_wl_class); |
2560 | st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); | 2947 | st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type); |
2561 | count = st->count; | 2948 | count = st->count; |
2562 | 2949 | ||
2563 | /* | 2950 | /* |
@@ -2568,10 +2955,11 @@ new_workload: | |||
2568 | group_slice = cfq_group_slice(cfqd, cfqg); | 2955 | group_slice = cfq_group_slice(cfqd, cfqg); |
2569 | 2956 | ||
2570 | slice = group_slice * count / | 2957 | slice = group_slice * count / |
2571 | max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio], | 2958 | max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class], |
2572 | cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg)); | 2959 | cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd, |
2960 | cfqg)); | ||
2573 | 2961 | ||
2574 | if (cfqd->serving_type == ASYNC_WORKLOAD) { | 2962 | if (cfqd->serving_wl_type == ASYNC_WORKLOAD) { |
2575 | unsigned int tmp; | 2963 | unsigned int tmp; |
2576 | 2964 | ||
2577 | /* | 2965 | /* |
@@ -2617,14 +3005,14 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd) | |||
2617 | cfqd->serving_group = cfqg; | 3005 | cfqd->serving_group = cfqg; |
2618 | 3006 | ||
2619 | /* Restore the workload type data */ | 3007 | /* Restore the workload type data */ |
2620 | if (cfqg->saved_workload_slice) { | 3008 | if (cfqg->saved_wl_slice) { |
2621 | cfqd->workload_expires = jiffies + cfqg->saved_workload_slice; | 3009 | cfqd->workload_expires = jiffies + cfqg->saved_wl_slice; |
2622 | cfqd->serving_type = cfqg->saved_workload; | 3010 | cfqd->serving_wl_type = cfqg->saved_wl_type; |
2623 | cfqd->serving_prio = cfqg->saved_serving_prio; | 3011 | cfqd->serving_wl_class = cfqg->saved_wl_class; |
2624 | } else | 3012 | } else |
2625 | cfqd->workload_expires = jiffies - 1; | 3013 | cfqd->workload_expires = jiffies - 1; |
2626 | 3014 | ||
2627 | choose_service_tree(cfqd, cfqg); | 3015 | choose_wl_class_and_type(cfqd, cfqg); |
2628 | } | 3016 | } |
2629 | 3017 | ||
2630 | /* | 3018 | /* |
@@ -3403,7 +3791,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, | |||
3403 | return true; | 3791 | return true; |
3404 | 3792 | ||
3405 | /* Allow preemption only if we are idling on sync-noidle tree */ | 3793 | /* Allow preemption only if we are idling on sync-noidle tree */ |
3406 | if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD && | 3794 | if (cfqd->serving_wl_type == SYNC_NOIDLE_WORKLOAD && |
3407 | cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD && | 3795 | cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD && |
3408 | new_cfqq->service_tree->count == 2 && | 3796 | new_cfqq->service_tree->count == 2 && |
3409 | RB_EMPTY_ROOT(&cfqq->sort_list)) | 3797 | RB_EMPTY_ROOT(&cfqq->sort_list)) |
@@ -3455,7 +3843,7 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
3455 | * doesn't happen | 3843 | * doesn't happen |
3456 | */ | 3844 | */ |
3457 | if (old_type != cfqq_type(cfqq)) | 3845 | if (old_type != cfqq_type(cfqq)) |
3458 | cfqq->cfqg->saved_workload_slice = 0; | 3846 | cfqq->cfqg->saved_wl_slice = 0; |
3459 | 3847 | ||
3460 | /* | 3848 | /* |
3461 | * Put the new queue at the front of the of the current list, | 3849 | * Put the new queue at the front of the of the current list, |
@@ -3637,16 +4025,17 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
3637 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; | 4025 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; |
3638 | 4026 | ||
3639 | if (sync) { | 4027 | if (sync) { |
3640 | struct cfq_rb_root *service_tree; | 4028 | struct cfq_rb_root *st; |
3641 | 4029 | ||
3642 | RQ_CIC(rq)->ttime.last_end_request = now; | 4030 | RQ_CIC(rq)->ttime.last_end_request = now; |
3643 | 4031 | ||
3644 | if (cfq_cfqq_on_rr(cfqq)) | 4032 | if (cfq_cfqq_on_rr(cfqq)) |
3645 | service_tree = cfqq->service_tree; | 4033 | st = cfqq->service_tree; |
3646 | else | 4034 | else |
3647 | service_tree = service_tree_for(cfqq->cfqg, | 4035 | st = st_for(cfqq->cfqg, cfqq_class(cfqq), |
3648 | cfqq_prio(cfqq), cfqq_type(cfqq)); | 4036 | cfqq_type(cfqq)); |
3649 | service_tree->ttime.last_end_request = now; | 4037 | |
4038 | st->ttime.last_end_request = now; | ||
3650 | if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) | 4039 | if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) |
3651 | cfqd->last_delayed_sync = now; | 4040 | cfqd->last_delayed_sync = now; |
3652 | } | 4041 | } |
@@ -3993,6 +4382,7 @@ static int cfq_init_queue(struct request_queue *q) | |||
3993 | cfq_init_cfqg_base(cfqd->root_group); | 4382 | cfq_init_cfqg_base(cfqd->root_group); |
3994 | #endif | 4383 | #endif |
3995 | cfqd->root_group->weight = 2 * CFQ_WEIGHT_DEFAULT; | 4384 | cfqd->root_group->weight = 2 * CFQ_WEIGHT_DEFAULT; |
4385 | cfqd->root_group->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT; | ||
3996 | 4386 | ||
3997 | /* | 4387 | /* |
3998 | * Not strictly needed (since RB_ROOT just clears the node and we | 4388 | * Not strictly needed (since RB_ROOT just clears the node and we |
@@ -4177,6 +4567,7 @@ static struct blkcg_policy blkcg_policy_cfq = { | |||
4177 | .cftypes = cfq_blkcg_files, | 4567 | .cftypes = cfq_blkcg_files, |
4178 | 4568 | ||
4179 | .pd_init_fn = cfq_pd_init, | 4569 | .pd_init_fn = cfq_pd_init, |
4570 | .pd_offline_fn = cfq_pd_offline, | ||
4180 | .pd_reset_stats_fn = cfq_pd_reset_stats, | 4571 | .pd_reset_stats_fn = cfq_pd_reset_stats, |
4181 | }; | 4572 | }; |
4182 | #endif | 4573 | #endif |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dbe74279f3d6..78feda9bbae2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/gfp.h> | 19 | #include <linux/gfp.h> |
20 | #include <linux/bsg.h> | 20 | #include <linux/bsg.h> |
21 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
22 | #include <linux/rcupdate.h> | ||
22 | 23 | ||
23 | #include <asm/scatterlist.h> | 24 | #include <asm/scatterlist.h> |
24 | 25 | ||
@@ -437,6 +438,7 @@ struct request_queue { | |||
437 | /* Throttle data */ | 438 | /* Throttle data */ |
438 | struct throtl_data *td; | 439 | struct throtl_data *td; |
439 | #endif | 440 | #endif |
441 | struct rcu_head rcu_head; | ||
440 | }; | 442 | }; |
441 | 443 | ||
442 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ | 444 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ |