diff options
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 727 |
1 files changed, 703 insertions, 24 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5fe03def34b2..aa97cd455cef 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -15,8 +15,12 @@ | |||
15 | #include <linux/kdev_t.h> | 15 | #include <linux/kdev_t.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/err.h> | 17 | #include <linux/err.h> |
18 | #include <linux/blkdev.h> | ||
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
19 | #include "blk-cgroup.h" | 20 | #include "blk-cgroup.h" |
21 | #include <linux/genhd.h> | ||
22 | |||
23 | #define MAX_KEY_LEN 100 | ||
20 | 24 | ||
21 | static DEFINE_SPINLOCK(blkio_list_lock); | 25 | static DEFINE_SPINLOCK(blkio_list_lock); |
22 | static LIST_HEAD(blkio_list); | 26 | static LIST_HEAD(blkio_list); |
@@ -49,6 +53,32 @@ struct cgroup_subsys blkio_subsys = { | |||
49 | }; | 53 | }; |
50 | EXPORT_SYMBOL_GPL(blkio_subsys); | 54 | EXPORT_SYMBOL_GPL(blkio_subsys); |
51 | 55 | ||
56 | static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg, | ||
57 | struct blkio_policy_node *pn) | ||
58 | { | ||
59 | list_add(&pn->node, &blkcg->policy_list); | ||
60 | } | ||
61 | |||
62 | /* Must be called with blkcg->lock held */ | ||
63 | static inline void blkio_policy_delete_node(struct blkio_policy_node *pn) | ||
64 | { | ||
65 | list_del(&pn->node); | ||
66 | } | ||
67 | |||
68 | /* Must be called with blkcg->lock held */ | ||
69 | static struct blkio_policy_node * | ||
70 | blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev) | ||
71 | { | ||
72 | struct blkio_policy_node *pn; | ||
73 | |||
74 | list_for_each_entry(pn, &blkcg->policy_list, node) { | ||
75 | if (pn->dev == dev) | ||
76 | return pn; | ||
77 | } | ||
78 | |||
79 | return NULL; | ||
80 | } | ||
81 | |||
52 | struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) | 82 | struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) |
53 | { | 83 | { |
54 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), | 84 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), |
@@ -56,13 +86,262 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) | |||
56 | } | 86 | } |
57 | EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); | 87 | EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); |
58 | 88 | ||
59 | void blkiocg_update_blkio_group_stats(struct blkio_group *blkg, | 89 | void blkio_group_init(struct blkio_group *blkg) |
60 | unsigned long time, unsigned long sectors) | 90 | { |
91 | spin_lock_init(&blkg->stats_lock); | ||
92 | } | ||
93 | EXPORT_SYMBOL_GPL(blkio_group_init); | ||
94 | |||
95 | /* | ||
96 | * Add to the appropriate stat variable depending on the request type. | ||
97 | * This should be called with the blkg->stats_lock held. | ||
98 | */ | ||
99 | static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction, | ||
100 | bool sync) | ||
101 | { | ||
102 | if (direction) | ||
103 | stat[BLKIO_STAT_WRITE] += add; | ||
104 | else | ||
105 | stat[BLKIO_STAT_READ] += add; | ||
106 | if (sync) | ||
107 | stat[BLKIO_STAT_SYNC] += add; | ||
108 | else | ||
109 | stat[BLKIO_STAT_ASYNC] += add; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Decrements the appropriate stat variable if non-zero depending on the | ||
114 | * request type. Panics on value being zero. | ||
115 | * This should be called with the blkg->stats_lock held. | ||
116 | */ | ||
117 | static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync) | ||
118 | { | ||
119 | if (direction) { | ||
120 | BUG_ON(stat[BLKIO_STAT_WRITE] == 0); | ||
121 | stat[BLKIO_STAT_WRITE]--; | ||
122 | } else { | ||
123 | BUG_ON(stat[BLKIO_STAT_READ] == 0); | ||
124 | stat[BLKIO_STAT_READ]--; | ||
125 | } | ||
126 | if (sync) { | ||
127 | BUG_ON(stat[BLKIO_STAT_SYNC] == 0); | ||
128 | stat[BLKIO_STAT_SYNC]--; | ||
129 | } else { | ||
130 | BUG_ON(stat[BLKIO_STAT_ASYNC] == 0); | ||
131 | stat[BLKIO_STAT_ASYNC]--; | ||
132 | } | ||
133 | } | ||
134 | |||
135 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
136 | /* This should be called with the blkg->stats_lock held. */ | ||
137 | static void blkio_set_start_group_wait_time(struct blkio_group *blkg, | ||
138 | struct blkio_group *curr_blkg) | ||
139 | { | ||
140 | if (blkio_blkg_waiting(&blkg->stats)) | ||
141 | return; | ||
142 | if (blkg == curr_blkg) | ||
143 | return; | ||
144 | blkg->stats.start_group_wait_time = sched_clock(); | ||
145 | blkio_mark_blkg_waiting(&blkg->stats); | ||
146 | } | ||
147 | |||
148 | /* This should be called with the blkg->stats_lock held. */ | ||
149 | static void blkio_update_group_wait_time(struct blkio_group_stats *stats) | ||
150 | { | ||
151 | unsigned long long now; | ||
152 | |||
153 | if (!blkio_blkg_waiting(stats)) | ||
154 | return; | ||
155 | |||
156 | now = sched_clock(); | ||
157 | if (time_after64(now, stats->start_group_wait_time)) | ||
158 | stats->group_wait_time += now - stats->start_group_wait_time; | ||
159 | blkio_clear_blkg_waiting(stats); | ||
160 | } | ||
161 | |||
162 | /* This should be called with the blkg->stats_lock held. */ | ||
163 | static void blkio_end_empty_time(struct blkio_group_stats *stats) | ||
164 | { | ||
165 | unsigned long long now; | ||
166 | |||
167 | if (!blkio_blkg_empty(stats)) | ||
168 | return; | ||
169 | |||
170 | now = sched_clock(); | ||
171 | if (time_after64(now, stats->start_empty_time)) | ||
172 | stats->empty_time += now - stats->start_empty_time; | ||
173 | blkio_clear_blkg_empty(stats); | ||
174 | } | ||
175 | |||
176 | void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg) | ||
177 | { | ||
178 | unsigned long flags; | ||
179 | |||
180 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
181 | BUG_ON(blkio_blkg_idling(&blkg->stats)); | ||
182 | blkg->stats.start_idle_time = sched_clock(); | ||
183 | blkio_mark_blkg_idling(&blkg->stats); | ||
184 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
185 | } | ||
186 | EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats); | ||
187 | |||
188 | void blkiocg_update_idle_time_stats(struct blkio_group *blkg) | ||
61 | { | 189 | { |
62 | blkg->time += time; | 190 | unsigned long flags; |
63 | blkg->sectors += sectors; | 191 | unsigned long long now; |
192 | struct blkio_group_stats *stats; | ||
193 | |||
194 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
195 | stats = &blkg->stats; | ||
196 | if (blkio_blkg_idling(stats)) { | ||
197 | now = sched_clock(); | ||
198 | if (time_after64(now, stats->start_idle_time)) | ||
199 | stats->idle_time += now - stats->start_idle_time; | ||
200 | blkio_clear_blkg_idling(stats); | ||
201 | } | ||
202 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
203 | } | ||
204 | EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats); | ||
205 | |||
206 | void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg) | ||
207 | { | ||
208 | unsigned long flags; | ||
209 | struct blkio_group_stats *stats; | ||
210 | |||
211 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
212 | stats = &blkg->stats; | ||
213 | stats->avg_queue_size_sum += | ||
214 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] + | ||
215 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]; | ||
216 | stats->avg_queue_size_samples++; | ||
217 | blkio_update_group_wait_time(stats); | ||
218 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
64 | } | 219 | } |
65 | EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats); | 220 | EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats); |
221 | |||
222 | void blkiocg_update_dequeue_stats(struct blkio_group *blkg, | ||
223 | unsigned long dequeue) | ||
224 | { | ||
225 | blkg->stats.dequeue += dequeue; | ||
226 | } | ||
227 | EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); | ||
228 | #else | ||
229 | static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg, | ||
230 | struct blkio_group *curr_blkg) {} | ||
231 | static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {} | ||
232 | #endif | ||
233 | |||
234 | void blkiocg_update_io_add_stats(struct blkio_group *blkg, | ||
235 | struct blkio_group *curr_blkg, bool direction, | ||
236 | bool sync) | ||
237 | { | ||
238 | unsigned long flags; | ||
239 | |||
240 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
241 | blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction, | ||
242 | sync); | ||
243 | blkio_end_empty_time(&blkg->stats); | ||
244 | blkio_set_start_group_wait_time(blkg, curr_blkg); | ||
245 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
246 | } | ||
247 | EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats); | ||
248 | |||
249 | void blkiocg_update_io_remove_stats(struct blkio_group *blkg, | ||
250 | bool direction, bool sync) | ||
251 | { | ||
252 | unsigned long flags; | ||
253 | |||
254 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
255 | blkio_check_and_dec_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], | ||
256 | direction, sync); | ||
257 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
258 | } | ||
259 | EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); | ||
260 | |||
261 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) | ||
262 | { | ||
263 | unsigned long flags; | ||
264 | |||
265 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
266 | blkg->stats.time += time; | ||
267 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
268 | } | ||
269 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); | ||
270 | |||
271 | void blkiocg_set_start_empty_time(struct blkio_group *blkg, bool ignore) | ||
272 | { | ||
273 | unsigned long flags; | ||
274 | struct blkio_group_stats *stats; | ||
275 | |||
276 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
277 | stats = &blkg->stats; | ||
278 | |||
279 | if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] || | ||
280 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) { | ||
281 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
282 | return; | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * If ignore is set, we do not panic on the empty flag being set | ||
287 | * already. This is to avoid cases where there are superfluous timeslice | ||
288 | * complete events (for eg., forced_dispatch in CFQ) when no IOs are | ||
289 | * served which could result in triggering the empty check incorrectly. | ||
290 | */ | ||
291 | BUG_ON(!ignore && blkio_blkg_empty(stats)); | ||
292 | stats->start_empty_time = sched_clock(); | ||
293 | blkio_mark_blkg_empty(stats); | ||
294 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
295 | } | ||
296 | EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time); | ||
297 | |||
298 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, | ||
299 | uint64_t bytes, bool direction, bool sync) | ||
300 | { | ||
301 | struct blkio_group_stats *stats; | ||
302 | unsigned long flags; | ||
303 | |||
304 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
305 | stats = &blkg->stats; | ||
306 | stats->sectors += bytes >> 9; | ||
307 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction, | ||
308 | sync); | ||
309 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes, | ||
310 | direction, sync); | ||
311 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
312 | } | ||
313 | EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats); | ||
314 | |||
315 | void blkiocg_update_completion_stats(struct blkio_group *blkg, | ||
316 | uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) | ||
317 | { | ||
318 | struct blkio_group_stats *stats; | ||
319 | unsigned long flags; | ||
320 | unsigned long long now = sched_clock(); | ||
321 | |||
322 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
323 | stats = &blkg->stats; | ||
324 | if (time_after64(now, io_start_time)) | ||
325 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME], | ||
326 | now - io_start_time, direction, sync); | ||
327 | if (time_after64(io_start_time, start_time)) | ||
328 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME], | ||
329 | io_start_time - start_time, direction, sync); | ||
330 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
331 | } | ||
332 | EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats); | ||
333 | |||
334 | void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction, | ||
335 | bool sync) | ||
336 | { | ||
337 | unsigned long flags; | ||
338 | |||
339 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
340 | blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction, | ||
341 | sync); | ||
342 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
343 | } | ||
344 | EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); | ||
66 | 345 | ||
67 | void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | 346 | void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, |
68 | struct blkio_group *blkg, void *key, dev_t dev) | 347 | struct blkio_group *blkg, void *key, dev_t dev) |
@@ -154,6 +433,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
154 | struct blkio_group *blkg; | 433 | struct blkio_group *blkg; |
155 | struct hlist_node *n; | 434 | struct hlist_node *n; |
156 | struct blkio_policy_type *blkiop; | 435 | struct blkio_policy_type *blkiop; |
436 | struct blkio_policy_node *pn; | ||
157 | 437 | ||
158 | if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) | 438 | if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) |
159 | return -EINVAL; | 439 | return -EINVAL; |
@@ -162,7 +442,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
162 | spin_lock(&blkio_list_lock); | 442 | spin_lock(&blkio_list_lock); |
163 | spin_lock_irq(&blkcg->lock); | 443 | spin_lock_irq(&blkcg->lock); |
164 | blkcg->weight = (unsigned int)val; | 444 | blkcg->weight = (unsigned int)val; |
445 | |||
165 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | 446 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { |
447 | pn = blkio_policy_search_node(blkcg, blkg->dev); | ||
448 | |||
449 | if (pn) | ||
450 | continue; | ||
451 | |||
166 | list_for_each_entry(blkiop, &blkio_list, list) | 452 | list_for_each_entry(blkiop, &blkio_list, list) |
167 | blkiop->ops.blkio_update_group_weight_fn(blkg, | 453 | blkiop->ops.blkio_update_group_weight_fn(blkg, |
168 | blkcg->weight); | 454 | blkcg->weight); |
@@ -172,13 +458,154 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
172 | return 0; | 458 | return 0; |
173 | } | 459 | } |
174 | 460 | ||
175 | #define SHOW_FUNCTION_PER_GROUP(__VAR) \ | 461 | static int |
462 | blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) | ||
463 | { | ||
464 | struct blkio_cgroup *blkcg; | ||
465 | struct blkio_group *blkg; | ||
466 | struct blkio_group_stats *stats; | ||
467 | struct hlist_node *n; | ||
468 | uint64_t queued[BLKIO_STAT_TOTAL]; | ||
469 | int i; | ||
470 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
471 | bool idling, waiting, empty; | ||
472 | unsigned long long now = sched_clock(); | ||
473 | #endif | ||
474 | |||
475 | blkcg = cgroup_to_blkio_cgroup(cgroup); | ||
476 | spin_lock_irq(&blkcg->lock); | ||
477 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | ||
478 | spin_lock(&blkg->stats_lock); | ||
479 | stats = &blkg->stats; | ||
480 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
481 | idling = blkio_blkg_idling(stats); | ||
482 | waiting = blkio_blkg_waiting(stats); | ||
483 | empty = blkio_blkg_empty(stats); | ||
484 | #endif | ||
485 | for (i = 0; i < BLKIO_STAT_TOTAL; i++) | ||
486 | queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i]; | ||
487 | memset(stats, 0, sizeof(struct blkio_group_stats)); | ||
488 | for (i = 0; i < BLKIO_STAT_TOTAL; i++) | ||
489 | stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i]; | ||
490 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
491 | if (idling) { | ||
492 | blkio_mark_blkg_idling(stats); | ||
493 | stats->start_idle_time = now; | ||
494 | } | ||
495 | if (waiting) { | ||
496 | blkio_mark_blkg_waiting(stats); | ||
497 | stats->start_group_wait_time = now; | ||
498 | } | ||
499 | if (empty) { | ||
500 | blkio_mark_blkg_empty(stats); | ||
501 | stats->start_empty_time = now; | ||
502 | } | ||
503 | #endif | ||
504 | spin_unlock(&blkg->stats_lock); | ||
505 | } | ||
506 | spin_unlock_irq(&blkcg->lock); | ||
507 | return 0; | ||
508 | } | ||
509 | |||
510 | static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str, | ||
511 | int chars_left, bool diskname_only) | ||
512 | { | ||
513 | snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev)); | ||
514 | chars_left -= strlen(str); | ||
515 | if (chars_left <= 0) { | ||
516 | printk(KERN_WARNING | ||
517 | "Possibly incorrect cgroup stat display format"); | ||
518 | return; | ||
519 | } | ||
520 | if (diskname_only) | ||
521 | return; | ||
522 | switch (type) { | ||
523 | case BLKIO_STAT_READ: | ||
524 | strlcat(str, " Read", chars_left); | ||
525 | break; | ||
526 | case BLKIO_STAT_WRITE: | ||
527 | strlcat(str, " Write", chars_left); | ||
528 | break; | ||
529 | case BLKIO_STAT_SYNC: | ||
530 | strlcat(str, " Sync", chars_left); | ||
531 | break; | ||
532 | case BLKIO_STAT_ASYNC: | ||
533 | strlcat(str, " Async", chars_left); | ||
534 | break; | ||
535 | case BLKIO_STAT_TOTAL: | ||
536 | strlcat(str, " Total", chars_left); | ||
537 | break; | ||
538 | default: | ||
539 | strlcat(str, " Invalid", chars_left); | ||
540 | } | ||
541 | } | ||
542 | |||
543 | static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val, | ||
544 | struct cgroup_map_cb *cb, dev_t dev) | ||
545 | { | ||
546 | blkio_get_key_name(0, dev, str, chars_left, true); | ||
547 | cb->fill(cb, str, val); | ||
548 | return val; | ||
549 | } | ||
550 | |||
551 | /* This should be called with blkg->stats_lock held */ | ||
552 | static uint64_t blkio_get_stat(struct blkio_group *blkg, | ||
553 | struct cgroup_map_cb *cb, dev_t dev, enum stat_type type) | ||
554 | { | ||
555 | uint64_t disk_total; | ||
556 | char key_str[MAX_KEY_LEN]; | ||
557 | enum stat_sub_type sub_type; | ||
558 | |||
559 | if (type == BLKIO_STAT_TIME) | ||
560 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
561 | blkg->stats.time, cb, dev); | ||
562 | if (type == BLKIO_STAT_SECTORS) | ||
563 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
564 | blkg->stats.sectors, cb, dev); | ||
565 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
566 | if (type == BLKIO_STAT_AVG_QUEUE_SIZE) { | ||
567 | uint64_t sum = blkg->stats.avg_queue_size_sum; | ||
568 | uint64_t samples = blkg->stats.avg_queue_size_samples; | ||
569 | if (samples) | ||
570 | do_div(sum, samples); | ||
571 | else | ||
572 | sum = 0; | ||
573 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev); | ||
574 | } | ||
575 | if (type == BLKIO_STAT_GROUP_WAIT_TIME) | ||
576 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
577 | blkg->stats.group_wait_time, cb, dev); | ||
578 | if (type == BLKIO_STAT_IDLE_TIME) | ||
579 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
580 | blkg->stats.idle_time, cb, dev); | ||
581 | if (type == BLKIO_STAT_EMPTY_TIME) | ||
582 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
583 | blkg->stats.empty_time, cb, dev); | ||
584 | if (type == BLKIO_STAT_DEQUEUE) | ||
585 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
586 | blkg->stats.dequeue, cb, dev); | ||
587 | #endif | ||
588 | |||
589 | for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL; | ||
590 | sub_type++) { | ||
591 | blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false); | ||
592 | cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]); | ||
593 | } | ||
594 | disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] + | ||
595 | blkg->stats.stat_arr[type][BLKIO_STAT_WRITE]; | ||
596 | blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false); | ||
597 | cb->fill(cb, key_str, disk_total); | ||
598 | return disk_total; | ||
599 | } | ||
600 | |||
601 | #define SHOW_FUNCTION_PER_GROUP(__VAR, type, show_total) \ | ||
176 | static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ | 602 | static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ |
177 | struct cftype *cftype, struct seq_file *m) \ | 603 | struct cftype *cftype, struct cgroup_map_cb *cb) \ |
178 | { \ | 604 | { \ |
179 | struct blkio_cgroup *blkcg; \ | 605 | struct blkio_cgroup *blkcg; \ |
180 | struct blkio_group *blkg; \ | 606 | struct blkio_group *blkg; \ |
181 | struct hlist_node *n; \ | 607 | struct hlist_node *n; \ |
608 | uint64_t cgroup_total = 0; \ | ||
182 | \ | 609 | \ |
183 | if (!cgroup_lock_live_group(cgroup)) \ | 610 | if (!cgroup_lock_live_group(cgroup)) \ |
184 | return -ENODEV; \ | 611 | return -ENODEV; \ |
@@ -186,50 +613,295 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ | |||
186 | blkcg = cgroup_to_blkio_cgroup(cgroup); \ | 613 | blkcg = cgroup_to_blkio_cgroup(cgroup); \ |
187 | rcu_read_lock(); \ | 614 | rcu_read_lock(); \ |
188 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ | 615 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ |
189 | if (blkg->dev) \ | 616 | if (blkg->dev) { \ |
190 | seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev), \ | 617 | spin_lock_irq(&blkg->stats_lock); \ |
191 | MINOR(blkg->dev), blkg->__VAR); \ | 618 | cgroup_total += blkio_get_stat(blkg, cb, \ |
619 | blkg->dev, type); \ | ||
620 | spin_unlock_irq(&blkg->stats_lock); \ | ||
621 | } \ | ||
192 | } \ | 622 | } \ |
623 | if (show_total) \ | ||
624 | cb->fill(cb, "Total", cgroup_total); \ | ||
193 | rcu_read_unlock(); \ | 625 | rcu_read_unlock(); \ |
194 | cgroup_unlock(); \ | 626 | cgroup_unlock(); \ |
195 | return 0; \ | 627 | return 0; \ |
196 | } | 628 | } |
197 | 629 | ||
198 | SHOW_FUNCTION_PER_GROUP(time); | 630 | SHOW_FUNCTION_PER_GROUP(time, BLKIO_STAT_TIME, 0); |
199 | SHOW_FUNCTION_PER_GROUP(sectors); | 631 | SHOW_FUNCTION_PER_GROUP(sectors, BLKIO_STAT_SECTORS, 0); |
632 | SHOW_FUNCTION_PER_GROUP(io_service_bytes, BLKIO_STAT_SERVICE_BYTES, 1); | ||
633 | SHOW_FUNCTION_PER_GROUP(io_serviced, BLKIO_STAT_SERVICED, 1); | ||
634 | SHOW_FUNCTION_PER_GROUP(io_service_time, BLKIO_STAT_SERVICE_TIME, 1); | ||
635 | SHOW_FUNCTION_PER_GROUP(io_wait_time, BLKIO_STAT_WAIT_TIME, 1); | ||
636 | SHOW_FUNCTION_PER_GROUP(io_merged, BLKIO_STAT_MERGED, 1); | ||
637 | SHOW_FUNCTION_PER_GROUP(io_queued, BLKIO_STAT_QUEUED, 1); | ||
200 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 638 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
201 | SHOW_FUNCTION_PER_GROUP(dequeue); | 639 | SHOW_FUNCTION_PER_GROUP(dequeue, BLKIO_STAT_DEQUEUE, 0); |
640 | SHOW_FUNCTION_PER_GROUP(avg_queue_size, BLKIO_STAT_AVG_QUEUE_SIZE, 0); | ||
641 | SHOW_FUNCTION_PER_GROUP(group_wait_time, BLKIO_STAT_GROUP_WAIT_TIME, 0); | ||
642 | SHOW_FUNCTION_PER_GROUP(idle_time, BLKIO_STAT_IDLE_TIME, 0); | ||
643 | SHOW_FUNCTION_PER_GROUP(empty_time, BLKIO_STAT_EMPTY_TIME, 0); | ||
202 | #endif | 644 | #endif |
203 | #undef SHOW_FUNCTION_PER_GROUP | 645 | #undef SHOW_FUNCTION_PER_GROUP |
204 | 646 | ||
205 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 647 | static int blkio_check_dev_num(dev_t dev) |
206 | void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg, | ||
207 | unsigned long dequeue) | ||
208 | { | 648 | { |
209 | blkg->dequeue += dequeue; | 649 | int part = 0; |
650 | struct gendisk *disk; | ||
651 | |||
652 | disk = get_gendisk(dev, &part); | ||
653 | if (!disk || part) | ||
654 | return -ENODEV; | ||
655 | |||
656 | return 0; | ||
657 | } | ||
658 | |||
659 | static int blkio_policy_parse_and_set(char *buf, | ||
660 | struct blkio_policy_node *newpn) | ||
661 | { | ||
662 | char *s[4], *p, *major_s = NULL, *minor_s = NULL; | ||
663 | int ret; | ||
664 | unsigned long major, minor, temp; | ||
665 | int i = 0; | ||
666 | dev_t dev; | ||
667 | |||
668 | memset(s, 0, sizeof(s)); | ||
669 | |||
670 | while ((p = strsep(&buf, " ")) != NULL) { | ||
671 | if (!*p) | ||
672 | continue; | ||
673 | |||
674 | s[i++] = p; | ||
675 | |||
676 | /* Prevent from inputing too many things */ | ||
677 | if (i == 3) | ||
678 | break; | ||
679 | } | ||
680 | |||
681 | if (i != 2) | ||
682 | return -EINVAL; | ||
683 | |||
684 | p = strsep(&s[0], ":"); | ||
685 | if (p != NULL) | ||
686 | major_s = p; | ||
687 | else | ||
688 | return -EINVAL; | ||
689 | |||
690 | minor_s = s[0]; | ||
691 | if (!minor_s) | ||
692 | return -EINVAL; | ||
693 | |||
694 | ret = strict_strtoul(major_s, 10, &major); | ||
695 | if (ret) | ||
696 | return -EINVAL; | ||
697 | |||
698 | ret = strict_strtoul(minor_s, 10, &minor); | ||
699 | if (ret) | ||
700 | return -EINVAL; | ||
701 | |||
702 | dev = MKDEV(major, minor); | ||
703 | |||
704 | ret = blkio_check_dev_num(dev); | ||
705 | if (ret) | ||
706 | return ret; | ||
707 | |||
708 | newpn->dev = dev; | ||
709 | |||
710 | if (s[1] == NULL) | ||
711 | return -EINVAL; | ||
712 | |||
713 | ret = strict_strtoul(s[1], 10, &temp); | ||
714 | if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || | ||
715 | temp > BLKIO_WEIGHT_MAX) | ||
716 | return -EINVAL; | ||
717 | |||
718 | newpn->weight = temp; | ||
719 | |||
720 | return 0; | ||
721 | } | ||
722 | |||
723 | unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, | ||
724 | dev_t dev) | ||
725 | { | ||
726 | struct blkio_policy_node *pn; | ||
727 | |||
728 | pn = blkio_policy_search_node(blkcg, dev); | ||
729 | if (pn) | ||
730 | return pn->weight; | ||
731 | else | ||
732 | return blkcg->weight; | ||
733 | } | ||
734 | EXPORT_SYMBOL_GPL(blkcg_get_weight); | ||
735 | |||
736 | |||
737 | static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft, | ||
738 | const char *buffer) | ||
739 | { | ||
740 | int ret = 0; | ||
741 | char *buf; | ||
742 | struct blkio_policy_node *newpn, *pn; | ||
743 | struct blkio_cgroup *blkcg; | ||
744 | struct blkio_group *blkg; | ||
745 | int keep_newpn = 0; | ||
746 | struct hlist_node *n; | ||
747 | struct blkio_policy_type *blkiop; | ||
748 | |||
749 | buf = kstrdup(buffer, GFP_KERNEL); | ||
750 | if (!buf) | ||
751 | return -ENOMEM; | ||
752 | |||
753 | newpn = kzalloc(sizeof(*newpn), GFP_KERNEL); | ||
754 | if (!newpn) { | ||
755 | ret = -ENOMEM; | ||
756 | goto free_buf; | ||
757 | } | ||
758 | |||
759 | ret = blkio_policy_parse_and_set(buf, newpn); | ||
760 | if (ret) | ||
761 | goto free_newpn; | ||
762 | |||
763 | blkcg = cgroup_to_blkio_cgroup(cgrp); | ||
764 | |||
765 | spin_lock_irq(&blkcg->lock); | ||
766 | |||
767 | pn = blkio_policy_search_node(blkcg, newpn->dev); | ||
768 | if (!pn) { | ||
769 | if (newpn->weight != 0) { | ||
770 | blkio_policy_insert_node(blkcg, newpn); | ||
771 | keep_newpn = 1; | ||
772 | } | ||
773 | spin_unlock_irq(&blkcg->lock); | ||
774 | goto update_io_group; | ||
775 | } | ||
776 | |||
777 | if (newpn->weight == 0) { | ||
778 | /* weight == 0 means deleteing a specific weight */ | ||
779 | blkio_policy_delete_node(pn); | ||
780 | spin_unlock_irq(&blkcg->lock); | ||
781 | goto update_io_group; | ||
782 | } | ||
783 | spin_unlock_irq(&blkcg->lock); | ||
784 | |||
785 | pn->weight = newpn->weight; | ||
786 | |||
787 | update_io_group: | ||
788 | /* update weight for each cfqg */ | ||
789 | spin_lock(&blkio_list_lock); | ||
790 | spin_lock_irq(&blkcg->lock); | ||
791 | |||
792 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | ||
793 | if (newpn->dev == blkg->dev) { | ||
794 | list_for_each_entry(blkiop, &blkio_list, list) | ||
795 | blkiop->ops.blkio_update_group_weight_fn(blkg, | ||
796 | newpn->weight ? | ||
797 | newpn->weight : | ||
798 | blkcg->weight); | ||
799 | } | ||
800 | } | ||
801 | |||
802 | spin_unlock_irq(&blkcg->lock); | ||
803 | spin_unlock(&blkio_list_lock); | ||
804 | |||
805 | free_newpn: | ||
806 | if (!keep_newpn) | ||
807 | kfree(newpn); | ||
808 | free_buf: | ||
809 | kfree(buf); | ||
810 | return ret; | ||
811 | } | ||
812 | |||
813 | static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft, | ||
814 | struct seq_file *m) | ||
815 | { | ||
816 | struct blkio_cgroup *blkcg; | ||
817 | struct blkio_policy_node *pn; | ||
818 | |||
819 | seq_printf(m, "dev\tweight\n"); | ||
820 | |||
821 | blkcg = cgroup_to_blkio_cgroup(cgrp); | ||
822 | if (list_empty(&blkcg->policy_list)) | ||
823 | goto out; | ||
824 | |||
825 | spin_lock_irq(&blkcg->lock); | ||
826 | list_for_each_entry(pn, &blkcg->policy_list, node) { | ||
827 | seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), | ||
828 | MINOR(pn->dev), pn->weight); | ||
829 | } | ||
830 | spin_unlock_irq(&blkcg->lock); | ||
831 | |||
832 | out: | ||
833 | return 0; | ||
210 | } | 834 | } |
211 | EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats); | ||
212 | #endif | ||
213 | 835 | ||
214 | struct cftype blkio_files[] = { | 836 | struct cftype blkio_files[] = { |
215 | { | 837 | { |
838 | .name = "weight_device", | ||
839 | .read_seq_string = blkiocg_weight_device_read, | ||
840 | .write_string = blkiocg_weight_device_write, | ||
841 | .max_write_len = 256, | ||
842 | }, | ||
843 | { | ||
216 | .name = "weight", | 844 | .name = "weight", |
217 | .read_u64 = blkiocg_weight_read, | 845 | .read_u64 = blkiocg_weight_read, |
218 | .write_u64 = blkiocg_weight_write, | 846 | .write_u64 = blkiocg_weight_write, |
219 | }, | 847 | }, |
220 | { | 848 | { |
221 | .name = "time", | 849 | .name = "time", |
222 | .read_seq_string = blkiocg_time_read, | 850 | .read_map = blkiocg_time_read, |
223 | }, | 851 | }, |
224 | { | 852 | { |
225 | .name = "sectors", | 853 | .name = "sectors", |
226 | .read_seq_string = blkiocg_sectors_read, | 854 | .read_map = blkiocg_sectors_read, |
855 | }, | ||
856 | { | ||
857 | .name = "io_service_bytes", | ||
858 | .read_map = blkiocg_io_service_bytes_read, | ||
859 | }, | ||
860 | { | ||
861 | .name = "io_serviced", | ||
862 | .read_map = blkiocg_io_serviced_read, | ||
863 | }, | ||
864 | { | ||
865 | .name = "io_service_time", | ||
866 | .read_map = blkiocg_io_service_time_read, | ||
867 | }, | ||
868 | { | ||
869 | .name = "io_wait_time", | ||
870 | .read_map = blkiocg_io_wait_time_read, | ||
871 | }, | ||
872 | { | ||
873 | .name = "io_merged", | ||
874 | .read_map = blkiocg_io_merged_read, | ||
875 | }, | ||
876 | { | ||
877 | .name = "io_queued", | ||
878 | .read_map = blkiocg_io_queued_read, | ||
879 | }, | ||
880 | { | ||
881 | .name = "reset_stats", | ||
882 | .write_u64 = blkiocg_reset_stats, | ||
227 | }, | 883 | }, |
228 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 884 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
229 | { | 885 | { |
886 | .name = "avg_queue_size", | ||
887 | .read_map = blkiocg_avg_queue_size_read, | ||
888 | }, | ||
889 | { | ||
890 | .name = "group_wait_time", | ||
891 | .read_map = blkiocg_group_wait_time_read, | ||
892 | }, | ||
893 | { | ||
894 | .name = "idle_time", | ||
895 | .read_map = blkiocg_idle_time_read, | ||
896 | }, | ||
897 | { | ||
898 | .name = "empty_time", | ||
899 | .read_map = blkiocg_empty_time_read, | ||
900 | }, | ||
901 | { | ||
230 | .name = "dequeue", | 902 | .name = "dequeue", |
231 | .read_seq_string = blkiocg_dequeue_read, | 903 | .read_map = blkiocg_dequeue_read, |
232 | }, | 904 | }, |
233 | #endif | 905 | #endif |
234 | }; | 906 | }; |
235 | 907 | ||
@@ -246,6 +918,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) | |||
246 | struct blkio_group *blkg; | 918 | struct blkio_group *blkg; |
247 | void *key; | 919 | void *key; |
248 | struct blkio_policy_type *blkiop; | 920 | struct blkio_policy_type *blkiop; |
921 | struct blkio_policy_node *pn, *pntmp; | ||
249 | 922 | ||
250 | rcu_read_lock(); | 923 | rcu_read_lock(); |
251 | remove_entry: | 924 | remove_entry: |
@@ -276,7 +949,12 @@ remove_entry: | |||
276 | blkiop->ops.blkio_unlink_group_fn(key, blkg); | 949 | blkiop->ops.blkio_unlink_group_fn(key, blkg); |
277 | spin_unlock(&blkio_list_lock); | 950 | spin_unlock(&blkio_list_lock); |
278 | goto remove_entry; | 951 | goto remove_entry; |
952 | |||
279 | done: | 953 | done: |
954 | list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) { | ||
955 | blkio_policy_delete_node(pn); | ||
956 | kfree(pn); | ||
957 | } | ||
280 | free_css_id(&blkio_subsys, &blkcg->css); | 958 | free_css_id(&blkio_subsys, &blkcg->css); |
281 | rcu_read_unlock(); | 959 | rcu_read_unlock(); |
282 | if (blkcg != &blkio_root_cgroup) | 960 | if (blkcg != &blkio_root_cgroup) |
@@ -307,6 +985,7 @@ done: | |||
307 | spin_lock_init(&blkcg->lock); | 985 | spin_lock_init(&blkcg->lock); |
308 | INIT_HLIST_HEAD(&blkcg->blkg_list); | 986 | INIT_HLIST_HEAD(&blkcg->blkg_list); |
309 | 987 | ||
988 | INIT_LIST_HEAD(&blkcg->policy_list); | ||
310 | return &blkcg->css; | 989 | return &blkcg->css; |
311 | } | 990 | } |
312 | 991 | ||