aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/cgroups/blkio-controller.txt48
-rw-r--r--block/blk-cgroup.c190
-rw-r--r--block/blk-cgroup.h64
-rw-r--r--block/cfq-iosched.c8
-rw-r--r--include/linux/blkdev.h18
5 files changed, 198 insertions, 130 deletions
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index 630879cd9a42..ed04fe9cce1a 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -77,7 +77,6 @@ Details of cgroup files
77======================= 77=======================
78- blkio.weight 78- blkio.weight
79 - Specifies per cgroup weight. 79 - Specifies per cgroup weight.
80
81 Currently allowed range of weights is from 100 to 1000. 80 Currently allowed range of weights is from 100 to 1000.
82 81
83- blkio.time 82- blkio.time
@@ -92,6 +91,49 @@ Details of cgroup files
92 third field specifies the number of sectors transferred by the 91 third field specifies the number of sectors transferred by the
93 group to/from the device. 92 group to/from the device.
94 93
94- blkio.io_service_bytes
95 - Number of bytes transferred to/from the disk by the group. These
96 are further divided by the type of operation - read or write, sync
97 or async. First two fields specify the major and minor number of the
98 device, third field specifies the operation type and the fourth field
99 specifies the number of bytes.
100
101- blkio.io_serviced
102 - Number of IOs completed to/from the disk by the group. These
103 are further divided by the type of operation - read or write, sync
104 or async. First two fields specify the major and minor number of the
105 device, third field specifies the operation type and the fourth field
106 specifies the number of IOs.
107
108- blkio.io_service_time
109 - Total amount of time between request dispatch and request completion
110 for the IOs done by this cgroup. This is in nanoseconds to make it
111 meaningful for flash devices too. For devices with queue depth of 1,
112 this time represents the actual service time. When queue_depth > 1,
113 that is no longer true as requests may be served out of order. This
114 may cause the service time for a given IO to include the service time
115 of multiple IOs when served out of order which may result in total
116 io_service_time > actual time elapsed. This time is further divided by
117 the type of operation - read or write, sync or async. First two fields
118 specify the major and minor number of the device, third field
119 specifies the operation type and the fourth field specifies the
120 io_service_time in ns.
121
122- blkio.io_wait_time
123 - Total amount of time the IOs for this cgroup spent waiting in the
124 scheduler queues for service. This can be greater than the total time
125 elapsed since it is cumulative io_wait_time for all IOs. It is not a
126 measure of total time the cgroup spent waiting but rather a measure of
127 the wait_time for its individual IOs. For devices with queue_depth > 1
128 this metric does not include the time spent waiting for service once
129 the IO is dispatched to the device but till it actually gets serviced
130 (there might be a time lag here due to re-ordering of requests by the
131 device). This is in nanoseconds to make it meaningful for flash
132 devices too. This time is further divided by the type of operation -
133 read or write, sync or async. First two fields specify the major and
134 minor number of the device, third field specifies the operation type
135 and the fourth field specifies the io_wait_time in ns.
136
95- blkio.dequeue 137- blkio.dequeue
96 - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This 138 - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This
97 gives the statistics about how many a times a group was dequeued 139 gives the statistics about how many a times a group was dequeued
@@ -99,6 +141,10 @@ Details of cgroup files
99 and minor number of the device and third field specifies the number 141 and minor number of the device and third field specifies the number
100 of times a group was dequeued from a particular device. 142 of times a group was dequeued from a particular device.
101 143
144- blkio.reset_stats
145 - Writing an int to this file will result in resetting all the stats
146 for that cgroup.
147
102CFQ sysfs tunable 148CFQ sysfs tunable
103================= 149=================
104/sys/block/<disk>/queue/iosched/group_isolation 150/sys/block/<disk>/queue/iosched/group_isolation
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 9af7257f429c..6797df508821 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -18,6 +18,8 @@
18#include <linux/blkdev.h> 18#include <linux/blkdev.h>
19#include "blk-cgroup.h" 19#include "blk-cgroup.h"
20 20
21#define MAX_KEY_LEN 100
22
21static DEFINE_SPINLOCK(blkio_list_lock); 23static DEFINE_SPINLOCK(blkio_list_lock);
22static LIST_HEAD(blkio_list); 24static LIST_HEAD(blkio_list);
23 25
@@ -56,24 +58,27 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
56} 58}
57EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); 59EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
58 60
61void blkio_group_init(struct blkio_group *blkg)
62{
63 spin_lock_init(&blkg->stats_lock);
64}
65EXPORT_SYMBOL_GPL(blkio_group_init);
66
59/* 67/*
60 * Add to the appropriate stat variable depending on the request type. 68 * Add to the appropriate stat variable depending on the request type.
61 * This should be called with the blkg->stats_lock held. 69 * This should be called with the blkg->stats_lock held.
62 */ 70 */
63void io_add_stat(uint64_t *stat, uint64_t add, unsigned int flags) 71static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
72 bool sync)
64{ 73{
65 if (flags & REQ_RW) 74 if (direction)
66 stat[IO_WRITE] += add; 75 stat[BLKIO_STAT_WRITE] += add;
67 else 76 else
68 stat[IO_READ] += add; 77 stat[BLKIO_STAT_READ] += add;
69 /* 78 if (sync)
70 * Everywhere in the block layer, an IO is treated as sync if it is a 79 stat[BLKIO_STAT_SYNC] += add;
71 * read or a SYNC write. We follow the same norm.
72 */
73 if (!(flags & REQ_RW) || flags & REQ_RW_SYNC)
74 stat[IO_SYNC] += add;
75 else 80 else
76 stat[IO_ASYNC] += add; 81 stat[BLKIO_STAT_ASYNC] += add;
77} 82}
78 83
79void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) 84void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
@@ -86,23 +91,25 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
86} 91}
87EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); 92EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
88 93
89void blkiocg_update_request_dispatch_stats(struct blkio_group *blkg, 94void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
90 struct request *rq) 95 uint64_t bytes, bool direction, bool sync)
91{ 96{
92 struct blkio_group_stats *stats; 97 struct blkio_group_stats *stats;
93 unsigned long flags; 98 unsigned long flags;
94 99
95 spin_lock_irqsave(&blkg->stats_lock, flags); 100 spin_lock_irqsave(&blkg->stats_lock, flags);
96 stats = &blkg->stats; 101 stats = &blkg->stats;
97 stats->sectors += blk_rq_sectors(rq); 102 stats->sectors += bytes >> 9;
98 io_add_stat(stats->io_serviced, 1, rq->cmd_flags); 103 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction,
99 io_add_stat(stats->io_service_bytes, blk_rq_sectors(rq) << 9, 104 sync);
100 rq->cmd_flags); 105 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes,
106 direction, sync);
101 spin_unlock_irqrestore(&blkg->stats_lock, flags); 107 spin_unlock_irqrestore(&blkg->stats_lock, flags);
102} 108}
109EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
103 110
104void blkiocg_update_request_completion_stats(struct blkio_group *blkg, 111void blkiocg_update_completion_stats(struct blkio_group *blkg,
105 struct request *rq) 112 uint64_t start_time, uint64_t io_start_time, bool direction, bool sync)
106{ 113{
107 struct blkio_group_stats *stats; 114 struct blkio_group_stats *stats;
108 unsigned long flags; 115 unsigned long flags;
@@ -110,16 +117,15 @@ void blkiocg_update_request_completion_stats(struct blkio_group *blkg,
110 117
111 spin_lock_irqsave(&blkg->stats_lock, flags); 118 spin_lock_irqsave(&blkg->stats_lock, flags);
112 stats = &blkg->stats; 119 stats = &blkg->stats;
113 if (time_after64(now, rq->io_start_time_ns)) 120 if (time_after64(now, io_start_time))
114 io_add_stat(stats->io_service_time, now - rq->io_start_time_ns, 121 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME],
115 rq->cmd_flags); 122 now - io_start_time, direction, sync);
116 if (time_after64(rq->io_start_time_ns, rq->start_time_ns)) 123 if (time_after64(io_start_time, start_time))
117 io_add_stat(stats->io_wait_time, 124 blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME],
118 rq->io_start_time_ns - rq->start_time_ns, 125 io_start_time - start_time, direction, sync);
119 rq->cmd_flags);
120 spin_unlock_irqrestore(&blkg->stats_lock, flags); 126 spin_unlock_irqrestore(&blkg->stats_lock, flags);
121} 127}
122EXPORT_SYMBOL_GPL(blkiocg_update_request_completion_stats); 128EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
123 129
124void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, 130void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
125 struct blkio_group *blkg, void *key, dev_t dev) 131 struct blkio_group *blkg, void *key, dev_t dev)
@@ -230,7 +236,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
230} 236}
231 237
232static int 238static int
233blkiocg_reset_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) 239blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
234{ 240{
235 struct blkio_cgroup *blkcg; 241 struct blkio_cgroup *blkcg;
236 struct blkio_group *blkg; 242 struct blkio_group *blkg;
@@ -249,29 +255,32 @@ blkiocg_reset_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
249 return 0; 255 return 0;
250} 256}
251 257
252void get_key_name(int type, char *disk_id, char *str, int chars_left) 258static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str,
259 int chars_left, bool diskname_only)
253{ 260{
254 strlcpy(str, disk_id, chars_left); 261 snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev));
255 chars_left -= strlen(str); 262 chars_left -= strlen(str);
256 if (chars_left <= 0) { 263 if (chars_left <= 0) {
257 printk(KERN_WARNING 264 printk(KERN_WARNING
258 "Possibly incorrect cgroup stat display format"); 265 "Possibly incorrect cgroup stat display format");
259 return; 266 return;
260 } 267 }
268 if (diskname_only)
269 return;
261 switch (type) { 270 switch (type) {
262 case IO_READ: 271 case BLKIO_STAT_READ:
263 strlcat(str, " Read", chars_left); 272 strlcat(str, " Read", chars_left);
264 break; 273 break;
265 case IO_WRITE: 274 case BLKIO_STAT_WRITE:
266 strlcat(str, " Write", chars_left); 275 strlcat(str, " Write", chars_left);
267 break; 276 break;
268 case IO_SYNC: 277 case BLKIO_STAT_SYNC:
269 strlcat(str, " Sync", chars_left); 278 strlcat(str, " Sync", chars_left);
270 break; 279 break;
271 case IO_ASYNC: 280 case BLKIO_STAT_ASYNC:
272 strlcat(str, " Async", chars_left); 281 strlcat(str, " Async", chars_left);
273 break; 282 break;
274 case IO_TYPE_MAX: 283 case BLKIO_STAT_TOTAL:
275 strlcat(str, " Total", chars_left); 284 strlcat(str, " Total", chars_left);
276 break; 285 break;
277 default: 286 default:
@@ -279,63 +288,47 @@ void get_key_name(int type, char *disk_id, char *str, int chars_left)
279 } 288 }
280} 289}
281 290
282typedef uint64_t (get_var) (struct blkio_group *, int); 291static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
292 struct cgroup_map_cb *cb, dev_t dev)
293{
294 blkio_get_key_name(0, dev, str, chars_left, true);
295 cb->fill(cb, str, val);
296 return val;
297}
283 298
284#define MAX_KEY_LEN 100 299/* This should be called with blkg->stats_lock held */
285uint64_t get_typed_stat(struct blkio_group *blkg, struct cgroup_map_cb *cb, 300static uint64_t blkio_get_stat(struct blkio_group *blkg,
286 get_var *getvar, char *disk_id) 301 struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
287{ 302{
288 uint64_t disk_total; 303 uint64_t disk_total;
289 char key_str[MAX_KEY_LEN]; 304 char key_str[MAX_KEY_LEN];
290 int type; 305 enum stat_sub_type sub_type;
306
307 if (type == BLKIO_STAT_TIME)
308 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
309 blkg->stats.time, cb, dev);
310 if (type == BLKIO_STAT_SECTORS)
311 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
312 blkg->stats.sectors, cb, dev);
313#ifdef CONFIG_DEBUG_BLK_CGROUP
314 if (type == BLKIO_STAT_DEQUEUE)
315 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
316 blkg->stats.dequeue, cb, dev);
317#endif
291 318
292 for (type = 0; type < IO_TYPE_MAX; type++) { 319 for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
293 get_key_name(type, disk_id, key_str, MAX_KEY_LEN); 320 sub_type++) {
294 cb->fill(cb, key_str, getvar(blkg, type)); 321 blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
322 cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]);
295 } 323 }
296 disk_total = getvar(blkg, IO_READ) + getvar(blkg, IO_WRITE); 324 disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] +
297 get_key_name(IO_TYPE_MAX, disk_id, key_str, MAX_KEY_LEN); 325 blkg->stats.stat_arr[type][BLKIO_STAT_WRITE];
326 blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
298 cb->fill(cb, key_str, disk_total); 327 cb->fill(cb, key_str, disk_total);
299 return disk_total; 328 return disk_total;
300} 329}
301 330
302uint64_t get_stat(struct blkio_group *blkg, struct cgroup_map_cb *cb, 331#define SHOW_FUNCTION_PER_GROUP(__VAR, type, show_total) \
303 get_var *getvar, char *disk_id)
304{
305 uint64_t var = getvar(blkg, 0);
306 cb->fill(cb, disk_id, var);
307 return var;
308}
309
310#define GET_STAT_INDEXED(__VAR) \
311uint64_t get_##__VAR##_stat(struct blkio_group *blkg, int type) \
312{ \
313 return blkg->stats.__VAR[type]; \
314} \
315
316GET_STAT_INDEXED(io_service_bytes);
317GET_STAT_INDEXED(io_serviced);
318GET_STAT_INDEXED(io_service_time);
319GET_STAT_INDEXED(io_wait_time);
320#undef GET_STAT_INDEXED
321
322#define GET_STAT(__VAR, __CONV) \
323uint64_t get_##__VAR##_stat(struct blkio_group *blkg, int dummy) \
324{ \
325 uint64_t data = blkg->stats.__VAR; \
326 if (__CONV) \
327 data = (uint64_t)jiffies_to_msecs(data) * NSEC_PER_MSEC;\
328 return data; \
329}
330
331GET_STAT(time, 1);
332GET_STAT(sectors, 0);
333#ifdef CONFIG_DEBUG_BLK_CGROUP
334GET_STAT(dequeue, 0);
335#endif
336#undef GET_STAT
337
338#define SHOW_FUNCTION_PER_GROUP(__VAR, get_stats, getvar, show_total) \
339static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ 332static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \
340 struct cftype *cftype, struct cgroup_map_cb *cb) \ 333 struct cftype *cftype, struct cgroup_map_cb *cb) \
341{ \ 334{ \
@@ -343,7 +336,6 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \
343 struct blkio_group *blkg; \ 336 struct blkio_group *blkg; \
344 struct hlist_node *n; \ 337 struct hlist_node *n; \
345 uint64_t cgroup_total = 0; \ 338 uint64_t cgroup_total = 0; \
346 char disk_id[10]; \
347 \ 339 \
348 if (!cgroup_lock_live_group(cgroup)) \ 340 if (!cgroup_lock_live_group(cgroup)) \
349 return -ENODEV; \ 341 return -ENODEV; \
@@ -353,10 +345,8 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \
353 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ 345 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\
354 if (blkg->dev) { \ 346 if (blkg->dev) { \
355 spin_lock_irq(&blkg->stats_lock); \ 347 spin_lock_irq(&blkg->stats_lock); \
356 snprintf(disk_id, 10, "%u:%u", MAJOR(blkg->dev),\ 348 cgroup_total += blkio_get_stat(blkg, cb, \
357 MINOR(blkg->dev)); \ 349 blkg->dev, type); \
358 cgroup_total += get_stats(blkg, cb, getvar, \
359 disk_id); \
360 spin_unlock_irq(&blkg->stats_lock); \ 350 spin_unlock_irq(&blkg->stats_lock); \
361 } \ 351 } \
362 } \ 352 } \
@@ -367,16 +357,14 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \
367 return 0; \ 357 return 0; \
368} 358}
369 359
370SHOW_FUNCTION_PER_GROUP(time, get_stat, get_time_stat, 0); 360SHOW_FUNCTION_PER_GROUP(time, BLKIO_STAT_TIME, 0);
371SHOW_FUNCTION_PER_GROUP(sectors, get_stat, get_sectors_stat, 0); 361SHOW_FUNCTION_PER_GROUP(sectors, BLKIO_STAT_SECTORS, 0);
372SHOW_FUNCTION_PER_GROUP(io_service_bytes, get_typed_stat, 362SHOW_FUNCTION_PER_GROUP(io_service_bytes, BLKIO_STAT_SERVICE_BYTES, 1);
373 get_io_service_bytes_stat, 1); 363SHOW_FUNCTION_PER_GROUP(io_serviced, BLKIO_STAT_SERVICED, 1);
374SHOW_FUNCTION_PER_GROUP(io_serviced, get_typed_stat, get_io_serviced_stat, 1); 364SHOW_FUNCTION_PER_GROUP(io_service_time, BLKIO_STAT_SERVICE_TIME, 1);
375SHOW_FUNCTION_PER_GROUP(io_service_time, get_typed_stat, 365SHOW_FUNCTION_PER_GROUP(io_wait_time, BLKIO_STAT_WAIT_TIME, 1);
376 get_io_service_time_stat, 1);
377SHOW_FUNCTION_PER_GROUP(io_wait_time, get_typed_stat, get_io_wait_time_stat, 1);
378#ifdef CONFIG_DEBUG_BLK_CGROUP 366#ifdef CONFIG_DEBUG_BLK_CGROUP
379SHOW_FUNCTION_PER_GROUP(dequeue, get_stat, get_dequeue_stat, 0); 367SHOW_FUNCTION_PER_GROUP(dequeue, BLKIO_STAT_DEQUEUE, 0);
380#endif 368#endif
381#undef SHOW_FUNCTION_PER_GROUP 369#undef SHOW_FUNCTION_PER_GROUP
382 370
@@ -398,32 +386,30 @@ struct cftype blkio_files[] = {
398 { 386 {
399 .name = "time", 387 .name = "time",
400 .read_map = blkiocg_time_read, 388 .read_map = blkiocg_time_read,
401 .write_u64 = blkiocg_reset_write,
402 }, 389 },
403 { 390 {
404 .name = "sectors", 391 .name = "sectors",
405 .read_map = blkiocg_sectors_read, 392 .read_map = blkiocg_sectors_read,
406 .write_u64 = blkiocg_reset_write,
407 }, 393 },
408 { 394 {
409 .name = "io_service_bytes", 395 .name = "io_service_bytes",
410 .read_map = blkiocg_io_service_bytes_read, 396 .read_map = blkiocg_io_service_bytes_read,
411 .write_u64 = blkiocg_reset_write,
412 }, 397 },
413 { 398 {
414 .name = "io_serviced", 399 .name = "io_serviced",
415 .read_map = blkiocg_io_serviced_read, 400 .read_map = blkiocg_io_serviced_read,
416 .write_u64 = blkiocg_reset_write,
417 }, 401 },
418 { 402 {
419 .name = "io_service_time", 403 .name = "io_service_time",
420 .read_map = blkiocg_io_service_time_read, 404 .read_map = blkiocg_io_service_time_read,
421 .write_u64 = blkiocg_reset_write,
422 }, 405 },
423 { 406 {
424 .name = "io_wait_time", 407 .name = "io_wait_time",
425 .read_map = blkiocg_io_wait_time_read, 408 .read_map = blkiocg_io_wait_time_read,
426 .write_u64 = blkiocg_reset_write, 409 },
410 {
411 .name = "reset_stats",
412 .write_u64 = blkiocg_reset_stats,
427 }, 413 },
428#ifdef CONFIG_DEBUG_BLK_CGROUP 414#ifdef CONFIG_DEBUG_BLK_CGROUP
429 { 415 {
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 80010ef64ab0..b22e55390a4f 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -23,12 +23,31 @@ extern struct cgroup_subsys blkio_subsys;
23#define blkio_subsys_id blkio_subsys.subsys_id 23#define blkio_subsys_id blkio_subsys.subsys_id
24#endif 24#endif
25 25
26enum io_type { 26enum stat_type {
27 IO_READ = 0, 27 /* Total time spent (in ns) between request dispatch to the driver and
28 IO_WRITE, 28 * request completion for IOs doen by this cgroup. This may not be
29 IO_SYNC, 29 * accurate when NCQ is turned on. */
30 IO_ASYNC, 30 BLKIO_STAT_SERVICE_TIME = 0,
31 IO_TYPE_MAX 31 /* Total bytes transferred */
32 BLKIO_STAT_SERVICE_BYTES,
33 /* Total IOs serviced, post merge */
34 BLKIO_STAT_SERVICED,
35 /* Total time spent waiting in scheduler queue in ns */
36 BLKIO_STAT_WAIT_TIME,
37 /* All the single valued stats go below this */
38 BLKIO_STAT_TIME,
39 BLKIO_STAT_SECTORS,
40#ifdef CONFIG_DEBUG_BLK_CGROUP
41 BLKIO_STAT_DEQUEUE
42#endif
43};
44
45enum stat_sub_type {
46 BLKIO_STAT_READ = 0,
47 BLKIO_STAT_WRITE,
48 BLKIO_STAT_SYNC,
49 BLKIO_STAT_ASYNC,
50 BLKIO_STAT_TOTAL
32}; 51};
33 52
34struct blkio_cgroup { 53struct blkio_cgroup {
@@ -42,13 +61,7 @@ struct blkio_group_stats {
42 /* total disk time and nr sectors dispatched by this group */ 61 /* total disk time and nr sectors dispatched by this group */
43 uint64_t time; 62 uint64_t time;
44 uint64_t sectors; 63 uint64_t sectors;
45 /* Total disk time used by IOs in ns */ 64 uint64_t stat_arr[BLKIO_STAT_WAIT_TIME + 1][BLKIO_STAT_TOTAL];
46 uint64_t io_service_time[IO_TYPE_MAX];
47 uint64_t io_service_bytes[IO_TYPE_MAX]; /* Total bytes transferred */
48 /* Total IOs serviced, post merge */
49 uint64_t io_serviced[IO_TYPE_MAX];
50 /* Total time spent waiting in scheduler queue in ns */
51 uint64_t io_wait_time[IO_TYPE_MAX];
52#ifdef CONFIG_DEBUG_BLK_CGROUP 65#ifdef CONFIG_DEBUG_BLK_CGROUP
53 /* How many times this group has been removed from service tree */ 66 /* How many times this group has been removed from service tree */
54 unsigned long dequeue; 67 unsigned long dequeue;
@@ -65,7 +78,7 @@ struct blkio_group {
65 char path[128]; 78 char path[128];
66#endif 79#endif
67 /* The device MKDEV(major, minor), this group has been created for */ 80 /* The device MKDEV(major, minor), this group has been created for */
68 dev_t dev; 81 dev_t dev;
69 82
70 /* Need to serialize the stats in the case of reset/update */ 83 /* Need to serialize the stats in the case of reset/update */
71 spinlock_t stats_lock; 84 spinlock_t stats_lock;
@@ -128,21 +141,21 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
128extern int blkiocg_del_blkio_group(struct blkio_group *blkg); 141extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
129extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, 142extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
130 void *key); 143 void *key);
144void blkio_group_init(struct blkio_group *blkg);
131void blkiocg_update_timeslice_used(struct blkio_group *blkg, 145void blkiocg_update_timeslice_used(struct blkio_group *blkg,
132 unsigned long time); 146 unsigned long time);
133void blkiocg_update_request_dispatch_stats(struct blkio_group *blkg, 147void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,
134 struct request *rq); 148 bool direction, bool sync);
135void blkiocg_update_request_completion_stats(struct blkio_group *blkg, 149void blkiocg_update_completion_stats(struct blkio_group *blkg,
136 struct request *rq); 150 uint64_t start_time, uint64_t io_start_time, bool direction, bool sync);
137#else 151#else
138struct cgroup; 152struct cgroup;
139static inline struct blkio_cgroup * 153static inline struct blkio_cgroup *
140cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } 154cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
141 155
156static inline void blkio_group_init(struct blkio_group *blkg) {}
142static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, 157static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
143 struct blkio_group *blkg, void *key, dev_t dev) 158 struct blkio_group *blkg, void *key, dev_t dev) {}
144{
145}
146 159
147static inline int 160static inline int
148blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } 161blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
@@ -151,9 +164,10 @@ static inline struct blkio_group *
151blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } 164blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
152static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, 165static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
153 unsigned long time) {} 166 unsigned long time) {}
154static inline void blkiocg_update_request_dispatch_stats( 167static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
155 struct blkio_group *blkg, struct request *rq) {} 168 uint64_t bytes, bool direction, bool sync) {}
156static inline void blkiocg_update_request_completion_stats( 169static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
157 struct blkio_group *blkg, struct request *rq) {} 170 uint64_t start_time, uint64_t io_start_time, bool direction,
171 bool sync) {}
158#endif 172#endif
159#endif /* _BLK_CGROUP_H */ 173#endif /* _BLK_CGROUP_H */
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 42028e7128a7..5617ae030b15 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -955,6 +955,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
955 for_each_cfqg_st(cfqg, i, j, st) 955 for_each_cfqg_st(cfqg, i, j, st)
956 *st = CFQ_RB_ROOT; 956 *st = CFQ_RB_ROOT;
957 RB_CLEAR_NODE(&cfqg->rb_node); 957 RB_CLEAR_NODE(&cfqg->rb_node);
958 blkio_group_init(&cfqg->blkg);
958 959
959 /* 960 /*
960 * Take the initial reference that will be released on destroy 961 * Take the initial reference that will be released on destroy
@@ -1865,7 +1866,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
1865 elv_dispatch_sort(q, rq); 1866 elv_dispatch_sort(q, rq);
1866 1867
1867 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; 1868 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
1868 blkiocg_update_request_dispatch_stats(&cfqq->cfqg->blkg, rq); 1869 blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
1870 rq_data_dir(rq), rq_is_sync(rq));
1869} 1871}
1870 1872
1871/* 1873/*
@@ -3286,7 +3288,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3286 WARN_ON(!cfqq->dispatched); 3288 WARN_ON(!cfqq->dispatched);
3287 cfqd->rq_in_driver--; 3289 cfqd->rq_in_driver--;
3288 cfqq->dispatched--; 3290 cfqq->dispatched--;
3289 blkiocg_update_request_completion_stats(&cfqq->cfqg->blkg, rq); 3291 blkiocg_update_completion_stats(&cfqq->cfqg->blkg, rq_start_time_ns(rq),
3292 rq_io_start_time_ns(rq), rq_data_dir(rq),
3293 rq_is_sync(rq));
3290 3294
3291 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; 3295 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
3292 3296
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f3fff8bf85ee..d483c494672a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1209,9 +1209,27 @@ static inline void set_io_start_time_ns(struct request *req)
1209{ 1209{
1210 req->io_start_time_ns = sched_clock(); 1210 req->io_start_time_ns = sched_clock();
1211} 1211}
1212
1213static inline uint64_t rq_start_time_ns(struct request *req)
1214{
1215 return req->start_time_ns;
1216}
1217
1218static inline uint64_t rq_io_start_time_ns(struct request *req)
1219{
1220 return req->io_start_time_ns;
1221}
1212#else 1222#else
1213static inline void set_start_time_ns(struct request *req) {} 1223static inline void set_start_time_ns(struct request *req) {}
1214static inline void set_io_start_time_ns(struct request *req) {} 1224static inline void set_io_start_time_ns(struct request *req) {}
1225static inline uint64_t rq_start_time_ns(struct request *req)
1226{
1227 return 0;
1228}
1229static inline uint64_t rq_io_start_time_ns(struct request *req)
1230{
1231 return 0;
1232}
1215#endif 1233#endif
1216 1234
1217#define MODULE_ALIAS_BLOCKDEV(major,minor) \ 1235#define MODULE_ALIAS_BLOCKDEV(major,minor) \