diff options
-rw-r--r-- | Documentation/cgroups/blkio-controller.txt | 48 | ||||
-rw-r--r-- | block/blk-cgroup.c | 190 | ||||
-rw-r--r-- | block/blk-cgroup.h | 64 | ||||
-rw-r--r-- | block/cfq-iosched.c | 8 | ||||
-rw-r--r-- | include/linux/blkdev.h | 18 |
5 files changed, 198 insertions, 130 deletions
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index 630879cd9a42..ed04fe9cce1a 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt | |||
@@ -77,7 +77,6 @@ Details of cgroup files | |||
77 | ======================= | 77 | ======================= |
78 | - blkio.weight | 78 | - blkio.weight |
79 | - Specifies per cgroup weight. | 79 | - Specifies per cgroup weight. |
80 | |||
81 | Currently allowed range of weights is from 100 to 1000. | 80 | Currently allowed range of weights is from 100 to 1000. |
82 | 81 | ||
83 | - blkio.time | 82 | - blkio.time |
@@ -92,6 +91,49 @@ Details of cgroup files | |||
92 | third field specifies the number of sectors transferred by the | 91 | third field specifies the number of sectors transferred by the |
93 | group to/from the device. | 92 | group to/from the device. |
94 | 93 | ||
94 | - blkio.io_service_bytes | ||
95 | - Number of bytes transferred to/from the disk by the group. These | ||
96 | are further divided by the type of operation - read or write, sync | ||
97 | or async. First two fields specify the major and minor number of the | ||
98 | device, third field specifies the operation type and the fourth field | ||
99 | specifies the number of bytes. | ||
100 | |||
101 | - blkio.io_serviced | ||
102 | - Number of IOs completed to/from the disk by the group. These | ||
103 | are further divided by the type of operation - read or write, sync | ||
104 | or async. First two fields specify the major and minor number of the | ||
105 | device, third field specifies the operation type and the fourth field | ||
106 | specifies the number of IOs. | ||
107 | |||
108 | - blkio.io_service_time | ||
109 | - Total amount of time between request dispatch and request completion | ||
110 | for the IOs done by this cgroup. This is in nanoseconds to make it | ||
111 | meaningful for flash devices too. For devices with queue depth of 1, | ||
112 | this time represents the actual service time. When queue_depth > 1, | ||
113 | that is no longer true as requests may be served out of order. This | ||
114 | may cause the service time for a given IO to include the service time | ||
115 | of multiple IOs when served out of order which may result in total | ||
116 | io_service_time > actual time elapsed. This time is further divided by | ||
117 | the type of operation - read or write, sync or async. First two fields | ||
118 | specify the major and minor number of the device, third field | ||
119 | specifies the operation type and the fourth field specifies the | ||
120 | io_service_time in ns. | ||
121 | |||
122 | - blkio.io_wait_time | ||
123 | - Total amount of time the IOs for this cgroup spent waiting in the | ||
124 | scheduler queues for service. This can be greater than the total time | ||
125 | elapsed since it is cumulative io_wait_time for all IOs. It is not a | ||
126 | measure of total time the cgroup spent waiting but rather a measure of | ||
127 | the wait_time for its individual IOs. For devices with queue_depth > 1 | ||
128 | this metric does not include the time spent waiting for service once | ||
129 | the IO is dispatched to the device but till it actually gets serviced | ||
130 | (there might be a time lag here due to re-ordering of requests by the | ||
131 | device). This is in nanoseconds to make it meaningful for flash | ||
132 | devices too. This time is further divided by the type of operation - | ||
133 | read or write, sync or async. First two fields specify the major and | ||
134 | minor number of the device, third field specifies the operation type | ||
135 | and the fourth field specifies the io_wait_time in ns. | ||
136 | |||
95 | - blkio.dequeue | 137 | - blkio.dequeue |
96 | - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This | 138 | - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This |
97 | gives the statistics about how many a times a group was dequeued | 139 | gives the statistics about how many a times a group was dequeued |
@@ -99,6 +141,10 @@ Details of cgroup files | |||
99 | and minor number of the device and third field specifies the number | 141 | and minor number of the device and third field specifies the number |
100 | of times a group was dequeued from a particular device. | 142 | of times a group was dequeued from a particular device. |
101 | 143 | ||
144 | - blkio.reset_stats | ||
145 | - Writing an int to this file will result in resetting all the stats | ||
146 | for that cgroup. | ||
147 | |||
102 | CFQ sysfs tunable | 148 | CFQ sysfs tunable |
103 | ================= | 149 | ================= |
104 | /sys/block/<disk>/queue/iosched/group_isolation | 150 | /sys/block/<disk>/queue/iosched/group_isolation |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 9af7257f429c..6797df508821 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
19 | #include "blk-cgroup.h" | 19 | #include "blk-cgroup.h" |
20 | 20 | ||
21 | #define MAX_KEY_LEN 100 | ||
22 | |||
21 | static DEFINE_SPINLOCK(blkio_list_lock); | 23 | static DEFINE_SPINLOCK(blkio_list_lock); |
22 | static LIST_HEAD(blkio_list); | 24 | static LIST_HEAD(blkio_list); |
23 | 25 | ||
@@ -56,24 +58,27 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) | |||
56 | } | 58 | } |
57 | EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); | 59 | EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); |
58 | 60 | ||
61 | void blkio_group_init(struct blkio_group *blkg) | ||
62 | { | ||
63 | spin_lock_init(&blkg->stats_lock); | ||
64 | } | ||
65 | EXPORT_SYMBOL_GPL(blkio_group_init); | ||
66 | |||
59 | /* | 67 | /* |
60 | * Add to the appropriate stat variable depending on the request type. | 68 | * Add to the appropriate stat variable depending on the request type. |
61 | * This should be called with the blkg->stats_lock held. | 69 | * This should be called with the blkg->stats_lock held. |
62 | */ | 70 | */ |
63 | void io_add_stat(uint64_t *stat, uint64_t add, unsigned int flags) | 71 | static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction, |
72 | bool sync) | ||
64 | { | 73 | { |
65 | if (flags & REQ_RW) | 74 | if (direction) |
66 | stat[IO_WRITE] += add; | 75 | stat[BLKIO_STAT_WRITE] += add; |
67 | else | 76 | else |
68 | stat[IO_READ] += add; | 77 | stat[BLKIO_STAT_READ] += add; |
69 | /* | 78 | if (sync) |
70 | * Everywhere in the block layer, an IO is treated as sync if it is a | 79 | stat[BLKIO_STAT_SYNC] += add; |
71 | * read or a SYNC write. We follow the same norm. | ||
72 | */ | ||
73 | if (!(flags & REQ_RW) || flags & REQ_RW_SYNC) | ||
74 | stat[IO_SYNC] += add; | ||
75 | else | 80 | else |
76 | stat[IO_ASYNC] += add; | 81 | stat[BLKIO_STAT_ASYNC] += add; |
77 | } | 82 | } |
78 | 83 | ||
79 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) | 84 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) |
@@ -86,23 +91,25 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) | |||
86 | } | 91 | } |
87 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); | 92 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); |
88 | 93 | ||
89 | void blkiocg_update_request_dispatch_stats(struct blkio_group *blkg, | 94 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, |
90 | struct request *rq) | 95 | uint64_t bytes, bool direction, bool sync) |
91 | { | 96 | { |
92 | struct blkio_group_stats *stats; | 97 | struct blkio_group_stats *stats; |
93 | unsigned long flags; | 98 | unsigned long flags; |
94 | 99 | ||
95 | spin_lock_irqsave(&blkg->stats_lock, flags); | 100 | spin_lock_irqsave(&blkg->stats_lock, flags); |
96 | stats = &blkg->stats; | 101 | stats = &blkg->stats; |
97 | stats->sectors += blk_rq_sectors(rq); | 102 | stats->sectors += bytes >> 9; |
98 | io_add_stat(stats->io_serviced, 1, rq->cmd_flags); | 103 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction, |
99 | io_add_stat(stats->io_service_bytes, blk_rq_sectors(rq) << 9, | 104 | sync); |
100 | rq->cmd_flags); | 105 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes, |
106 | direction, sync); | ||
101 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | 107 | spin_unlock_irqrestore(&blkg->stats_lock, flags); |
102 | } | 108 | } |
109 | EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats); | ||
103 | 110 | ||
104 | void blkiocg_update_request_completion_stats(struct blkio_group *blkg, | 111 | void blkiocg_update_completion_stats(struct blkio_group *blkg, |
105 | struct request *rq) | 112 | uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) |
106 | { | 113 | { |
107 | struct blkio_group_stats *stats; | 114 | struct blkio_group_stats *stats; |
108 | unsigned long flags; | 115 | unsigned long flags; |
@@ -110,16 +117,15 @@ void blkiocg_update_request_completion_stats(struct blkio_group *blkg, | |||
110 | 117 | ||
111 | spin_lock_irqsave(&blkg->stats_lock, flags); | 118 | spin_lock_irqsave(&blkg->stats_lock, flags); |
112 | stats = &blkg->stats; | 119 | stats = &blkg->stats; |
113 | if (time_after64(now, rq->io_start_time_ns)) | 120 | if (time_after64(now, io_start_time)) |
114 | io_add_stat(stats->io_service_time, now - rq->io_start_time_ns, | 121 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME], |
115 | rq->cmd_flags); | 122 | now - io_start_time, direction, sync); |
116 | if (time_after64(rq->io_start_time_ns, rq->start_time_ns)) | 123 | if (time_after64(io_start_time, start_time)) |
117 | io_add_stat(stats->io_wait_time, | 124 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME], |
118 | rq->io_start_time_ns - rq->start_time_ns, | 125 | io_start_time - start_time, direction, sync); |
119 | rq->cmd_flags); | ||
120 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | 126 | spin_unlock_irqrestore(&blkg->stats_lock, flags); |
121 | } | 127 | } |
122 | EXPORT_SYMBOL_GPL(blkiocg_update_request_completion_stats); | 128 | EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats); |
123 | 129 | ||
124 | void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | 130 | void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, |
125 | struct blkio_group *blkg, void *key, dev_t dev) | 131 | struct blkio_group *blkg, void *key, dev_t dev) |
@@ -230,7 +236,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
230 | } | 236 | } |
231 | 237 | ||
232 | static int | 238 | static int |
233 | blkiocg_reset_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | 239 | blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) |
234 | { | 240 | { |
235 | struct blkio_cgroup *blkcg; | 241 | struct blkio_cgroup *blkcg; |
236 | struct blkio_group *blkg; | 242 | struct blkio_group *blkg; |
@@ -249,29 +255,32 @@ blkiocg_reset_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
249 | return 0; | 255 | return 0; |
250 | } | 256 | } |
251 | 257 | ||
252 | void get_key_name(int type, char *disk_id, char *str, int chars_left) | 258 | static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str, |
259 | int chars_left, bool diskname_only) | ||
253 | { | 260 | { |
254 | strlcpy(str, disk_id, chars_left); | 261 | snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev)); |
255 | chars_left -= strlen(str); | 262 | chars_left -= strlen(str); |
256 | if (chars_left <= 0) { | 263 | if (chars_left <= 0) { |
257 | printk(KERN_WARNING | 264 | printk(KERN_WARNING |
258 | "Possibly incorrect cgroup stat display format"); | 265 | "Possibly incorrect cgroup stat display format"); |
259 | return; | 266 | return; |
260 | } | 267 | } |
268 | if (diskname_only) | ||
269 | return; | ||
261 | switch (type) { | 270 | switch (type) { |
262 | case IO_READ: | 271 | case BLKIO_STAT_READ: |
263 | strlcat(str, " Read", chars_left); | 272 | strlcat(str, " Read", chars_left); |
264 | break; | 273 | break; |
265 | case IO_WRITE: | 274 | case BLKIO_STAT_WRITE: |
266 | strlcat(str, " Write", chars_left); | 275 | strlcat(str, " Write", chars_left); |
267 | break; | 276 | break; |
268 | case IO_SYNC: | 277 | case BLKIO_STAT_SYNC: |
269 | strlcat(str, " Sync", chars_left); | 278 | strlcat(str, " Sync", chars_left); |
270 | break; | 279 | break; |
271 | case IO_ASYNC: | 280 | case BLKIO_STAT_ASYNC: |
272 | strlcat(str, " Async", chars_left); | 281 | strlcat(str, " Async", chars_left); |
273 | break; | 282 | break; |
274 | case IO_TYPE_MAX: | 283 | case BLKIO_STAT_TOTAL: |
275 | strlcat(str, " Total", chars_left); | 284 | strlcat(str, " Total", chars_left); |
276 | break; | 285 | break; |
277 | default: | 286 | default: |
@@ -279,63 +288,47 @@ void get_key_name(int type, char *disk_id, char *str, int chars_left) | |||
279 | } | 288 | } |
280 | } | 289 | } |
281 | 290 | ||
282 | typedef uint64_t (get_var) (struct blkio_group *, int); | 291 | static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val, |
292 | struct cgroup_map_cb *cb, dev_t dev) | ||
293 | { | ||
294 | blkio_get_key_name(0, dev, str, chars_left, true); | ||
295 | cb->fill(cb, str, val); | ||
296 | return val; | ||
297 | } | ||
283 | 298 | ||
284 | #define MAX_KEY_LEN 100 | 299 | /* This should be called with blkg->stats_lock held */ |
285 | uint64_t get_typed_stat(struct blkio_group *blkg, struct cgroup_map_cb *cb, | 300 | static uint64_t blkio_get_stat(struct blkio_group *blkg, |
286 | get_var *getvar, char *disk_id) | 301 | struct cgroup_map_cb *cb, dev_t dev, enum stat_type type) |
287 | { | 302 | { |
288 | uint64_t disk_total; | 303 | uint64_t disk_total; |
289 | char key_str[MAX_KEY_LEN]; | 304 | char key_str[MAX_KEY_LEN]; |
290 | int type; | 305 | enum stat_sub_type sub_type; |
306 | |||
307 | if (type == BLKIO_STAT_TIME) | ||
308 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
309 | blkg->stats.time, cb, dev); | ||
310 | if (type == BLKIO_STAT_SECTORS) | ||
311 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
312 | blkg->stats.sectors, cb, dev); | ||
313 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
314 | if (type == BLKIO_STAT_DEQUEUE) | ||
315 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
316 | blkg->stats.dequeue, cb, dev); | ||
317 | #endif | ||
291 | 318 | ||
292 | for (type = 0; type < IO_TYPE_MAX; type++) { | 319 | for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL; |
293 | get_key_name(type, disk_id, key_str, MAX_KEY_LEN); | 320 | sub_type++) { |
294 | cb->fill(cb, key_str, getvar(blkg, type)); | 321 | blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false); |
322 | cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]); | ||
295 | } | 323 | } |
296 | disk_total = getvar(blkg, IO_READ) + getvar(blkg, IO_WRITE); | 324 | disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] + |
297 | get_key_name(IO_TYPE_MAX, disk_id, key_str, MAX_KEY_LEN); | 325 | blkg->stats.stat_arr[type][BLKIO_STAT_WRITE]; |
326 | blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false); | ||
298 | cb->fill(cb, key_str, disk_total); | 327 | cb->fill(cb, key_str, disk_total); |
299 | return disk_total; | 328 | return disk_total; |
300 | } | 329 | } |
301 | 330 | ||
302 | uint64_t get_stat(struct blkio_group *blkg, struct cgroup_map_cb *cb, | 331 | #define SHOW_FUNCTION_PER_GROUP(__VAR, type, show_total) \ |
303 | get_var *getvar, char *disk_id) | ||
304 | { | ||
305 | uint64_t var = getvar(blkg, 0); | ||
306 | cb->fill(cb, disk_id, var); | ||
307 | return var; | ||
308 | } | ||
309 | |||
310 | #define GET_STAT_INDEXED(__VAR) \ | ||
311 | uint64_t get_##__VAR##_stat(struct blkio_group *blkg, int type) \ | ||
312 | { \ | ||
313 | return blkg->stats.__VAR[type]; \ | ||
314 | } \ | ||
315 | |||
316 | GET_STAT_INDEXED(io_service_bytes); | ||
317 | GET_STAT_INDEXED(io_serviced); | ||
318 | GET_STAT_INDEXED(io_service_time); | ||
319 | GET_STAT_INDEXED(io_wait_time); | ||
320 | #undef GET_STAT_INDEXED | ||
321 | |||
322 | #define GET_STAT(__VAR, __CONV) \ | ||
323 | uint64_t get_##__VAR##_stat(struct blkio_group *blkg, int dummy) \ | ||
324 | { \ | ||
325 | uint64_t data = blkg->stats.__VAR; \ | ||
326 | if (__CONV) \ | ||
327 | data = (uint64_t)jiffies_to_msecs(data) * NSEC_PER_MSEC;\ | ||
328 | return data; \ | ||
329 | } | ||
330 | |||
331 | GET_STAT(time, 1); | ||
332 | GET_STAT(sectors, 0); | ||
333 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
334 | GET_STAT(dequeue, 0); | ||
335 | #endif | ||
336 | #undef GET_STAT | ||
337 | |||
338 | #define SHOW_FUNCTION_PER_GROUP(__VAR, get_stats, getvar, show_total) \ | ||
339 | static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ | 332 | static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ |
340 | struct cftype *cftype, struct cgroup_map_cb *cb) \ | 333 | struct cftype *cftype, struct cgroup_map_cb *cb) \ |
341 | { \ | 334 | { \ |
@@ -343,7 +336,6 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ | |||
343 | struct blkio_group *blkg; \ | 336 | struct blkio_group *blkg; \ |
344 | struct hlist_node *n; \ | 337 | struct hlist_node *n; \ |
345 | uint64_t cgroup_total = 0; \ | 338 | uint64_t cgroup_total = 0; \ |
346 | char disk_id[10]; \ | ||
347 | \ | 339 | \ |
348 | if (!cgroup_lock_live_group(cgroup)) \ | 340 | if (!cgroup_lock_live_group(cgroup)) \ |
349 | return -ENODEV; \ | 341 | return -ENODEV; \ |
@@ -353,10 +345,8 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ | |||
353 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ | 345 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ |
354 | if (blkg->dev) { \ | 346 | if (blkg->dev) { \ |
355 | spin_lock_irq(&blkg->stats_lock); \ | 347 | spin_lock_irq(&blkg->stats_lock); \ |
356 | snprintf(disk_id, 10, "%u:%u", MAJOR(blkg->dev),\ | 348 | cgroup_total += blkio_get_stat(blkg, cb, \ |
357 | MINOR(blkg->dev)); \ | 349 | blkg->dev, type); \ |
358 | cgroup_total += get_stats(blkg, cb, getvar, \ | ||
359 | disk_id); \ | ||
360 | spin_unlock_irq(&blkg->stats_lock); \ | 350 | spin_unlock_irq(&blkg->stats_lock); \ |
361 | } \ | 351 | } \ |
362 | } \ | 352 | } \ |
@@ -367,16 +357,14 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ | |||
367 | return 0; \ | 357 | return 0; \ |
368 | } | 358 | } |
369 | 359 | ||
370 | SHOW_FUNCTION_PER_GROUP(time, get_stat, get_time_stat, 0); | 360 | SHOW_FUNCTION_PER_GROUP(time, BLKIO_STAT_TIME, 0); |
371 | SHOW_FUNCTION_PER_GROUP(sectors, get_stat, get_sectors_stat, 0); | 361 | SHOW_FUNCTION_PER_GROUP(sectors, BLKIO_STAT_SECTORS, 0); |
372 | SHOW_FUNCTION_PER_GROUP(io_service_bytes, get_typed_stat, | 362 | SHOW_FUNCTION_PER_GROUP(io_service_bytes, BLKIO_STAT_SERVICE_BYTES, 1); |
373 | get_io_service_bytes_stat, 1); | 363 | SHOW_FUNCTION_PER_GROUP(io_serviced, BLKIO_STAT_SERVICED, 1); |
374 | SHOW_FUNCTION_PER_GROUP(io_serviced, get_typed_stat, get_io_serviced_stat, 1); | 364 | SHOW_FUNCTION_PER_GROUP(io_service_time, BLKIO_STAT_SERVICE_TIME, 1); |
375 | SHOW_FUNCTION_PER_GROUP(io_service_time, get_typed_stat, | 365 | SHOW_FUNCTION_PER_GROUP(io_wait_time, BLKIO_STAT_WAIT_TIME, 1); |
376 | get_io_service_time_stat, 1); | ||
377 | SHOW_FUNCTION_PER_GROUP(io_wait_time, get_typed_stat, get_io_wait_time_stat, 1); | ||
378 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 366 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
379 | SHOW_FUNCTION_PER_GROUP(dequeue, get_stat, get_dequeue_stat, 0); | 367 | SHOW_FUNCTION_PER_GROUP(dequeue, BLKIO_STAT_DEQUEUE, 0); |
380 | #endif | 368 | #endif |
381 | #undef SHOW_FUNCTION_PER_GROUP | 369 | #undef SHOW_FUNCTION_PER_GROUP |
382 | 370 | ||
@@ -398,32 +386,30 @@ struct cftype blkio_files[] = { | |||
398 | { | 386 | { |
399 | .name = "time", | 387 | .name = "time", |
400 | .read_map = blkiocg_time_read, | 388 | .read_map = blkiocg_time_read, |
401 | .write_u64 = blkiocg_reset_write, | ||
402 | }, | 389 | }, |
403 | { | 390 | { |
404 | .name = "sectors", | 391 | .name = "sectors", |
405 | .read_map = blkiocg_sectors_read, | 392 | .read_map = blkiocg_sectors_read, |
406 | .write_u64 = blkiocg_reset_write, | ||
407 | }, | 393 | }, |
408 | { | 394 | { |
409 | .name = "io_service_bytes", | 395 | .name = "io_service_bytes", |
410 | .read_map = blkiocg_io_service_bytes_read, | 396 | .read_map = blkiocg_io_service_bytes_read, |
411 | .write_u64 = blkiocg_reset_write, | ||
412 | }, | 397 | }, |
413 | { | 398 | { |
414 | .name = "io_serviced", | 399 | .name = "io_serviced", |
415 | .read_map = blkiocg_io_serviced_read, | 400 | .read_map = blkiocg_io_serviced_read, |
416 | .write_u64 = blkiocg_reset_write, | ||
417 | }, | 401 | }, |
418 | { | 402 | { |
419 | .name = "io_service_time", | 403 | .name = "io_service_time", |
420 | .read_map = blkiocg_io_service_time_read, | 404 | .read_map = blkiocg_io_service_time_read, |
421 | .write_u64 = blkiocg_reset_write, | ||
422 | }, | 405 | }, |
423 | { | 406 | { |
424 | .name = "io_wait_time", | 407 | .name = "io_wait_time", |
425 | .read_map = blkiocg_io_wait_time_read, | 408 | .read_map = blkiocg_io_wait_time_read, |
426 | .write_u64 = blkiocg_reset_write, | 409 | }, |
410 | { | ||
411 | .name = "reset_stats", | ||
412 | .write_u64 = blkiocg_reset_stats, | ||
427 | }, | 413 | }, |
428 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 414 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
429 | { | 415 | { |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 80010ef64ab0..b22e55390a4f 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -23,12 +23,31 @@ extern struct cgroup_subsys blkio_subsys; | |||
23 | #define blkio_subsys_id blkio_subsys.subsys_id | 23 | #define blkio_subsys_id blkio_subsys.subsys_id |
24 | #endif | 24 | #endif |
25 | 25 | ||
26 | enum io_type { | 26 | enum stat_type { |
27 | IO_READ = 0, | 27 | /* Total time spent (in ns) between request dispatch to the driver and |
28 | IO_WRITE, | 28 | * request completion for IOs doen by this cgroup. This may not be |
29 | IO_SYNC, | 29 | * accurate when NCQ is turned on. */ |
30 | IO_ASYNC, | 30 | BLKIO_STAT_SERVICE_TIME = 0, |
31 | IO_TYPE_MAX | 31 | /* Total bytes transferred */ |
32 | BLKIO_STAT_SERVICE_BYTES, | ||
33 | /* Total IOs serviced, post merge */ | ||
34 | BLKIO_STAT_SERVICED, | ||
35 | /* Total time spent waiting in scheduler queue in ns */ | ||
36 | BLKIO_STAT_WAIT_TIME, | ||
37 | /* All the single valued stats go below this */ | ||
38 | BLKIO_STAT_TIME, | ||
39 | BLKIO_STAT_SECTORS, | ||
40 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
41 | BLKIO_STAT_DEQUEUE | ||
42 | #endif | ||
43 | }; | ||
44 | |||
45 | enum stat_sub_type { | ||
46 | BLKIO_STAT_READ = 0, | ||
47 | BLKIO_STAT_WRITE, | ||
48 | BLKIO_STAT_SYNC, | ||
49 | BLKIO_STAT_ASYNC, | ||
50 | BLKIO_STAT_TOTAL | ||
32 | }; | 51 | }; |
33 | 52 | ||
34 | struct blkio_cgroup { | 53 | struct blkio_cgroup { |
@@ -42,13 +61,7 @@ struct blkio_group_stats { | |||
42 | /* total disk time and nr sectors dispatched by this group */ | 61 | /* total disk time and nr sectors dispatched by this group */ |
43 | uint64_t time; | 62 | uint64_t time; |
44 | uint64_t sectors; | 63 | uint64_t sectors; |
45 | /* Total disk time used by IOs in ns */ | 64 | uint64_t stat_arr[BLKIO_STAT_WAIT_TIME + 1][BLKIO_STAT_TOTAL]; |
46 | uint64_t io_service_time[IO_TYPE_MAX]; | ||
47 | uint64_t io_service_bytes[IO_TYPE_MAX]; /* Total bytes transferred */ | ||
48 | /* Total IOs serviced, post merge */ | ||
49 | uint64_t io_serviced[IO_TYPE_MAX]; | ||
50 | /* Total time spent waiting in scheduler queue in ns */ | ||
51 | uint64_t io_wait_time[IO_TYPE_MAX]; | ||
52 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 65 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
53 | /* How many times this group has been removed from service tree */ | 66 | /* How many times this group has been removed from service tree */ |
54 | unsigned long dequeue; | 67 | unsigned long dequeue; |
@@ -65,7 +78,7 @@ struct blkio_group { | |||
65 | char path[128]; | 78 | char path[128]; |
66 | #endif | 79 | #endif |
67 | /* The device MKDEV(major, minor), this group has been created for */ | 80 | /* The device MKDEV(major, minor), this group has been created for */ |
68 | dev_t dev; | 81 | dev_t dev; |
69 | 82 | ||
70 | /* Need to serialize the stats in the case of reset/update */ | 83 | /* Need to serialize the stats in the case of reset/update */ |
71 | spinlock_t stats_lock; | 84 | spinlock_t stats_lock; |
@@ -128,21 +141,21 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | |||
128 | extern int blkiocg_del_blkio_group(struct blkio_group *blkg); | 141 | extern int blkiocg_del_blkio_group(struct blkio_group *blkg); |
129 | extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, | 142 | extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, |
130 | void *key); | 143 | void *key); |
144 | void blkio_group_init(struct blkio_group *blkg); | ||
131 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, | 145 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
132 | unsigned long time); | 146 | unsigned long time); |
133 | void blkiocg_update_request_dispatch_stats(struct blkio_group *blkg, | 147 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes, |
134 | struct request *rq); | 148 | bool direction, bool sync); |
135 | void blkiocg_update_request_completion_stats(struct blkio_group *blkg, | 149 | void blkiocg_update_completion_stats(struct blkio_group *blkg, |
136 | struct request *rq); | 150 | uint64_t start_time, uint64_t io_start_time, bool direction, bool sync); |
137 | #else | 151 | #else |
138 | struct cgroup; | 152 | struct cgroup; |
139 | static inline struct blkio_cgroup * | 153 | static inline struct blkio_cgroup * |
140 | cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } | 154 | cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } |
141 | 155 | ||
156 | static inline void blkio_group_init(struct blkio_group *blkg) {} | ||
142 | static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | 157 | static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, |
143 | struct blkio_group *blkg, void *key, dev_t dev) | 158 | struct blkio_group *blkg, void *key, dev_t dev) {} |
144 | { | ||
145 | } | ||
146 | 159 | ||
147 | static inline int | 160 | static inline int |
148 | blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } | 161 | blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } |
@@ -151,9 +164,10 @@ static inline struct blkio_group * | |||
151 | blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } | 164 | blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } |
152 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, | 165 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
153 | unsigned long time) {} | 166 | unsigned long time) {} |
154 | static inline void blkiocg_update_request_dispatch_stats( | 167 | static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg, |
155 | struct blkio_group *blkg, struct request *rq) {} | 168 | uint64_t bytes, bool direction, bool sync) {} |
156 | static inline void blkiocg_update_request_completion_stats( | 169 | static inline void blkiocg_update_completion_stats(struct blkio_group *blkg, |
157 | struct blkio_group *blkg, struct request *rq) {} | 170 | uint64_t start_time, uint64_t io_start_time, bool direction, |
171 | bool sync) {} | ||
158 | #endif | 172 | #endif |
159 | #endif /* _BLK_CGROUP_H */ | 173 | #endif /* _BLK_CGROUP_H */ |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 42028e7128a7..5617ae030b15 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -955,6 +955,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) | |||
955 | for_each_cfqg_st(cfqg, i, j, st) | 955 | for_each_cfqg_st(cfqg, i, j, st) |
956 | *st = CFQ_RB_ROOT; | 956 | *st = CFQ_RB_ROOT; |
957 | RB_CLEAR_NODE(&cfqg->rb_node); | 957 | RB_CLEAR_NODE(&cfqg->rb_node); |
958 | blkio_group_init(&cfqg->blkg); | ||
958 | 959 | ||
959 | /* | 960 | /* |
960 | * Take the initial reference that will be released on destroy | 961 | * Take the initial reference that will be released on destroy |
@@ -1865,7 +1866,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) | |||
1865 | elv_dispatch_sort(q, rq); | 1866 | elv_dispatch_sort(q, rq); |
1866 | 1867 | ||
1867 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; | 1868 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; |
1868 | blkiocg_update_request_dispatch_stats(&cfqq->cfqg->blkg, rq); | 1869 | blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq), |
1870 | rq_data_dir(rq), rq_is_sync(rq)); | ||
1869 | } | 1871 | } |
1870 | 1872 | ||
1871 | /* | 1873 | /* |
@@ -3286,7 +3288,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
3286 | WARN_ON(!cfqq->dispatched); | 3288 | WARN_ON(!cfqq->dispatched); |
3287 | cfqd->rq_in_driver--; | 3289 | cfqd->rq_in_driver--; |
3288 | cfqq->dispatched--; | 3290 | cfqq->dispatched--; |
3289 | blkiocg_update_request_completion_stats(&cfqq->cfqg->blkg, rq); | 3291 | blkiocg_update_completion_stats(&cfqq->cfqg->blkg, rq_start_time_ns(rq), |
3292 | rq_io_start_time_ns(rq), rq_data_dir(rq), | ||
3293 | rq_is_sync(rq)); | ||
3290 | 3294 | ||
3291 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; | 3295 | cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; |
3292 | 3296 | ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3fff8bf85ee..d483c494672a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -1209,9 +1209,27 @@ static inline void set_io_start_time_ns(struct request *req) | |||
1209 | { | 1209 | { |
1210 | req->io_start_time_ns = sched_clock(); | 1210 | req->io_start_time_ns = sched_clock(); |
1211 | } | 1211 | } |
1212 | |||
1213 | static inline uint64_t rq_start_time_ns(struct request *req) | ||
1214 | { | ||
1215 | return req->start_time_ns; | ||
1216 | } | ||
1217 | |||
1218 | static inline uint64_t rq_io_start_time_ns(struct request *req) | ||
1219 | { | ||
1220 | return req->io_start_time_ns; | ||
1221 | } | ||
1212 | #else | 1222 | #else |
1213 | static inline void set_start_time_ns(struct request *req) {} | 1223 | static inline void set_start_time_ns(struct request *req) {} |
1214 | static inline void set_io_start_time_ns(struct request *req) {} | 1224 | static inline void set_io_start_time_ns(struct request *req) {} |
1225 | static inline uint64_t rq_start_time_ns(struct request *req) | ||
1226 | { | ||
1227 | return 0; | ||
1228 | } | ||
1229 | static inline uint64_t rq_io_start_time_ns(struct request *req) | ||
1230 | { | ||
1231 | return 0; | ||
1232 | } | ||
1215 | #endif | 1233 | #endif |
1216 | 1234 | ||
1217 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ | 1235 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ |