diff options
-rw-r--r-- | Documentation/cgroups/blkio-controller.txt | 29 | ||||
-rw-r--r-- | block/blk-cgroup.c | 159 | ||||
-rw-r--r-- | block/blk-cgroup.h | 54 | ||||
-rw-r--r-- | block/cfq-iosched.c | 50 |
4 files changed, 271 insertions, 21 deletions
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index 6e52e7c512a4..db054ea3e7fb 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt | |||
@@ -150,6 +150,35 @@ Details of cgroup files | |||
150 | cgroup's existence. Queue size samples are taken each time one of the | 150 | cgroup's existence. Queue size samples are taken each time one of the |
151 | queues of this cgroup gets a timeslice. | 151 | queues of this cgroup gets a timeslice. |
152 | 152 | ||
153 | - blkio.group_wait_time | ||
154 | - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. | ||
155 | This is the amount of time the cgroup had to wait since it became busy | ||
156 | (i.e., went from 0 to 1 request queued) to get a timeslice for one of | ||
157 | its queues. This is different from the io_wait_time which is the | ||
158 | cumulative total of the amount of time spent by each IO in that cgroup | ||
159 | waiting in the scheduler queue. This is in nanoseconds. If this is | ||
160 | read when the cgroup is in a waiting (for timeslice) state, the stat | ||
161 | will only report the group_wait_time accumulated till the last time it | ||
162 | got a timeslice and will not include the current delta. | ||
163 | |||
164 | - blkio.empty_time | ||
165 | - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. | ||
166 | This is the amount of time a cgroup spends without any pending | ||
167 | requests when not being served, i.e., it does not include any time | ||
168 | spent idling for one of the queues of the cgroup. This is in | ||
169 | nanoseconds. If this is read when the cgroup is in an empty state, | ||
170 | the stat will only report the empty_time accumulated till the last | ||
171 | time it had a pending request and will not include the current delta. | ||
172 | |||
173 | - blkio.idle_time | ||
174 | - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. | ||
175 | This is the amount of time spent by the IO scheduler idling for a | ||
176 | given cgroup in anticipation of a better request than the exising ones | ||
177 | from other queues/cgroups. This is in nanoseconds. If this is read | ||
178 | when the cgroup is in an idling state, the stat will only report the | ||
179 | idle_time accumulated till the last idle period and will not include | ||
180 | the current delta. | ||
181 | |||
153 | - blkio.dequeue | 182 | - blkio.dequeue |
154 | - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This | 183 | - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This |
155 | gives the statistics about how many a times a group was dequeued | 184 | gives the statistics about how many a times a group was dequeued |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 1e0c4970b35d..1ecff7a39f2c 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -105,6 +105,76 @@ static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync) | |||
105 | } | 105 | } |
106 | 106 | ||
107 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 107 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
108 | /* This should be called with the blkg->stats_lock held. */ | ||
109 | static void blkio_set_start_group_wait_time(struct blkio_group *blkg, | ||
110 | struct blkio_group *curr_blkg) | ||
111 | { | ||
112 | if (blkio_blkg_waiting(&blkg->stats)) | ||
113 | return; | ||
114 | if (blkg == curr_blkg) | ||
115 | return; | ||
116 | blkg->stats.start_group_wait_time = sched_clock(); | ||
117 | blkio_mark_blkg_waiting(&blkg->stats); | ||
118 | } | ||
119 | |||
120 | /* This should be called with the blkg->stats_lock held. */ | ||
121 | static void blkio_update_group_wait_time(struct blkio_group_stats *stats) | ||
122 | { | ||
123 | unsigned long long now; | ||
124 | |||
125 | if (!blkio_blkg_waiting(stats)) | ||
126 | return; | ||
127 | |||
128 | now = sched_clock(); | ||
129 | if (time_after64(now, stats->start_group_wait_time)) | ||
130 | stats->group_wait_time += now - stats->start_group_wait_time; | ||
131 | blkio_clear_blkg_waiting(stats); | ||
132 | } | ||
133 | |||
134 | /* This should be called with the blkg->stats_lock held. */ | ||
135 | static void blkio_end_empty_time(struct blkio_group_stats *stats) | ||
136 | { | ||
137 | unsigned long long now; | ||
138 | |||
139 | if (!blkio_blkg_empty(stats)) | ||
140 | return; | ||
141 | |||
142 | now = sched_clock(); | ||
143 | if (time_after64(now, stats->start_empty_time)) | ||
144 | stats->empty_time += now - stats->start_empty_time; | ||
145 | blkio_clear_blkg_empty(stats); | ||
146 | } | ||
147 | |||
148 | void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg) | ||
149 | { | ||
150 | unsigned long flags; | ||
151 | |||
152 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
153 | BUG_ON(blkio_blkg_idling(&blkg->stats)); | ||
154 | blkg->stats.start_idle_time = sched_clock(); | ||
155 | blkio_mark_blkg_idling(&blkg->stats); | ||
156 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
157 | } | ||
158 | EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats); | ||
159 | |||
160 | void blkiocg_update_idle_time_stats(struct blkio_group *blkg) | ||
161 | { | ||
162 | unsigned long flags; | ||
163 | unsigned long long now; | ||
164 | struct blkio_group_stats *stats; | ||
165 | |||
166 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
167 | stats = &blkg->stats; | ||
168 | if (blkio_blkg_idling(stats)) { | ||
169 | now = sched_clock(); | ||
170 | if (time_after64(now, stats->start_idle_time)) | ||
171 | stats->idle_time += now - stats->start_idle_time; | ||
172 | blkio_clear_blkg_idling(stats); | ||
173 | } | ||
174 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
175 | } | ||
176 | EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats); | ||
177 | |||
108 | void blkiocg_update_set_active_queue_stats(struct blkio_group *blkg) | 178 | void blkiocg_update_set_active_queue_stats(struct blkio_group *blkg) |
109 | { | 179 | { |
110 | unsigned long flags; | 180 | unsigned long flags; |
@@ -116,9 +186,14 @@ void blkiocg_update_set_active_queue_stats(struct blkio_group *blkg) | |||
116 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] + | 186 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] + |
117 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]; | 187 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]; |
118 | stats->avg_queue_size_samples++; | 188 | stats->avg_queue_size_samples++; |
189 | blkio_update_group_wait_time(stats); | ||
119 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | 190 | spin_unlock_irqrestore(&blkg->stats_lock, flags); |
120 | } | 191 | } |
121 | EXPORT_SYMBOL_GPL(blkiocg_update_set_active_queue_stats); | 192 | EXPORT_SYMBOL_GPL(blkiocg_update_set_active_queue_stats); |
193 | #else | ||
194 | static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg, | ||
195 | struct blkio_group *curr_blkg) {} | ||
196 | static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {} | ||
122 | #endif | 197 | #endif |
123 | 198 | ||
124 | void blkiocg_update_request_add_stats(struct blkio_group *blkg, | 199 | void blkiocg_update_request_add_stats(struct blkio_group *blkg, |
@@ -130,6 +205,8 @@ void blkiocg_update_request_add_stats(struct blkio_group *blkg, | |||
130 | spin_lock_irqsave(&blkg->stats_lock, flags); | 205 | spin_lock_irqsave(&blkg->stats_lock, flags); |
131 | blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction, | 206 | blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction, |
132 | sync); | 207 | sync); |
208 | blkio_end_empty_time(&blkg->stats); | ||
209 | blkio_set_start_group_wait_time(blkg, curr_blkg); | ||
133 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | 210 | spin_unlock_irqrestore(&blkg->stats_lock, flags); |
134 | } | 211 | } |
135 | EXPORT_SYMBOL_GPL(blkiocg_update_request_add_stats); | 212 | EXPORT_SYMBOL_GPL(blkiocg_update_request_add_stats); |
@@ -156,6 +233,33 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) | |||
156 | } | 233 | } |
157 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); | 234 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); |
158 | 235 | ||
236 | void blkiocg_set_start_empty_time(struct blkio_group *blkg, bool ignore) | ||
237 | { | ||
238 | unsigned long flags; | ||
239 | struct blkio_group_stats *stats; | ||
240 | |||
241 | spin_lock_irqsave(&blkg->stats_lock, flags); | ||
242 | stats = &blkg->stats; | ||
243 | |||
244 | if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] || | ||
245 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) { | ||
246 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
247 | return; | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * If ignore is set, we do not panic on the empty flag being set | ||
252 | * already. This is to avoid cases where there are superfluous timeslice | ||
253 | * complete events (for eg., forced_dispatch in CFQ) when no IOs are | ||
254 | * served which could result in triggering the empty check incorrectly. | ||
255 | */ | ||
256 | BUG_ON(!ignore && blkio_blkg_empty(stats)); | ||
257 | stats->start_empty_time = sched_clock(); | ||
258 | blkio_mark_blkg_empty(stats); | ||
259 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | ||
260 | } | ||
261 | EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time); | ||
262 | |||
159 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, | 263 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, |
160 | uint64_t bytes, bool direction, bool sync) | 264 | uint64_t bytes, bool direction, bool sync) |
161 | { | 265 | { |
@@ -317,19 +421,44 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
317 | { | 421 | { |
318 | struct blkio_cgroup *blkcg; | 422 | struct blkio_cgroup *blkcg; |
319 | struct blkio_group *blkg; | 423 | struct blkio_group *blkg; |
424 | struct blkio_group_stats *stats; | ||
320 | struct hlist_node *n; | 425 | struct hlist_node *n; |
321 | uint64_t queued[BLKIO_STAT_TOTAL]; | 426 | uint64_t queued[BLKIO_STAT_TOTAL]; |
322 | int i; | 427 | int i; |
428 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
429 | bool idling, waiting, empty; | ||
430 | unsigned long long now = sched_clock(); | ||
431 | #endif | ||
323 | 432 | ||
324 | blkcg = cgroup_to_blkio_cgroup(cgroup); | 433 | blkcg = cgroup_to_blkio_cgroup(cgroup); |
325 | spin_lock_irq(&blkcg->lock); | 434 | spin_lock_irq(&blkcg->lock); |
326 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | 435 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { |
327 | spin_lock(&blkg->stats_lock); | 436 | spin_lock(&blkg->stats_lock); |
437 | stats = &blkg->stats; | ||
438 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
439 | idling = blkio_blkg_idling(stats); | ||
440 | waiting = blkio_blkg_waiting(stats); | ||
441 | empty = blkio_blkg_empty(stats); | ||
442 | #endif | ||
328 | for (i = 0; i < BLKIO_STAT_TOTAL; i++) | 443 | for (i = 0; i < BLKIO_STAT_TOTAL; i++) |
329 | queued[i] = blkg->stats.stat_arr[BLKIO_STAT_QUEUED][i]; | 444 | queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i]; |
330 | memset(&blkg->stats, 0, sizeof(struct blkio_group_stats)); | 445 | memset(stats, 0, sizeof(struct blkio_group_stats)); |
331 | for (i = 0; i < BLKIO_STAT_TOTAL; i++) | 446 | for (i = 0; i < BLKIO_STAT_TOTAL; i++) |
332 | blkg->stats.stat_arr[BLKIO_STAT_QUEUED][i] = queued[i]; | 447 | stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i]; |
448 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
449 | if (idling) { | ||
450 | blkio_mark_blkg_idling(stats); | ||
451 | stats->start_idle_time = now; | ||
452 | } | ||
453 | if (waiting) { | ||
454 | blkio_mark_blkg_waiting(stats); | ||
455 | stats->start_group_wait_time = now; | ||
456 | } | ||
457 | if (empty) { | ||
458 | blkio_mark_blkg_empty(stats); | ||
459 | stats->start_empty_time = now; | ||
460 | } | ||
461 | #endif | ||
333 | spin_unlock(&blkg->stats_lock); | 462 | spin_unlock(&blkg->stats_lock); |
334 | } | 463 | } |
335 | spin_unlock_irq(&blkcg->lock); | 464 | spin_unlock_irq(&blkcg->lock); |
@@ -401,6 +530,15 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, | |||
401 | sum = 0; | 530 | sum = 0; |
402 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev); | 531 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev); |
403 | } | 532 | } |
533 | if (type == BLKIO_STAT_GROUP_WAIT_TIME) | ||
534 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
535 | blkg->stats.group_wait_time, cb, dev); | ||
536 | if (type == BLKIO_STAT_IDLE_TIME) | ||
537 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
538 | blkg->stats.idle_time, cb, dev); | ||
539 | if (type == BLKIO_STAT_EMPTY_TIME) | ||
540 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
541 | blkg->stats.empty_time, cb, dev); | ||
404 | if (type == BLKIO_STAT_DEQUEUE) | 542 | if (type == BLKIO_STAT_DEQUEUE) |
405 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | 543 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, |
406 | blkg->stats.dequeue, cb, dev); | 544 | blkg->stats.dequeue, cb, dev); |
@@ -458,6 +596,9 @@ SHOW_FUNCTION_PER_GROUP(io_queued, BLKIO_STAT_QUEUED, 1); | |||
458 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 596 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
459 | SHOW_FUNCTION_PER_GROUP(dequeue, BLKIO_STAT_DEQUEUE, 0); | 597 | SHOW_FUNCTION_PER_GROUP(dequeue, BLKIO_STAT_DEQUEUE, 0); |
460 | SHOW_FUNCTION_PER_GROUP(avg_queue_size, BLKIO_STAT_AVG_QUEUE_SIZE, 0); | 598 | SHOW_FUNCTION_PER_GROUP(avg_queue_size, BLKIO_STAT_AVG_QUEUE_SIZE, 0); |
599 | SHOW_FUNCTION_PER_GROUP(group_wait_time, BLKIO_STAT_GROUP_WAIT_TIME, 0); | ||
600 | SHOW_FUNCTION_PER_GROUP(idle_time, BLKIO_STAT_IDLE_TIME, 0); | ||
601 | SHOW_FUNCTION_PER_GROUP(empty_time, BLKIO_STAT_EMPTY_TIME, 0); | ||
461 | #endif | 602 | #endif |
462 | #undef SHOW_FUNCTION_PER_GROUP | 603 | #undef SHOW_FUNCTION_PER_GROUP |
463 | 604 | ||
@@ -518,6 +659,18 @@ struct cftype blkio_files[] = { | |||
518 | .read_map = blkiocg_avg_queue_size_read, | 659 | .read_map = blkiocg_avg_queue_size_read, |
519 | }, | 660 | }, |
520 | { | 661 | { |
662 | .name = "group_wait_time", | ||
663 | .read_map = blkiocg_group_wait_time_read, | ||
664 | }, | ||
665 | { | ||
666 | .name = "idle_time", | ||
667 | .read_map = blkiocg_idle_time_read, | ||
668 | }, | ||
669 | { | ||
670 | .name = "empty_time", | ||
671 | .read_map = blkiocg_empty_time_read, | ||
672 | }, | ||
673 | { | ||
521 | .name = "dequeue", | 674 | .name = "dequeue", |
522 | .read_map = blkiocg_dequeue_read, | 675 | .read_map = blkiocg_dequeue_read, |
523 | }, | 676 | }, |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index bea7f3b9a88e..bfce085b1962 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -43,6 +43,9 @@ enum stat_type { | |||
43 | BLKIO_STAT_SECTORS, | 43 | BLKIO_STAT_SECTORS, |
44 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 44 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
45 | BLKIO_STAT_AVG_QUEUE_SIZE, | 45 | BLKIO_STAT_AVG_QUEUE_SIZE, |
46 | BLKIO_STAT_IDLE_TIME, | ||
47 | BLKIO_STAT_EMPTY_TIME, | ||
48 | BLKIO_STAT_GROUP_WAIT_TIME, | ||
46 | BLKIO_STAT_DEQUEUE | 49 | BLKIO_STAT_DEQUEUE |
47 | #endif | 50 | #endif |
48 | }; | 51 | }; |
@@ -55,6 +58,13 @@ enum stat_sub_type { | |||
55 | BLKIO_STAT_TOTAL | 58 | BLKIO_STAT_TOTAL |
56 | }; | 59 | }; |
57 | 60 | ||
61 | /* blkg state flags */ | ||
62 | enum blkg_state_flags { | ||
63 | BLKG_waiting = 0, | ||
64 | BLKG_idling, | ||
65 | BLKG_empty, | ||
66 | }; | ||
67 | |||
58 | struct blkio_cgroup { | 68 | struct blkio_cgroup { |
59 | struct cgroup_subsys_state css; | 69 | struct cgroup_subsys_state css; |
60 | unsigned int weight; | 70 | unsigned int weight; |
@@ -74,6 +84,21 @@ struct blkio_group_stats { | |||
74 | uint64_t avg_queue_size_samples; | 84 | uint64_t avg_queue_size_samples; |
75 | /* How many times this group has been removed from service tree */ | 85 | /* How many times this group has been removed from service tree */ |
76 | unsigned long dequeue; | 86 | unsigned long dequeue; |
87 | |||
88 | /* Total time spent waiting for it to be assigned a timeslice. */ | ||
89 | uint64_t group_wait_time; | ||
90 | uint64_t start_group_wait_time; | ||
91 | |||
92 | /* Time spent idling for this blkio_group */ | ||
93 | uint64_t idle_time; | ||
94 | uint64_t start_idle_time; | ||
95 | /* | ||
96 | * Total time when we have requests queued and do not contain the | ||
97 | * current active queue. | ||
98 | */ | ||
99 | uint64_t empty_time; | ||
100 | uint64_t start_empty_time; | ||
101 | uint16_t flags; | ||
77 | #endif | 102 | #endif |
78 | }; | 103 | }; |
79 | 104 | ||
@@ -137,12 +162,41 @@ static inline char *blkg_path(struct blkio_group *blkg) | |||
137 | void blkiocg_update_set_active_queue_stats(struct blkio_group *blkg); | 162 | void blkiocg_update_set_active_queue_stats(struct blkio_group *blkg); |
138 | void blkiocg_update_dequeue_stats(struct blkio_group *blkg, | 163 | void blkiocg_update_dequeue_stats(struct blkio_group *blkg, |
139 | unsigned long dequeue); | 164 | unsigned long dequeue); |
165 | void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg); | ||
166 | void blkiocg_update_idle_time_stats(struct blkio_group *blkg); | ||
167 | void blkiocg_set_start_empty_time(struct blkio_group *blkg, bool ignore); | ||
168 | |||
169 | #define BLKG_FLAG_FNS(name) \ | ||
170 | static inline void blkio_mark_blkg_##name( \ | ||
171 | struct blkio_group_stats *stats) \ | ||
172 | { \ | ||
173 | stats->flags |= (1 << BLKG_##name); \ | ||
174 | } \ | ||
175 | static inline void blkio_clear_blkg_##name( \ | ||
176 | struct blkio_group_stats *stats) \ | ||
177 | { \ | ||
178 | stats->flags &= ~(1 << BLKG_##name); \ | ||
179 | } \ | ||
180 | static inline int blkio_blkg_##name(struct blkio_group_stats *stats) \ | ||
181 | { \ | ||
182 | return (stats->flags & (1 << BLKG_##name)) != 0; \ | ||
183 | } \ | ||
184 | |||
185 | BLKG_FLAG_FNS(waiting) | ||
186 | BLKG_FLAG_FNS(idling) | ||
187 | BLKG_FLAG_FNS(empty) | ||
188 | #undef BLKG_FLAG_FNS | ||
140 | #else | 189 | #else |
141 | static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } | 190 | static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } |
142 | static inline void blkiocg_update_set_active_queue_stats( | 191 | static inline void blkiocg_update_set_active_queue_stats( |
143 | struct blkio_group *blkg) {} | 192 | struct blkio_group *blkg) {} |
144 | static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg, | 193 | static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg, |
145 | unsigned long dequeue) {} | 194 | unsigned long dequeue) {} |
195 | static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg) | ||
196 | {} | ||
197 | static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg) {} | ||
198 | static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg, | ||
199 | bool ignore) {} | ||
146 | #endif | 200 | #endif |
147 | 201 | ||
148 | #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) | 202 | #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 8e0b86a9111a..b6e095c7ef5e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -886,7 +886,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) | |||
886 | } | 886 | } |
887 | 887 | ||
888 | static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | 888 | static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, |
889 | struct cfq_queue *cfqq) | 889 | struct cfq_queue *cfqq, bool forced) |
890 | { | 890 | { |
891 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 891 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
892 | unsigned int used_sl, charge_sl; | 892 | unsigned int used_sl, charge_sl; |
@@ -916,6 +916,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
916 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, | 916 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, |
917 | st->min_vdisktime); | 917 | st->min_vdisktime); |
918 | blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); | 918 | blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); |
919 | blkiocg_set_start_empty_time(&cfqg->blkg, forced); | ||
919 | } | 920 | } |
920 | 921 | ||
921 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 922 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
@@ -1528,6 +1529,12 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, | |||
1528 | return cfqq == RQ_CFQQ(rq); | 1529 | return cfqq == RQ_CFQQ(rq); |
1529 | } | 1530 | } |
1530 | 1531 | ||
1532 | static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) | ||
1533 | { | ||
1534 | del_timer(&cfqd->idle_slice_timer); | ||
1535 | blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg); | ||
1536 | } | ||
1537 | |||
1531 | static void __cfq_set_active_queue(struct cfq_data *cfqd, | 1538 | static void __cfq_set_active_queue(struct cfq_data *cfqd, |
1532 | struct cfq_queue *cfqq) | 1539 | struct cfq_queue *cfqq) |
1533 | { | 1540 | { |
@@ -1547,7 +1554,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, | |||
1547 | cfq_clear_cfqq_fifo_expire(cfqq); | 1554 | cfq_clear_cfqq_fifo_expire(cfqq); |
1548 | cfq_mark_cfqq_slice_new(cfqq); | 1555 | cfq_mark_cfqq_slice_new(cfqq); |
1549 | 1556 | ||
1550 | del_timer(&cfqd->idle_slice_timer); | 1557 | cfq_del_timer(cfqd, cfqq); |
1551 | } | 1558 | } |
1552 | 1559 | ||
1553 | cfqd->active_queue = cfqq; | 1560 | cfqd->active_queue = cfqq; |
@@ -1558,12 +1565,12 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, | |||
1558 | */ | 1565 | */ |
1559 | static void | 1566 | static void |
1560 | __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 1567 | __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
1561 | bool timed_out) | 1568 | bool timed_out, bool forced) |
1562 | { | 1569 | { |
1563 | cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); | 1570 | cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); |
1564 | 1571 | ||
1565 | if (cfq_cfqq_wait_request(cfqq)) | 1572 | if (cfq_cfqq_wait_request(cfqq)) |
1566 | del_timer(&cfqd->idle_slice_timer); | 1573 | cfq_del_timer(cfqd, cfqq); |
1567 | 1574 | ||
1568 | cfq_clear_cfqq_wait_request(cfqq); | 1575 | cfq_clear_cfqq_wait_request(cfqq); |
1569 | cfq_clear_cfqq_wait_busy(cfqq); | 1576 | cfq_clear_cfqq_wait_busy(cfqq); |
@@ -1585,7 +1592,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1585 | cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); | 1592 | cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); |
1586 | } | 1593 | } |
1587 | 1594 | ||
1588 | cfq_group_served(cfqd, cfqq->cfqg, cfqq); | 1595 | cfq_group_served(cfqd, cfqq->cfqg, cfqq, forced); |
1589 | 1596 | ||
1590 | if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) | 1597 | if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) |
1591 | cfq_del_cfqq_rr(cfqd, cfqq); | 1598 | cfq_del_cfqq_rr(cfqd, cfqq); |
@@ -1604,12 +1611,13 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1604 | } | 1611 | } |
1605 | } | 1612 | } |
1606 | 1613 | ||
1607 | static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) | 1614 | static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out, |
1615 | bool forced) | ||
1608 | { | 1616 | { |
1609 | struct cfq_queue *cfqq = cfqd->active_queue; | 1617 | struct cfq_queue *cfqq = cfqd->active_queue; |
1610 | 1618 | ||
1611 | if (cfqq) | 1619 | if (cfqq) |
1612 | __cfq_slice_expired(cfqd, cfqq, timed_out); | 1620 | __cfq_slice_expired(cfqd, cfqq, timed_out, forced); |
1613 | } | 1621 | } |
1614 | 1622 | ||
1615 | /* | 1623 | /* |
@@ -1865,6 +1873,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
1865 | sl = cfqd->cfq_slice_idle; | 1873 | sl = cfqd->cfq_slice_idle; |
1866 | 1874 | ||
1867 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); | 1875 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); |
1876 | blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg); | ||
1868 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); | 1877 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); |
1869 | } | 1878 | } |
1870 | 1879 | ||
@@ -2176,7 +2185,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | |||
2176 | } | 2185 | } |
2177 | 2186 | ||
2178 | expire: | 2187 | expire: |
2179 | cfq_slice_expired(cfqd, 0); | 2188 | cfq_slice_expired(cfqd, 0, false); |
2180 | new_queue: | 2189 | new_queue: |
2181 | /* | 2190 | /* |
2182 | * Current queue expired. Check if we have to switch to a new | 2191 | * Current queue expired. Check if we have to switch to a new |
@@ -2202,7 +2211,7 @@ static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) | |||
2202 | BUG_ON(!list_empty(&cfqq->fifo)); | 2211 | BUG_ON(!list_empty(&cfqq->fifo)); |
2203 | 2212 | ||
2204 | /* By default cfqq is not expired if it is empty. Do it explicitly */ | 2213 | /* By default cfqq is not expired if it is empty. Do it explicitly */ |
2205 | __cfq_slice_expired(cfqq->cfqd, cfqq, 0); | 2214 | __cfq_slice_expired(cfqq->cfqd, cfqq, 0, true); |
2206 | return dispatched; | 2215 | return dispatched; |
2207 | } | 2216 | } |
2208 | 2217 | ||
@@ -2218,7 +2227,7 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) | |||
2218 | while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) | 2227 | while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) |
2219 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); | 2228 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); |
2220 | 2229 | ||
2221 | cfq_slice_expired(cfqd, 0); | 2230 | cfq_slice_expired(cfqd, 0, true); |
2222 | BUG_ON(cfqd->busy_queues); | 2231 | BUG_ON(cfqd->busy_queues); |
2223 | 2232 | ||
2224 | cfq_log(cfqd, "forced_dispatch=%d", dispatched); | 2233 | cfq_log(cfqd, "forced_dispatch=%d", dispatched); |
@@ -2382,10 +2391,15 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) | |||
2382 | cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || | 2391 | cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || |
2383 | cfq_class_idle(cfqq))) { | 2392 | cfq_class_idle(cfqq))) { |
2384 | cfqq->slice_end = jiffies + 1; | 2393 | cfqq->slice_end = jiffies + 1; |
2385 | cfq_slice_expired(cfqd, 0); | 2394 | cfq_slice_expired(cfqd, 0, false); |
2386 | } | 2395 | } |
2387 | 2396 | ||
2388 | cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); | 2397 | cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); |
2398 | /* | ||
2399 | * This is needed since we don't exactly match the mod_timer() and | ||
2400 | * del_timer() calls in CFQ. | ||
2401 | */ | ||
2402 | blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg); | ||
2389 | return 1; | 2403 | return 1; |
2390 | } | 2404 | } |
2391 | 2405 | ||
@@ -2413,7 +2427,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2413 | orig_cfqg = cfqq->orig_cfqg; | 2427 | orig_cfqg = cfqq->orig_cfqg; |
2414 | 2428 | ||
2415 | if (unlikely(cfqd->active_queue == cfqq)) { | 2429 | if (unlikely(cfqd->active_queue == cfqq)) { |
2416 | __cfq_slice_expired(cfqd, cfqq, 0); | 2430 | __cfq_slice_expired(cfqd, cfqq, 0, false); |
2417 | cfq_schedule_dispatch(cfqd); | 2431 | cfq_schedule_dispatch(cfqd); |
2418 | } | 2432 | } |
2419 | 2433 | ||
@@ -2514,7 +2528,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2514 | struct cfq_queue *__cfqq, *next; | 2528 | struct cfq_queue *__cfqq, *next; |
2515 | 2529 | ||
2516 | if (unlikely(cfqq == cfqd->active_queue)) { | 2530 | if (unlikely(cfqq == cfqd->active_queue)) { |
2517 | __cfq_slice_expired(cfqd, cfqq, 0); | 2531 | __cfq_slice_expired(cfqd, cfqq, 0, false); |
2518 | cfq_schedule_dispatch(cfqd); | 2532 | cfq_schedule_dispatch(cfqd); |
2519 | } | 2533 | } |
2520 | 2534 | ||
@@ -3143,7 +3157,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, | |||
3143 | static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 3157 | static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
3144 | { | 3158 | { |
3145 | cfq_log_cfqq(cfqd, cfqq, "preempt"); | 3159 | cfq_log_cfqq(cfqd, cfqq, "preempt"); |
3146 | cfq_slice_expired(cfqd, 1); | 3160 | cfq_slice_expired(cfqd, 1, false); |
3147 | 3161 | ||
3148 | /* | 3162 | /* |
3149 | * Put the new queue at the front of the of the current list, | 3163 | * Put the new queue at the front of the of the current list, |
@@ -3191,7 +3205,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
3191 | if (cfq_cfqq_wait_request(cfqq)) { | 3205 | if (cfq_cfqq_wait_request(cfqq)) { |
3192 | if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || | 3206 | if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || |
3193 | cfqd->busy_queues > 1) { | 3207 | cfqd->busy_queues > 1) { |
3194 | del_timer(&cfqd->idle_slice_timer); | 3208 | cfq_del_timer(cfqd, cfqq); |
3195 | cfq_clear_cfqq_wait_request(cfqq); | 3209 | cfq_clear_cfqq_wait_request(cfqq); |
3196 | __blk_run_queue(cfqd->queue); | 3210 | __blk_run_queue(cfqd->queue); |
3197 | } else | 3211 | } else |
@@ -3352,7 +3366,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
3352 | * - when there is a close cooperator | 3366 | * - when there is a close cooperator |
3353 | */ | 3367 | */ |
3354 | if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) | 3368 | if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) |
3355 | cfq_slice_expired(cfqd, 1); | 3369 | cfq_slice_expired(cfqd, 1, false); |
3356 | else if (sync && cfqq_empty && | 3370 | else if (sync && cfqq_empty && |
3357 | !cfq_close_cooperator(cfqd, cfqq)) { | 3371 | !cfq_close_cooperator(cfqd, cfqq)) { |
3358 | cfqd->noidle_tree_requires_idle |= !rq_noidle(rq); | 3372 | cfqd->noidle_tree_requires_idle |= !rq_noidle(rq); |
@@ -3612,7 +3626,7 @@ static void cfq_idle_slice_timer(unsigned long data) | |||
3612 | cfq_clear_cfqq_deep(cfqq); | 3626 | cfq_clear_cfqq_deep(cfqq); |
3613 | } | 3627 | } |
3614 | expire: | 3628 | expire: |
3615 | cfq_slice_expired(cfqd, timed_out); | 3629 | cfq_slice_expired(cfqd, timed_out, false); |
3616 | out_kick: | 3630 | out_kick: |
3617 | cfq_schedule_dispatch(cfqd); | 3631 | cfq_schedule_dispatch(cfqd); |
3618 | out_cont: | 3632 | out_cont: |
@@ -3655,7 +3669,7 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3655 | spin_lock_irq(q->queue_lock); | 3669 | spin_lock_irq(q->queue_lock); |
3656 | 3670 | ||
3657 | if (cfqd->active_queue) | 3671 | if (cfqd->active_queue) |
3658 | __cfq_slice_expired(cfqd, cfqd->active_queue, 0); | 3672 | __cfq_slice_expired(cfqd, cfqd->active_queue, 0, false); |
3659 | 3673 | ||
3660 | while (!list_empty(&cfqd->cic_list)) { | 3674 | while (!list_empty(&cfqd->cic_list)) { |
3661 | struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, | 3675 | struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, |