diff options
author | Tejun Heo <tj@kernel.org> | 2012-04-01 17:38:42 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2012-04-01 17:38:42 -0400 |
commit | edcb0722c654947908388df660791abd41e6617c (patch) | |
tree | 8c44dd4517bd562b58216dd01fb5abc42cb74a95 /block | |
parent | 2aa4a1523b40a065bc3a31e20097ea7a618ec3de (diff) |
blkcg: introduce blkg_stat and blkg_rwstat
blkcg uses u64_stats_sync to avoid reading wrong u64 statistic values
on 32bit archs and some stat counters have subtypes to distinguish
read/writes and sync/async IOs. The stat code paths are confusing and
involve a lot of going back and forth between blkcg core and specific
policy implementations, and synchronization and subtype handling are
open coded in blkcg core.
This patch introduces struct blkg_stat and blkg_rwstat which, with
accompanying operations, encapsulate stat updating and accessing with
proper synchronization.
blkg_stat is simple u64 counter with 64bit read-access protection.
blkg_rwstat is the one with rw and [a]sync subcounters and takes @rw
flags to distinguish IO subtypes (%REQ_WRITE and %REQ_SYNC) and
replaces stat_sub_type indexed arrays.
All counters in blkio_group_stats and blkio_group_stats_cpu are
replaced with either blkg_stat or blkg_rwstat along with all users.
This does add one u64_stats_sync per counter and increase stats_sync
operations but they're empty/noops on 64bit archs and blkcg doesn't
have too many counters, especially with DEBUG_BLK_CGROUP off.
While the currently resulting code isn't necessarily simpler at the
moment, this will enable further clean up of blkcg stats code.
- BLKIO_STAT_{READ|WRITE|SYNC|ASYNC|TOTAL} renamed to
BLKG_RWSTAT_{READ|WRITE|SYNC|ASYNC|TOTAL}.
- blkg_stat_add() replaces blkio_add_stat() and
blkio_check_and_dec_stat(). Note that BUG_ON() on underflow in the
latter function no longer exists. It's *way* better to have
underflowed stat counters than oopsing.
- blkio_group_stats->dequeue is now a proper u64 stat counter instead
of ulong.
- reset_stats() updated to clear each stat counters individually and
BLKG_STATS_DEBUG_CLEAR_{START|SIZE} are removed.
- Some functions reconstruct rw flags from direction and sync
booleans. This will be removed by future patches.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 289 | ||||
-rw-r--r-- | block/blk-cgroup.h | 211 |
2 files changed, 293 insertions, 207 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 55ccbae6c434..09ac462ba89e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -132,46 +132,6 @@ static inline void blkio_update_group_iops(struct blkio_group *blkg, | |||
132 | } | 132 | } |
133 | } | 133 | } |
134 | 134 | ||
135 | /* | ||
136 | * Add to the appropriate stat variable depending on the request type. | ||
137 | * This should be called with queue_lock held. | ||
138 | */ | ||
139 | static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction, | ||
140 | bool sync) | ||
141 | { | ||
142 | if (direction) | ||
143 | stat[BLKIO_STAT_WRITE] += add; | ||
144 | else | ||
145 | stat[BLKIO_STAT_READ] += add; | ||
146 | if (sync) | ||
147 | stat[BLKIO_STAT_SYNC] += add; | ||
148 | else | ||
149 | stat[BLKIO_STAT_ASYNC] += add; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * Decrements the appropriate stat variable if non-zero depending on the | ||
154 | * request type. Panics on value being zero. | ||
155 | * This should be called with the queue_lock held. | ||
156 | */ | ||
157 | static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync) | ||
158 | { | ||
159 | if (direction) { | ||
160 | BUG_ON(stat[BLKIO_STAT_WRITE] == 0); | ||
161 | stat[BLKIO_STAT_WRITE]--; | ||
162 | } else { | ||
163 | BUG_ON(stat[BLKIO_STAT_READ] == 0); | ||
164 | stat[BLKIO_STAT_READ]--; | ||
165 | } | ||
166 | if (sync) { | ||
167 | BUG_ON(stat[BLKIO_STAT_SYNC] == 0); | ||
168 | stat[BLKIO_STAT_SYNC]--; | ||
169 | } else { | ||
170 | BUG_ON(stat[BLKIO_STAT_ASYNC] == 0); | ||
171 | stat[BLKIO_STAT_ASYNC]--; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 135 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
176 | /* This should be called with the queue_lock held. */ | 136 | /* This should be called with the queue_lock held. */ |
177 | static void blkio_set_start_group_wait_time(struct blkio_group *blkg, | 137 | static void blkio_set_start_group_wait_time(struct blkio_group *blkg, |
@@ -198,7 +158,8 @@ static void blkio_update_group_wait_time(struct blkio_group_stats *stats) | |||
198 | 158 | ||
199 | now = sched_clock(); | 159 | now = sched_clock(); |
200 | if (time_after64(now, stats->start_group_wait_time)) | 160 | if (time_after64(now, stats->start_group_wait_time)) |
201 | stats->group_wait_time += now - stats->start_group_wait_time; | 161 | blkg_stat_add(&stats->group_wait_time, |
162 | now - stats->start_group_wait_time); | ||
202 | blkio_clear_blkg_waiting(stats); | 163 | blkio_clear_blkg_waiting(stats); |
203 | } | 164 | } |
204 | 165 | ||
@@ -212,7 +173,8 @@ static void blkio_end_empty_time(struct blkio_group_stats *stats) | |||
212 | 173 | ||
213 | now = sched_clock(); | 174 | now = sched_clock(); |
214 | if (time_after64(now, stats->start_empty_time)) | 175 | if (time_after64(now, stats->start_empty_time)) |
215 | stats->empty_time += now - stats->start_empty_time; | 176 | blkg_stat_add(&stats->empty_time, |
177 | now - stats->start_empty_time); | ||
216 | blkio_clear_blkg_empty(stats); | 178 | blkio_clear_blkg_empty(stats); |
217 | } | 179 | } |
218 | 180 | ||
@@ -239,11 +201,9 @@ void blkiocg_update_idle_time_stats(struct blkio_group *blkg, | |||
239 | if (blkio_blkg_idling(stats)) { | 201 | if (blkio_blkg_idling(stats)) { |
240 | unsigned long long now = sched_clock(); | 202 | unsigned long long now = sched_clock(); |
241 | 203 | ||
242 | if (time_after64(now, stats->start_idle_time)) { | 204 | if (time_after64(now, stats->start_idle_time)) |
243 | u64_stats_update_begin(&stats->syncp); | 205 | blkg_stat_add(&stats->idle_time, |
244 | stats->idle_time += now - stats->start_idle_time; | 206 | now - stats->start_idle_time); |
245 | u64_stats_update_end(&stats->syncp); | ||
246 | } | ||
247 | blkio_clear_blkg_idling(stats); | 207 | blkio_clear_blkg_idling(stats); |
248 | } | 208 | } |
249 | } | 209 | } |
@@ -256,13 +216,10 @@ void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg, | |||
256 | 216 | ||
257 | lockdep_assert_held(blkg->q->queue_lock); | 217 | lockdep_assert_held(blkg->q->queue_lock); |
258 | 218 | ||
259 | u64_stats_update_begin(&stats->syncp); | 219 | blkg_stat_add(&stats->avg_queue_size_sum, |
260 | stats->avg_queue_size_sum += | 220 | blkg_rwstat_sum(&stats->queued)); |
261 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] + | 221 | blkg_stat_add(&stats->avg_queue_size_samples, 1); |
262 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]; | ||
263 | stats->avg_queue_size_samples++; | ||
264 | blkio_update_group_wait_time(stats); | 222 | blkio_update_group_wait_time(stats); |
265 | u64_stats_update_end(&stats->syncp); | ||
266 | } | 223 | } |
267 | EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats); | 224 | EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats); |
268 | 225 | ||
@@ -273,8 +230,7 @@ void blkiocg_set_start_empty_time(struct blkio_group *blkg, | |||
273 | 230 | ||
274 | lockdep_assert_held(blkg->q->queue_lock); | 231 | lockdep_assert_held(blkg->q->queue_lock); |
275 | 232 | ||
276 | if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] || | 233 | if (blkg_rwstat_sum(&stats->queued)) |
277 | stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) | ||
278 | return; | 234 | return; |
279 | 235 | ||
280 | /* | 236 | /* |
@@ -298,7 +254,7 @@ void blkiocg_update_dequeue_stats(struct blkio_group *blkg, | |||
298 | 254 | ||
299 | lockdep_assert_held(blkg->q->queue_lock); | 255 | lockdep_assert_held(blkg->q->queue_lock); |
300 | 256 | ||
301 | pd->stats.dequeue += dequeue; | 257 | blkg_stat_add(&pd->stats.dequeue, dequeue); |
302 | } | 258 | } |
303 | EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); | 259 | EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); |
304 | #else | 260 | #else |
@@ -314,14 +270,12 @@ void blkiocg_update_io_add_stats(struct blkio_group *blkg, | |||
314 | bool sync) | 270 | bool sync) |
315 | { | 271 | { |
316 | struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; | 272 | struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; |
273 | int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0); | ||
317 | 274 | ||
318 | lockdep_assert_held(blkg->q->queue_lock); | 275 | lockdep_assert_held(blkg->q->queue_lock); |
319 | 276 | ||
320 | u64_stats_update_begin(&stats->syncp); | 277 | blkg_rwstat_add(&stats->queued, rw, 1); |
321 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_QUEUED], 1, direction, sync); | ||
322 | blkio_end_empty_time(stats); | 278 | blkio_end_empty_time(stats); |
323 | u64_stats_update_end(&stats->syncp); | ||
324 | |||
325 | blkio_set_start_group_wait_time(blkg, pol, curr_blkg); | 279 | blkio_set_start_group_wait_time(blkg, pol, curr_blkg); |
326 | } | 280 | } |
327 | EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats); | 281 | EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats); |
@@ -331,13 +285,11 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg, | |||
331 | bool direction, bool sync) | 285 | bool direction, bool sync) |
332 | { | 286 | { |
333 | struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; | 287 | struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; |
288 | int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0); | ||
334 | 289 | ||
335 | lockdep_assert_held(blkg->q->queue_lock); | 290 | lockdep_assert_held(blkg->q->queue_lock); |
336 | 291 | ||
337 | u64_stats_update_begin(&stats->syncp); | 292 | blkg_rwstat_add(&stats->queued, rw, -1); |
338 | blkio_check_and_dec_stat(stats->stat_arr[BLKIO_STAT_QUEUED], direction, | ||
339 | sync); | ||
340 | u64_stats_update_end(&stats->syncp); | ||
341 | } | 293 | } |
342 | EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); | 294 | EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); |
343 | 295 | ||
@@ -350,12 +302,10 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, | |||
350 | 302 | ||
351 | lockdep_assert_held(blkg->q->queue_lock); | 303 | lockdep_assert_held(blkg->q->queue_lock); |
352 | 304 | ||
353 | u64_stats_update_begin(&stats->syncp); | 305 | blkg_stat_add(&stats->time, time); |
354 | stats->time += time; | ||
355 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 306 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
356 | stats->unaccounted_time += unaccounted_time; | 307 | blkg_stat_add(&stats->unaccounted_time, unaccounted_time); |
357 | #endif | 308 | #endif |
358 | u64_stats_update_end(&stats->syncp); | ||
359 | } | 309 | } |
360 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); | 310 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); |
361 | 311 | ||
@@ -367,6 +317,7 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg, | |||
367 | struct blkio_policy_type *pol, | 317 | struct blkio_policy_type *pol, |
368 | uint64_t bytes, bool direction, bool sync) | 318 | uint64_t bytes, bool direction, bool sync) |
369 | { | 319 | { |
320 | int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0); | ||
370 | struct blkg_policy_data *pd = blkg->pd[pol->plid]; | 321 | struct blkg_policy_data *pd = blkg->pd[pol->plid]; |
371 | struct blkio_group_stats_cpu *stats_cpu; | 322 | struct blkio_group_stats_cpu *stats_cpu; |
372 | unsigned long flags; | 323 | unsigned long flags; |
@@ -384,13 +335,10 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg, | |||
384 | 335 | ||
385 | stats_cpu = this_cpu_ptr(pd->stats_cpu); | 336 | stats_cpu = this_cpu_ptr(pd->stats_cpu); |
386 | 337 | ||
387 | u64_stats_update_begin(&stats_cpu->syncp); | 338 | blkg_stat_add(&stats_cpu->sectors, bytes >> 9); |
388 | stats_cpu->sectors += bytes >> 9; | 339 | blkg_rwstat_add(&stats_cpu->serviced, rw, 1); |
389 | blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED], | 340 | blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes); |
390 | 1, direction, sync); | 341 | |
391 | blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES], | ||
392 | bytes, direction, sync); | ||
393 | u64_stats_update_end(&stats_cpu->syncp); | ||
394 | local_irq_restore(flags); | 342 | local_irq_restore(flags); |
395 | } | 343 | } |
396 | EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats); | 344 | EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats); |
@@ -403,17 +351,15 @@ void blkiocg_update_completion_stats(struct blkio_group *blkg, | |||
403 | { | 351 | { |
404 | struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; | 352 | struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; |
405 | unsigned long long now = sched_clock(); | 353 | unsigned long long now = sched_clock(); |
354 | int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0); | ||
406 | 355 | ||
407 | lockdep_assert_held(blkg->q->queue_lock); | 356 | lockdep_assert_held(blkg->q->queue_lock); |
408 | 357 | ||
409 | u64_stats_update_begin(&stats->syncp); | ||
410 | if (time_after64(now, io_start_time)) | 358 | if (time_after64(now, io_start_time)) |
411 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME], | 359 | blkg_rwstat_add(&stats->service_time, rw, now - io_start_time); |
412 | now - io_start_time, direction, sync); | ||
413 | if (time_after64(io_start_time, start_time)) | 360 | if (time_after64(io_start_time, start_time)) |
414 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME], | 361 | blkg_rwstat_add(&stats->wait_time, rw, |
415 | io_start_time - start_time, direction, sync); | 362 | io_start_time - start_time); |
416 | u64_stats_update_end(&stats->syncp); | ||
417 | } | 363 | } |
418 | EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats); | 364 | EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats); |
419 | 365 | ||
@@ -423,12 +369,11 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, | |||
423 | bool direction, bool sync) | 369 | bool direction, bool sync) |
424 | { | 370 | { |
425 | struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; | 371 | struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; |
372 | int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0); | ||
426 | 373 | ||
427 | lockdep_assert_held(blkg->q->queue_lock); | 374 | lockdep_assert_held(blkg->q->queue_lock); |
428 | 375 | ||
429 | u64_stats_update_begin(&stats->syncp); | 376 | blkg_rwstat_add(&stats->merged, rw, 1); |
430 | blkio_add_stat(stats->stat_arr[BLKIO_STAT_MERGED], 1, direction, sync); | ||
431 | u64_stats_update_end(&stats->syncp); | ||
432 | } | 377 | } |
433 | EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); | 378 | EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); |
434 | 379 | ||
@@ -757,8 +702,9 @@ static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid) | |||
757 | struct blkio_group_stats_cpu *sc = | 702 | struct blkio_group_stats_cpu *sc = |
758 | per_cpu_ptr(pd->stats_cpu, cpu); | 703 | per_cpu_ptr(pd->stats_cpu, cpu); |
759 | 704 | ||
760 | sc->sectors = 0; | 705 | blkg_rwstat_reset(&sc->service_bytes); |
761 | memset(sc->stat_arr_cpu, 0, sizeof(sc->stat_arr_cpu)); | 706 | blkg_rwstat_reset(&sc->serviced); |
707 | blkg_stat_reset(&sc->sectors); | ||
762 | } | 708 | } |
763 | } | 709 | } |
764 | 710 | ||
@@ -768,7 +714,6 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
768 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); | 714 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); |
769 | struct blkio_group *blkg; | 715 | struct blkio_group *blkg; |
770 | struct hlist_node *n; | 716 | struct hlist_node *n; |
771 | int i; | ||
772 | 717 | ||
773 | spin_lock(&blkio_list_lock); | 718 | spin_lock(&blkio_list_lock); |
774 | spin_lock_irq(&blkcg->lock); | 719 | spin_lock_irq(&blkcg->lock); |
@@ -786,14 +731,18 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
786 | struct blkio_group_stats *stats = &pd->stats; | 731 | struct blkio_group_stats *stats = &pd->stats; |
787 | 732 | ||
788 | /* queued stats shouldn't be cleared */ | 733 | /* queued stats shouldn't be cleared */ |
789 | for (i = 0; i < ARRAY_SIZE(stats->stat_arr); i++) | 734 | blkg_rwstat_reset(&stats->merged); |
790 | if (i != BLKIO_STAT_QUEUED) | 735 | blkg_rwstat_reset(&stats->service_time); |
791 | memset(stats->stat_arr[i], 0, | 736 | blkg_rwstat_reset(&stats->wait_time); |
792 | sizeof(stats->stat_arr[i])); | 737 | blkg_stat_reset(&stats->time); |
793 | stats->time = 0; | ||
794 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 738 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
795 | memset((void *)stats + BLKG_STATS_DEBUG_CLEAR_START, 0, | 739 | blkg_stat_reset(&stats->unaccounted_time); |
796 | BLKG_STATS_DEBUG_CLEAR_SIZE); | 740 | blkg_stat_reset(&stats->avg_queue_size_sum); |
741 | blkg_stat_reset(&stats->avg_queue_size_samples); | ||
742 | blkg_stat_reset(&stats->dequeue); | ||
743 | blkg_stat_reset(&stats->group_wait_time); | ||
744 | blkg_stat_reset(&stats->idle_time); | ||
745 | blkg_stat_reset(&stats->empty_time); | ||
797 | #endif | 746 | #endif |
798 | blkio_reset_stats_cpu(blkg, pol->plid); | 747 | blkio_reset_stats_cpu(blkg, pol->plid); |
799 | } | 748 | } |
@@ -804,7 +753,7 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
804 | return 0; | 753 | return 0; |
805 | } | 754 | } |
806 | 755 | ||
807 | static void blkio_get_key_name(enum stat_sub_type type, const char *dname, | 756 | static void blkio_get_key_name(enum blkg_rwstat_type type, const char *dname, |
808 | char *str, int chars_left, bool diskname_only) | 757 | char *str, int chars_left, bool diskname_only) |
809 | { | 758 | { |
810 | snprintf(str, chars_left, "%s", dname); | 759 | snprintf(str, chars_left, "%s", dname); |
@@ -817,19 +766,19 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname, | |||
817 | if (diskname_only) | 766 | if (diskname_only) |
818 | return; | 767 | return; |
819 | switch (type) { | 768 | switch (type) { |
820 | case BLKIO_STAT_READ: | 769 | case BLKG_RWSTAT_READ: |
821 | strlcat(str, " Read", chars_left); | 770 | strlcat(str, " Read", chars_left); |
822 | break; | 771 | break; |
823 | case BLKIO_STAT_WRITE: | 772 | case BLKG_RWSTAT_WRITE: |
824 | strlcat(str, " Write", chars_left); | 773 | strlcat(str, " Write", chars_left); |
825 | break; | 774 | break; |
826 | case BLKIO_STAT_SYNC: | 775 | case BLKG_RWSTAT_SYNC: |
827 | strlcat(str, " Sync", chars_left); | 776 | strlcat(str, " Sync", chars_left); |
828 | break; | 777 | break; |
829 | case BLKIO_STAT_ASYNC: | 778 | case BLKG_RWSTAT_ASYNC: |
830 | strlcat(str, " Async", chars_left); | 779 | strlcat(str, " Async", chars_left); |
831 | break; | 780 | break; |
832 | case BLKIO_STAT_TOTAL: | 781 | case BLKG_RWSTAT_TOTAL: |
833 | strlcat(str, " Total", chars_left); | 782 | strlcat(str, " Total", chars_left); |
834 | break; | 783 | break; |
835 | default: | 784 | default: |
@@ -838,29 +787,34 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname, | |||
838 | } | 787 | } |
839 | 788 | ||
840 | static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid, | 789 | static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid, |
841 | enum stat_type_cpu type, enum stat_sub_type sub_type) | 790 | enum stat_type_cpu type, |
791 | enum blkg_rwstat_type sub_type) | ||
842 | { | 792 | { |
843 | struct blkg_policy_data *pd = blkg->pd[plid]; | 793 | struct blkg_policy_data *pd = blkg->pd[plid]; |
794 | u64 val = 0; | ||
844 | int cpu; | 795 | int cpu; |
845 | struct blkio_group_stats_cpu *stats_cpu; | ||
846 | u64 val = 0, tval; | ||
847 | 796 | ||
848 | if (pd->stats_cpu == NULL) | 797 | if (pd->stats_cpu == NULL) |
849 | return val; | 798 | return val; |
850 | 799 | ||
851 | for_each_possible_cpu(cpu) { | 800 | for_each_possible_cpu(cpu) { |
852 | unsigned int start; | 801 | struct blkio_group_stats_cpu *stats_cpu = |
853 | stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu); | 802 | per_cpu_ptr(pd->stats_cpu, cpu); |
854 | 803 | struct blkg_rwstat rws; | |
855 | do { | 804 | |
856 | start = u64_stats_fetch_begin(&stats_cpu->syncp); | 805 | switch (type) { |
857 | if (type == BLKIO_STAT_CPU_SECTORS) | 806 | case BLKIO_STAT_CPU_SECTORS: |
858 | tval = stats_cpu->sectors; | 807 | val += blkg_stat_read(&stats_cpu->sectors); |
859 | else | 808 | break; |
860 | tval = stats_cpu->stat_arr_cpu[type][sub_type]; | 809 | case BLKIO_STAT_CPU_SERVICE_BYTES: |
861 | } while(u64_stats_fetch_retry(&stats_cpu->syncp, start)); | 810 | rws = blkg_rwstat_read(&stats_cpu->service_bytes); |
862 | 811 | val += rws.cnt[sub_type]; | |
863 | val += tval; | 812 | break; |
813 | case BLKIO_STAT_CPU_SERVICED: | ||
814 | rws = blkg_rwstat_read(&stats_cpu->serviced); | ||
815 | val += rws.cnt[sub_type]; | ||
816 | break; | ||
817 | } | ||
864 | } | 818 | } |
865 | 819 | ||
866 | return val; | 820 | return val; |
@@ -872,7 +826,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid, | |||
872 | { | 826 | { |
873 | uint64_t disk_total, val; | 827 | uint64_t disk_total, val; |
874 | char key_str[MAX_KEY_LEN]; | 828 | char key_str[MAX_KEY_LEN]; |
875 | enum stat_sub_type sub_type; | 829 | enum blkg_rwstat_type sub_type; |
876 | 830 | ||
877 | if (type == BLKIO_STAT_CPU_SECTORS) { | 831 | if (type == BLKIO_STAT_CPU_SECTORS) { |
878 | val = blkio_read_stat_cpu(blkg, plid, type, 0); | 832 | val = blkio_read_stat_cpu(blkg, plid, type, 0); |
@@ -881,7 +835,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid, | |||
881 | return val; | 835 | return val; |
882 | } | 836 | } |
883 | 837 | ||
884 | for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL; | 838 | for (sub_type = BLKG_RWSTAT_READ; sub_type < BLKG_RWSTAT_NR; |
885 | sub_type++) { | 839 | sub_type++) { |
886 | blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN, | 840 | blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN, |
887 | false); | 841 | false); |
@@ -889,10 +843,10 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid, | |||
889 | cb->fill(cb, key_str, val); | 843 | cb->fill(cb, key_str, val); |
890 | } | 844 | } |
891 | 845 | ||
892 | disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_READ) + | 846 | disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKG_RWSTAT_READ) + |
893 | blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_WRITE); | 847 | blkio_read_stat_cpu(blkg, plid, type, BLKG_RWSTAT_WRITE); |
894 | 848 | ||
895 | blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN, | 849 | blkio_get_key_name(BLKG_RWSTAT_TOTAL, dname, key_str, MAX_KEY_LEN, |
896 | false); | 850 | false); |
897 | cb->fill(cb, key_str, disk_total); | 851 | cb->fill(cb, key_str, disk_total); |
898 | return disk_total; | 852 | return disk_total; |
@@ -905,65 +859,76 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid, | |||
905 | struct blkio_group_stats *stats = &blkg->pd[plid]->stats; | 859 | struct blkio_group_stats *stats = &blkg->pd[plid]->stats; |
906 | uint64_t v = 0, disk_total = 0; | 860 | uint64_t v = 0, disk_total = 0; |
907 | char key_str[MAX_KEY_LEN]; | 861 | char key_str[MAX_KEY_LEN]; |
908 | unsigned int sync_start; | 862 | struct blkg_rwstat rws = { }; |
909 | int st; | 863 | int st; |
910 | 864 | ||
911 | if (type >= BLKIO_STAT_ARR_NR) { | 865 | if (type >= BLKIO_STAT_ARR_NR) { |
912 | do { | 866 | switch (type) { |
913 | sync_start = u64_stats_fetch_begin(&stats->syncp); | 867 | case BLKIO_STAT_TIME: |
914 | switch (type) { | 868 | v = blkg_stat_read(&stats->time); |
915 | case BLKIO_STAT_TIME: | 869 | break; |
916 | v = stats->time; | ||
917 | break; | ||
918 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 870 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
919 | case BLKIO_STAT_UNACCOUNTED_TIME: | 871 | case BLKIO_STAT_UNACCOUNTED_TIME: |
920 | v = stats->unaccounted_time; | 872 | v = blkg_stat_read(&stats->unaccounted_time); |
921 | break; | 873 | break; |
922 | case BLKIO_STAT_AVG_QUEUE_SIZE: { | 874 | case BLKIO_STAT_AVG_QUEUE_SIZE: { |
923 | uint64_t samples = stats->avg_queue_size_samples; | 875 | uint64_t samples; |
924 | 876 | ||
925 | if (samples) { | 877 | samples = blkg_stat_read(&stats->avg_queue_size_samples); |
926 | v = stats->avg_queue_size_sum; | 878 | if (samples) { |
927 | do_div(v, samples); | 879 | v = blkg_stat_read(&stats->avg_queue_size_sum); |
928 | } | 880 | do_div(v, samples); |
929 | break; | ||
930 | } | 881 | } |
931 | case BLKIO_STAT_IDLE_TIME: | 882 | break; |
932 | v = stats->idle_time; | 883 | } |
933 | break; | 884 | case BLKIO_STAT_IDLE_TIME: |
934 | case BLKIO_STAT_EMPTY_TIME: | 885 | v = blkg_stat_read(&stats->idle_time); |
935 | v = stats->empty_time; | 886 | break; |
936 | break; | 887 | case BLKIO_STAT_EMPTY_TIME: |
937 | case BLKIO_STAT_DEQUEUE: | 888 | v = blkg_stat_read(&stats->empty_time); |
938 | v = stats->dequeue; | 889 | break; |
939 | break; | 890 | case BLKIO_STAT_DEQUEUE: |
940 | case BLKIO_STAT_GROUP_WAIT_TIME: | 891 | v = blkg_stat_read(&stats->dequeue); |
941 | v = stats->group_wait_time; | 892 | break; |
942 | break; | 893 | case BLKIO_STAT_GROUP_WAIT_TIME: |
894 | v = blkg_stat_read(&stats->group_wait_time); | ||
895 | break; | ||
943 | #endif | 896 | #endif |
944 | default: | 897 | default: |
945 | WARN_ON_ONCE(1); | 898 | WARN_ON_ONCE(1); |
946 | } | 899 | } |
947 | } while (u64_stats_fetch_retry(&stats->syncp, sync_start)); | ||
948 | 900 | ||
949 | blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true); | 901 | blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true); |
950 | cb->fill(cb, key_str, v); | 902 | cb->fill(cb, key_str, v); |
951 | return v; | 903 | return v; |
952 | } | 904 | } |
953 | 905 | ||
954 | for (st = BLKIO_STAT_READ; st < BLKIO_STAT_TOTAL; st++) { | 906 | switch (type) { |
955 | do { | 907 | case BLKIO_STAT_MERGED: |
956 | sync_start = u64_stats_fetch_begin(&stats->syncp); | 908 | rws = blkg_rwstat_read(&stats->merged); |
957 | v = stats->stat_arr[type][st]; | 909 | break; |
958 | } while (u64_stats_fetch_retry(&stats->syncp, sync_start)); | 910 | case BLKIO_STAT_SERVICE_TIME: |
911 | rws = blkg_rwstat_read(&stats->service_time); | ||
912 | break; | ||
913 | case BLKIO_STAT_WAIT_TIME: | ||
914 | rws = blkg_rwstat_read(&stats->wait_time); | ||
915 | break; | ||
916 | case BLKIO_STAT_QUEUED: | ||
917 | rws = blkg_rwstat_read(&stats->queued); | ||
918 | break; | ||
919 | default: | ||
920 | WARN_ON_ONCE(true); | ||
921 | break; | ||
922 | } | ||
959 | 923 | ||
924 | for (st = BLKG_RWSTAT_READ; st < BLKG_RWSTAT_NR; st++) { | ||
960 | blkio_get_key_name(st, dname, key_str, MAX_KEY_LEN, false); | 925 | blkio_get_key_name(st, dname, key_str, MAX_KEY_LEN, false); |
961 | cb->fill(cb, key_str, v); | 926 | cb->fill(cb, key_str, rws.cnt[st]); |
962 | if (st == BLKIO_STAT_READ || st == BLKIO_STAT_WRITE) | 927 | if (st == BLKG_RWSTAT_READ || st == BLKG_RWSTAT_WRITE) |
963 | disk_total += v; | 928 | disk_total += rws.cnt[st]; |
964 | } | 929 | } |
965 | 930 | ||
966 | blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN, | 931 | blkio_get_key_name(BLKG_RWSTAT_TOTAL, dname, key_str, MAX_KEY_LEN, |
967 | false); | 932 | false); |
968 | cb->fill(cb, key_str, disk_total); | 933 | cb->fill(cb, key_str, disk_total); |
969 | return disk_total; | 934 | return disk_total; |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 2060d812ae8e..7578df3938b4 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -69,12 +69,14 @@ enum stat_type_cpu { | |||
69 | 69 | ||
70 | #define BLKIO_STAT_CPU_ARR_NR (BLKIO_STAT_CPU_SERVICED + 1) | 70 | #define BLKIO_STAT_CPU_ARR_NR (BLKIO_STAT_CPU_SERVICED + 1) |
71 | 71 | ||
72 | enum stat_sub_type { | 72 | enum blkg_rwstat_type { |
73 | BLKIO_STAT_READ = 0, | 73 | BLKG_RWSTAT_READ, |
74 | BLKIO_STAT_WRITE, | 74 | BLKG_RWSTAT_WRITE, |
75 | BLKIO_STAT_SYNC, | 75 | BLKG_RWSTAT_SYNC, |
76 | BLKIO_STAT_ASYNC, | 76 | BLKG_RWSTAT_ASYNC, |
77 | BLKIO_STAT_TOTAL | 77 | |
78 | BLKG_RWSTAT_NR, | ||
79 | BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, | ||
78 | }; | 80 | }; |
79 | 81 | ||
80 | /* blkg state flags */ | 82 | /* blkg state flags */ |
@@ -124,54 +126,58 @@ struct blkio_cgroup { | |||
124 | uint64_t id; | 126 | uint64_t id; |
125 | }; | 127 | }; |
126 | 128 | ||
129 | struct blkg_stat { | ||
130 | struct u64_stats_sync syncp; | ||
131 | uint64_t cnt; | ||
132 | }; | ||
133 | |||
134 | struct blkg_rwstat { | ||
135 | struct u64_stats_sync syncp; | ||
136 | uint64_t cnt[BLKG_RWSTAT_NR]; | ||
137 | }; | ||
138 | |||
127 | struct blkio_group_stats { | 139 | struct blkio_group_stats { |
128 | struct u64_stats_sync syncp; | 140 | /* number of ios merged */ |
141 | struct blkg_rwstat merged; | ||
142 | /* total time spent on device in ns, may not be accurate w/ queueing */ | ||
143 | struct blkg_rwstat service_time; | ||
144 | /* total time spent waiting in scheduler queue in ns */ | ||
145 | struct blkg_rwstat wait_time; | ||
146 | /* number of IOs queued up */ | ||
147 | struct blkg_rwstat queued; | ||
129 | /* total disk time and nr sectors dispatched by this group */ | 148 | /* total disk time and nr sectors dispatched by this group */ |
130 | uint64_t time; | 149 | struct blkg_stat time; |
131 | uint64_t stat_arr[BLKIO_STAT_ARR_NR][BLKIO_STAT_TOTAL]; | ||
132 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 150 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
133 | /* Time not charged to this cgroup */ | 151 | /* time not charged to this cgroup */ |
134 | uint64_t unaccounted_time; | 152 | struct blkg_stat unaccounted_time; |
135 | 153 | /* sum of number of ios queued across all samples */ | |
136 | /* Sum of number of IOs queued across all samples */ | 154 | struct blkg_stat avg_queue_size_sum; |
137 | uint64_t avg_queue_size_sum; | 155 | /* count of samples taken for average */ |
138 | /* Count of samples taken for average */ | 156 | struct blkg_stat avg_queue_size_samples; |
139 | uint64_t avg_queue_size_samples; | 157 | /* how many times this group has been removed from service tree */ |
140 | /* How many times this group has been removed from service tree */ | 158 | struct blkg_stat dequeue; |
141 | unsigned long dequeue; | 159 | /* total time spent waiting for it to be assigned a timeslice. */ |
142 | 160 | struct blkg_stat group_wait_time; | |
143 | /* Total time spent waiting for it to be assigned a timeslice. */ | 161 | /* time spent idling for this blkio_group */ |
144 | uint64_t group_wait_time; | 162 | struct blkg_stat idle_time; |
145 | 163 | /* total time with empty current active q with other requests queued */ | |
146 | /* Time spent idling for this blkio_group */ | 164 | struct blkg_stat empty_time; |
147 | uint64_t idle_time; | ||
148 | /* | ||
149 | * Total time when we have requests queued and do not contain the | ||
150 | * current active queue. | ||
151 | */ | ||
152 | uint64_t empty_time; | ||
153 | |||
154 | /* fields after this shouldn't be cleared on stat reset */ | 165 | /* fields after this shouldn't be cleared on stat reset */ |
155 | uint64_t start_group_wait_time; | 166 | uint64_t start_group_wait_time; |
156 | uint64_t start_idle_time; | 167 | uint64_t start_idle_time; |
157 | uint64_t start_empty_time; | 168 | uint64_t start_empty_time; |
158 | uint16_t flags; | 169 | uint16_t flags; |
159 | #endif | 170 | #endif |
160 | }; | 171 | }; |
161 | 172 | ||
162 | #ifdef CONFIG_DEBUG_BLK_CGROUP | ||
163 | #define BLKG_STATS_DEBUG_CLEAR_START \ | ||
164 | offsetof(struct blkio_group_stats, unaccounted_time) | ||
165 | #define BLKG_STATS_DEBUG_CLEAR_SIZE \ | ||
166 | (offsetof(struct blkio_group_stats, start_group_wait_time) - \ | ||
167 | BLKG_STATS_DEBUG_CLEAR_START) | ||
168 | #endif | ||
169 | |||
170 | /* Per cpu blkio group stats */ | 173 | /* Per cpu blkio group stats */ |
171 | struct blkio_group_stats_cpu { | 174 | struct blkio_group_stats_cpu { |
172 | uint64_t sectors; | 175 | /* total bytes transferred */ |
173 | uint64_t stat_arr_cpu[BLKIO_STAT_CPU_ARR_NR][BLKIO_STAT_TOTAL]; | 176 | struct blkg_rwstat service_bytes; |
174 | struct u64_stats_sync syncp; | 177 | /* total IOs serviced, post merge */ |
178 | struct blkg_rwstat serviced; | ||
179 | /* total sectors transferred */ | ||
180 | struct blkg_stat sectors; | ||
175 | }; | 181 | }; |
176 | 182 | ||
177 | struct blkio_group_conf { | 183 | struct blkio_group_conf { |
@@ -316,6 +322,121 @@ static inline void blkg_put(struct blkio_group *blkg) | |||
316 | __blkg_release(blkg); | 322 | __blkg_release(blkg); |
317 | } | 323 | } |
318 | 324 | ||
325 | /** | ||
326 | * blkg_stat_add - add a value to a blkg_stat | ||
327 | * @stat: target blkg_stat | ||
328 | * @val: value to add | ||
329 | * | ||
330 | * Add @val to @stat. The caller is responsible for synchronizing calls to | ||
331 | * this function. | ||
332 | */ | ||
333 | static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) | ||
334 | { | ||
335 | u64_stats_update_begin(&stat->syncp); | ||
336 | stat->cnt += val; | ||
337 | u64_stats_update_end(&stat->syncp); | ||
338 | } | ||
339 | |||
340 | /** | ||
341 | * blkg_stat_read - read the current value of a blkg_stat | ||
342 | * @stat: blkg_stat to read | ||
343 | * | ||
344 | * Read the current value of @stat. This function can be called without | ||
345 | * synchroniztion and takes care of u64 atomicity. | ||
346 | */ | ||
347 | static inline uint64_t blkg_stat_read(struct blkg_stat *stat) | ||
348 | { | ||
349 | unsigned int start; | ||
350 | uint64_t v; | ||
351 | |||
352 | do { | ||
353 | start = u64_stats_fetch_begin(&stat->syncp); | ||
354 | v = stat->cnt; | ||
355 | } while (u64_stats_fetch_retry(&stat->syncp, start)); | ||
356 | |||
357 | return v; | ||
358 | } | ||
359 | |||
360 | /** | ||
361 | * blkg_stat_reset - reset a blkg_stat | ||
362 | * @stat: blkg_stat to reset | ||
363 | */ | ||
364 | static inline void blkg_stat_reset(struct blkg_stat *stat) | ||
365 | { | ||
366 | stat->cnt = 0; | ||
367 | } | ||
368 | |||
369 | /** | ||
370 | * blkg_rwstat_add - add a value to a blkg_rwstat | ||
371 | * @rwstat: target blkg_rwstat | ||
372 | * @rw: mask of REQ_{WRITE|SYNC} | ||
373 | * @val: value to add | ||
374 | * | ||
375 | * Add @val to @rwstat. The counters are chosen according to @rw. The | ||
376 | * caller is responsible for synchronizing calls to this function. | ||
377 | */ | ||
378 | static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, | ||
379 | int rw, uint64_t val) | ||
380 | { | ||
381 | u64_stats_update_begin(&rwstat->syncp); | ||
382 | |||
383 | if (rw & REQ_WRITE) | ||
384 | rwstat->cnt[BLKG_RWSTAT_WRITE] += val; | ||
385 | else | ||
386 | rwstat->cnt[BLKG_RWSTAT_READ] += val; | ||
387 | if (rw & REQ_SYNC) | ||
388 | rwstat->cnt[BLKG_RWSTAT_SYNC] += val; | ||
389 | else | ||
390 | rwstat->cnt[BLKG_RWSTAT_ASYNC] += val; | ||
391 | |||
392 | u64_stats_update_end(&rwstat->syncp); | ||
393 | } | ||
394 | |||
395 | /** | ||
396 | * blkg_rwstat_read - read the current values of a blkg_rwstat | ||
397 | * @rwstat: blkg_rwstat to read | ||
398 | * | ||
399 | * Read the current snapshot of @rwstat and return it as the return value. | ||
400 | * This function can be called without synchronization and takes care of | ||
401 | * u64 atomicity. | ||
402 | */ | ||
403 | static struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) | ||
404 | { | ||
405 | unsigned int start; | ||
406 | struct blkg_rwstat tmp; | ||
407 | |||
408 | do { | ||
409 | start = u64_stats_fetch_begin(&rwstat->syncp); | ||
410 | tmp = *rwstat; | ||
411 | } while (u64_stats_fetch_retry(&rwstat->syncp, start)); | ||
412 | |||
413 | return tmp; | ||
414 | } | ||
415 | |||
416 | /** | ||
417 | * blkg_rwstat_sum - read the total count of a blkg_rwstat | ||
418 | * @rwstat: blkg_rwstat to read | ||
419 | * | ||
420 | * Return the total count of @rwstat regardless of the IO direction. This | ||
421 | * function can be called without synchronization and takes care of u64 | ||
422 | * atomicity. | ||
423 | */ | ||
424 | static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat) | ||
425 | { | ||
426 | struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); | ||
427 | |||
428 | return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; | ||
429 | } | ||
430 | |||
431 | /** | ||
432 | * blkg_rwstat_reset - reset a blkg_rwstat | ||
433 | * @rwstat: blkg_rwstat to reset | ||
434 | */ | ||
435 | static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) | ||
436 | { | ||
437 | memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); | ||
438 | } | ||
439 | |||
319 | #else | 440 | #else |
320 | 441 | ||
321 | struct blkio_group { | 442 | struct blkio_group { |