aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-04-01 17:38:42 -0400
committerTejun Heo <tj@kernel.org>2012-04-01 17:38:42 -0400
commitedcb0722c654947908388df660791abd41e6617c (patch)
tree8c44dd4517bd562b58216dd01fb5abc42cb74a95
parent2aa4a1523b40a065bc3a31e20097ea7a618ec3de (diff)
blkcg: introduce blkg_stat and blkg_rwstat
blkcg uses u64_stats_sync to avoid reading wrong u64 statistic values on 32bit archs and some stat counters have subtypes to distinguish read/writes and sync/async IOs. The stat code paths are confusing and involve a lot of going back and forth between blkcg core and specific policy implementations, and synchronization and subtype handling are open coded in blkcg core. This patch introduces struct blkg_stat and blkg_rwstat which, with accompanying operations, encapsulate stat updating and accessing with proper synchronization. blkg_stat is simple u64 counter with 64bit read-access protection. blkg_rwstat is the one with rw and [a]sync subcounters and takes @rw flags to distinguish IO subtypes (%REQ_WRITE and %REQ_SYNC) and replaces stat_sub_type indexed arrays. All counters in blkio_group_stats and blkio_group_stats_cpu are replaced with either blkg_stat or blkg_rwstat along with all users. This does add one u64_stats_sync per counter and increase stats_sync operations but they're empty/noops on 64bit archs and blkcg doesn't have too many counters, especially with DEBUG_BLK_CGROUP off. While the currently resulting code isn't necessarily simpler at the moment, this will enable further clean up of blkcg stats code. - BLKIO_STAT_{READ|WRITE|SYNC|ASYNC|TOTAL} renamed to BLKG_RWSTAT_{READ|WRITE|SYNC|ASYNC|TOTAL}. - blkg_stat_add() replaces blkio_add_stat() and blkio_check_and_dec_stat(). Note that BUG_ON() on underflow in the latter function no longer exists. It's *way* better to have underflowed stat counters than oopsing. - blkio_group_stats->dequeue is now a proper u64 stat counter instead of ulong. - reset_stats() updated to clear each stat counters individually and BLKG_STATS_DEBUG_CLEAR_{START|SIZE} are removed. - Some functions reconstruct rw flags from direction and sync booleans. This will be removed by future patches. Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--block/blk-cgroup.c289
-rw-r--r--block/blk-cgroup.h211
2 files changed, 293 insertions, 207 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 55ccbae6c434..09ac462ba89e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -132,46 +132,6 @@ static inline void blkio_update_group_iops(struct blkio_group *blkg,
132 } 132 }
133} 133}
134 134
135/*
136 * Add to the appropriate stat variable depending on the request type.
137 * This should be called with queue_lock held.
138 */
139static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
140 bool sync)
141{
142 if (direction)
143 stat[BLKIO_STAT_WRITE] += add;
144 else
145 stat[BLKIO_STAT_READ] += add;
146 if (sync)
147 stat[BLKIO_STAT_SYNC] += add;
148 else
149 stat[BLKIO_STAT_ASYNC] += add;
150}
151
152/*
153 * Decrements the appropriate stat variable if non-zero depending on the
154 * request type. Panics on value being zero.
155 * This should be called with the queue_lock held.
156 */
157static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
158{
159 if (direction) {
160 BUG_ON(stat[BLKIO_STAT_WRITE] == 0);
161 stat[BLKIO_STAT_WRITE]--;
162 } else {
163 BUG_ON(stat[BLKIO_STAT_READ] == 0);
164 stat[BLKIO_STAT_READ]--;
165 }
166 if (sync) {
167 BUG_ON(stat[BLKIO_STAT_SYNC] == 0);
168 stat[BLKIO_STAT_SYNC]--;
169 } else {
170 BUG_ON(stat[BLKIO_STAT_ASYNC] == 0);
171 stat[BLKIO_STAT_ASYNC]--;
172 }
173}
174
175#ifdef CONFIG_DEBUG_BLK_CGROUP 135#ifdef CONFIG_DEBUG_BLK_CGROUP
176/* This should be called with the queue_lock held. */ 136/* This should be called with the queue_lock held. */
177static void blkio_set_start_group_wait_time(struct blkio_group *blkg, 137static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
@@ -198,7 +158,8 @@ static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
198 158
199 now = sched_clock(); 159 now = sched_clock();
200 if (time_after64(now, stats->start_group_wait_time)) 160 if (time_after64(now, stats->start_group_wait_time))
201 stats->group_wait_time += now - stats->start_group_wait_time; 161 blkg_stat_add(&stats->group_wait_time,
162 now - stats->start_group_wait_time);
202 blkio_clear_blkg_waiting(stats); 163 blkio_clear_blkg_waiting(stats);
203} 164}
204 165
@@ -212,7 +173,8 @@ static void blkio_end_empty_time(struct blkio_group_stats *stats)
212 173
213 now = sched_clock(); 174 now = sched_clock();
214 if (time_after64(now, stats->start_empty_time)) 175 if (time_after64(now, stats->start_empty_time))
215 stats->empty_time += now - stats->start_empty_time; 176 blkg_stat_add(&stats->empty_time,
177 now - stats->start_empty_time);
216 blkio_clear_blkg_empty(stats); 178 blkio_clear_blkg_empty(stats);
217} 179}
218 180
@@ -239,11 +201,9 @@ void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
239 if (blkio_blkg_idling(stats)) { 201 if (blkio_blkg_idling(stats)) {
240 unsigned long long now = sched_clock(); 202 unsigned long long now = sched_clock();
241 203
242 if (time_after64(now, stats->start_idle_time)) { 204 if (time_after64(now, stats->start_idle_time))
243 u64_stats_update_begin(&stats->syncp); 205 blkg_stat_add(&stats->idle_time,
244 stats->idle_time += now - stats->start_idle_time; 206 now - stats->start_idle_time);
245 u64_stats_update_end(&stats->syncp);
246 }
247 blkio_clear_blkg_idling(stats); 207 blkio_clear_blkg_idling(stats);
248 } 208 }
249} 209}
@@ -256,13 +216,10 @@ void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
256 216
257 lockdep_assert_held(blkg->q->queue_lock); 217 lockdep_assert_held(blkg->q->queue_lock);
258 218
259 u64_stats_update_begin(&stats->syncp); 219 blkg_stat_add(&stats->avg_queue_size_sum,
260 stats->avg_queue_size_sum += 220 blkg_rwstat_sum(&stats->queued));
261 stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] + 221 blkg_stat_add(&stats->avg_queue_size_samples, 1);
262 stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
263 stats->avg_queue_size_samples++;
264 blkio_update_group_wait_time(stats); 222 blkio_update_group_wait_time(stats);
265 u64_stats_update_end(&stats->syncp);
266} 223}
267EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats); 224EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
268 225
@@ -273,8 +230,7 @@ void blkiocg_set_start_empty_time(struct blkio_group *blkg,
273 230
274 lockdep_assert_held(blkg->q->queue_lock); 231 lockdep_assert_held(blkg->q->queue_lock);
275 232
276 if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] || 233 if (blkg_rwstat_sum(&stats->queued))
277 stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE])
278 return; 234 return;
279 235
280 /* 236 /*
@@ -298,7 +254,7 @@ void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
298 254
299 lockdep_assert_held(blkg->q->queue_lock); 255 lockdep_assert_held(blkg->q->queue_lock);
300 256
301 pd->stats.dequeue += dequeue; 257 blkg_stat_add(&pd->stats.dequeue, dequeue);
302} 258}
303EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); 259EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
304#else 260#else
@@ -314,14 +270,12 @@ void blkiocg_update_io_add_stats(struct blkio_group *blkg,
314 bool sync) 270 bool sync)
315{ 271{
316 struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; 272 struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
273 int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
317 274
318 lockdep_assert_held(blkg->q->queue_lock); 275 lockdep_assert_held(blkg->q->queue_lock);
319 276
320 u64_stats_update_begin(&stats->syncp); 277 blkg_rwstat_add(&stats->queued, rw, 1);
321 blkio_add_stat(stats->stat_arr[BLKIO_STAT_QUEUED], 1, direction, sync);
322 blkio_end_empty_time(stats); 278 blkio_end_empty_time(stats);
323 u64_stats_update_end(&stats->syncp);
324
325 blkio_set_start_group_wait_time(blkg, pol, curr_blkg); 279 blkio_set_start_group_wait_time(blkg, pol, curr_blkg);
326} 280}
327EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats); 281EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
@@ -331,13 +285,11 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
331 bool direction, bool sync) 285 bool direction, bool sync)
332{ 286{
333 struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; 287 struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
288 int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
334 289
335 lockdep_assert_held(blkg->q->queue_lock); 290 lockdep_assert_held(blkg->q->queue_lock);
336 291
337 u64_stats_update_begin(&stats->syncp); 292 blkg_rwstat_add(&stats->queued, rw, -1);
338 blkio_check_and_dec_stat(stats->stat_arr[BLKIO_STAT_QUEUED], direction,
339 sync);
340 u64_stats_update_end(&stats->syncp);
341} 293}
342EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); 294EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
343 295
@@ -350,12 +302,10 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg,
350 302
351 lockdep_assert_held(blkg->q->queue_lock); 303 lockdep_assert_held(blkg->q->queue_lock);
352 304
353 u64_stats_update_begin(&stats->syncp); 305 blkg_stat_add(&stats->time, time);
354 stats->time += time;
355#ifdef CONFIG_DEBUG_BLK_CGROUP 306#ifdef CONFIG_DEBUG_BLK_CGROUP
356 stats->unaccounted_time += unaccounted_time; 307 blkg_stat_add(&stats->unaccounted_time, unaccounted_time);
357#endif 308#endif
358 u64_stats_update_end(&stats->syncp);
359} 309}
360EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); 310EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
361 311
@@ -367,6 +317,7 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
367 struct blkio_policy_type *pol, 317 struct blkio_policy_type *pol,
368 uint64_t bytes, bool direction, bool sync) 318 uint64_t bytes, bool direction, bool sync)
369{ 319{
320 int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
370 struct blkg_policy_data *pd = blkg->pd[pol->plid]; 321 struct blkg_policy_data *pd = blkg->pd[pol->plid];
371 struct blkio_group_stats_cpu *stats_cpu; 322 struct blkio_group_stats_cpu *stats_cpu;
372 unsigned long flags; 323 unsigned long flags;
@@ -384,13 +335,10 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
384 335
385 stats_cpu = this_cpu_ptr(pd->stats_cpu); 336 stats_cpu = this_cpu_ptr(pd->stats_cpu);
386 337
387 u64_stats_update_begin(&stats_cpu->syncp); 338 blkg_stat_add(&stats_cpu->sectors, bytes >> 9);
388 stats_cpu->sectors += bytes >> 9; 339 blkg_rwstat_add(&stats_cpu->serviced, rw, 1);
389 blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED], 340 blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes);
390 1, direction, sync); 341
391 blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES],
392 bytes, direction, sync);
393 u64_stats_update_end(&stats_cpu->syncp);
394 local_irq_restore(flags); 342 local_irq_restore(flags);
395} 343}
396EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats); 344EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
@@ -403,17 +351,15 @@ void blkiocg_update_completion_stats(struct blkio_group *blkg,
403{ 351{
404 struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; 352 struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
405 unsigned long long now = sched_clock(); 353 unsigned long long now = sched_clock();
354 int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
406 355
407 lockdep_assert_held(blkg->q->queue_lock); 356 lockdep_assert_held(blkg->q->queue_lock);
408 357
409 u64_stats_update_begin(&stats->syncp);
410 if (time_after64(now, io_start_time)) 358 if (time_after64(now, io_start_time))
411 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME], 359 blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
412 now - io_start_time, direction, sync);
413 if (time_after64(io_start_time, start_time)) 360 if (time_after64(io_start_time, start_time))
414 blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME], 361 blkg_rwstat_add(&stats->wait_time, rw,
415 io_start_time - start_time, direction, sync); 362 io_start_time - start_time);
416 u64_stats_update_end(&stats->syncp);
417} 363}
418EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats); 364EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
419 365
@@ -423,12 +369,11 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
423 bool direction, bool sync) 369 bool direction, bool sync)
424{ 370{
425 struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; 371 struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
372 int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
426 373
427 lockdep_assert_held(blkg->q->queue_lock); 374 lockdep_assert_held(blkg->q->queue_lock);
428 375
429 u64_stats_update_begin(&stats->syncp); 376 blkg_rwstat_add(&stats->merged, rw, 1);
430 blkio_add_stat(stats->stat_arr[BLKIO_STAT_MERGED], 1, direction, sync);
431 u64_stats_update_end(&stats->syncp);
432} 377}
433EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); 378EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
434 379
@@ -757,8 +702,9 @@ static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
757 struct blkio_group_stats_cpu *sc = 702 struct blkio_group_stats_cpu *sc =
758 per_cpu_ptr(pd->stats_cpu, cpu); 703 per_cpu_ptr(pd->stats_cpu, cpu);
759 704
760 sc->sectors = 0; 705 blkg_rwstat_reset(&sc->service_bytes);
761 memset(sc->stat_arr_cpu, 0, sizeof(sc->stat_arr_cpu)); 706 blkg_rwstat_reset(&sc->serviced);
707 blkg_stat_reset(&sc->sectors);
762 } 708 }
763} 709}
764 710
@@ -768,7 +714,6 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
768 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); 714 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
769 struct blkio_group *blkg; 715 struct blkio_group *blkg;
770 struct hlist_node *n; 716 struct hlist_node *n;
771 int i;
772 717
773 spin_lock(&blkio_list_lock); 718 spin_lock(&blkio_list_lock);
774 spin_lock_irq(&blkcg->lock); 719 spin_lock_irq(&blkcg->lock);
@@ -786,14 +731,18 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
786 struct blkio_group_stats *stats = &pd->stats; 731 struct blkio_group_stats *stats = &pd->stats;
787 732
788 /* queued stats shouldn't be cleared */ 733 /* queued stats shouldn't be cleared */
789 for (i = 0; i < ARRAY_SIZE(stats->stat_arr); i++) 734 blkg_rwstat_reset(&stats->merged);
790 if (i != BLKIO_STAT_QUEUED) 735 blkg_rwstat_reset(&stats->service_time);
791 memset(stats->stat_arr[i], 0, 736 blkg_rwstat_reset(&stats->wait_time);
792 sizeof(stats->stat_arr[i])); 737 blkg_stat_reset(&stats->time);
793 stats->time = 0;
794#ifdef CONFIG_DEBUG_BLK_CGROUP 738#ifdef CONFIG_DEBUG_BLK_CGROUP
795 memset((void *)stats + BLKG_STATS_DEBUG_CLEAR_START, 0, 739 blkg_stat_reset(&stats->unaccounted_time);
796 BLKG_STATS_DEBUG_CLEAR_SIZE); 740 blkg_stat_reset(&stats->avg_queue_size_sum);
741 blkg_stat_reset(&stats->avg_queue_size_samples);
742 blkg_stat_reset(&stats->dequeue);
743 blkg_stat_reset(&stats->group_wait_time);
744 blkg_stat_reset(&stats->idle_time);
745 blkg_stat_reset(&stats->empty_time);
797#endif 746#endif
798 blkio_reset_stats_cpu(blkg, pol->plid); 747 blkio_reset_stats_cpu(blkg, pol->plid);
799 } 748 }
@@ -804,7 +753,7 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
804 return 0; 753 return 0;
805} 754}
806 755
807static void blkio_get_key_name(enum stat_sub_type type, const char *dname, 756static void blkio_get_key_name(enum blkg_rwstat_type type, const char *dname,
808 char *str, int chars_left, bool diskname_only) 757 char *str, int chars_left, bool diskname_only)
809{ 758{
810 snprintf(str, chars_left, "%s", dname); 759 snprintf(str, chars_left, "%s", dname);
@@ -817,19 +766,19 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname,
817 if (diskname_only) 766 if (diskname_only)
818 return; 767 return;
819 switch (type) { 768 switch (type) {
820 case BLKIO_STAT_READ: 769 case BLKG_RWSTAT_READ:
821 strlcat(str, " Read", chars_left); 770 strlcat(str, " Read", chars_left);
822 break; 771 break;
823 case BLKIO_STAT_WRITE: 772 case BLKG_RWSTAT_WRITE:
824 strlcat(str, " Write", chars_left); 773 strlcat(str, " Write", chars_left);
825 break; 774 break;
826 case BLKIO_STAT_SYNC: 775 case BLKG_RWSTAT_SYNC:
827 strlcat(str, " Sync", chars_left); 776 strlcat(str, " Sync", chars_left);
828 break; 777 break;
829 case BLKIO_STAT_ASYNC: 778 case BLKG_RWSTAT_ASYNC:
830 strlcat(str, " Async", chars_left); 779 strlcat(str, " Async", chars_left);
831 break; 780 break;
832 case BLKIO_STAT_TOTAL: 781 case BLKG_RWSTAT_TOTAL:
833 strlcat(str, " Total", chars_left); 782 strlcat(str, " Total", chars_left);
834 break; 783 break;
835 default: 784 default:
@@ -838,29 +787,34 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname,
838} 787}
839 788
840static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid, 789static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid,
841 enum stat_type_cpu type, enum stat_sub_type sub_type) 790 enum stat_type_cpu type,
791 enum blkg_rwstat_type sub_type)
842{ 792{
843 struct blkg_policy_data *pd = blkg->pd[plid]; 793 struct blkg_policy_data *pd = blkg->pd[plid];
794 u64 val = 0;
844 int cpu; 795 int cpu;
845 struct blkio_group_stats_cpu *stats_cpu;
846 u64 val = 0, tval;
847 796
848 if (pd->stats_cpu == NULL) 797 if (pd->stats_cpu == NULL)
849 return val; 798 return val;
850 799
851 for_each_possible_cpu(cpu) { 800 for_each_possible_cpu(cpu) {
852 unsigned int start; 801 struct blkio_group_stats_cpu *stats_cpu =
853 stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu); 802 per_cpu_ptr(pd->stats_cpu, cpu);
854 803 struct blkg_rwstat rws;
855 do { 804
856 start = u64_stats_fetch_begin(&stats_cpu->syncp); 805 switch (type) {
857 if (type == BLKIO_STAT_CPU_SECTORS) 806 case BLKIO_STAT_CPU_SECTORS:
858 tval = stats_cpu->sectors; 807 val += blkg_stat_read(&stats_cpu->sectors);
859 else 808 break;
860 tval = stats_cpu->stat_arr_cpu[type][sub_type]; 809 case BLKIO_STAT_CPU_SERVICE_BYTES:
861 } while(u64_stats_fetch_retry(&stats_cpu->syncp, start)); 810 rws = blkg_rwstat_read(&stats_cpu->service_bytes);
862 811 val += rws.cnt[sub_type];
863 val += tval; 812 break;
813 case BLKIO_STAT_CPU_SERVICED:
814 rws = blkg_rwstat_read(&stats_cpu->serviced);
815 val += rws.cnt[sub_type];
816 break;
817 }
864 } 818 }
865 819
866 return val; 820 return val;
@@ -872,7 +826,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
872{ 826{
873 uint64_t disk_total, val; 827 uint64_t disk_total, val;
874 char key_str[MAX_KEY_LEN]; 828 char key_str[MAX_KEY_LEN];
875 enum stat_sub_type sub_type; 829 enum blkg_rwstat_type sub_type;
876 830
877 if (type == BLKIO_STAT_CPU_SECTORS) { 831 if (type == BLKIO_STAT_CPU_SECTORS) {
878 val = blkio_read_stat_cpu(blkg, plid, type, 0); 832 val = blkio_read_stat_cpu(blkg, plid, type, 0);
@@ -881,7 +835,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
881 return val; 835 return val;
882 } 836 }
883 837
884 for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL; 838 for (sub_type = BLKG_RWSTAT_READ; sub_type < BLKG_RWSTAT_NR;
885 sub_type++) { 839 sub_type++) {
886 blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN, 840 blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
887 false); 841 false);
@@ -889,10 +843,10 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
889 cb->fill(cb, key_str, val); 843 cb->fill(cb, key_str, val);
890 } 844 }
891 845
892 disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_READ) + 846 disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKG_RWSTAT_READ) +
893 blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_WRITE); 847 blkio_read_stat_cpu(blkg, plid, type, BLKG_RWSTAT_WRITE);
894 848
895 blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN, 849 blkio_get_key_name(BLKG_RWSTAT_TOTAL, dname, key_str, MAX_KEY_LEN,
896 false); 850 false);
897 cb->fill(cb, key_str, disk_total); 851 cb->fill(cb, key_str, disk_total);
898 return disk_total; 852 return disk_total;
@@ -905,65 +859,76 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid,
905 struct blkio_group_stats *stats = &blkg->pd[plid]->stats; 859 struct blkio_group_stats *stats = &blkg->pd[plid]->stats;
906 uint64_t v = 0, disk_total = 0; 860 uint64_t v = 0, disk_total = 0;
907 char key_str[MAX_KEY_LEN]; 861 char key_str[MAX_KEY_LEN];
908 unsigned int sync_start; 862 struct blkg_rwstat rws = { };
909 int st; 863 int st;
910 864
911 if (type >= BLKIO_STAT_ARR_NR) { 865 if (type >= BLKIO_STAT_ARR_NR) {
912 do { 866 switch (type) {
913 sync_start = u64_stats_fetch_begin(&stats->syncp); 867 case BLKIO_STAT_TIME:
914 switch (type) { 868 v = blkg_stat_read(&stats->time);
915 case BLKIO_STAT_TIME: 869 break;
916 v = stats->time;
917 break;
918#ifdef CONFIG_DEBUG_BLK_CGROUP 870#ifdef CONFIG_DEBUG_BLK_CGROUP
919 case BLKIO_STAT_UNACCOUNTED_TIME: 871 case BLKIO_STAT_UNACCOUNTED_TIME:
920 v = stats->unaccounted_time; 872 v = blkg_stat_read(&stats->unaccounted_time);
921 break; 873 break;
922 case BLKIO_STAT_AVG_QUEUE_SIZE: { 874 case BLKIO_STAT_AVG_QUEUE_SIZE: {
923 uint64_t samples = stats->avg_queue_size_samples; 875 uint64_t samples;
924 876
925 if (samples) { 877 samples = blkg_stat_read(&stats->avg_queue_size_samples);
926 v = stats->avg_queue_size_sum; 878 if (samples) {
927 do_div(v, samples); 879 v = blkg_stat_read(&stats->avg_queue_size_sum);
928 } 880 do_div(v, samples);
929 break;
930 } 881 }
931 case BLKIO_STAT_IDLE_TIME: 882 break;
932 v = stats->idle_time; 883 }
933 break; 884 case BLKIO_STAT_IDLE_TIME:
934 case BLKIO_STAT_EMPTY_TIME: 885 v = blkg_stat_read(&stats->idle_time);
935 v = stats->empty_time; 886 break;
936 break; 887 case BLKIO_STAT_EMPTY_TIME:
937 case BLKIO_STAT_DEQUEUE: 888 v = blkg_stat_read(&stats->empty_time);
938 v = stats->dequeue; 889 break;
939 break; 890 case BLKIO_STAT_DEQUEUE:
940 case BLKIO_STAT_GROUP_WAIT_TIME: 891 v = blkg_stat_read(&stats->dequeue);
941 v = stats->group_wait_time; 892 break;
942 break; 893 case BLKIO_STAT_GROUP_WAIT_TIME:
894 v = blkg_stat_read(&stats->group_wait_time);
895 break;
943#endif 896#endif
944 default: 897 default:
945 WARN_ON_ONCE(1); 898 WARN_ON_ONCE(1);
946 } 899 }
947 } while (u64_stats_fetch_retry(&stats->syncp, sync_start));
948 900
949 blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true); 901 blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true);
950 cb->fill(cb, key_str, v); 902 cb->fill(cb, key_str, v);
951 return v; 903 return v;
952 } 904 }
953 905
954 for (st = BLKIO_STAT_READ; st < BLKIO_STAT_TOTAL; st++) { 906 switch (type) {
955 do { 907 case BLKIO_STAT_MERGED:
956 sync_start = u64_stats_fetch_begin(&stats->syncp); 908 rws = blkg_rwstat_read(&stats->merged);
957 v = stats->stat_arr[type][st]; 909 break;
958 } while (u64_stats_fetch_retry(&stats->syncp, sync_start)); 910 case BLKIO_STAT_SERVICE_TIME:
911 rws = blkg_rwstat_read(&stats->service_time);
912 break;
913 case BLKIO_STAT_WAIT_TIME:
914 rws = blkg_rwstat_read(&stats->wait_time);
915 break;
916 case BLKIO_STAT_QUEUED:
917 rws = blkg_rwstat_read(&stats->queued);
918 break;
919 default:
920 WARN_ON_ONCE(true);
921 break;
922 }
959 923
924 for (st = BLKG_RWSTAT_READ; st < BLKG_RWSTAT_NR; st++) {
960 blkio_get_key_name(st, dname, key_str, MAX_KEY_LEN, false); 925 blkio_get_key_name(st, dname, key_str, MAX_KEY_LEN, false);
961 cb->fill(cb, key_str, v); 926 cb->fill(cb, key_str, rws.cnt[st]);
962 if (st == BLKIO_STAT_READ || st == BLKIO_STAT_WRITE) 927 if (st == BLKG_RWSTAT_READ || st == BLKG_RWSTAT_WRITE)
963 disk_total += v; 928 disk_total += rws.cnt[st];
964 } 929 }
965 930
966 blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN, 931 blkio_get_key_name(BLKG_RWSTAT_TOTAL, dname, key_str, MAX_KEY_LEN,
967 false); 932 false);
968 cb->fill(cb, key_str, disk_total); 933 cb->fill(cb, key_str, disk_total);
969 return disk_total; 934 return disk_total;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 2060d812ae8e..7578df3938b4 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -69,12 +69,14 @@ enum stat_type_cpu {
69 69
70#define BLKIO_STAT_CPU_ARR_NR (BLKIO_STAT_CPU_SERVICED + 1) 70#define BLKIO_STAT_CPU_ARR_NR (BLKIO_STAT_CPU_SERVICED + 1)
71 71
72enum stat_sub_type { 72enum blkg_rwstat_type {
73 BLKIO_STAT_READ = 0, 73 BLKG_RWSTAT_READ,
74 BLKIO_STAT_WRITE, 74 BLKG_RWSTAT_WRITE,
75 BLKIO_STAT_SYNC, 75 BLKG_RWSTAT_SYNC,
76 BLKIO_STAT_ASYNC, 76 BLKG_RWSTAT_ASYNC,
77 BLKIO_STAT_TOTAL 77
78 BLKG_RWSTAT_NR,
79 BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
78}; 80};
79 81
80/* blkg state flags */ 82/* blkg state flags */
@@ -124,54 +126,58 @@ struct blkio_cgroup {
124 uint64_t id; 126 uint64_t id;
125}; 127};
126 128
129struct blkg_stat {
130 struct u64_stats_sync syncp;
131 uint64_t cnt;
132};
133
134struct blkg_rwstat {
135 struct u64_stats_sync syncp;
136 uint64_t cnt[BLKG_RWSTAT_NR];
137};
138
127struct blkio_group_stats { 139struct blkio_group_stats {
128 struct u64_stats_sync syncp; 140 /* number of ios merged */
141 struct blkg_rwstat merged;
142 /* total time spent on device in ns, may not be accurate w/ queueing */
143 struct blkg_rwstat service_time;
144 /* total time spent waiting in scheduler queue in ns */
145 struct blkg_rwstat wait_time;
146 /* number of IOs queued up */
147 struct blkg_rwstat queued;
129 /* total disk time and nr sectors dispatched by this group */ 148 /* total disk time and nr sectors dispatched by this group */
130 uint64_t time; 149 struct blkg_stat time;
131 uint64_t stat_arr[BLKIO_STAT_ARR_NR][BLKIO_STAT_TOTAL];
132#ifdef CONFIG_DEBUG_BLK_CGROUP 150#ifdef CONFIG_DEBUG_BLK_CGROUP
133 /* Time not charged to this cgroup */ 151 /* time not charged to this cgroup */
134 uint64_t unaccounted_time; 152 struct blkg_stat unaccounted_time;
135 153 /* sum of number of ios queued across all samples */
136 /* Sum of number of IOs queued across all samples */ 154 struct blkg_stat avg_queue_size_sum;
137 uint64_t avg_queue_size_sum; 155 /* count of samples taken for average */
138 /* Count of samples taken for average */ 156 struct blkg_stat avg_queue_size_samples;
139 uint64_t avg_queue_size_samples; 157 /* how many times this group has been removed from service tree */
140 /* How many times this group has been removed from service tree */ 158 struct blkg_stat dequeue;
141 unsigned long dequeue; 159 /* total time spent waiting for it to be assigned a timeslice. */
142 160 struct blkg_stat group_wait_time;
143 /* Total time spent waiting for it to be assigned a timeslice. */ 161 /* time spent idling for this blkio_group */
144 uint64_t group_wait_time; 162 struct blkg_stat idle_time;
145 163 /* total time with empty current active q with other requests queued */
146 /* Time spent idling for this blkio_group */ 164 struct blkg_stat empty_time;
147 uint64_t idle_time;
148 /*
149 * Total time when we have requests queued and do not contain the
150 * current active queue.
151 */
152 uint64_t empty_time;
153
154 /* fields after this shouldn't be cleared on stat reset */ 165 /* fields after this shouldn't be cleared on stat reset */
155 uint64_t start_group_wait_time; 166 uint64_t start_group_wait_time;
156 uint64_t start_idle_time; 167 uint64_t start_idle_time;
157 uint64_t start_empty_time; 168 uint64_t start_empty_time;
158 uint16_t flags; 169 uint16_t flags;
159#endif 170#endif
160}; 171};
161 172
162#ifdef CONFIG_DEBUG_BLK_CGROUP
163#define BLKG_STATS_DEBUG_CLEAR_START \
164 offsetof(struct blkio_group_stats, unaccounted_time)
165#define BLKG_STATS_DEBUG_CLEAR_SIZE \
166 (offsetof(struct blkio_group_stats, start_group_wait_time) - \
167 BLKG_STATS_DEBUG_CLEAR_START)
168#endif
169
170/* Per cpu blkio group stats */ 173/* Per cpu blkio group stats */
171struct blkio_group_stats_cpu { 174struct blkio_group_stats_cpu {
172 uint64_t sectors; 175 /* total bytes transferred */
173 uint64_t stat_arr_cpu[BLKIO_STAT_CPU_ARR_NR][BLKIO_STAT_TOTAL]; 176 struct blkg_rwstat service_bytes;
174 struct u64_stats_sync syncp; 177 /* total IOs serviced, post merge */
178 struct blkg_rwstat serviced;
179 /* total sectors transferred */
180 struct blkg_stat sectors;
175}; 181};
176 182
177struct blkio_group_conf { 183struct blkio_group_conf {
@@ -316,6 +322,121 @@ static inline void blkg_put(struct blkio_group *blkg)
316 __blkg_release(blkg); 322 __blkg_release(blkg);
317} 323}
318 324
325/**
326 * blkg_stat_add - add a value to a blkg_stat
327 * @stat: target blkg_stat
328 * @val: value to add
329 *
330 * Add @val to @stat. The caller is responsible for synchronizing calls to
331 * this function.
332 */
333static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
334{
335 u64_stats_update_begin(&stat->syncp);
336 stat->cnt += val;
337 u64_stats_update_end(&stat->syncp);
338}
339
340/**
341 * blkg_stat_read - read the current value of a blkg_stat
342 * @stat: blkg_stat to read
343 *
344 * Read the current value of @stat. This function can be called without
345 * synchroniztion and takes care of u64 atomicity.
346 */
347static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
348{
349 unsigned int start;
350 uint64_t v;
351
352 do {
353 start = u64_stats_fetch_begin(&stat->syncp);
354 v = stat->cnt;
355 } while (u64_stats_fetch_retry(&stat->syncp, start));
356
357 return v;
358}
359
360/**
361 * blkg_stat_reset - reset a blkg_stat
362 * @stat: blkg_stat to reset
363 */
364static inline void blkg_stat_reset(struct blkg_stat *stat)
365{
366 stat->cnt = 0;
367}
368
369/**
370 * blkg_rwstat_add - add a value to a blkg_rwstat
371 * @rwstat: target blkg_rwstat
372 * @rw: mask of REQ_{WRITE|SYNC}
373 * @val: value to add
374 *
375 * Add @val to @rwstat. The counters are chosen according to @rw. The
376 * caller is responsible for synchronizing calls to this function.
377 */
378static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
379 int rw, uint64_t val)
380{
381 u64_stats_update_begin(&rwstat->syncp);
382
383 if (rw & REQ_WRITE)
384 rwstat->cnt[BLKG_RWSTAT_WRITE] += val;
385 else
386 rwstat->cnt[BLKG_RWSTAT_READ] += val;
387 if (rw & REQ_SYNC)
388 rwstat->cnt[BLKG_RWSTAT_SYNC] += val;
389 else
390 rwstat->cnt[BLKG_RWSTAT_ASYNC] += val;
391
392 u64_stats_update_end(&rwstat->syncp);
393}
394
395/**
396 * blkg_rwstat_read - read the current values of a blkg_rwstat
397 * @rwstat: blkg_rwstat to read
398 *
399 * Read the current snapshot of @rwstat and return it as the return value.
400 * This function can be called without synchronization and takes care of
401 * u64 atomicity.
402 */
403static struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
404{
405 unsigned int start;
406 struct blkg_rwstat tmp;
407
408 do {
409 start = u64_stats_fetch_begin(&rwstat->syncp);
410 tmp = *rwstat;
411 } while (u64_stats_fetch_retry(&rwstat->syncp, start));
412
413 return tmp;
414}
415
416/**
417 * blkg_rwstat_sum - read the total count of a blkg_rwstat
418 * @rwstat: blkg_rwstat to read
419 *
420 * Return the total count of @rwstat regardless of the IO direction. This
421 * function can be called without synchronization and takes care of u64
422 * atomicity.
423 */
424static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat)
425{
426 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
427
428 return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
429}
430
431/**
432 * blkg_rwstat_reset - reset a blkg_rwstat
433 * @rwstat: blkg_rwstat to reset
434 */
435static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
436{
437 memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
438}
439
319#else 440#else
320 441
321struct blkio_group { 442struct blkio_group {