aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-03-08 13:53:58 -0500
committerJens Axboe <axboe@kernel.dk>2012-03-20 07:45:37 -0400
commit997a026c80c3cc05f82e589aced1f0011c17d376 (patch)
tree905fe49970f8549663e1e70e77dd04811fd14c9c
parent5fe224d2d5fbf8f020b30d0ba69fed7856923752 (diff)
blkcg: simplify stat reset
blkiocg_reset_stats() implements stat reset for blkio.reset_stats cgroupfs file. This feature is very unconventional and something which shouldn't have been merged. It's only useful when there's only one user or tool looking at the stats. As soon as multiple users and/or tools are involved, it becomes useless as resetting disrupts other usages. There are very good reasons why all other stats expect readers to read values at the start and end of a period and subtract to determine delta over the period. The implementation is rather complex - some fields shouldn't be cleared and it saves some fields, resets whole and restores for some reason. Reset of percpu stats is also racy. The comment points to 64bit store atomicity for the reason but even without that stores for zero can simply race with other CPUs doing RMW and get clobbered. Simplify reset by * Clear selectively instead of resetting and restoring. * Grouping debug stat fields to be reset and using memset() over them. * Not caring about stats_lock. * Using memset() to reset percpu stats. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-cgroup.c80
-rw-r--r--block/blk-cgroup.h14
2 files changed, 37 insertions, 57 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 6eedf3afa27..759bc58154c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -779,83 +779,53 @@ EXPORT_SYMBOL_GPL(__blkg_release);
779static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid) 779static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
780{ 780{
781 struct blkg_policy_data *pd = blkg->pd[plid]; 781 struct blkg_policy_data *pd = blkg->pd[plid];
782 struct blkio_group_stats_cpu *stats_cpu; 782 int cpu;
783 int i, j, k;
784 783
785 if (pd->stats_cpu == NULL) 784 if (pd->stats_cpu == NULL)
786 return; 785 return;
787 /* 786
788 * Note: On 64 bit arch this should not be an issue. This has the 787 for_each_possible_cpu(cpu) {
789 * possibility of returning some inconsistent value on 32bit arch 788 struct blkio_group_stats_cpu *sc =
790 * as 64bit update on 32bit is non atomic. Taking care of this 789 per_cpu_ptr(pd->stats_cpu, cpu);
791 * corner case makes code very complicated, like sending IPIs to 790
792 * cpus, taking care of stats of offline cpus etc. 791 sc->sectors = 0;
793 * 792 memset(sc->stat_arr_cpu, 0, sizeof(sc->stat_arr_cpu));
794 * reset stats is anyway more of a debug feature and this sounds a
795 * corner case. So I am not complicating the code yet until and
796 * unless this becomes a real issue.
797 */
798 for_each_possible_cpu(i) {
799 stats_cpu = per_cpu_ptr(pd->stats_cpu, i);
800 stats_cpu->sectors = 0;
801 for(j = 0; j < BLKIO_STAT_CPU_NR; j++)
802 for (k = 0; k < BLKIO_STAT_TOTAL; k++)
803 stats_cpu->stat_arr_cpu[j][k] = 0;
804 } 793 }
805} 794}
806 795
807static int 796static int
808blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) 797blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
809{ 798{
810 struct blkio_cgroup *blkcg; 799 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
811 struct blkio_group *blkg; 800 struct blkio_group *blkg;
812 struct blkio_group_stats *stats;
813 struct hlist_node *n; 801 struct hlist_node *n;
814 uint64_t queued[BLKIO_STAT_TOTAL];
815 int i; 802 int i;
816#ifdef CONFIG_DEBUG_BLK_CGROUP
817 bool idling, waiting, empty;
818 unsigned long long now = sched_clock();
819#endif
820 803
821 blkcg = cgroup_to_blkio_cgroup(cgroup);
822 spin_lock(&blkio_list_lock); 804 spin_lock(&blkio_list_lock);
823 spin_lock_irq(&blkcg->lock); 805 spin_lock_irq(&blkcg->lock);
806
807 /*
808 * Note that stat reset is racy - it doesn't synchronize against
809 * stat updates. This is a debug feature which shouldn't exist
810 * anyway. If you get hit by a race, retry.
811 */
824 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { 812 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
825 struct blkio_policy_type *pol; 813 struct blkio_policy_type *pol;
826 814
827 list_for_each_entry(pol, &blkio_list, list) { 815 list_for_each_entry(pol, &blkio_list, list) {
828 struct blkg_policy_data *pd = blkg->pd[pol->plid]; 816 struct blkg_policy_data *pd = blkg->pd[pol->plid];
829 817 struct blkio_group_stats *stats = &pd->stats;
830 spin_lock(&blkg->stats_lock); 818
831 stats = &pd->stats; 819 /* queued stats shouldn't be cleared */
820 for (i = 0; i < ARRAY_SIZE(stats->stat_arr); i++)
821 if (i != BLKIO_STAT_QUEUED)
822 memset(stats->stat_arr[i], 0,
823 sizeof(stats->stat_arr[i]));
824 stats->time = 0;
832#ifdef CONFIG_DEBUG_BLK_CGROUP 825#ifdef CONFIG_DEBUG_BLK_CGROUP
833 idling = blkio_blkg_idling(stats); 826 memset((void *)stats + BLKG_STATS_DEBUG_CLEAR_START, 0,
834 waiting = blkio_blkg_waiting(stats); 827 BLKG_STATS_DEBUG_CLEAR_SIZE);
835 empty = blkio_blkg_empty(stats);
836#endif 828#endif
837 for (i = 0; i < BLKIO_STAT_TOTAL; i++)
838 queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
839 memset(stats, 0, sizeof(struct blkio_group_stats));
840 for (i = 0; i < BLKIO_STAT_TOTAL; i++)
841 stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
842#ifdef CONFIG_DEBUG_BLK_CGROUP
843 if (idling) {
844 blkio_mark_blkg_idling(stats);
845 stats->start_idle_time = now;
846 }
847 if (waiting) {
848 blkio_mark_blkg_waiting(stats);
849 stats->start_group_wait_time = now;
850 }
851 if (empty) {
852 blkio_mark_blkg_empty(stats);
853 stats->start_empty_time = now;
854 }
855#endif
856 spin_unlock(&blkg->stats_lock);
857
858 /* Reset Per cpu stats which don't take blkg->stats_lock */
859 blkio_reset_stats_cpu(blkg, pol->plid); 829 blkio_reset_stats_cpu(blkg, pol->plid);
860 } 830 }
861 } 831 }
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 6c8e3e34542..1fa3c5e8d87 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -131,21 +131,31 @@ struct blkio_group_stats {
131 131
132 /* Total time spent waiting for it to be assigned a timeslice. */ 132 /* Total time spent waiting for it to be assigned a timeslice. */
133 uint64_t group_wait_time; 133 uint64_t group_wait_time;
134 uint64_t start_group_wait_time;
135 134
136 /* Time spent idling for this blkio_group */ 135 /* Time spent idling for this blkio_group */
137 uint64_t idle_time; 136 uint64_t idle_time;
138 uint64_t start_idle_time;
139 /* 137 /*
140 * Total time when we have requests queued and do not contain the 138 * Total time when we have requests queued and do not contain the
141 * current active queue. 139 * current active queue.
142 */ 140 */
143 uint64_t empty_time; 141 uint64_t empty_time;
142
143 /* fields after this shouldn't be cleared on stat reset */
144 uint64_t start_group_wait_time;
145 uint64_t start_idle_time;
144 uint64_t start_empty_time; 146 uint64_t start_empty_time;
145 uint16_t flags; 147 uint16_t flags;
146#endif 148#endif
147}; 149};
148 150
151#ifdef CONFIG_DEBUG_BLK_CGROUP
152#define BLKG_STATS_DEBUG_CLEAR_START \
153 offsetof(struct blkio_group_stats, unaccounted_time)
154#define BLKG_STATS_DEBUG_CLEAR_SIZE \
155 (offsetof(struct blkio_group_stats, start_group_wait_time) - \
156 BLKG_STATS_DEBUG_CLEAR_START)
157#endif
158
149/* Per cpu blkio group stats */ 159/* Per cpu blkio group stats */
150struct blkio_group_stats_cpu { 160struct blkio_group_stats_cpu {
151 uint64_t sectors; 161 uint64_t sectors;