diff options
author | Ankita Garg <ankita@in.ibm.com> | 2008-07-01 05:00:06 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-04 06:50:23 -0400 |
commit | 46ac22bab42cc868b9c1d0e915ddbc8e8065a44d (patch) | |
tree | 407d6a38b698b0e071f6d32c0244d8af2c618fa1 | |
parent | 2087a1ad822cd3a68b73338457047fcc54da726b (diff) |
sched: fix accounting in task delay accounting & migration
On Thu, Jun 19, 2008 at 12:27:14PM +0200, Peter Zijlstra wrote:
> On Thu, 2008-06-05 at 10:50 +0530, Ankita Garg wrote:
>
> > Thanks Peter for the explanation...
> >
> > I agree with the above and that is the reason why I did not see weird
> > values with cpu_time. But, run_delay still would suffer skews as the end
> > points for delta could be taken on different cpus due to migration (more
> > so on RT kernel due to the push-pull operations). With the below patch,
> > I could not reproduce the issue I had seen earlier. After every dequeue,
> > we take the delta and start wait measurements from zero when moved to a
> > different rq.
>
> OK, so task delay delay accounting is broken because it doesn't take
> migration into account.
>
> What you've done is make it symmetric wrt enqueue, and account it like
>
> cpu0 cpu1
>
> enqueue
> <wait-d1>
> dequeue
> enqueue
> <wait-d2>
> run
>
> Where you add both d1 and d2 to the run_delay,.. right?
>
Thanks for reviewing the patch. The above is exactly what I have done.
> This seems like a good fix, however it looks like the patch will break
> compilation in !CONFIG_SCHEDSTATS && !CONFIG_TASK_DELAY_ACCT, of it
> failing to provide a stub for sched_info_dequeue() in that case.
Fixed. Pl. find the new patch below.
Signed-off-by: Ankita Garg <ankita@in.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Gregory Haskins <ghaskins@novell.com>
Cc: rostedt@goodmis.org
Cc: suresh.b.siddha@intel.com
Cc: aneesh.kumar@linux.vnet.ibm.com
Cc: dhaval@linux.vnet.ibm.com
Cc: vatsa@linux.vnet.ibm.com
Cc: David Bahi <DBahi@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/sched.c | 1 | ||||
-rw-r--r-- | kernel/sched_stats.h | 42 |
2 files changed, 34 insertions, 9 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index bbc40c3a0657..996bc15196a5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1714,6 +1714,7 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) | |||
1714 | p->se.last_wakeup = 0; | 1714 | p->se.last_wakeup = 0; |
1715 | } | 1715 | } |
1716 | 1716 | ||
1717 | sched_info_dequeued(p); | ||
1717 | p->sched_class->dequeue_task(rq, p, sleep); | 1718 | p->sched_class->dequeue_task(rq, p, sleep); |
1718 | p->se.on_rq = 0; | 1719 | p->se.on_rq = 0; |
1719 | } | 1720 | } |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 80179ef7450e..8385d43987e2 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -118,6 +118,13 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) | |||
118 | if (rq) | 118 | if (rq) |
119 | rq->rq_sched_info.cpu_time += delta; | 119 | rq->rq_sched_info.cpu_time += delta; |
120 | } | 120 | } |
121 | |||
122 | static inline void | ||
123 | rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) | ||
124 | { | ||
125 | if (rq) | ||
126 | rq->rq_sched_info.run_delay += delta; | ||
127 | } | ||
121 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) | 128 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) |
122 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) | 129 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) |
123 | # define schedstat_set(var, val) do { var = (val); } while (0) | 130 | # define schedstat_set(var, val) do { var = (val); } while (0) |
@@ -126,6 +133,9 @@ static inline void | |||
126 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) | 133 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) |
127 | {} | 134 | {} |
128 | static inline void | 135 | static inline void |
136 | rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) | ||
137 | {} | ||
138 | static inline void | ||
129 | rq_sched_info_depart(struct rq *rq, unsigned long long delta) | 139 | rq_sched_info_depart(struct rq *rq, unsigned long long delta) |
130 | {} | 140 | {} |
131 | # define schedstat_inc(rq, field) do { } while (0) | 141 | # define schedstat_inc(rq, field) do { } while (0) |
@@ -134,6 +144,11 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) | |||
134 | #endif | 144 | #endif |
135 | 145 | ||
136 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 146 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
147 | static inline void sched_info_reset_dequeued(struct task_struct *t) | ||
148 | { | ||
149 | t->sched_info.last_queued = 0; | ||
150 | } | ||
151 | |||
137 | /* | 152 | /* |
138 | * Called when a process is dequeued from the active array and given | 153 | * Called when a process is dequeued from the active array and given |
139 | * the cpu. We should note that with the exception of interactive | 154 | * the cpu. We should note that with the exception of interactive |
@@ -143,15 +158,22 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) | |||
143 | * active queue, thus delaying tasks in the expired queue from running; | 158 | * active queue, thus delaying tasks in the expired queue from running; |
144 | * see scheduler_tick()). | 159 | * see scheduler_tick()). |
145 | * | 160 | * |
146 | * This function is only called from sched_info_arrive(), rather than | 161 | * Though we are interested in knowing how long it was from the *first* time a |
147 | * dequeue_task(). Even though a task may be queued and dequeued multiple | 162 | * task was queued to the time that it finally hit a cpu, we call this routine |
148 | * times as it is shuffled about, we're really interested in knowing how | 163 | * from dequeue_task() to account for possible rq->clock skew across cpus. The |
149 | * long it was from the *first* time it was queued to the time that it | 164 | * delta taken on each cpu would annul the skew. |
150 | * finally hit a cpu. | ||
151 | */ | 165 | */ |
152 | static inline void sched_info_dequeued(struct task_struct *t) | 166 | static inline void sched_info_dequeued(struct task_struct *t) |
153 | { | 167 | { |
154 | t->sched_info.last_queued = 0; | 168 | unsigned long long now = task_rq(t)->clock, delta = 0; |
169 | |||
170 | if (unlikely(sched_info_on())) | ||
171 | if (t->sched_info.last_queued) | ||
172 | delta = now - t->sched_info.last_queued; | ||
173 | sched_info_reset_dequeued(t); | ||
174 | t->sched_info.run_delay += delta; | ||
175 | |||
176 | rq_sched_info_dequeued(task_rq(t), delta); | ||
155 | } | 177 | } |
156 | 178 | ||
157 | /* | 179 | /* |
@@ -165,7 +187,7 @@ static void sched_info_arrive(struct task_struct *t) | |||
165 | 187 | ||
166 | if (t->sched_info.last_queued) | 188 | if (t->sched_info.last_queued) |
167 | delta = now - t->sched_info.last_queued; | 189 | delta = now - t->sched_info.last_queued; |
168 | sched_info_dequeued(t); | 190 | sched_info_reset_dequeued(t); |
169 | t->sched_info.run_delay += delta; | 191 | t->sched_info.run_delay += delta; |
170 | t->sched_info.last_arrival = now; | 192 | t->sched_info.last_arrival = now; |
171 | t->sched_info.pcount++; | 193 | t->sched_info.pcount++; |
@@ -242,7 +264,9 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
242 | __sched_info_switch(prev, next); | 264 | __sched_info_switch(prev, next); |
243 | } | 265 | } |
244 | #else | 266 | #else |
245 | #define sched_info_queued(t) do { } while (0) | 267 | #define sched_info_queued(t) do { } while (0) |
246 | #define sched_info_switch(t, next) do { } while (0) | 268 | #define sched_info_reset_dequeued(t) do { } while (0) |
269 | #define sched_info_dequeued(t) do { } while (0) | ||
270 | #define sched_info_switch(t, next) do { } while (0) | ||
247 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ | 271 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ |
248 | 272 | ||