diff options
author | Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> | 2009-03-31 03:56:03 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-01 10:44:16 -0400 |
commit | c5f8d99585d7b5b7e857fabf8aefd0174903a98c (patch) | |
tree | 504127a7d4b6a0e3aee56d0822e5e8b23f8062d7 | |
parent | 13b8bd0a5713bdf05659019badd7c0407984ece1 (diff) |
posixtimers, sched: Fix posix clock monotonicity
Impact: Regression fix (against clock_gettime() backwarding bug)
This patch re-introduces a couple of functions, task_sched_runtime
and thread_group_sched_runtime, which was once removed at the
time of 2.6.28-rc1.
These functions protect the sampling of thread/process clock with
rq lock. This rq lock is required not to update rq->clock during
the sampling.
i.e.
The clock_gettime() may return
((accounted runtime before update) + (delta after update))
that is less than what it should be.
v2 -> v3:
- Rename static helper function __task_delta_exec()
to do_task_delta_exec() since -tip tree already has
a __task_delta_exec() of different version.
v1 -> v2:
- Revises comments of function and patch description.
- Add note about accuracy of thread group's runtime.
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stable@kernel.org [2.6.28.x][2.6.29.x]
LKML-Reference: <49D1CC93.4080401@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/posix-cpu-timers.c | 7 | ||||
-rw-r--r-- | kernel/sched.c | 65 |
2 files changed, 61 insertions, 11 deletions
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index fa07da94d7be..4318c3085788 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
224 | cpu->cpu = virt_ticks(p); | 224 | cpu->cpu = virt_ticks(p); |
225 | break; | 225 | break; |
226 | case CPUCLOCK_SCHED: | 226 | case CPUCLOCK_SCHED: |
227 | cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); | 227 | cpu->sched = task_sched_runtime(p); |
228 | break; | 228 | break; |
229 | } | 229 | } |
230 | return 0; | 230 | return 0; |
@@ -240,18 +240,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock, | |||
240 | { | 240 | { |
241 | struct task_cputime cputime; | 241 | struct task_cputime cputime; |
242 | 242 | ||
243 | thread_group_cputime(p, &cputime); | ||
244 | switch (CPUCLOCK_WHICH(which_clock)) { | 243 | switch (CPUCLOCK_WHICH(which_clock)) { |
245 | default: | 244 | default: |
246 | return -EINVAL; | 245 | return -EINVAL; |
247 | case CPUCLOCK_PROF: | 246 | case CPUCLOCK_PROF: |
247 | thread_group_cputime(p, &cputime); | ||
248 | cpu->cpu = cputime_add(cputime.utime, cputime.stime); | 248 | cpu->cpu = cputime_add(cputime.utime, cputime.stime); |
249 | break; | 249 | break; |
250 | case CPUCLOCK_VIRT: | 250 | case CPUCLOCK_VIRT: |
251 | thread_group_cputime(p, &cputime); | ||
251 | cpu->cpu = cputime.utime; | 252 | cpu->cpu = cputime.utime; |
252 | break; | 253 | break; |
253 | case CPUCLOCK_SCHED: | 254 | case CPUCLOCK_SCHED: |
254 | cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); | 255 | cpu->sched = thread_group_sched_runtime(p); |
255 | break; | 256 | break; |
256 | } | 257 | } |
257 | return 0; | 258 | return 0; |
diff --git a/kernel/sched.c b/kernel/sched.c index cc397aae5eae..c8d7f17bd036 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4139,9 +4139,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); | |||
4139 | EXPORT_PER_CPU_SYMBOL(kstat); | 4139 | EXPORT_PER_CPU_SYMBOL(kstat); |
4140 | 4140 | ||
4141 | /* | 4141 | /* |
4142 | * Return any ns on the sched_clock that have not yet been banked in | 4142 | * Return any ns on the sched_clock that have not yet been accounted in |
4143 | * @p in case that task is currently running. | 4143 | * @p in case that task is currently running. |
4144 | * | ||
4145 | * Called with task_rq_lock() held on @rq. | ||
4144 | */ | 4146 | */ |
4147 | static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | ||
4148 | { | ||
4149 | u64 ns = 0; | ||
4150 | |||
4151 | if (task_current(rq, p)) { | ||
4152 | update_rq_clock(rq); | ||
4153 | ns = rq->clock - p->se.exec_start; | ||
4154 | if ((s64)ns < 0) | ||
4155 | ns = 0; | ||
4156 | } | ||
4157 | |||
4158 | return ns; | ||
4159 | } | ||
4160 | |||
4145 | unsigned long long task_delta_exec(struct task_struct *p) | 4161 | unsigned long long task_delta_exec(struct task_struct *p) |
4146 | { | 4162 | { |
4147 | unsigned long flags; | 4163 | unsigned long flags; |
@@ -4149,16 +4165,49 @@ unsigned long long task_delta_exec(struct task_struct *p) | |||
4149 | u64 ns = 0; | 4165 | u64 ns = 0; |
4150 | 4166 | ||
4151 | rq = task_rq_lock(p, &flags); | 4167 | rq = task_rq_lock(p, &flags); |
4168 | ns = do_task_delta_exec(p, rq); | ||
4169 | task_rq_unlock(rq, &flags); | ||
4152 | 4170 | ||
4153 | if (task_current(rq, p)) { | 4171 | return ns; |
4154 | u64 delta_exec; | 4172 | } |
4155 | 4173 | ||
4156 | update_rq_clock(rq); | 4174 | /* |
4157 | delta_exec = rq->clock - p->se.exec_start; | 4175 | * Return accounted runtime for the task. |
4158 | if ((s64)delta_exec > 0) | 4176 | * In case the task is currently running, return the runtime plus current's |
4159 | ns = delta_exec; | 4177 | * pending runtime that have not been accounted yet. |
4160 | } | 4178 | */ |
4179 | unsigned long long task_sched_runtime(struct task_struct *p) | ||
4180 | { | ||
4181 | unsigned long flags; | ||
4182 | struct rq *rq; | ||
4183 | u64 ns = 0; | ||
4184 | |||
4185 | rq = task_rq_lock(p, &flags); | ||
4186 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
4187 | task_rq_unlock(rq, &flags); | ||
4188 | |||
4189 | return ns; | ||
4190 | } | ||
4161 | 4191 | ||
4192 | /* | ||
4193 | * Return sum_exec_runtime for the thread group. | ||
4194 | * In case the task is currently running, return the sum plus current's | ||
4195 | * pending runtime that have not been accounted yet. | ||
4196 | * | ||
4197 | * Note that the thread group might have other running tasks as well, | ||
4198 | * so the return value not includes other pending runtime that other | ||
4199 | * running tasks might have. | ||
4200 | */ | ||
4201 | unsigned long long thread_group_sched_runtime(struct task_struct *p) | ||
4202 | { | ||
4203 | struct task_cputime totals; | ||
4204 | unsigned long flags; | ||
4205 | struct rq *rq; | ||
4206 | u64 ns; | ||
4207 | |||
4208 | rq = task_rq_lock(p, &flags); | ||
4209 | thread_group_cputime(p, &totals); | ||
4210 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
4162 | task_rq_unlock(rq, &flags); | 4211 | task_rq_unlock(rq, &flags); |
4163 | 4212 | ||
4164 | return ns; | 4213 | return ns; |