aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>2009-03-31 03:56:03 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-01 10:44:16 -0400
commitc5f8d99585d7b5b7e857fabf8aefd0174903a98c (patch)
tree504127a7d4b6a0e3aee56d0822e5e8b23f8062d7
parent13b8bd0a5713bdf05659019badd7c0407984ece1 (diff)
posixtimers, sched: Fix posix clock monotonicity
Impact: Regression fix (against clock_gettime() backwarding bug) This patch re-introduces a couple of functions, task_sched_runtime and thread_group_sched_runtime, which was once removed at the time of 2.6.28-rc1. These functions protect the sampling of thread/process clock with rq lock. This rq lock is required not to update rq->clock during the sampling. i.e. The clock_gettime() may return ((accounted runtime before update) + (delta after update)) that is less than what it should be. v2 -> v3: - Rename static helper function __task_delta_exec() to do_task_delta_exec() since -tip tree already has a __task_delta_exec() of different version. v1 -> v2: - Revises comments of function and patch description. - Add note about accuracy of thread group's runtime. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: stable@kernel.org [2.6.28.x][2.6.29.x] LKML-Reference: <49D1CC93.4080401@jp.fujitsu.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/posix-cpu-timers.c7
-rw-r--r--kernel/sched.c65
2 files changed, 61 insertions, 11 deletions
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index fa07da94d7be..4318c3085788 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
224 cpu->cpu = virt_ticks(p); 224 cpu->cpu = virt_ticks(p);
225 break; 225 break;
226 case CPUCLOCK_SCHED: 226 case CPUCLOCK_SCHED:
227 cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); 227 cpu->sched = task_sched_runtime(p);
228 break; 228 break;
229 } 229 }
230 return 0; 230 return 0;
@@ -240,18 +240,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
240{ 240{
241 struct task_cputime cputime; 241 struct task_cputime cputime;
242 242
243 thread_group_cputime(p, &cputime);
244 switch (CPUCLOCK_WHICH(which_clock)) { 243 switch (CPUCLOCK_WHICH(which_clock)) {
245 default: 244 default:
246 return -EINVAL; 245 return -EINVAL;
247 case CPUCLOCK_PROF: 246 case CPUCLOCK_PROF:
247 thread_group_cputime(p, &cputime);
248 cpu->cpu = cputime_add(cputime.utime, cputime.stime); 248 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
249 break; 249 break;
250 case CPUCLOCK_VIRT: 250 case CPUCLOCK_VIRT:
251 thread_group_cputime(p, &cputime);
251 cpu->cpu = cputime.utime; 252 cpu->cpu = cputime.utime;
252 break; 253 break;
253 case CPUCLOCK_SCHED: 254 case CPUCLOCK_SCHED:
254 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); 255 cpu->sched = thread_group_sched_runtime(p);
255 break; 256 break;
256 } 257 }
257 return 0; 258 return 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index cc397aae5eae..c8d7f17bd036 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4139,9 +4139,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
4139EXPORT_PER_CPU_SYMBOL(kstat); 4139EXPORT_PER_CPU_SYMBOL(kstat);
4140 4140
4141/* 4141/*
4142 * Return any ns on the sched_clock that have not yet been banked in 4142 * Return any ns on the sched_clock that have not yet been accounted in
4143 * @p in case that task is currently running. 4143 * @p in case that task is currently running.
4144 *
4145 * Called with task_rq_lock() held on @rq.
4144 */ 4146 */
4147static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
4148{
4149 u64 ns = 0;
4150
4151 if (task_current(rq, p)) {
4152 update_rq_clock(rq);
4153 ns = rq->clock - p->se.exec_start;
4154 if ((s64)ns < 0)
4155 ns = 0;
4156 }
4157
4158 return ns;
4159}
4160
4145unsigned long long task_delta_exec(struct task_struct *p) 4161unsigned long long task_delta_exec(struct task_struct *p)
4146{ 4162{
4147 unsigned long flags; 4163 unsigned long flags;
@@ -4149,16 +4165,49 @@ unsigned long long task_delta_exec(struct task_struct *p)
4149 u64 ns = 0; 4165 u64 ns = 0;
4150 4166
4151 rq = task_rq_lock(p, &flags); 4167 rq = task_rq_lock(p, &flags);
4168 ns = do_task_delta_exec(p, rq);
4169 task_rq_unlock(rq, &flags);
4152 4170
4153 if (task_current(rq, p)) { 4171 return ns;
4154 u64 delta_exec; 4172}
4155 4173
4156 update_rq_clock(rq); 4174/*
4157 delta_exec = rq->clock - p->se.exec_start; 4175 * Return accounted runtime for the task.
4158 if ((s64)delta_exec > 0) 4176 * In case the task is currently running, return the runtime plus current's
4159 ns = delta_exec; 4177 * pending runtime that have not been accounted yet.
4160 } 4178 */
4179unsigned long long task_sched_runtime(struct task_struct *p)
4180{
4181 unsigned long flags;
4182 struct rq *rq;
4183 u64 ns = 0;
4184
4185 rq = task_rq_lock(p, &flags);
4186 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
4187 task_rq_unlock(rq, &flags);
4188
4189 return ns;
4190}
4161 4191
4192/*
4193 * Return sum_exec_runtime for the thread group.
4194 * In case the task is currently running, return the sum plus current's
4195 * pending runtime that have not been accounted yet.
4196 *
4197 * Note that the thread group might have other running tasks as well,
4198 * so the return value not includes other pending runtime that other
4199 * running tasks might have.
4200 */
4201unsigned long long thread_group_sched_runtime(struct task_struct *p)
4202{
4203 struct task_cputime totals;
4204 unsigned long flags;
4205 struct rq *rq;
4206 u64 ns;
4207
4208 rq = task_rq_lock(p, &flags);
4209 thread_group_cputime(p, &totals);
4210 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
4162 task_rq_unlock(rq, &flags); 4211 task_rq_unlock(rq, &flags);
4163 4212
4164 return ns; 4213 return ns;