sched: fix process time monotonicity

Spencer reported a problem where utime and stime were going negative despite the fixes in commit b27f03d4bdc145a09fb7b0c0e004b29f1ee555fa. The suspected reason for the problem is that signal_struct maintains it's own utime and stime (of exited tasks), these are not updated using the new task_utime() routine, hence sig->utime can go backwards and cause the same problem to occur (sig->utime, adds tsk->utime and not task_utime()). This patch fixes the problem TODO: using max(task->prev_utime, derived utime) works for now, but a more generic solution is to implement cputime_max() and use the cputime_gt() function for comparison. Reported-by: spencer@bluehost.com Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Balbir Singh <balbir@linux.vnet.ibm.com> 2008-09-05 12:12:23 -0400
committer: Ingo Molnar <mingo@elte.hu> 2008-09-05 12:14:35 -0400
commit: 49048622eae698e5c4ae61f7e71200f265ccc529 (patch)
tree: e568595fe5329e1293eafc3a3cc833dfe89ffbf2
parent: 56c7426b3951e4f35a71d695f1c982989399d6fd (diff)
4 files changed, 66 insertions, 62 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0d6eb33597c6..71c9be59c9c2 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
        return 0;
 }
-/*
- * Use precise platform statistics if available:
- */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-static cputime_t task_utime(struct task_struct *p)
-{
-        return p->utime;
-}
-static cputime_t task_stime(struct task_struct *p)
-{
-        return p->stime;
-}
-#else
-static cputime_t task_utime(struct task_struct *p)
-{
-        clock_t utime = cputime_to_clock_t(p->utime),
-                total = utime + cputime_to_clock_t(p->stime);
-        u64 temp;
-        /*
-         * Use CFS's precise accounting:
-         */
-        temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
-        if (total) {
-                temp *= utime;
-                do_div(temp, total);
-        }
-        utime = (clock_t)temp;
-        p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
-        return p->prev_utime;
-}
-static cputime_t task_stime(struct task_struct *p)
-{
-        clock_t stime;
-        /*
-         * Use CFS's precise accounting. (we subtract utime from
-         * the total, to make sure the total observed by userspace
-         * grows monotonically - apps rely on that):
-         */
-        stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
-                        cputime_to_clock_t(task_utime(p));
-        if (stime >= 0)
-                p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
-        return p->prev_stime;
-}
-#endif
-static cputime_t task_gtime(struct task_struct *p)
-{
-        return p->gtime;
-}
 static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                        struct pid *pid, struct task_struct *task, int whole)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cfb0d87b99fc..3d9120c5ad15 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1475,6 +1475,10 @@ static inline void put_task_struct(struct task_struct *t)
                __put_task_struct(t);
 }
+extern cputime_t task_utime(struct task_struct *p);
+extern cputime_t task_stime(struct task_struct *p);
+extern cputime_t task_gtime(struct task_struct *p);
 /*
 * Per process flags
 */
diff --git a/kernel/exit.c b/kernel/exit.c
index 25ed2ad986df..16395644a98f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -112,9 +112,9 @@ static void __exit_signal(struct task_struct *tsk)
                 * We won't ever get here for the group leader, since it
                 * will have been the last reference on the signal_struct.
                 */
-                sig->utime = cputime_add(sig->utime, tsk->utime);
+                sig->utime = cputime_add(sig->utime, task_utime(tsk));
-                sig->stime = cputime_add(sig->stime, tsk->stime);
+                sig->stime = cputime_add(sig->stime, task_stime(tsk));
-                sig->gtime = cputime_add(sig->gtime, tsk->gtime);
+                sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
                sig->min_flt += tsk->min_flt;
                sig->maj_flt += tsk->maj_flt;
                sig->nvcsw += tsk->nvcsw;
diff --git a/kernel/sched.c b/kernel/sched.c
index 9a1ddb84e26d..1a5f73c1fcdc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4179,6 +4179,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 }
 /*
+ * Use precise platform statistics if available:
+ */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+cputime_t task_utime(struct task_struct *p)
+{
+        return p->utime;
+}
+cputime_t task_stime(struct task_struct *p)
+{
+        return p->stime;
+}
+#else
+cputime_t task_utime(struct task_struct *p)
+{
+        clock_t utime = cputime_to_clock_t(p->utime),
+                total = utime + cputime_to_clock_t(p->stime);
+        u64 temp;
+        /*
+         * Use CFS's precise accounting:
+         */
+        temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
+        if (total) {
+                temp *= utime;
+                do_div(temp, total);
+        }
+        utime = (clock_t)temp;
+        p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
+        return p->prev_utime;
+}
+cputime_t task_stime(struct task_struct *p)
+{
+        clock_t stime;
+        /*
+         * Use CFS's precise accounting. (we subtract utime from
+         * the total, to make sure the total observed by userspace
+         * grows monotonically - apps rely on that):
+         */
+        stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
+                        cputime_to_clock_t(task_utime(p));
+        if (stime >= 0)
+                p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
+        return p->prev_stime;
+}
+#endif
+inline cputime_t task_gtime(struct task_struct *p)
+{
+        return p->gtime;
+}
+/*
 * This function gets called by the timer code, with HZ frequency.
 * We call it with interrupts disabled.
 *
author	Balbir Singh <balbir@linux.vnet.ibm.com>	2008-09-05 12:12:23 -0400
committer	Ingo Molnar <mingo@elte.hu>	2008-09-05 12:14:35 -0400
commit	49048622eae698e5c4ae61f7e71200f265ccc529 (patch)
tree	e568595fe5329e1293eafc3a3cc833dfe89ffbf2
parent	56c7426b3951e4f35a71d695f1c982989399d6fd (diff)