Merge branch 'sched/core' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into sched/core

Pull CPU runtime stats/accounting fixes from Frederic Weisbecker: " Some users are complaining that their threadgroup's runtime accounting freezes after a week or so of intense cpu-bound workload. This set tries to fix the issue by reducing the risk of multiplication overflow in the cputime scaling code. " Stanislaw Gruszka further explained the historic context and impact of the bug: " Commit 0cf55e1ec08bb5a22e068309e2d8ba1180ab4239 start to use scalling for whole thread group, so increase chances of hitting multiplication overflow, depending on how many CPUs are on the system. We have multiplication utime * rtime for one thread since commit b27f03d4bdc145a09fb7b0c0e004b29f1ee555fa. Overflow will happen after: rtime * utime > 0xffffffffffffffff jiffies if thread utilize 100% of CPU time, that gives: rtime > sqrt(0xffffffffffffffff) jiffies ritme > sqrt(0xffffffffffffffff) / (24 * 60 * 60 * HZ) days For HZ 100 it will be 497 days for HZ 1000 it will be 49 days. Bug affect only users, who run CPU intensive application for that long period. Also they have to be interested on utime,stime values, as bug has no other visible effect as making those values incorrect. " Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2013-03-18 05:09:31 -0400
committer: Ingo Molnar <mingo@kernel.org> 2013-03-18 05:09:31 -0400
commit: e75c8b475e4b1da6bf5b412db9a2ecd7c44188a2 (patch)
tree: 082cf1fd56a86ae901cdc39c83ccc3e8f6b8c850 /kernel/sched
parent: 1bf08230f745e48fea9c18ee34a73581631fe7c9 (diff)
parent: d9a3c9823a2e6a543eb7807fb3d15d8233817ec5 (diff)
1 files changed, 34 insertions, 12 deletions
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 024fe1998ad5..699d59756ece 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -521,18 +521,36 @@ void account_idle_ticks(unsigned long ticks)
        account_idle_time(jiffies_to_cputime(ticks));
 }
-static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total)
+/*
+ * Perform (stime * rtime) / total with reduced chances
+ * of multiplication overflows by using smaller factors
+ * like quotient and remainders of divisions between
+ * rtime and total.
+ */
+static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
 {
-        u64 temp = (__force u64) rtime;
+        u64 rem, res, scaled;
-        temp *= (__force u64) stime;
+        if (rtime >= total) {
+                /*
-        if (sizeof(cputime_t) == 4)
+                 * Scale up to rtime / total then add
-                temp = div_u64(temp, (__force u32) total);
+                 * the remainder scaled to stime / total.
-        else
+                 */
-                temp = div64_u64(temp, (__force u64) total);
+                res = div64_u64_rem(rtime, total, &rem);
+                scaled = stime * res;
+                scaled += div64_u64(stime * rem, total);
+        } else {
+                /*
+                 * Same in reverse: scale down to total / rtime
+                 * then substract that result scaled to
+                 * to the remaining part.
+                 */
+                res = div64_u64_rem(total, rtime, &rem);
+                scaled = div64_u64(stime, res);
+                scaled -= div64_u64(scaled * rem, total);
+        }
-        return (__force cputime_t) temp;
+        return (__force cputime_t) scaled;
 }
 /*
@@ -566,10 +584,14 @@ static void cputime_adjust(struct task_cputime *curr,
         */
        rtime = nsecs_to_cputime(curr->sum_exec_runtime);
-        if (total)
+        if (!rtime) {
-                stime = scale_stime(stime, rtime, total);
+                stime = 0;
-        else
+        } else if (!total) {
                stime = rtime;
+        } else {
+                stime = scale_stime((__force u64)stime,
+                                    (__force u64)rtime, (__force u64)total);
+        }
        /*
         * If the tick based count grows faster than the scheduler one,
author	Ingo Molnar <mingo@kernel.org>	2013-03-18 05:09:31 -0400
committer	Ingo Molnar <mingo@kernel.org>	2013-03-18 05:09:31 -0400
commit	e75c8b475e4b1da6bf5b412db9a2ecd7c44188a2 (patch)
tree	082cf1fd56a86ae901cdc39c83ccc3e8f6b8c850 /kernel/sched
parent	1bf08230f745e48fea9c18ee34a73581631fe7c9 (diff)
parent	d9a3c9823a2e6a543eb7807fb3d15d8233817ec5 (diff)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 024fe1998ad5..699d59756ece 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c
@@ -521,18 +521,36 @@ void account_idle_ticks(unsigned long ticks)
521	account_idle_time(jiffies_to_cputime(ticks));	521	account_idle_time(jiffies_to_cputime(ticks));
522	}	522	}
523		523
524	static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total)	524	/*
		525	* Perform (stime * rtime) / total with reduced chances
		526	* of multiplication overflows by using smaller factors
		527	* like quotient and remainders of divisions between
		528	* rtime and total.
		529	*/
		530	static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
525	{	531	{
526	u64 temp = (__force u64) rtime;	532	u64 rem, res, scaled;
527		533
528	temp *= (__force u64) stime;	534	if (rtime >= total) {
529		535	/*
530	if (sizeof(cputime_t) == 4)	536	* Scale up to rtime / total then add
531	temp = div_u64(temp, (__force u32) total);	537	* the remainder scaled to stime / total.
532	else	538	*/
533	temp = div64_u64(temp, (__force u64) total);	539	res = div64_u64_rem(rtime, total, &rem);
		540	scaled = stime * res;
		541	scaled += div64_u64(stime * rem, total);
		542	} else {
		543	/*
		544	* Same in reverse: scale down to total / rtime
		545	* then substract that result scaled to
		546	* to the remaining part.
		547	*/
		548	res = div64_u64_rem(total, rtime, &rem);
		549	scaled = div64_u64(stime, res);
		550	scaled -= div64_u64(scaled * rem, total);
		551	}
534		552
535	return (__force cputime_t) temp;	553	return (__force cputime_t) scaled;
536	}	554	}
537		555
538	/*	556	/*
@@ -566,10 +584,14 @@ static void cputime_adjust(struct task_cputime *curr,
566	*/	584	*/
567	rtime = nsecs_to_cputime(curr->sum_exec_runtime);	585	rtime = nsecs_to_cputime(curr->sum_exec_runtime);
568		586
569	if (total)	587	if (!rtime) {
570	stime = scale_stime(stime, rtime, total);	588	stime = 0;
571	else	589	} else if (!total) {
572	stime = rtime;	590	stime = rtime;
		591	} else {
		592	stime = scale_stime((__force u64)stime,
		593	(__force u64)rtime, (__force u64)total);
		594	}
573		595
574	/*	596	/*
575	* If the tick based count grows faster than the scheduler one,	597	* If the tick based count grows faster than the scheduler one,