aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorStanislaw Gruszka <sgruszka@redhat.com>2013-04-30 11:14:42 -0400
committerIngo Molnar <mingo@kernel.org>2013-04-30 13:13:04 -0400
commit55eaa7c1f511af5fb6ef808b5328804f4d4e5243 (patch)
treeaef918fd1c788ed508b32e0ca91905a0a907bec8 /kernel/sched
parent25f55d9d01ad7a7ad248fd5af1d22675ffd202c5 (diff)
sched: Avoid cputime scaling overflow
Here is patch, which adds Linus's cputime scaling algorithm to the kernel. This is a follow up (well, fix) to commit d9a3c9823a2e6a543eb7807fb3d15d8233817ec5 ("sched: Lower chances of cputime scaling overflow") which commit tried to avoid multiplication overflow, but did not guarantee that the overflow would not happen. Linus crated a different algorithm, which completely avoids the multiplication overflow by dropping precision when numbers are big. It was tested by me and it gives good relative error of scaled numbers. Testing method is described here: http://marc.info/?l=linux-kernel&m=136733059505406&w=2 Originally-From: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: rostedt@goodmis.org Cc: Dave Hansen <dave@sr71.net> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20130430151441.GC10465@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/cputime.c57
1 files changed, 35 insertions, 22 deletions
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 33508dc78d0c..e9198abfca53 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -506,34 +506,47 @@ void account_idle_ticks(unsigned long ticks)
506} 506}
507 507
508/* 508/*
509 * Perform (stime * rtime) / total with reduced chances 509 * Perform (stime * rtime) / total, but avoid multiplication overflow by
510 * of multiplication overflows by using smaller factors 510 * loosing precision when the numbers are big.
511 * like quotient and remainders of divisions between
512 * rtime and total.
513 */ 511 */
514static cputime_t scale_stime(u64 stime, u64 rtime, u64 total) 512static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
515{ 513{
516 u64 rem, res, scaled; 514 u64 scaled;
517 515
518 if (rtime >= total) { 516 for (;;) {
519 /* 517 /* Make sure "rtime" is the bigger of stime/rtime */
520 * Scale up to rtime / total then add 518 if (stime > rtime) {
521 * the remainder scaled to stime / total. 519 u64 tmp = rtime; rtime = stime; stime = tmp;
522 */ 520 }
523 res = div64_u64_rem(rtime, total, &rem); 521
524 scaled = stime * res; 522 /* Make sure 'total' fits in 32 bits */
525 scaled += div64_u64(stime * rem, total); 523 if (total >> 32)
526 } else { 524 goto drop_precision;
527 /* 525
528 * Same in reverse: scale down to total / rtime 526 /* Does rtime (and thus stime) fit in 32 bits? */
529 * then substract that result scaled to 527 if (!(rtime >> 32))
530 * to the remaining part. 528 break;
531 */ 529
532 res = div64_u64_rem(total, rtime, &rem); 530 /* Can we just balance rtime/stime rather than dropping bits? */
533 scaled = div64_u64(stime, res); 531 if (stime >> 31)
534 scaled -= div64_u64(scaled * rem, total); 532 goto drop_precision;
533
534 /* We can grow stime and shrink rtime and try to make them both fit */
535 stime <<= 1;
536 rtime >>= 1;
537 continue;
538
539drop_precision:
540 /* We drop from rtime, it has more bits than stime */
541 rtime >>= 1;
542 total >>= 1;
535 } 543 }
536 544
545 /*
546 * Make sure gcc understands that this is a 32x32->64 multiply,
547 * followed by a 64/32->64 divide.
548 */
549 scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
537 return (__force cputime_t) scaled; 550 return (__force cputime_t) scaled;
538} 551}
539 552