diff options
author | Ingo Molnar <mingo@kernel.org> | 2013-03-18 05:09:31 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-03-18 05:09:31 -0400 |
commit | e75c8b475e4b1da6bf5b412db9a2ecd7c44188a2 (patch) | |
tree | 082cf1fd56a86ae901cdc39c83ccc3e8f6b8c850 | |
parent | 1bf08230f745e48fea9c18ee34a73581631fe7c9 (diff) | |
parent | d9a3c9823a2e6a543eb7807fb3d15d8233817ec5 (diff) |
Merge branch 'sched/core' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into sched/core
Pull CPU runtime stats/accounting fixes from Frederic Weisbecker:
" Some users are complaining that their threadgroup's runtime accounting
freezes after a week or so of intense cpu-bound workload. This set tries
to fix the issue by reducing the risk of multiplication overflow in the
cputime scaling code. "
Stanislaw Gruszka further explained the historic context and impact of the
bug:
" Commit 0cf55e1ec08bb5a22e068309e2d8ba1180ab4239 start to use scalling
for whole thread group, so increase chances of hitting multiplication
overflow, depending on how many CPUs are on the system.
We have multiplication utime * rtime for one thread since commit
b27f03d4bdc145a09fb7b0c0e004b29f1ee555fa.
Overflow will happen after:
rtime * utime > 0xffffffffffffffff jiffies
if thread utilize 100% of CPU time, that gives:
rtime > sqrt(0xffffffffffffffff) jiffies
ritme > sqrt(0xffffffffffffffff) / (24 * 60 * 60 * HZ) days
For HZ 100 it will be 497 days for HZ 1000 it will be 49 days.
Bug affect only users, who run CPU intensive application for that
long period. Also they have to be interested on utime,stime values,
as bug has no other visible effect as making those values incorrect. "
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | include/linux/math64.h | 19 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 46 | ||||
-rw-r--r-- | lib/div64.c | 19 |
3 files changed, 65 insertions, 19 deletions
diff --git a/include/linux/math64.h b/include/linux/math64.h index b8ba85544721..931a619407bf 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h | |||
@@ -30,6 +30,15 @@ static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) | |||
30 | } | 30 | } |
31 | 31 | ||
32 | /** | 32 | /** |
33 | * div64_u64_rem - unsigned 64bit divide with 64bit divisor | ||
34 | */ | ||
35 | static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) | ||
36 | { | ||
37 | *remainder = dividend % divisor; | ||
38 | return dividend / divisor; | ||
39 | } | ||
40 | |||
41 | /** | ||
33 | * div64_u64 - unsigned 64bit divide with 64bit divisor | 42 | * div64_u64 - unsigned 64bit divide with 64bit divisor |
34 | */ | 43 | */ |
35 | static inline u64 div64_u64(u64 dividend, u64 divisor) | 44 | static inline u64 div64_u64(u64 dividend, u64 divisor) |
@@ -61,8 +70,16 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) | |||
61 | extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder); | 70 | extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder); |
62 | #endif | 71 | #endif |
63 | 72 | ||
73 | #ifndef div64_u64_rem | ||
74 | extern u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder); | ||
75 | #endif | ||
76 | |||
64 | #ifndef div64_u64 | 77 | #ifndef div64_u64 |
65 | extern u64 div64_u64(u64 dividend, u64 divisor); | 78 | static inline u64 div64_u64(u64 dividend, u64 divisor) |
79 | { | ||
80 | u64 remainder; | ||
81 | return div64_u64_rem(dividend, divisor, &remainder); | ||
82 | } | ||
66 | #endif | 83 | #endif |
67 | 84 | ||
68 | #ifndef div64_s64 | 85 | #ifndef div64_s64 |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 024fe1998ad5..699d59756ece 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -521,18 +521,36 @@ void account_idle_ticks(unsigned long ticks) | |||
521 | account_idle_time(jiffies_to_cputime(ticks)); | 521 | account_idle_time(jiffies_to_cputime(ticks)); |
522 | } | 522 | } |
523 | 523 | ||
524 | static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total) | 524 | /* |
525 | * Perform (stime * rtime) / total with reduced chances | ||
526 | * of multiplication overflows by using smaller factors | ||
527 | * like quotient and remainders of divisions between | ||
528 | * rtime and total. | ||
529 | */ | ||
530 | static cputime_t scale_stime(u64 stime, u64 rtime, u64 total) | ||
525 | { | 531 | { |
526 | u64 temp = (__force u64) rtime; | 532 | u64 rem, res, scaled; |
527 | 533 | ||
528 | temp *= (__force u64) stime; | 534 | if (rtime >= total) { |
529 | 535 | /* | |
530 | if (sizeof(cputime_t) == 4) | 536 | * Scale up to rtime / total then add |
531 | temp = div_u64(temp, (__force u32) total); | 537 | * the remainder scaled to stime / total. |
532 | else | 538 | */ |
533 | temp = div64_u64(temp, (__force u64) total); | 539 | res = div64_u64_rem(rtime, total, &rem); |
540 | scaled = stime * res; | ||
541 | scaled += div64_u64(stime * rem, total); | ||
542 | } else { | ||
543 | /* | ||
544 | * Same in reverse: scale down to total / rtime | ||
545 | * then substract that result scaled to | ||
546 | * to the remaining part. | ||
547 | */ | ||
548 | res = div64_u64_rem(total, rtime, &rem); | ||
549 | scaled = div64_u64(stime, res); | ||
550 | scaled -= div64_u64(scaled * rem, total); | ||
551 | } | ||
534 | 552 | ||
535 | return (__force cputime_t) temp; | 553 | return (__force cputime_t) scaled; |
536 | } | 554 | } |
537 | 555 | ||
538 | /* | 556 | /* |
@@ -566,10 +584,14 @@ static void cputime_adjust(struct task_cputime *curr, | |||
566 | */ | 584 | */ |
567 | rtime = nsecs_to_cputime(curr->sum_exec_runtime); | 585 | rtime = nsecs_to_cputime(curr->sum_exec_runtime); |
568 | 586 | ||
569 | if (total) | 587 | if (!rtime) { |
570 | stime = scale_stime(stime, rtime, total); | 588 | stime = 0; |
571 | else | 589 | } else if (!total) { |
572 | stime = rtime; | 590 | stime = rtime; |
591 | } else { | ||
592 | stime = scale_stime((__force u64)stime, | ||
593 | (__force u64)rtime, (__force u64)total); | ||
594 | } | ||
573 | 595 | ||
574 | /* | 596 | /* |
575 | * If the tick based count grows faster than the scheduler one, | 597 | * If the tick based count grows faster than the scheduler one, |
diff --git a/lib/div64.c b/lib/div64.c index a163b6caef73..3af5728d95fd 100644 --- a/lib/div64.c +++ b/lib/div64.c | |||
@@ -79,9 +79,10 @@ EXPORT_SYMBOL(div_s64_rem); | |||
79 | #endif | 79 | #endif |
80 | 80 | ||
81 | /** | 81 | /** |
82 | * div64_u64 - unsigned 64bit divide with 64bit divisor | 82 | * div64_u64_rem - unsigned 64bit divide with 64bit divisor and 64bit remainder |
83 | * @dividend: 64bit dividend | 83 | * @dividend: 64bit dividend |
84 | * @divisor: 64bit divisor | 84 | * @divisor: 64bit divisor |
85 | * @remainder: 64bit remainder | ||
85 | * | 86 | * |
86 | * This implementation is a modified version of the algorithm proposed | 87 | * This implementation is a modified version of the algorithm proposed |
87 | * by the book 'Hacker's Delight'. The original source and full proof | 88 | * by the book 'Hacker's Delight'. The original source and full proof |
@@ -89,27 +90,33 @@ EXPORT_SYMBOL(div_s64_rem); | |||
89 | * | 90 | * |
90 | * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt' | 91 | * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt' |
91 | */ | 92 | */ |
92 | #ifndef div64_u64 | 93 | #ifndef div64_u64_rem |
93 | u64 div64_u64(u64 dividend, u64 divisor) | 94 | u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) |
94 | { | 95 | { |
95 | u32 high = divisor >> 32; | 96 | u32 high = divisor >> 32; |
96 | u64 quot; | 97 | u64 quot; |
97 | 98 | ||
98 | if (high == 0) { | 99 | if (high == 0) { |
99 | quot = div_u64(dividend, divisor); | 100 | u32 rem32; |
101 | quot = div_u64_rem(dividend, divisor, &rem32); | ||
102 | *remainder = rem32; | ||
100 | } else { | 103 | } else { |
101 | int n = 1 + fls(high); | 104 | int n = 1 + fls(high); |
102 | quot = div_u64(dividend >> n, divisor >> n); | 105 | quot = div_u64(dividend >> n, divisor >> n); |
103 | 106 | ||
104 | if (quot != 0) | 107 | if (quot != 0) |
105 | quot--; | 108 | quot--; |
106 | if ((dividend - quot * divisor) >= divisor) | 109 | |
110 | *remainder = dividend - quot * divisor; | ||
111 | if (*remainder >= divisor) { | ||
107 | quot++; | 112 | quot++; |
113 | *remainder -= divisor; | ||
114 | } | ||
108 | } | 115 | } |
109 | 116 | ||
110 | return quot; | 117 | return quot; |
111 | } | 118 | } |
112 | EXPORT_SYMBOL(div64_u64); | 119 | EXPORT_SYMBOL(div64_u64_rem); |
113 | #endif | 120 | #endif |
114 | 121 | ||
115 | /** | 122 | /** |