diff options
author | Steven Rostedt <rostedt@goodmis.org> | 2008-07-09 00:15:33 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-11 09:53:28 -0400 |
commit | c300ba252829e9325e08f0af60687add94445b25 (patch) | |
tree | 93c583012a3f26617e95aea516c02716ce254f5d /kernel/sched_clock.c | |
parent | a83bc47c33ab182f1e48977fd5a04024d713c75e (diff) |
sched_clock: and multiplier for TSC to gtod drift
The sched_clock code currently tries to keep all CPU clocks of all CPUS
somewhat in sync. At every clock tick it records the gtod clock and
uses that and jiffies and the TSC to calculate a CPU clock that tries to
stay in sync with all the other CPUs.
ftrace depends heavily on this timer and it detects when this timer
"jumps". One problem is that the TSC and the gtod also drift.
When the TSC is 0.1% faster or slower than the gtod it is very noticeable
in ftrace. To help compensate for this, I've added a multiplier that
tries to keep the CPU clock updating at the same rate as the gtod.
I've tried various ways to get it to be in sync and this ended up being
the most reliable. At every scheduler tick we calculate the new multiplier:
multi = delta_gtod / delta_TSC
This means we perform a 64 bit divide at the tick (once a HZ). A shift
is used to handle the accuracy.
Other methods that failed due to dynamic HZ are:
(not used) multi += (gtod - tsc) / delta_gtod
(not used) multi += (gtod - (last_tsc + delta_tsc)) / delta_gtod
as well as other variants.
This code still allows for a slight drift between TSC and gtod, but
it keeps the damage down to a minimum.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Steven Rostedt <srostedt@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_clock.c')
-rw-r--r-- | kernel/sched_clock.c | 40 |
1 files changed, 37 insertions, 3 deletions
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 28ff6bf5e02b..8affbfd0cdb0 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
@@ -3,6 +3,9 @@ | |||
3 | * | 3 | * |
4 | * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 4 | * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
5 | * | 5 | * |
6 | * Updates and enhancements: | ||
7 | * Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com> | ||
8 | * | ||
6 | * Based on code by: | 9 | * Based on code by: |
7 | * Ingo Molnar <mingo@redhat.com> | 10 | * Ingo Molnar <mingo@redhat.com> |
8 | * Guillaume Chazarain <guichaz@gmail.com> | 11 | * Guillaume Chazarain <guichaz@gmail.com> |
@@ -32,6 +35,11 @@ | |||
32 | 35 | ||
33 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | 36 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
34 | 37 | ||
38 | #define MULTI_SHIFT 15 | ||
39 | /* Max is double, Min is 1/2 */ | ||
40 | #define MAX_MULTI (2LL << MULTI_SHIFT) | ||
41 | #define MIN_MULTI (1LL << (MULTI_SHIFT-1)) | ||
42 | |||
35 | struct sched_clock_data { | 43 | struct sched_clock_data { |
36 | /* | 44 | /* |
37 | * Raw spinlock - this is a special case: this might be called | 45 | * Raw spinlock - this is a special case: this might be called |
@@ -45,6 +53,7 @@ struct sched_clock_data { | |||
45 | u64 tick_raw; | 53 | u64 tick_raw; |
46 | u64 tick_gtod; | 54 | u64 tick_gtod; |
47 | u64 clock; | 55 | u64 clock; |
56 | s64 multi; | ||
48 | #ifdef CONFIG_NO_HZ | 57 | #ifdef CONFIG_NO_HZ |
49 | int check_max; | 58 | int check_max; |
50 | #endif | 59 | #endif |
@@ -79,6 +88,7 @@ void sched_clock_init(void) | |||
79 | scd->tick_raw = 0; | 88 | scd->tick_raw = 0; |
80 | scd->tick_gtod = ktime_now; | 89 | scd->tick_gtod = ktime_now; |
81 | scd->clock = ktime_now; | 90 | scd->clock = ktime_now; |
91 | scd->multi = 1 << MULTI_SHIFT; | ||
82 | #ifdef CONFIG_NO_HZ | 92 | #ifdef CONFIG_NO_HZ |
83 | scd->check_max = 1; | 93 | scd->check_max = 1; |
84 | #endif | 94 | #endif |
@@ -134,8 +144,13 @@ static void __update_sched_clock(struct sched_clock_data *scd, u64 now, u64 *tim | |||
134 | 144 | ||
135 | WARN_ON_ONCE(!irqs_disabled()); | 145 | WARN_ON_ONCE(!irqs_disabled()); |
136 | 146 | ||
137 | min_clock = scd->tick_gtod + | 147 | /* |
138 | (delta_jiffies ? delta_jiffies - 1 : 0) * TICK_NSEC; | 148 | * At schedule tick the clock can be just under the gtod. We don't |
149 | * want to push it too prematurely. | ||
150 | */ | ||
151 | min_clock = scd->tick_gtod + (delta_jiffies * TICK_NSEC); | ||
152 | if (min_clock > TICK_NSEC) | ||
153 | min_clock -= TICK_NSEC / 2; | ||
139 | 154 | ||
140 | if (unlikely(delta < 0)) { | 155 | if (unlikely(delta < 0)) { |
141 | clock++; | 156 | clock++; |
@@ -149,6 +164,9 @@ static void __update_sched_clock(struct sched_clock_data *scd, u64 now, u64 *tim | |||
149 | */ | 164 | */ |
150 | max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC; | 165 | max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC; |
151 | 166 | ||
167 | delta *= scd->multi; | ||
168 | delta >>= MULTI_SHIFT; | ||
169 | |||
152 | if (unlikely(clock + delta > max_clock) && check_max(scd)) { | 170 | if (unlikely(clock + delta > max_clock) && check_max(scd)) { |
153 | if (clock < max_clock) | 171 | if (clock < max_clock) |
154 | clock = max_clock; | 172 | clock = max_clock; |
@@ -230,6 +248,7 @@ void sched_clock_tick(void) | |||
230 | { | 248 | { |
231 | struct sched_clock_data *scd = this_scd(); | 249 | struct sched_clock_data *scd = this_scd(); |
232 | unsigned long now_jiffies = jiffies; | 250 | unsigned long now_jiffies = jiffies; |
251 | s64 mult, delta_gtod, delta_raw; | ||
233 | u64 now, now_gtod; | 252 | u64 now, now_gtod; |
234 | 253 | ||
235 | if (unlikely(!sched_clock_running)) | 254 | if (unlikely(!sched_clock_running)) |
@@ -247,9 +266,23 @@ void sched_clock_tick(void) | |||
247 | * already observe 1 new jiffy; adding a new tick_gtod to that would | 266 | * already observe 1 new jiffy; adding a new tick_gtod to that would |
248 | * increase the clock 2 jiffies. | 267 | * increase the clock 2 jiffies. |
249 | */ | 268 | */ |
250 | scd->tick_jiffies = now_jiffies; | 269 | delta_gtod = now_gtod - scd->tick_gtod; |
270 | delta_raw = now - scd->tick_raw; | ||
271 | |||
272 | if ((long)delta_raw > 0) { | ||
273 | mult = delta_gtod << MULTI_SHIFT; | ||
274 | do_div(mult, delta_raw); | ||
275 | scd->multi = mult; | ||
276 | if (scd->multi > MAX_MULTI) | ||
277 | scd->multi = MAX_MULTI; | ||
278 | else if (scd->multi < MIN_MULTI) | ||
279 | scd->multi = MIN_MULTI; | ||
280 | } else | ||
281 | scd->multi = 1 << MULTI_SHIFT; | ||
282 | |||
251 | scd->tick_raw = now; | 283 | scd->tick_raw = now; |
252 | scd->tick_gtod = now_gtod; | 284 | scd->tick_gtod = now_gtod; |
285 | scd->tick_jiffies = now_jiffies; | ||
253 | __raw_spin_unlock(&scd->lock); | 286 | __raw_spin_unlock(&scd->lock); |
254 | } | 287 | } |
255 | 288 | ||
@@ -279,6 +312,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) | |||
279 | __raw_spin_lock(&scd->lock); | 312 | __raw_spin_lock(&scd->lock); |
280 | scd->prev_raw = now; | 313 | scd->prev_raw = now; |
281 | scd->clock += delta_ns; | 314 | scd->clock += delta_ns; |
315 | scd->multi = 1 << MULTI_SHIFT; | ||
282 | __raw_spin_unlock(&scd->lock); | 316 | __raw_spin_unlock(&scd->lock); |
283 | 317 | ||
284 | touch_softlockup_watchdog(); | 318 | touch_softlockup_watchdog(); |