aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_clock.c
diff options
context:
space:
mode:
authorSteven Rostedt <rostedt@goodmis.org>2008-07-09 00:15:33 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-11 09:53:28 -0400
commitc300ba252829e9325e08f0af60687add94445b25 (patch)
tree93c583012a3f26617e95aea516c02716ce254f5d /kernel/sched_clock.c
parenta83bc47c33ab182f1e48977fd5a04024d713c75e (diff)
sched_clock: and multiplier for TSC to gtod drift
The sched_clock code currently tries to keep all CPU clocks of all CPUS somewhat in sync. At every clock tick it records the gtod clock and uses that and jiffies and the TSC to calculate a CPU clock that tries to stay in sync with all the other CPUs. ftrace depends heavily on this timer and it detects when this timer "jumps". One problem is that the TSC and the gtod also drift. When the TSC is 0.1% faster or slower than the gtod it is very noticeable in ftrace. To help compensate for this, I've added a multiplier that tries to keep the CPU clock updating at the same rate as the gtod. I've tried various ways to get it to be in sync and this ended up being the most reliable. At every scheduler tick we calculate the new multiplier: multi = delta_gtod / delta_TSC This means we perform a 64 bit divide at the tick (once a HZ). A shift is used to handle the accuracy. Other methods that failed due to dynamic HZ are: (not used) multi += (gtod - tsc) / delta_gtod (not used) multi += (gtod - (last_tsc + delta_tsc)) / delta_gtod as well as other variants. This code still allows for a slight drift between TSC and gtod, but it keeps the damage down to a minimum. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Cc: Steven Rostedt <srostedt@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: john stultz <johnstul@us.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_clock.c')
-rw-r--r--kernel/sched_clock.c40
1 files changed, 37 insertions, 3 deletions
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 28ff6bf5e02b..8affbfd0cdb0 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -3,6 +3,9 @@
3 * 3 *
4 * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 4 * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
5 * 5 *
6 * Updates and enhancements:
7 * Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
8 *
6 * Based on code by: 9 * Based on code by:
7 * Ingo Molnar <mingo@redhat.com> 10 * Ingo Molnar <mingo@redhat.com>
8 * Guillaume Chazarain <guichaz@gmail.com> 11 * Guillaume Chazarain <guichaz@gmail.com>
@@ -32,6 +35,11 @@
32 35
33#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 36#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
34 37
38#define MULTI_SHIFT 15
39/* Max is double, Min is 1/2 */
40#define MAX_MULTI (2LL << MULTI_SHIFT)
41#define MIN_MULTI (1LL << (MULTI_SHIFT-1))
42
35struct sched_clock_data { 43struct sched_clock_data {
36 /* 44 /*
37 * Raw spinlock - this is a special case: this might be called 45 * Raw spinlock - this is a special case: this might be called
@@ -45,6 +53,7 @@ struct sched_clock_data {
45 u64 tick_raw; 53 u64 tick_raw;
46 u64 tick_gtod; 54 u64 tick_gtod;
47 u64 clock; 55 u64 clock;
56 s64 multi;
48#ifdef CONFIG_NO_HZ 57#ifdef CONFIG_NO_HZ
49 int check_max; 58 int check_max;
50#endif 59#endif
@@ -79,6 +88,7 @@ void sched_clock_init(void)
79 scd->tick_raw = 0; 88 scd->tick_raw = 0;
80 scd->tick_gtod = ktime_now; 89 scd->tick_gtod = ktime_now;
81 scd->clock = ktime_now; 90 scd->clock = ktime_now;
91 scd->multi = 1 << MULTI_SHIFT;
82#ifdef CONFIG_NO_HZ 92#ifdef CONFIG_NO_HZ
83 scd->check_max = 1; 93 scd->check_max = 1;
84#endif 94#endif
@@ -134,8 +144,13 @@ static void __update_sched_clock(struct sched_clock_data *scd, u64 now, u64 *tim
134 144
135 WARN_ON_ONCE(!irqs_disabled()); 145 WARN_ON_ONCE(!irqs_disabled());
136 146
137 min_clock = scd->tick_gtod + 147 /*
138 (delta_jiffies ? delta_jiffies - 1 : 0) * TICK_NSEC; 148 * At schedule tick the clock can be just under the gtod. We don't
149 * want to push it too prematurely.
150 */
151 min_clock = scd->tick_gtod + (delta_jiffies * TICK_NSEC);
152 if (min_clock > TICK_NSEC)
153 min_clock -= TICK_NSEC / 2;
139 154
140 if (unlikely(delta < 0)) { 155 if (unlikely(delta < 0)) {
141 clock++; 156 clock++;
@@ -149,6 +164,9 @@ static void __update_sched_clock(struct sched_clock_data *scd, u64 now, u64 *tim
149 */ 164 */
150 max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC; 165 max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC;
151 166
167 delta *= scd->multi;
168 delta >>= MULTI_SHIFT;
169
152 if (unlikely(clock + delta > max_clock) && check_max(scd)) { 170 if (unlikely(clock + delta > max_clock) && check_max(scd)) {
153 if (clock < max_clock) 171 if (clock < max_clock)
154 clock = max_clock; 172 clock = max_clock;
@@ -230,6 +248,7 @@ void sched_clock_tick(void)
230{ 248{
231 struct sched_clock_data *scd = this_scd(); 249 struct sched_clock_data *scd = this_scd();
232 unsigned long now_jiffies = jiffies; 250 unsigned long now_jiffies = jiffies;
251 s64 mult, delta_gtod, delta_raw;
233 u64 now, now_gtod; 252 u64 now, now_gtod;
234 253
235 if (unlikely(!sched_clock_running)) 254 if (unlikely(!sched_clock_running))
@@ -247,9 +266,23 @@ void sched_clock_tick(void)
247 * already observe 1 new jiffy; adding a new tick_gtod to that would 266 * already observe 1 new jiffy; adding a new tick_gtod to that would
248 * increase the clock 2 jiffies. 267 * increase the clock 2 jiffies.
249 */ 268 */
250 scd->tick_jiffies = now_jiffies; 269 delta_gtod = now_gtod - scd->tick_gtod;
270 delta_raw = now - scd->tick_raw;
271
272 if ((long)delta_raw > 0) {
273 mult = delta_gtod << MULTI_SHIFT;
274 do_div(mult, delta_raw);
275 scd->multi = mult;
276 if (scd->multi > MAX_MULTI)
277 scd->multi = MAX_MULTI;
278 else if (scd->multi < MIN_MULTI)
279 scd->multi = MIN_MULTI;
280 } else
281 scd->multi = 1 << MULTI_SHIFT;
282
251 scd->tick_raw = now; 283 scd->tick_raw = now;
252 scd->tick_gtod = now_gtod; 284 scd->tick_gtod = now_gtod;
285 scd->tick_jiffies = now_jiffies;
253 __raw_spin_unlock(&scd->lock); 286 __raw_spin_unlock(&scd->lock);
254} 287}
255 288
@@ -279,6 +312,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
279 __raw_spin_lock(&scd->lock); 312 __raw_spin_lock(&scd->lock);
280 scd->prev_raw = now; 313 scd->prev_raw = now;
281 scd->clock += delta_ns; 314 scd->clock += delta_ns;
315 scd->multi = 1 << MULTI_SHIFT;
282 __raw_spin_unlock(&scd->lock); 316 __raw_spin_unlock(&scd->lock);
283 317
284 touch_softlockup_watchdog(); 318 touch_softlockup_watchdog();