diff options
author | Steven Rostedt <rostedt@goodmis.org> | 2008-07-07 14:16:52 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-11 09:53:26 -0400 |
commit | af52a90a14cdaa54ecbfb6e6982abb13466a4b56 (patch) | |
tree | 2488d6d7943b167987c33f984f7109c3dc4ae783 | |
parent | f7cce27f5605b9e137b829a47949cb2d3c7e1cab (diff) |
sched_clock: stop maximum check on NO HZ
Working with ftrace I would get large jumps of 11 millisecs or more with
the clock tracer. This killed the latencing timings of ftrace and also
caused the irqoff self tests to fail.
What was happening is with NO_HZ the idle would stop the jiffy counter and
before the jiffy counter was updated the sched_clock would have a bad
delta jiffies to compare with the gtod with the maximum.
The jiffies would stop and the last sched_tick would record the last gtod.
On wakeup, the sched clock update would compare the gtod + delta jiffies
(which would be zero) and compare it to the TSC. The TSC would have
correctly (with a stable TSC) moved forward several jiffies. But because the
jiffies has not been updated yet the clock would be prevented from moving
forward because it would appear that the TSC jumped too far ahead.
The clock would then virtually stop, until the jiffies are updated. Then
the next sched clock update would see that the clock was very much behind
since the delta jiffies is now correct. This would then jump the clock
forward by several jiffies.
This caused ftrace to report several milliseconds of interrupts off
latency at every resume from NO_HZ idle.
This patch adds hooks into the nohz code to disable the checking of the
maximum clock update when nohz is in effect. It resumes the max check
when nohz has updated the jiffies again.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Steven Rostedt <srostedt@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/sched.h | 17 | ||||
-rw-r--r-- | kernel/sched_clock.c | 39 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 2 |
3 files changed, 56 insertions, 2 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index c5d3f847ca8d..33a8f42041fa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1573,13 +1573,28 @@ static inline void sched_clock_idle_sleep_event(void) | |||
1573 | static inline void sched_clock_idle_wakeup_event(u64 delta_ns) | 1573 | static inline void sched_clock_idle_wakeup_event(u64 delta_ns) |
1574 | { | 1574 | { |
1575 | } | 1575 | } |
1576 | #else | 1576 | |
1577 | #ifdef CONFIG_NO_HZ | ||
1578 | static inline void sched_clock_tick_stop(int cpu) | ||
1579 | { | ||
1580 | } | ||
1581 | |||
1582 | static inline void sched_clock_tick_start(int cpu) | ||
1583 | { | ||
1584 | } | ||
1585 | #endif | ||
1586 | |||
1587 | #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ | ||
1577 | extern void sched_clock_init(void); | 1588 | extern void sched_clock_init(void); |
1578 | extern u64 sched_clock_cpu(int cpu); | 1589 | extern u64 sched_clock_cpu(int cpu); |
1579 | extern void sched_clock_tick(void); | 1590 | extern void sched_clock_tick(void); |
1580 | extern void sched_clock_idle_sleep_event(void); | 1591 | extern void sched_clock_idle_sleep_event(void); |
1581 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); | 1592 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); |
1593 | #ifdef CONFIG_NO_HZ | ||
1594 | extern void sched_clock_tick_stop(int cpu); | ||
1595 | extern void sched_clock_tick_start(int cpu); | ||
1582 | #endif | 1596 | #endif |
1597 | #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ | ||
1583 | 1598 | ||
1584 | /* | 1599 | /* |
1585 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | 1600 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu |
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 42b81fa38cbd..97159e225a77 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
@@ -45,6 +45,9 @@ struct sched_clock_data { | |||
45 | u64 tick_raw; | 45 | u64 tick_raw; |
46 | u64 tick_gtod; | 46 | u64 tick_gtod; |
47 | u64 clock; | 47 | u64 clock; |
48 | #ifdef CONFIG_NO_HZ | ||
49 | int check_max; | ||
50 | #endif | ||
48 | }; | 51 | }; |
49 | 52 | ||
50 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); | 53 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); |
@@ -76,11 +79,45 @@ void sched_clock_init(void) | |||
76 | scd->tick_raw = 0; | 79 | scd->tick_raw = 0; |
77 | scd->tick_gtod = ktime_now; | 80 | scd->tick_gtod = ktime_now; |
78 | scd->clock = ktime_now; | 81 | scd->clock = ktime_now; |
82 | #ifdef CONFIG_NO_HZ | ||
83 | scd->check_max = 1; | ||
84 | #endif | ||
79 | } | 85 | } |
80 | 86 | ||
81 | sched_clock_running = 1; | 87 | sched_clock_running = 1; |
82 | } | 88 | } |
83 | 89 | ||
90 | #ifdef CONFIG_NO_HZ | ||
91 | /* | ||
92 | * The dynamic ticks makes the delta jiffies inaccurate. This | ||
93 | * prevents us from checking the maximum time update. | ||
94 | * Disable the maximum check during stopped ticks. | ||
95 | */ | ||
96 | void sched_clock_tick_stop(int cpu) | ||
97 | { | ||
98 | struct sched_clock_data *scd = cpu_sdc(cpu); | ||
99 | |||
100 | scd->check_max = 0; | ||
101 | } | ||
102 | |||
103 | void sched_clock_tick_start(int cpu) | ||
104 | { | ||
105 | struct sched_clock_data *scd = cpu_sdc(cpu); | ||
106 | |||
107 | scd->check_max = 1; | ||
108 | } | ||
109 | |||
110 | static int check_max(struct sched_clock_data *scd) | ||
111 | { | ||
112 | return scd->check_max; | ||
113 | } | ||
114 | #else | ||
115 | static int check_max(struct sched_clock_data *scd) | ||
116 | { | ||
117 | return 1; | ||
118 | } | ||
119 | #endif /* CONFIG_NO_HZ */ | ||
120 | |||
84 | /* | 121 | /* |
85 | * update the percpu scd from the raw @now value | 122 | * update the percpu scd from the raw @now value |
86 | * | 123 | * |
@@ -112,7 +149,7 @@ static void __update_sched_clock(struct sched_clock_data *scd, u64 now) | |||
112 | */ | 149 | */ |
113 | max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC; | 150 | max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC; |
114 | 151 | ||
115 | if (unlikely(clock + delta > max_clock)) { | 152 | if (unlikely(clock + delta > max_clock) && check_max(scd)) { |
116 | if (clock < max_clock) | 153 | if (clock < max_clock) |
117 | clock = max_clock; | 154 | clock = max_clock; |
118 | else | 155 | else |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b854a895591e..d63008b09a4c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -276,6 +276,7 @@ void tick_nohz_stop_sched_tick(void) | |||
276 | ts->tick_stopped = 1; | 276 | ts->tick_stopped = 1; |
277 | ts->idle_jiffies = last_jiffies; | 277 | ts->idle_jiffies = last_jiffies; |
278 | rcu_enter_nohz(); | 278 | rcu_enter_nohz(); |
279 | sched_clock_tick_stop(cpu); | ||
279 | } | 280 | } |
280 | 281 | ||
281 | /* | 282 | /* |
@@ -375,6 +376,7 @@ void tick_nohz_restart_sched_tick(void) | |||
375 | select_nohz_load_balancer(0); | 376 | select_nohz_load_balancer(0); |
376 | now = ktime_get(); | 377 | now = ktime_get(); |
377 | tick_do_update_jiffies64(now); | 378 | tick_do_update_jiffies64(now); |
379 | sched_clock_tick_start(cpu); | ||
378 | cpu_clear(cpu, nohz_cpu_mask); | 380 | cpu_clear(cpu, nohz_cpu_mask); |
379 | 381 | ||
380 | /* | 382 | /* |