aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2008-07-18 11:27:28 -0400
committerThomas Gleixner <tglx@linutronix.de>2008-07-18 12:10:28 -0400
commitb8f8c3cf0a4ac0632ec3f0e15e9dc0c29de917af (patch)
tree183825db00f4e9252603a51a1be6f8874a963dbc
parent857f3fd7a496ddf4329345af65a4a2b16dd25fe8 (diff)
nohz: prevent tick stop outside of the idle loop
Jack Ren and Eric Miao tracked down the following long standing problem in the NOHZ code: scheduler switch to idle task enable interrupts Window starts here ----> interrupt happens (does not set NEED_RESCHED) irq_exit() stops the tick ----> interrupt happens (does set NEED_RESCHED) return from schedule() cpu_idle(): preempt_disable(); Window ends here The interrupts can happen at any point inside the race window. The first interrupt stops the tick, the second one causes the scheduler to rerun and switch away from idle again and we end up with the tick disabled. The fact that it needs two interrupts where the first one does not set NEED_RESCHED and the second one does made the bug obscure and extremly hard to reproduce and analyse. Kudos to Jack and Eric. Solution: Limit the NOHZ functionality to the idle loop to make sure that we can not run into such a situation ever again. cpu_idle() { preempt_disable(); while(1) { tick_nohz_stop_sched_tick(1); <- tell NOHZ code that we are in the idle loop while (!need_resched()) halt(); tick_nohz_restart_sched_tick(); <- disables NOHZ mode preempt_enable_no_resched(); schedule(); preempt_disable(); } } In hindsight we should have done this forever, but ... /me grabs a large brown paperbag. Debugged-by: Jack Ren <jack.ren@marvell.com>, Debugged-by: eric miao <eric.y.miao@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/arm/kernel/process.c2
-rw-r--r--arch/avr32/kernel/process.c2
-rw-r--r--arch/blackfin/kernel/process.c2
-rw-r--r--arch/mips/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/idle.c2
-rw-r--r--arch/powerpc/platforms/iseries/setup.c4
-rw-r--r--arch/sh/kernel/process_32.c2
-rw-r--r--arch/sparc64/kernel/process.c2
-rw-r--r--arch/um/kernel/process.c2
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--include/linux/tick.h5
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/time/tick-sched.c12
14 files changed, 26 insertions, 17 deletions
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 46bf2ede6128..84f5a4c778fb 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -164,7 +164,7 @@ void cpu_idle(void)
164 if (!idle) 164 if (!idle)
165 idle = default_idle; 165 idle = default_idle;
166 leds_event(led_idle_start); 166 leds_event(led_idle_start);
167 tick_nohz_stop_sched_tick(); 167 tick_nohz_stop_sched_tick(1);
168 while (!need_resched()) 168 while (!need_resched())
169 idle(); 169 idle();
170 leds_event(led_idle_end); 170 leds_event(led_idle_end);
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 6cf9df176274..ff820a9e743a 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -31,7 +31,7 @@ void cpu_idle(void)
31{ 31{
32 /* endless idle loop with no priority at all */ 32 /* endless idle loop with no priority at all */
33 while (1) { 33 while (1) {
34 tick_nohz_stop_sched_tick(); 34 tick_nohz_stop_sched_tick(1);
35 while (!need_resched()) 35 while (!need_resched())
36 cpu_idle_sleep(); 36 cpu_idle_sleep();
37 tick_nohz_restart_sched_tick(); 37 tick_nohz_restart_sched_tick();
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 53c2cd255441..77800dd83e57 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -105,7 +105,7 @@ void cpu_idle(void)
105#endif 105#endif
106 if (!idle) 106 if (!idle)
107 idle = default_idle; 107 idle = default_idle;
108 tick_nohz_stop_sched_tick(); 108 tick_nohz_stop_sched_tick(1);
109 while (!need_resched()) 109 while (!need_resched())
110 idle(); 110 idle();
111 tick_nohz_restart_sched_tick(); 111 tick_nohz_restart_sched_tick();
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 2c09a442e5e5..bdead3aad253 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -53,7 +53,7 @@ void __noreturn cpu_idle(void)
53{ 53{
54 /* endless idle loop with no priority at all */ 54 /* endless idle loop with no priority at all */
55 while (1) { 55 while (1) {
56 tick_nohz_stop_sched_tick(); 56 tick_nohz_stop_sched_tick(1);
57 while (!need_resched()) { 57 while (!need_resched()) {
58#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG 58#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
59 extern void smtc_idle_loop_hook(void); 59 extern void smtc_idle_loop_hook(void);
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index c3cf0e8f3ac1..d308a9f70f1b 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -60,7 +60,7 @@ void cpu_idle(void)
60 60
61 set_thread_flag(TIF_POLLING_NRFLAG); 61 set_thread_flag(TIF_POLLING_NRFLAG);
62 while (1) { 62 while (1) {
63 tick_nohz_stop_sched_tick(); 63 tick_nohz_stop_sched_tick(1);
64 while (!need_resched() && !cpu_should_die()) { 64 while (!need_resched() && !cpu_should_die()) {
65 ppc64_runlatch_off(); 65 ppc64_runlatch_off();
66 66
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index b72120751bbe..70b688c1aefb 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -561,7 +561,7 @@ static void yield_shared_processor(void)
561static void iseries_shared_idle(void) 561static void iseries_shared_idle(void)
562{ 562{
563 while (1) { 563 while (1) {
564 tick_nohz_stop_sched_tick(); 564 tick_nohz_stop_sched_tick(1);
565 while (!need_resched() && !hvlpevent_is_pending()) { 565 while (!need_resched() && !hvlpevent_is_pending()) {
566 local_irq_disable(); 566 local_irq_disable();
567 ppc64_runlatch_off(); 567 ppc64_runlatch_off();
@@ -591,7 +591,7 @@ static void iseries_dedicated_idle(void)
591 set_thread_flag(TIF_POLLING_NRFLAG); 591 set_thread_flag(TIF_POLLING_NRFLAG);
592 592
593 while (1) { 593 while (1) {
594 tick_nohz_stop_sched_tick(); 594 tick_nohz_stop_sched_tick(1);
595 if (!need_resched()) { 595 if (!need_resched()) {
596 while (!need_resched()) { 596 while (!need_resched()) {
597 ppc64_runlatch_off(); 597 ppc64_runlatch_off();
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index b98e37a1f54c..921892c351da 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -86,7 +86,7 @@ void cpu_idle(void)
86 if (!idle) 86 if (!idle)
87 idle = default_idle; 87 idle = default_idle;
88 88
89 tick_nohz_stop_sched_tick(); 89 tick_nohz_stop_sched_tick(1);
90 while (!need_resched()) 90 while (!need_resched())
91 idle(); 91 idle();
92 tick_nohz_restart_sched_tick(); 92 tick_nohz_restart_sched_tick();
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 2084f81a76e1..0798928ba361 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -97,7 +97,7 @@ void cpu_idle(void)
97 set_thread_flag(TIF_POLLING_NRFLAG); 97 set_thread_flag(TIF_POLLING_NRFLAG);
98 98
99 while(1) { 99 while(1) {
100 tick_nohz_stop_sched_tick(); 100 tick_nohz_stop_sched_tick(1);
101 101
102 while (!need_resched() && !cpu_is_offline(cpu)) 102 while (!need_resched() && !cpu_is_offline(cpu))
103 sparc64_yield(cpu); 103 sparc64_yield(cpu);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 83603cfbde81..a1c6d07cac3e 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -243,7 +243,7 @@ void default_idle(void)
243 if (need_resched()) 243 if (need_resched())
244 schedule(); 244 schedule();
245 245
246 tick_nohz_stop_sched_tick(); 246 tick_nohz_stop_sched_tick(1);
247 nsecs = disable_timer(); 247 nsecs = disable_timer();
248 idle_sleep(nsecs); 248 idle_sleep(nsecs);
249 tick_nohz_restart_sched_tick(); 249 tick_nohz_restart_sched_tick();
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f8476dfbb60d..1f5fa1cf16dd 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -166,7 +166,7 @@ void cpu_idle(void)
166 166
167 /* endless idle loop with no priority at all */ 167 /* endless idle loop with no priority at all */
168 while (1) { 168 while (1) {
169 tick_nohz_stop_sched_tick(); 169 tick_nohz_stop_sched_tick(1);
170 while (!need_resched()) { 170 while (!need_resched()) {
171 void (*idle)(void); 171 void (*idle)(void);
172 172
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e2319f39988b..c0a5c2a687e6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -148,7 +148,7 @@ void cpu_idle(void)
148 current_thread_info()->status |= TS_POLLING; 148 current_thread_info()->status |= TS_POLLING;
149 /* endless idle loop with no priority at all */ 149 /* endless idle loop with no priority at all */
150 while (1) { 150 while (1) {
151 tick_nohz_stop_sched_tick(); 151 tick_nohz_stop_sched_tick(1);
152 while (!need_resched()) { 152 while (!need_resched()) {
153 void (*idle)(void); 153 void (*idle)(void);
154 154
diff --git a/include/linux/tick.h b/include/linux/tick.h
index a881c652f7e9..d3c02695dc5d 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -49,6 +49,7 @@ struct tick_sched {
49 unsigned long check_clocks; 49 unsigned long check_clocks;
50 enum tick_nohz_mode nohz_mode; 50 enum tick_nohz_mode nohz_mode;
51 ktime_t idle_tick; 51 ktime_t idle_tick;
52 int inidle;
52 int tick_stopped; 53 int tick_stopped;
53 unsigned long idle_jiffies; 54 unsigned long idle_jiffies;
54 unsigned long idle_calls; 55 unsigned long idle_calls;
@@ -105,14 +106,14 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
105#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ 106#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
106 107
107# ifdef CONFIG_NO_HZ 108# ifdef CONFIG_NO_HZ
108extern void tick_nohz_stop_sched_tick(void); 109extern void tick_nohz_stop_sched_tick(int inidle);
109extern void tick_nohz_restart_sched_tick(void); 110extern void tick_nohz_restart_sched_tick(void);
110extern void tick_nohz_update_jiffies(void); 111extern void tick_nohz_update_jiffies(void);
111extern ktime_t tick_nohz_get_sleep_length(void); 112extern ktime_t tick_nohz_get_sleep_length(void);
112extern void tick_nohz_stop_idle(int cpu); 113extern void tick_nohz_stop_idle(int cpu);
113extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); 114extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
114# else 115# else
115static inline void tick_nohz_stop_sched_tick(void) { } 116static inline void tick_nohz_stop_sched_tick(int inidle) { }
116static inline void tick_nohz_restart_sched_tick(void) { } 117static inline void tick_nohz_restart_sched_tick(void) { }
117static inline void tick_nohz_update_jiffies(void) { } 118static inline void tick_nohz_update_jiffies(void) { }
118static inline ktime_t tick_nohz_get_sleep_length(void) 119static inline ktime_t tick_nohz_get_sleep_length(void)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 36e061740047..05f248039d77 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -312,7 +312,7 @@ void irq_exit(void)
312#ifdef CONFIG_NO_HZ 312#ifdef CONFIG_NO_HZ
313 /* Make sure that timer wheel updates are propagated */ 313 /* Make sure that timer wheel updates are propagated */
314 if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) 314 if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
315 tick_nohz_stop_sched_tick(); 315 tick_nohz_stop_sched_tick(0);
316 rcu_irq_exit(); 316 rcu_irq_exit();
317#endif 317#endif
318 preempt_enable_no_resched(); 318 preempt_enable_no_resched();
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 86baa4f0dfe4..ee962d11107b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -195,7 +195,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
195 * Called either from the idle loop or from irq_exit() when an idle period was 195 * Called either from the idle loop or from irq_exit() when an idle period was
196 * just interrupted by an interrupt which did not cause a reschedule. 196 * just interrupted by an interrupt which did not cause a reschedule.
197 */ 197 */
198void tick_nohz_stop_sched_tick(void) 198void tick_nohz_stop_sched_tick(int inidle)
199{ 199{
200 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; 200 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
201 struct tick_sched *ts; 201 struct tick_sched *ts;
@@ -224,6 +224,11 @@ void tick_nohz_stop_sched_tick(void)
224 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) 224 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
225 goto end; 225 goto end;
226 226
227 if (!inidle && !ts->inidle)
228 goto end;
229
230 ts->inidle = 1;
231
227 if (need_resched()) 232 if (need_resched())
228 goto end; 233 goto end;
229 234
@@ -372,11 +377,14 @@ void tick_nohz_restart_sched_tick(void)
372 local_irq_disable(); 377 local_irq_disable();
373 tick_nohz_stop_idle(cpu); 378 tick_nohz_stop_idle(cpu);
374 379
375 if (!ts->tick_stopped) { 380 if (!ts->inidle || !ts->tick_stopped) {
381 ts->inidle = 0;
376 local_irq_enable(); 382 local_irq_enable();
377 return; 383 return;
378 } 384 }
379 385
386 ts->inidle = 0;
387
380 rcu_exit_nohz(); 388 rcu_exit_nohz();
381 389
382 /* Update jiffies first */ 390 /* Update jiffies first */