aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2008-03-22 04:20:24 -0400
committerThomas Gleixner <tglx@linutronix.de>2008-03-26 03:28:55 -0400
commit06d8308c61e54346585b2691c13ee3f90cb6fb2f (patch)
treed522e285525964706e7ec112bca23d407a540a8e
parent898a19de1502649877091b398229026b4142c0e2 (diff)
NOHZ: reevaluate idle sleep length after add_timer_on()
add_timer_on() can add a timer on a CPU which is currently in a long idle sleep, but the timer wheel is not reevaluated by the nohz code on that CPU. So a timer can be delayed for quite a long time. This triggered a false positive in the clocksource watchdog code. To avoid this we need to wake up the idle CPU and enforce the reevaluation of the timer wheel for the next timer event. Add a function, which checks a given CPU for idle state, marks the idle task with NEED_RESCHED and sends a reschedule IPI to notify the other CPU of the change in the timer wheel. Call this function from add_timer_on(). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Ingo Molnar <mingo@elte.hu> Cc: stable@kernel.org -- include/linux/sched.h | 6 ++++++ kernel/sched.c | 43 +++++++++++++++++++++++++++++++++++++++++++ kernel/timer.c | 10 +++++++++- 3 files changed, 58 insertions(+), 1 deletion(-)
-rw-r--r--include/linux/sched.h6
-rw-r--r--kernel/sched.c43
-rw-r--r--kernel/timer.c10
3 files changed, 58 insertions, 1 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index fed07d03364e..6a1e7afb099b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1541,6 +1541,12 @@ static inline void idle_task_exit(void) {}
1541 1541
1542extern void sched_idle_next(void); 1542extern void sched_idle_next(void);
1543 1543
1544#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
1545extern void wake_up_idle_cpu(int cpu);
1546#else
1547static inline void wake_up_idle_cpu(int cpu) { }
1548#endif
1549
1544#ifdef CONFIG_SCHED_DEBUG 1550#ifdef CONFIG_SCHED_DEBUG
1545extern unsigned int sysctl_sched_latency; 1551extern unsigned int sysctl_sched_latency;
1546extern unsigned int sysctl_sched_min_granularity; 1552extern unsigned int sysctl_sched_min_granularity;
diff --git a/kernel/sched.c b/kernel/sched.c
index 28c73f07efb2..8dcdec6fe0fe 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1052,6 +1052,49 @@ static void resched_cpu(int cpu)
1052 resched_task(cpu_curr(cpu)); 1052 resched_task(cpu_curr(cpu));
1053 spin_unlock_irqrestore(&rq->lock, flags); 1053 spin_unlock_irqrestore(&rq->lock, flags);
1054} 1054}
1055
1056#ifdef CONFIG_NO_HZ
1057/*
1058 * When add_timer_on() enqueues a timer into the timer wheel of an
1059 * idle CPU then this timer might expire before the next timer event
1060 * which is scheduled to wake up that CPU. In case of a completely
1061 * idle system the next event might even be infinite time into the
1062 * future. wake_up_idle_cpu() ensures that the CPU is woken up and
1063 * leaves the inner idle loop so the newly added timer is taken into
1064 * account when the CPU goes back to idle and evaluates the timer
1065 * wheel for the next timer event.
1066 */
1067void wake_up_idle_cpu(int cpu)
1068{
1069 struct rq *rq = cpu_rq(cpu);
1070
1071 if (cpu == smp_processor_id())
1072 return;
1073
1074 /*
1075 * This is safe, as this function is called with the timer
1076 * wheel base lock of (cpu) held. When the CPU is on the way
1077 * to idle and has not yet set rq->curr to idle then it will
1078 * be serialized on the timer wheel base lock and take the new
1079 * timer into account automatically.
1080 */
1081 if (rq->curr != rq->idle)
1082 return;
1083
1084 /*
1085 * We can set TIF_RESCHED on the idle task of the other CPU
1086 * lockless. The worst case is that the other CPU runs the
1087 * idle task through an additional NOOP schedule()
1088 */
1089 set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED);
1090
1091 /* NEED_RESCHED must be visible before we test polling */
1092 smp_mb();
1093 if (!tsk_is_polling(rq->idle))
1094 smp_send_reschedule(cpu);
1095}
1096#endif
1097
1055#else 1098#else
1056static void __resched_task(struct task_struct *p, int tif_bit) 1099static void __resched_task(struct task_struct *p, int tif_bit)
1057{ 1100{
diff --git a/kernel/timer.c b/kernel/timer.c
index 99b00a25f88b..b024106daa70 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -451,10 +451,18 @@ void add_timer_on(struct timer_list *timer, int cpu)
451 spin_lock_irqsave(&base->lock, flags); 451 spin_lock_irqsave(&base->lock, flags);
452 timer_set_base(timer, base); 452 timer_set_base(timer, base);
453 internal_add_timer(base, timer); 453 internal_add_timer(base, timer);
454 /*
455 * Check whether the other CPU is idle and needs to be
456 * triggered to reevaluate the timer wheel when nohz is
457 * active. We are protected against the other CPU fiddling
458 * with the timer by holding the timer base lock. This also
459 * makes sure that a CPU on the way to idle can not evaluate
460 * the timer wheel.
461 */
462 wake_up_idle_cpu(cpu);
454 spin_unlock_irqrestore(&base->lock, flags); 463 spin_unlock_irqrestore(&base->lock, flags);
455} 464}
456 465
457
458/** 466/**
459 * mod_timer - modify a timer's timeout 467 * mod_timer - modify a timer's timeout
460 * @timer: the timer to be modified 468 * @timer: the timer to be modified