aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2012-05-10 19:41:44 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2012-06-06 23:43:28 -0400
commitaa9b16306e3243229580ff889cc59fd66bf77973 (patch)
treef01812ee99804cd7d7533a1d3cba1e9d439e6f63 /kernel
parent5955f7eecd77d6b440db278b266cfecdb72ecd00 (diff)
rcu: Precompute RCU_FAST_NO_HZ timer offsets
When a CPU is entering dyntick-idle mode, tick_nohz_stop_sched_tick() calls rcu_needs_cpu() see if RCU needs that CPU, and, if not, computes the next wakeup time based on the timer wheels. Only later, when actually entering the idle loop, rcu_prepare_for_idle() will be invoked. In some cases, rcu_prepare_for_idle() will post timers to wake the CPU back up. But all for naught: The next wakeup time for the CPU has already been computed, and posting a timer afterwards does not force that wakeup time to be recomputed. This means that rcu_prepare_for_idle()'s have no effect. This is not a problem on a busy system because something else will wake up the CPU soon enough. However, on lightly loaded systems, the CPU might stay asleep for a considerable length of time. If that CPU has a callback that the rest of the system is waiting on, the system might run very slowly or (in theory) even hang. This commit avoids this problem by having rcu_needs_cpu() give tick_nohz_stop_sched_tick() an estimate of when RCU will need the CPU to wake back up, which tick_nohz_stop_sched_tick() takes into account when programming the CPU's wakeup time. An alternative approach is for rcu_prepare_for_idle() to use hrtimers instead of normal timers, but timers are much more efficient than are hrtimers for frequently and repeatedly posting and cancelling a given timer, which is exactly what RCU_FAST_NO_HZ does. Reported-by: Pascal Chapperon <pascal.chapperon@wanadoo.fr> Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com> Tested-by: Pascal Chapperon <pascal.chapperon@wanadoo.fr>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcutree_plugin.h66
-rw-r--r--kernel/time/tick-sched.c7
2 files changed, 49 insertions, 24 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 6bd9637d5d83..5271a020887e 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1886,8 +1886,9 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
1886 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs 1886 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
1887 * any flavor of RCU. 1887 * any flavor of RCU.
1888 */ 1888 */
1889int rcu_needs_cpu(int cpu) 1889int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
1890{ 1890{
1891 *delta_jiffies = ULONG_MAX;
1891 return rcu_cpu_has_callbacks(cpu); 1892 return rcu_cpu_has_callbacks(cpu);
1892} 1893}
1893 1894
@@ -1963,28 +1964,6 @@ static void rcu_idle_count_callbacks_posted(void)
1963#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ 1964#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
1964 1965
1965/* 1966/*
1966 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
1967 * callbacks on this CPU, (2) this CPU has not yet attempted to enter
1968 * dyntick-idle mode, or (3) this CPU is in the process of attempting to
1969 * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
1970 * to enter dyntick-idle mode, we refuse to try to enter it. After all,
1971 * it is better to incur scheduling-clock interrupts than to spin
1972 * continuously for the same time duration!
1973 */
1974int rcu_needs_cpu(int cpu)
1975{
1976 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1977
1978 /* Flag a new idle sojourn to the idle-entry state machine. */
1979 rdtp->idle_first_pass = 1;
1980 /* If no callbacks, RCU doesn't need the CPU. */
1981 if (!rcu_cpu_has_callbacks(cpu))
1982 return 0;
1983 /* Otherwise, RCU needs the CPU only if it recently tried and failed. */
1984 return rdtp->dyntick_holdoff == jiffies;
1985}
1986
1987/*
1988 * Does the specified flavor of RCU have non-lazy callbacks pending on 1967 * Does the specified flavor of RCU have non-lazy callbacks pending on
1989 * the specified CPU? Both RCU flavor and CPU are specified by the 1968 * the specified CPU? Both RCU flavor and CPU are specified by the
1990 * rcu_data structure. 1969 * rcu_data structure.
@@ -2027,6 +2006,47 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu)
2027} 2006}
2028 2007
2029/* 2008/*
2009 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
2010 * callbacks on this CPU, (2) this CPU has not yet attempted to enter
2011 * dyntick-idle mode, or (3) this CPU is in the process of attempting to
2012 * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
2013 * to enter dyntick-idle mode, we refuse to try to enter it. After all,
2014 * it is better to incur scheduling-clock interrupts than to spin
2015 * continuously for the same time duration!
2016 *
2017 * The delta_jiffies argument is used to store the time when RCU is
2018 * going to need the CPU again if it still has callbacks. The reason
2019 * for this is that rcu_prepare_for_idle() might need to post a timer,
2020 * but if so, it will do so after tick_nohz_stop_sched_tick() has set
2021 * the wakeup time for this CPU. This means that RCU's timer can be
2022 * delayed until the wakeup time, which defeats the purpose of posting
2023 * a timer.
2024 */
2025int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
2026{
2027 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
2028
2029 /* Flag a new idle sojourn to the idle-entry state machine. */
2030 rdtp->idle_first_pass = 1;
2031 /* If no callbacks, RCU doesn't need the CPU. */
2032 if (!rcu_cpu_has_callbacks(cpu)) {
2033 *delta_jiffies = ULONG_MAX;
2034 return 0;
2035 }
2036 if (rdtp->dyntick_holdoff == jiffies) {
2037 /* RCU recently tried and failed, so don't try again. */
2038 *delta_jiffies = 1;
2039 return 1;
2040 }
2041 /* Set up for the possibility that RCU will post a timer. */
2042 if (rcu_cpu_has_nonlazy_callbacks(cpu))
2043 *delta_jiffies = RCU_IDLE_GP_DELAY;
2044 else
2045 *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY;
2046 return 0;
2047}
2048
2049/*
2030 * Handler for smp_call_function_single(). The only point of this 2050 * Handler for smp_call_function_single(). The only point of this
2031 * handler is to wake the CPU up, so the handler does only tracing. 2051 * handler is to wake the CPU up, so the handler does only tracing.
2032 */ 2052 */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6a3a5b9ff561..52f5ebbd443b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -274,6 +274,7 @@ EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
274static void tick_nohz_stop_sched_tick(struct tick_sched *ts) 274static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
275{ 275{
276 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; 276 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
277 unsigned long rcu_delta_jiffies;
277 ktime_t last_update, expires, now; 278 ktime_t last_update, expires, now;
278 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 279 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
279 u64 time_delta; 280 u64 time_delta;
@@ -322,7 +323,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
322 time_delta = timekeeping_max_deferment(); 323 time_delta = timekeeping_max_deferment();
323 } while (read_seqretry(&xtime_lock, seq)); 324 } while (read_seqretry(&xtime_lock, seq));
324 325
325 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || 326 if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
326 arch_needs_cpu(cpu)) { 327 arch_needs_cpu(cpu)) {
327 next_jiffies = last_jiffies + 1; 328 next_jiffies = last_jiffies + 1;
328 delta_jiffies = 1; 329 delta_jiffies = 1;
@@ -330,6 +331,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
330 /* Get the next timer wheel timer */ 331 /* Get the next timer wheel timer */
331 next_jiffies = get_next_timer_interrupt(last_jiffies); 332 next_jiffies = get_next_timer_interrupt(last_jiffies);
332 delta_jiffies = next_jiffies - last_jiffies; 333 delta_jiffies = next_jiffies - last_jiffies;
334 if (rcu_delta_jiffies < delta_jiffies) {
335 next_jiffies = last_jiffies + rcu_delta_jiffies;
336 delta_jiffies = rcu_delta_jiffies;
337 }
333 } 338 }
334 /* 339 /*
335 * Do not stop the tick, if we are only one off 340 * Do not stop the tick, if we are only one off