diff options
author | Paul E. McKenney <paul.mckenney@linaro.org> | 2012-05-10 19:41:44 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-06-06 23:43:28 -0400 |
commit | aa9b16306e3243229580ff889cc59fd66bf77973 (patch) | |
tree | f01812ee99804cd7d7533a1d3cba1e9d439e6f63 /kernel | |
parent | 5955f7eecd77d6b440db278b266cfecdb72ecd00 (diff) |
rcu: Precompute RCU_FAST_NO_HZ timer offsets
When a CPU is entering dyntick-idle mode, tick_nohz_stop_sched_tick()
calls rcu_needs_cpu() see if RCU needs that CPU, and, if not, computes the
next wakeup time based on the timer wheels. Only later, when actually
entering the idle loop, rcu_prepare_for_idle() will be invoked. In some
cases, rcu_prepare_for_idle() will post timers to wake the CPU back up.
But all for naught: The next wakeup time for the CPU has already been
computed, and posting a timer afterwards does not force that wakeup
time to be recomputed. This means that rcu_prepare_for_idle()'s have
no effect.
This is not a problem on a busy system because something else will wake
up the CPU soon enough. However, on lightly loaded systems, the CPU
might stay asleep for a considerable length of time. If that CPU has
a callback that the rest of the system is waiting on, the system might
run very slowly or (in theory) even hang.
This commit avoids this problem by having rcu_needs_cpu() give
tick_nohz_stop_sched_tick() an estimate of when RCU will need the CPU
to wake back up, which tick_nohz_stop_sched_tick() takes into account
when programming the CPU's wakeup time. An alternative approach is
for rcu_prepare_for_idle() to use hrtimers instead of normal timers,
but timers are much more efficient than are hrtimers for frequently
and repeatedly posting and cancelling a given timer, which is exactly
what RCU_FAST_NO_HZ does.
Reported-by: Pascal Chapperon <pascal.chapperon@wanadoo.fr>
Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: Pascal Chapperon <pascal.chapperon@wanadoo.fr>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcutree_plugin.h | 66 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 7 |
2 files changed, 49 insertions, 24 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 6bd9637d5d83..5271a020887e 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -1886,8 +1886,9 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) | |||
1886 | * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs | 1886 | * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs |
1887 | * any flavor of RCU. | 1887 | * any flavor of RCU. |
1888 | */ | 1888 | */ |
1889 | int rcu_needs_cpu(int cpu) | 1889 | int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) |
1890 | { | 1890 | { |
1891 | *delta_jiffies = ULONG_MAX; | ||
1891 | return rcu_cpu_has_callbacks(cpu); | 1892 | return rcu_cpu_has_callbacks(cpu); |
1892 | } | 1893 | } |
1893 | 1894 | ||
@@ -1963,28 +1964,6 @@ static void rcu_idle_count_callbacks_posted(void) | |||
1963 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ | 1964 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ |
1964 | 1965 | ||
1965 | /* | 1966 | /* |
1966 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no | ||
1967 | * callbacks on this CPU, (2) this CPU has not yet attempted to enter | ||
1968 | * dyntick-idle mode, or (3) this CPU is in the process of attempting to | ||
1969 | * enter dyntick-idle mode. Otherwise, if we have recently tried and failed | ||
1970 | * to enter dyntick-idle mode, we refuse to try to enter it. After all, | ||
1971 | * it is better to incur scheduling-clock interrupts than to spin | ||
1972 | * continuously for the same time duration! | ||
1973 | */ | ||
1974 | int rcu_needs_cpu(int cpu) | ||
1975 | { | ||
1976 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | ||
1977 | |||
1978 | /* Flag a new idle sojourn to the idle-entry state machine. */ | ||
1979 | rdtp->idle_first_pass = 1; | ||
1980 | /* If no callbacks, RCU doesn't need the CPU. */ | ||
1981 | if (!rcu_cpu_has_callbacks(cpu)) | ||
1982 | return 0; | ||
1983 | /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ | ||
1984 | return rdtp->dyntick_holdoff == jiffies; | ||
1985 | } | ||
1986 | |||
1987 | /* | ||
1988 | * Does the specified flavor of RCU have non-lazy callbacks pending on | 1967 | * Does the specified flavor of RCU have non-lazy callbacks pending on |
1989 | * the specified CPU? Both RCU flavor and CPU are specified by the | 1968 | * the specified CPU? Both RCU flavor and CPU are specified by the |
1990 | * rcu_data structure. | 1969 | * rcu_data structure. |
@@ -2027,6 +2006,47 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu) | |||
2027 | } | 2006 | } |
2028 | 2007 | ||
2029 | /* | 2008 | /* |
2009 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no | ||
2010 | * callbacks on this CPU, (2) this CPU has not yet attempted to enter | ||
2011 | * dyntick-idle mode, or (3) this CPU is in the process of attempting to | ||
2012 | * enter dyntick-idle mode. Otherwise, if we have recently tried and failed | ||
2013 | * to enter dyntick-idle mode, we refuse to try to enter it. After all, | ||
2014 | * it is better to incur scheduling-clock interrupts than to spin | ||
2015 | * continuously for the same time duration! | ||
2016 | * | ||
2017 | * The delta_jiffies argument is used to store the time when RCU is | ||
2018 | * going to need the CPU again if it still has callbacks. The reason | ||
2019 | * for this is that rcu_prepare_for_idle() might need to post a timer, | ||
2020 | * but if so, it will do so after tick_nohz_stop_sched_tick() has set | ||
2021 | * the wakeup time for this CPU. This means that RCU's timer can be | ||
2022 | * delayed until the wakeup time, which defeats the purpose of posting | ||
2023 | * a timer. | ||
2024 | */ | ||
2025 | int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | ||
2026 | { | ||
2027 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | ||
2028 | |||
2029 | /* Flag a new idle sojourn to the idle-entry state machine. */ | ||
2030 | rdtp->idle_first_pass = 1; | ||
2031 | /* If no callbacks, RCU doesn't need the CPU. */ | ||
2032 | if (!rcu_cpu_has_callbacks(cpu)) { | ||
2033 | *delta_jiffies = ULONG_MAX; | ||
2034 | return 0; | ||
2035 | } | ||
2036 | if (rdtp->dyntick_holdoff == jiffies) { | ||
2037 | /* RCU recently tried and failed, so don't try again. */ | ||
2038 | *delta_jiffies = 1; | ||
2039 | return 1; | ||
2040 | } | ||
2041 | /* Set up for the possibility that RCU will post a timer. */ | ||
2042 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) | ||
2043 | *delta_jiffies = RCU_IDLE_GP_DELAY; | ||
2044 | else | ||
2045 | *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; | ||
2046 | return 0; | ||
2047 | } | ||
2048 | |||
2049 | /* | ||
2030 | * Handler for smp_call_function_single(). The only point of this | 2050 | * Handler for smp_call_function_single(). The only point of this |
2031 | * handler is to wake the CPU up, so the handler does only tracing. | 2051 | * handler is to wake the CPU up, so the handler does only tracing. |
2032 | */ | 2052 | */ |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6a3a5b9ff561..52f5ebbd443b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -274,6 +274,7 @@ EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); | |||
274 | static void tick_nohz_stop_sched_tick(struct tick_sched *ts) | 274 | static void tick_nohz_stop_sched_tick(struct tick_sched *ts) |
275 | { | 275 | { |
276 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; | 276 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; |
277 | unsigned long rcu_delta_jiffies; | ||
277 | ktime_t last_update, expires, now; | 278 | ktime_t last_update, expires, now; |
278 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 279 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
279 | u64 time_delta; | 280 | u64 time_delta; |
@@ -322,7 +323,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) | |||
322 | time_delta = timekeeping_max_deferment(); | 323 | time_delta = timekeeping_max_deferment(); |
323 | } while (read_seqretry(&xtime_lock, seq)); | 324 | } while (read_seqretry(&xtime_lock, seq)); |
324 | 325 | ||
325 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || | 326 | if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || |
326 | arch_needs_cpu(cpu)) { | 327 | arch_needs_cpu(cpu)) { |
327 | next_jiffies = last_jiffies + 1; | 328 | next_jiffies = last_jiffies + 1; |
328 | delta_jiffies = 1; | 329 | delta_jiffies = 1; |
@@ -330,6 +331,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) | |||
330 | /* Get the next timer wheel timer */ | 331 | /* Get the next timer wheel timer */ |
331 | next_jiffies = get_next_timer_interrupt(last_jiffies); | 332 | next_jiffies = get_next_timer_interrupt(last_jiffies); |
332 | delta_jiffies = next_jiffies - last_jiffies; | 333 | delta_jiffies = next_jiffies - last_jiffies; |
334 | if (rcu_delta_jiffies < delta_jiffies) { | ||
335 | next_jiffies = last_jiffies + rcu_delta_jiffies; | ||
336 | delta_jiffies = rcu_delta_jiffies; | ||
337 | } | ||
333 | } | 338 | } |
334 | /* | 339 | /* |
335 | * Do not stop the tick, if we are only one off | 340 | * Do not stop the tick, if we are only one off |