aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2011-11-28 15:28:34 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2011-12-11 13:32:07 -0500
commit7cb92499000e3c86dae653077b1465458a039ef6 (patch)
treeebc982a5cc562b4fe0cb8f20541f45a5506a0b5f
parent3842a0832a1d6eb0b31421f8810a813135967512 (diff)
rcu: Permit dyntick-idle with callbacks pending
The current implementation of RCU_FAST_NO_HZ prevents CPUs from entering dyntick-idle state if they have RCU callbacks pending. Unfortunately, this has the side-effect of often preventing them from entering this state, especially if at least one other CPU is not in dyntick-idle state. However, the resulting per-tick wakeup is wasteful in many cases: if the CPU has already fully responded to the current RCU grace period, there will be nothing for it to do until this grace period ends, which will frequently take several jiffies. This commit therefore permits a CPU that has done everything that the current grace period has asked of it (rcu_pending() == 0) even if it still as RCU callbacks pending. However, such a CPU posts a timer to wake it up several jiffies later (6 jiffies, based on experience with grace-period lengths). This wakeup is required to handle situations that can result in all CPUs being in dyntick-idle mode, thus failing to ever complete the current grace period. If a CPU wakes up before the timer goes off, then it cancels that timer, thus avoiding spurious wakeups. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r--include/trace/events/rcu.h3
-rw-r--r--kernel/rcutree.c3
-rw-r--r--kernel/rcutree.h2
-rw-r--r--kernel/rcutree_plugin.h75
4 files changed, 78 insertions, 5 deletions
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 8dd6fcb94946..c75418c3ccb8 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -288,9 +288,10 @@ TRACE_EVENT(rcu_dyntick,
288 * "No callbacks": Nothing to do, no callbacks on this CPU. 288 * "No callbacks": Nothing to do, no callbacks on this CPU.
289 * "In holdoff": Nothing to do, holding off after unsuccessful attempt. 289 * "In holdoff": Nothing to do, holding off after unsuccessful attempt.
290 * "Begin holdoff": Attempt failed, don't retry until next jiffy. 290 * "Begin holdoff": Attempt failed, don't retry until next jiffy.
291 * "Dyntick with callbacks": Entering dyntick-idle despite callbacks.
291 * "More callbacks": Still more callbacks, try again to clear them out. 292 * "More callbacks": Still more callbacks, try again to clear them out.
292 * "Callbacks drained": All callbacks processed, off to dyntick idle! 293 * "Callbacks drained": All callbacks processed, off to dyntick idle!
293 * "CPU awakened at GP end": 294 * "Timer": Timer fired to cause CPU to continue processing callbacks.
294 */ 295 */
295TRACE_EVENT(rcu_prep_idle, 296TRACE_EVENT(rcu_prep_idle,
296 297
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 69bb37287cc8..bf085d7f6a3f 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -448,6 +448,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
448 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 448 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
449 smp_mb__after_atomic_inc(); /* See above. */ 449 smp_mb__after_atomic_inc(); /* See above. */
450 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 450 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
451 rcu_cleanup_after_idle(smp_processor_id());
451 trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); 452 trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
452 if (!is_idle_task(current)) { 453 if (!is_idle_task(current)) {
453 struct task_struct *idle = idle_task(smp_processor_id()); 454 struct task_struct *idle = idle_task(smp_processor_id());
@@ -2057,6 +2058,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2057 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; 2058 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING;
2058 atomic_set(&rdp->dynticks->dynticks, 2059 atomic_set(&rdp->dynticks->dynticks,
2059 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); 2060 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
2061 rcu_prepare_for_idle_init(cpu);
2060 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2062 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2061 2063
2062 /* 2064 /*
@@ -2138,6 +2140,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2138 rcu_send_cbs_to_online(&rcu_bh_state); 2140 rcu_send_cbs_to_online(&rcu_bh_state);
2139 rcu_send_cbs_to_online(&rcu_sched_state); 2141 rcu_send_cbs_to_online(&rcu_sched_state);
2140 rcu_preempt_send_cbs_to_online(); 2142 rcu_preempt_send_cbs_to_online();
2143 rcu_cleanup_after_idle(cpu);
2141 break; 2144 break;
2142 case CPU_DEAD: 2145 case CPU_DEAD:
2143 case CPU_DEAD_FROZEN: 2146 case CPU_DEAD_FROZEN:
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 70d8a557090f..9bcfbc9d16c6 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -467,6 +467,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg);
467#endif /* #ifdef CONFIG_RCU_BOOST */ 467#endif /* #ifdef CONFIG_RCU_BOOST */
468static void rcu_cpu_kthread_setrt(int cpu, int to_rt); 468static void rcu_cpu_kthread_setrt(int cpu, int to_rt);
469static void __cpuinit rcu_prepare_kthreads(int cpu); 469static void __cpuinit rcu_prepare_kthreads(int cpu);
470static void rcu_prepare_for_idle_init(int cpu);
471static void rcu_cleanup_after_idle(int cpu);
470static void rcu_prepare_for_idle(int cpu); 472static void rcu_prepare_for_idle(int cpu);
471 473
472#endif /* #ifndef RCU_TREE_NONCORE */ 474#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 42ca5a400ae3..dbcea6b93aea 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1947,9 +1947,8 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
1947 * 1 if so. This function is part of the RCU implementation; it is -not- 1947 * 1 if so. This function is part of the RCU implementation; it is -not-
1948 * an exported member of the RCU API. 1948 * an exported member of the RCU API.
1949 * 1949 *
1950 * Because we have preemptible RCU, just check whether this CPU needs 1950 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
1951 * any flavor of RCU. Do not chew up lots of CPU cycles with preemption 1951 * any flavor of RCU.
1952 * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
1953 */ 1952 */
1954int rcu_needs_cpu(int cpu) 1953int rcu_needs_cpu(int cpu)
1955{ 1954{
@@ -1957,6 +1956,21 @@ int rcu_needs_cpu(int cpu)
1957} 1956}
1958 1957
1959/* 1958/*
1959 * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
1960 */
1961static void rcu_prepare_for_idle_init(int cpu)
1962{
1963}
1964
1965/*
1966 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
1967 * after it.
1968 */
1969static void rcu_cleanup_after_idle(int cpu)
1970{
1971}
1972
1973/*
1960 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y, 1974 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y,
1961 * is nothing. 1975 * is nothing.
1962 */ 1976 */
@@ -1966,9 +1980,12 @@ static void rcu_prepare_for_idle(int cpu)
1966 1980
1967#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1981#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1968 1982
1969#define RCU_NEEDS_CPU_FLUSHES 5 1983#define RCU_NEEDS_CPU_FLUSHES 5 /* Allow for callback self-repost. */
1984#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */
1970static DEFINE_PER_CPU(int, rcu_dyntick_drain); 1985static DEFINE_PER_CPU(int, rcu_dyntick_drain);
1971static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); 1986static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1987static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer);
1988static ktime_t rcu_idle_gp_wait;
1972 1989
1973/* 1990/*
1974 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no 1991 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
@@ -1989,6 +2006,47 @@ int rcu_needs_cpu(int cpu)
1989} 2006}
1990 2007
1991/* 2008/*
2009 * Timer handler used to force CPU to start pushing its remaining RCU
2010 * callbacks in the case where it entered dyntick-idle mode with callbacks
2011 * pending. The hander doesn't really need to do anything because the
2012 * real work is done upon re-entry to idle, or by the next scheduling-clock
2013 * interrupt should idle not be re-entered.
2014 */
2015static enum hrtimer_restart rcu_idle_gp_timer_func(struct hrtimer *hrtp)
2016{
2017 trace_rcu_prep_idle("Timer");
2018 return HRTIMER_NORESTART;
2019}
2020
2021/*
2022 * Initialize the timer used to pull CPUs out of dyntick-idle mode.
2023 */
2024static void rcu_prepare_for_idle_init(int cpu)
2025{
2026 static int firsttime = 1;
2027 struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu);
2028
2029 hrtimer_init(hrtp, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2030 hrtp->function = rcu_idle_gp_timer_func;
2031 if (firsttime) {
2032 unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY);
2033
2034 rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000);
2035 firsttime = 0;
2036 }
2037}
2038
2039/*
2040 * Clean up for exit from idle. Because we are exiting from idle, there
2041 * is no longer any point to rcu_idle_gp_timer, so cancel it. This will
2042 * do nothing if this timer is not active, so just cancel it unconditionally.
2043 */
2044static void rcu_cleanup_after_idle(int cpu)
2045{
2046 hrtimer_cancel(&per_cpu(rcu_idle_gp_timer, cpu));
2047}
2048
2049/*
1992 * Check to see if any RCU-related work can be done by the current CPU, 2050 * Check to see if any RCU-related work can be done by the current CPU,
1993 * and if so, schedule a softirq to get it done. This function is part 2051 * and if so, schedule a softirq to get it done. This function is part
1994 * of the RCU implementation; it is -not- an exported member of the RCU API. 2052 * of the RCU implementation; it is -not- an exported member of the RCU API.
@@ -2040,6 +2098,15 @@ static void rcu_prepare_for_idle(int cpu)
2040 /* First time through, initialize the counter. */ 2098 /* First time through, initialize the counter. */
2041 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; 2099 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
2042 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { 2100 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
2101 /* Can we go dyntick-idle despite still having callbacks? */
2102 if (!rcu_pending(cpu)) {
2103 trace_rcu_prep_idle("Dyntick with callbacks");
2104 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
2105 hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu),
2106 rcu_idle_gp_wait, HRTIMER_MODE_REL);
2107 return; /* Nothing more to do immediately. */
2108 }
2109
2043 /* We have hit the limit, so time to give up. */ 2110 /* We have hit the limit, so time to give up. */
2044 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; 2111 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
2045 local_irq_restore(flags); 2112 local_irq_restore(flags);