aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2013-06-21 15:34:33 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2013-08-18 21:58:31 -0400
commit2333210b26cf7aaf48d71343029afb860103d9f9 (patch)
treed3395575543b67a7ca6a91158d65c3ee760500bf /kernel
parentb44379af1cf40050794832c38ea6a64e07eb5087 (diff)
nohz_full: Add rcu_dyntick data for scalable detection of all-idle state
This commit adds fields to the rcu_dyntick structure that are used to detect idle CPUs. These new fields differ from the existing ones in that the existing ones consider a CPU executing in user mode to be idle, where the new ones consider CPUs executing in user mode to be busy. The handling of these new fields is otherwise quite similar to that for the exiting fields. This commit also adds the initialization required for these fields. So, why is usermode execution treated differently, with RCU considering it a quiescent state equivalent to idle, while in contrast the new full-system idle state detection considers usermode execution to be non-idle? It turns out that although one of RCU's quiescent states is usermode execution, it is not a full-system idle state. This is because the purpose of the full-system idle state is not RCU, but rather determining when accurate timekeeping can safely be disabled. Whenever accurate timekeeping is required in a CONFIG_NO_HZ_FULL kernel, at least one CPU must keep the scheduling-clock tick going. If even one CPU is executing in user mode, accurate timekeeping is requires, particularly for architectures where gettimeofday() and friends do not enter the kernel. Only when all CPUs are really and truly idle can accurate timekeeping be disabled, allowing all CPUs to turn off the scheduling clock interrupt, thus greatly improving energy efficiency. This naturally raises the question "Why is this code in RCU rather than in timekeeping?", and the answer is that RCU has the data and infrastructure to efficiently make this determination. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Acked-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org> Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcutree.c5
-rw-r--r--kernel/rcutree.h9
-rw-r--r--kernel/rcutree_plugin.h19
3 files changed, 33 insertions, 0 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 8807019138c6..4f27b85d8c86 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -224,6 +224,10 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
224DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 224DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
225 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, 225 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
226 .dynticks = ATOMIC_INIT(1), 226 .dynticks = ATOMIC_INIT(1),
227#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
228 .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
229 .dynticks_idle = ATOMIC_INIT(1),
230#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
227}; 231};
228 232
229static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 233static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
@@ -2904,6 +2908,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2904 rdp->blimit = blimit; 2908 rdp->blimit = blimit;
2905 init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ 2909 init_callback_list(rdp); /* Re-enable callbacks on this CPU. */
2906 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 2910 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
2911 rcu_sysidle_init_percpu_data(rdp->dynticks);
2907 atomic_set(&rdp->dynticks->dynticks, 2912 atomic_set(&rdp->dynticks->dynticks,
2908 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); 2913 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
2909 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2914 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index cbdeac6cea9e..52d1be108e75 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -88,6 +88,14 @@ struct rcu_dynticks {
88 /* Process level is worth LLONG_MAX/2. */ 88 /* Process level is worth LLONG_MAX/2. */
89 int dynticks_nmi_nesting; /* Track NMI nesting level. */ 89 int dynticks_nmi_nesting; /* Track NMI nesting level. */
90 atomic_t dynticks; /* Even value for idle, else odd. */ 90 atomic_t dynticks; /* Even value for idle, else odd. */
91#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
92 long long dynticks_idle_nesting;
93 /* irq/process nesting level from idle. */
94 atomic_t dynticks_idle; /* Even value for idle, else odd. */
95 /* "Idle" excludes userspace execution. */
96 unsigned long dynticks_idle_jiffies;
97 /* End of last non-NMI non-idle period. */
98#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
91#ifdef CONFIG_RCU_FAST_NO_HZ 99#ifdef CONFIG_RCU_FAST_NO_HZ
92 bool all_lazy; /* Are all CPU's CBs lazy? */ 100 bool all_lazy; /* Are all CPU's CBs lazy? */
93 unsigned long nonlazy_posted; 101 unsigned long nonlazy_posted;
@@ -545,6 +553,7 @@ static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
545static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); 553static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
546static void rcu_kick_nohz_cpu(int cpu); 554static void rcu_kick_nohz_cpu(int cpu);
547static bool init_nocb_callback_list(struct rcu_data *rdp); 555static bool init_nocb_callback_list(struct rcu_data *rdp);
556static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
548 557
549#endif /* #ifndef RCU_TREE_NONCORE */ 558#endif /* #ifndef RCU_TREE_NONCORE */
550 559
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index dff86f53ee09..e5baccbd8038 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2373,3 +2373,22 @@ static void rcu_kick_nohz_cpu(int cpu)
2373 smp_send_reschedule(cpu); 2373 smp_send_reschedule(cpu);
2374#endif /* #ifdef CONFIG_NO_HZ_FULL */ 2374#endif /* #ifdef CONFIG_NO_HZ_FULL */
2375} 2375}
2376
2377
2378#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
2379
2380/*
2381 * Initialize dynticks sysidle state for CPUs coming online.
2382 */
2383static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
2384{
2385 rdtp->dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE;
2386}
2387
2388#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
2389
2390static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
2391{
2392}
2393
2394#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */