aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2009-08-22 16:56:52 -0400
committerIngo Molnar <mingo@elte.hu>2009-08-23 04:32:40 -0400
commitf41d911f8c49a5d65c86504c19e8204bb605c4fd (patch)
tree59bcd3048652ef290b3e19d2904409afd5c90eb3
parenta157229cabd6dd8cfa82525fc9bf730c94cc9ac2 (diff)
rcu: Merge preemptable-RCU functionality into hierarchical RCU
Create a kernel/rcutree_plugin.h file that contains definitions for preemptable RCU (or, under the #else branch of the #ifdef, empty definitions for the classic non-preemptable semantics). These definitions fit into plugins defined in kernel/rcutree.c for this purpose. This variant of preemptable RCU uses a new algorithm whose read-side expense is roughly that of classic hierarchical RCU under CONFIG_PREEMPT. This new algorithm's update-side expense is similar to that of classic hierarchical RCU, and, in absence of read-side preemption or blocking, is exactly that of classic hierarchical RCU. Perhaps more important, this new algorithm has a much simpler implementation, saving well over 1,000 lines of code compared to mainline's implementation of preemptable RCU, which will hopefully be retired in favor of this new algorithm. The simplifications are obtained by maintaining per-task nesting state for running tasks, and using a simple lock-protected algorithm to handle accounting when tasks block within RCU read-side critical sections, making use of lessons learned while creating numerous user-level RCU implementations over the past 18 months. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <12509746134003-git-send-email-> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/init_task.h15
-rw-r--r--include/linux/rcupdate.h2
-rw-r--r--include/linux/rcupreempt.h4
-rw-r--r--include/linux/rcutree.h16
-rw-r--r--include/linux/sched.h37
-rw-r--r--init/Kconfig22
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/rcutree.c135
-rw-r--r--kernel/rcutree.h9
-rw-r--r--kernel/rcutree_plugin.h447
-rw-r--r--kernel/rcutree_trace.c20
-rw-r--r--lib/Kconfig.debug2
14 files changed, 661 insertions, 55 deletions
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 7fc01b13be43..971a968831bf 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -94,6 +94,20 @@ extern struct group_info init_groups;
94# define CAP_INIT_BSET CAP_INIT_EFF_SET 94# define CAP_INIT_BSET CAP_INIT_EFF_SET
95#endif 95#endif
96 96
97#ifdef CONFIG_PREEMPT_RCU
98#define INIT_TASK_RCU_PREEMPT(tsk) \
99 .rcu_read_lock_nesting = 0, \
100 .rcu_flipctr_idx = 0,
101#elif defined(CONFIG_TREE_PREEMPT_RCU)
102#define INIT_TASK_RCU_PREEMPT(tsk) \
103 .rcu_read_lock_nesting = 0, \
104 .rcu_read_unlock_special = 0, \
105 .rcu_blocked_cpu = -1, \
106 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),
107#else
108#define INIT_TASK_RCU_PREEMPT(tsk)
109#endif
110
97extern struct cred init_cred; 111extern struct cred init_cred;
98 112
99#ifdef CONFIG_PERF_COUNTERS 113#ifdef CONFIG_PERF_COUNTERS
@@ -173,6 +187,7 @@ extern struct cred init_cred;
173 INIT_LOCKDEP \ 187 INIT_LOCKDEP \
174 INIT_FTRACE_GRAPH \ 188 INIT_FTRACE_GRAPH \
175 INIT_TRACE_RECURSION \ 189 INIT_TRACE_RECURSION \
190 INIT_TASK_RCU_PREEMPT(tsk) \
176} 191}
177 192
178 193
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9d85ee19492a..26892f5e7bd8 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -66,7 +66,7 @@ extern void rcu_scheduler_starting(void);
66extern int rcu_needs_cpu(int cpu); 66extern int rcu_needs_cpu(int cpu);
67extern int rcu_scheduler_active; 67extern int rcu_scheduler_active;
68 68
69#if defined(CONFIG_TREE_RCU) 69#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
70#include <linux/rcutree.h> 70#include <linux/rcutree.h>
71#elif defined(CONFIG_PREEMPT_RCU) 71#elif defined(CONFIG_PREEMPT_RCU)
72#include <linux/rcupreempt.h> 72#include <linux/rcupreempt.h>
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index aff4772fb49e..a42ab88e9210 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -98,6 +98,10 @@ static inline long rcu_batches_completed_bh(void)
98 return rcu_batches_completed(); 98 return rcu_batches_completed();
99} 99}
100 100
101static inline void exit_rcu(void)
102{
103}
104
101#ifdef CONFIG_RCU_TRACE 105#ifdef CONFIG_RCU_TRACE
102struct rcupreempt_trace; 106struct rcupreempt_trace;
103extern long *rcupreempt_flipctr(int cpu); 107extern long *rcupreempt_flipctr(int cpu);
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c739d90f5e68..a89307717825 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,14 +35,30 @@ extern void rcu_bh_qs(int cpu);
35 35
36extern int rcu_needs_cpu(int cpu); 36extern int rcu_needs_cpu(int cpu);
37 37
38#ifdef CONFIG_TREE_PREEMPT_RCU
39
40extern void __rcu_read_lock(void);
41extern void __rcu_read_unlock(void);
42extern void exit_rcu(void);
43
44#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
45
38static inline void __rcu_read_lock(void) 46static inline void __rcu_read_lock(void)
39{ 47{
40 preempt_disable(); 48 preempt_disable();
41} 49}
50
42static inline void __rcu_read_unlock(void) 51static inline void __rcu_read_unlock(void)
43{ 52{
44 preempt_enable(); 53 preempt_enable();
45} 54}
55
56static inline void exit_rcu(void)
57{
58}
59
60#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
61
46static inline void __rcu_read_lock_bh(void) 62static inline void __rcu_read_lock_bh(void)
47{ 63{
48 local_bh_disable(); 64 local_bh_disable();
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3ab08e4bb6b8..d7f98f637a2a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1210,6 +1210,13 @@ struct task_struct {
1210 int rcu_flipctr_idx; 1210 int rcu_flipctr_idx;
1211#endif /* #ifdef CONFIG_PREEMPT_RCU */ 1211#endif /* #ifdef CONFIG_PREEMPT_RCU */
1212 1212
1213#ifdef CONFIG_TREE_PREEMPT_RCU
1214 int rcu_read_lock_nesting;
1215 char rcu_read_unlock_special;
1216 int rcu_blocked_cpu;
1217 struct list_head rcu_node_entry;
1218#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1219
1213#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 1220#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1214 struct sched_info sched_info; 1221 struct sched_info sched_info;
1215#endif 1222#endif
@@ -1723,6 +1730,36 @@ extern cputime_t task_gtime(struct task_struct *p);
1723#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) 1730#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
1724#define used_math() tsk_used_math(current) 1731#define used_math() tsk_used_math(current)
1725 1732
1733#ifdef CONFIG_TREE_PREEMPT_RCU
1734
1735#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
1736#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
1737#define RCU_READ_UNLOCK_GOT_QS (1 << 2) /* CPU has responded to RCU core. */
1738
1739static inline void rcu_copy_process(struct task_struct *p)
1740{
1741 p->rcu_read_lock_nesting = 0;
1742 p->rcu_read_unlock_special = 0;
1743 p->rcu_blocked_cpu = -1;
1744 INIT_LIST_HEAD(&p->rcu_node_entry);
1745}
1746
1747#elif defined(CONFIG_PREEMPT_RCU)
1748
1749static inline void rcu_copy_process(struct task_struct *p)
1750{
1751 p->rcu_read_lock_nesting = 0;
1752 p->rcu_flipctr_idx = 0;
1753}
1754
1755#else
1756
1757static inline void rcu_copy_process(struct task_struct *p)
1758{
1759}
1760
1761#endif
1762
1726#ifdef CONFIG_SMP 1763#ifdef CONFIG_SMP
1727extern int set_cpus_allowed_ptr(struct task_struct *p, 1764extern int set_cpus_allowed_ptr(struct task_struct *p,
1728 const struct cpumask *new_mask); 1765 const struct cpumask *new_mask);
diff --git a/init/Kconfig b/init/Kconfig
index 25373cf32672..f88da2d1c1fb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -335,11 +335,20 @@ config PREEMPT_RCU
335 now-naive assumptions about each RCU read-side critical section 335 now-naive assumptions about each RCU read-side critical section
336 remaining on a given CPU through its execution. 336 remaining on a given CPU through its execution.
337 337
338config TREE_PREEMPT_RCU
339 bool "Preemptable tree-based hierarchical RCU"
340 depends on PREEMPT
341 help
342 This option selects the RCU implementation that is
343 designed for very large SMP systems with hundreds or
344 thousands of CPUs, but for which real-time response
345 is also required.
346
338endchoice 347endchoice
339 348
340config RCU_TRACE 349config RCU_TRACE
341 bool "Enable tracing for RCU" 350 bool "Enable tracing for RCU"
342 depends on TREE_RCU || PREEMPT_RCU 351 depends on TREE_RCU || PREEMPT_RCU || TREE_PREEMPT_RCU
343 help 352 help
344 This option provides tracing in RCU which presents stats 353 This option provides tracing in RCU which presents stats
345 in debugfs for debugging RCU implementation. 354 in debugfs for debugging RCU implementation.
@@ -351,7 +360,7 @@ config RCU_FANOUT
351 int "Tree-based hierarchical RCU fanout value" 360 int "Tree-based hierarchical RCU fanout value"
352 range 2 64 if 64BIT 361 range 2 64 if 64BIT
353 range 2 32 if !64BIT 362 range 2 32 if !64BIT
354 depends on TREE_RCU 363 depends on TREE_RCU || TREE_PREEMPT_RCU
355 default 64 if 64BIT 364 default 64 if 64BIT
356 default 32 if !64BIT 365 default 32 if !64BIT
357 help 366 help
@@ -366,7 +375,7 @@ config RCU_FANOUT
366 375
367config RCU_FANOUT_EXACT 376config RCU_FANOUT_EXACT
368 bool "Disable tree-based hierarchical RCU auto-balancing" 377 bool "Disable tree-based hierarchical RCU auto-balancing"
369 depends on TREE_RCU 378 depends on TREE_RCU || TREE_PREEMPT_RCU
370 default n 379 default n
371 help 380 help
372 This option forces use of the exact RCU_FANOUT value specified, 381 This option forces use of the exact RCU_FANOUT value specified,
@@ -379,11 +388,12 @@ config RCU_FANOUT_EXACT
379 Say N if unsure. 388 Say N if unsure.
380 389
381config TREE_RCU_TRACE 390config TREE_RCU_TRACE
382 def_bool RCU_TRACE && TREE_RCU 391 def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU )
383 select DEBUG_FS 392 select DEBUG_FS
384 help 393 help
385 This option provides tracing for the TREE_RCU implementation, 394 This option provides tracing for the TREE_RCU and
386 permitting Makefile to trivially select kernel/rcutree_trace.c. 395 TREE_PREEMPT_RCU implementations, permitting Makefile to
396 trivially select kernel/rcutree_trace.c.
387 397
388config PREEMPT_RCU_TRACE 398config PREEMPT_RCU_TRACE
389 def_bool RCU_TRACE && PREEMPT_RCU 399 def_bool RCU_TRACE && PREEMPT_RCU
diff --git a/kernel/Makefile b/kernel/Makefile
index 2419c9d43918..1a38b4789dda 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
81obj-$(CONFIG_SECCOMP) += seccomp.o 81obj-$(CONFIG_SECCOMP) += seccomp.o
82obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 82obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
83obj-$(CONFIG_TREE_RCU) += rcutree.o 83obj-$(CONFIG_TREE_RCU) += rcutree.o
84obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
84obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o 85obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
85obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o 86obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
86obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o 87obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 869dc221733e..263f95ed7201 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1010,6 +1010,7 @@ NORET_TYPE void do_exit(long code)
1010 __free_pipe_info(tsk->splice_pipe); 1010 __free_pipe_info(tsk->splice_pipe);
1011 1011
1012 preempt_disable(); 1012 preempt_disable();
1013 exit_rcu();
1013 /* causes final put_task_struct in finish_task_switch(). */ 1014 /* causes final put_task_struct in finish_task_switch(). */
1014 tsk->state = TASK_DEAD; 1015 tsk->state = TASK_DEAD;
1015 schedule(); 1016 schedule();
diff --git a/kernel/fork.c b/kernel/fork.c
index 021e1138556e..642e8b5edf00 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1022,10 +1022,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1022 copy_flags(clone_flags, p); 1022 copy_flags(clone_flags, p);
1023 INIT_LIST_HEAD(&p->children); 1023 INIT_LIST_HEAD(&p->children);
1024 INIT_LIST_HEAD(&p->sibling); 1024 INIT_LIST_HEAD(&p->sibling);
1025#ifdef CONFIG_PREEMPT_RCU 1025 rcu_copy_process(p);
1026 p->rcu_read_lock_nesting = 0;
1027 p->rcu_flipctr_idx = 0;
1028#endif /* #ifdef CONFIG_PREEMPT_RCU */
1029 p->vfork_done = NULL; 1026 p->vfork_done = NULL;
1030 spin_lock_init(&p->alloc_lock); 1027 spin_lock_init(&p->alloc_lock);
1031 1028
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4ce3adcfa94d..cc0255714075 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -80,6 +80,21 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
80struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 80struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
81DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 81DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
82 82
83extern long rcu_batches_completed_sched(void);
84static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp,
85 struct rcu_node *rnp, unsigned long flags);
86static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags);
87static void __rcu_process_callbacks(struct rcu_state *rsp,
88 struct rcu_data *rdp);
89static void __call_rcu(struct rcu_head *head,
90 void (*func)(struct rcu_head *rcu),
91 struct rcu_state *rsp);
92static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp);
93static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp,
94 int preemptable);
95
96#include "rcutree_plugin.h"
97
83/* 98/*
84 * Note a quiescent state. Because we do not need to know 99 * Note a quiescent state. Because we do not need to know
85 * how many quiescent states passed, just if there was at least 100 * how many quiescent states passed, just if there was at least
@@ -87,16 +102,27 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
87 */ 102 */
88void rcu_sched_qs(int cpu) 103void rcu_sched_qs(int cpu)
89{ 104{
90 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 105 unsigned long flags;
106 struct rcu_data *rdp;
107
108 local_irq_save(flags);
109 rdp = &per_cpu(rcu_sched_data, cpu);
91 rdp->passed_quiesc = 1; 110 rdp->passed_quiesc = 1;
92 rdp->passed_quiesc_completed = rdp->completed; 111 rdp->passed_quiesc_completed = rdp->completed;
112 rcu_preempt_qs(cpu);
113 local_irq_restore(flags);
93} 114}
94 115
95void rcu_bh_qs(int cpu) 116void rcu_bh_qs(int cpu)
96{ 117{
97 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 118 unsigned long flags;
119 struct rcu_data *rdp;
120
121 local_irq_save(flags);
122 rdp = &per_cpu(rcu_bh_data, cpu);
98 rdp->passed_quiesc = 1; 123 rdp->passed_quiesc = 1;
99 rdp->passed_quiesc_completed = rdp->completed; 124 rdp->passed_quiesc_completed = rdp->completed;
125 local_irq_restore(flags);
100} 126}
101 127
102#ifdef CONFIG_NO_HZ 128#ifdef CONFIG_NO_HZ
@@ -123,16 +149,6 @@ long rcu_batches_completed_sched(void)
123EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); 149EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
124 150
125/* 151/*
126 * Return the number of RCU batches processed thus far for debug & stats.
127 * @@@ placeholder, maps to rcu_batches_completed_sched().
128 */
129long rcu_batches_completed(void)
130{
131 return rcu_batches_completed_sched();
132}
133EXPORT_SYMBOL_GPL(rcu_batches_completed);
134
135/*
136 * Return the number of RCU BH batches processed thus far for debug & stats. 152 * Return the number of RCU BH batches processed thus far for debug & stats.
137 */ 153 */
138long rcu_batches_completed_bh(void) 154long rcu_batches_completed_bh(void)
@@ -193,6 +209,10 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
193 return 1; 209 return 1;
194 } 210 }
195 211
212 /* If preemptable RCU, no point in sending reschedule IPI. */
213 if (rdp->preemptable)
214 return 0;
215
196 /* The CPU is online, so send it a reschedule IPI. */ 216 /* The CPU is online, so send it a reschedule IPI. */
197 if (rdp->cpu != smp_processor_id()) 217 if (rdp->cpu != smp_processor_id())
198 smp_send_reschedule(rdp->cpu); 218 smp_send_reschedule(rdp->cpu);
@@ -473,6 +493,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
473 493
474 printk(KERN_ERR "INFO: RCU detected CPU stalls:"); 494 printk(KERN_ERR "INFO: RCU detected CPU stalls:");
475 for (; rnp_cur < rnp_end; rnp_cur++) { 495 for (; rnp_cur < rnp_end; rnp_cur++) {
496 rcu_print_task_stall(rnp);
476 if (rnp_cur->qsmask == 0) 497 if (rnp_cur->qsmask == 0)
477 continue; 498 continue;
478 for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++) 499 for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++)
@@ -686,6 +707,19 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
686} 707}
687 708
688/* 709/*
710 * Clean up after the prior grace period and let rcu_start_gp() start up
711 * the next grace period if one is needed. Note that the caller must
712 * hold rnp->lock, as required by rcu_start_gp(), which will release it.
713 */
714static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags)
715 __releases(rnp->lock)
716{
717 rsp->completed = rsp->gpnum;
718 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
719 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
720}
721
722/*
689 * Similar to cpu_quiet(), for which it is a helper function. Allows 723 * Similar to cpu_quiet(), for which it is a helper function. Allows
690 * a group of CPUs to be quieted at one go, though all the CPUs in the 724 * a group of CPUs to be quieted at one go, though all the CPUs in the
691 * group must be represented by the same leaf rcu_node structure. 725 * group must be represented by the same leaf rcu_node structure.
@@ -706,7 +740,7 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
706 return; 740 return;
707 } 741 }
708 rnp->qsmask &= ~mask; 742 rnp->qsmask &= ~mask;
709 if (rnp->qsmask != 0) { 743 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
710 744
711 /* Other bits still set at this level, so done. */ 745 /* Other bits still set at this level, so done. */
712 spin_unlock_irqrestore(&rnp->lock, flags); 746 spin_unlock_irqrestore(&rnp->lock, flags);
@@ -726,14 +760,10 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
726 760
727 /* 761 /*
728 * Get here if we are the last CPU to pass through a quiescent 762 * Get here if we are the last CPU to pass through a quiescent
729 * state for this grace period. Clean up and let rcu_start_gp() 763 * state for this grace period. Invoke cpu_quiet_msk_finish()
730 * start up the next grace period if one is needed. Note that 764 * to clean up and start the next grace period if one is needed.
731 * we still hold rnp->lock, as required by rcu_start_gp(), which
732 * will release it.
733 */ 765 */
734 rsp->completed = rsp->gpnum; 766 cpu_quiet_msk_finish(rsp, flags); /* releases rnp->lock. */
735 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
736 rcu_start_gp(rsp, flags); /* releases rnp->lock. */
737} 767}
738 768
739/* 769/*
@@ -840,11 +870,11 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
840 spin_lock(&rnp->lock); /* irqs already disabled. */ 870 spin_lock(&rnp->lock); /* irqs already disabled. */
841 rnp->qsmaskinit &= ~mask; 871 rnp->qsmaskinit &= ~mask;
842 if (rnp->qsmaskinit != 0) { 872 if (rnp->qsmaskinit != 0) {
843 spin_unlock(&rnp->lock); /* irqs already disabled. */ 873 spin_unlock(&rnp->lock); /* irqs remain disabled. */
844 break; 874 break;
845 } 875 }
846 mask = rnp->grpmask; 876 mask = rnp->grpmask;
847 spin_unlock(&rnp->lock); /* irqs already disabled. */ 877 spin_unlock(&rnp->lock); /* irqs remain disabled. */
848 rnp = rnp->parent; 878 rnp = rnp->parent;
849 } while (rnp != NULL); 879 } while (rnp != NULL);
850 lastcomp = rsp->completed; 880 lastcomp = rsp->completed;
@@ -1007,6 +1037,7 @@ void rcu_check_callbacks(int cpu, int user)
1007 1037
1008 rcu_bh_qs(cpu); 1038 rcu_bh_qs(cpu);
1009 } 1039 }
1040 rcu_preempt_check_callbacks(cpu);
1010 raise_softirq(RCU_SOFTIRQ); 1041 raise_softirq(RCU_SOFTIRQ);
1011} 1042}
1012 1043
@@ -1188,6 +1219,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
1188 __rcu_process_callbacks(&rcu_sched_state, 1219 __rcu_process_callbacks(&rcu_sched_state,
1189 &__get_cpu_var(rcu_sched_data)); 1220 &__get_cpu_var(rcu_sched_data));
1190 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1221 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1222 rcu_preempt_process_callbacks();
1191 1223
1192 /* 1224 /*
1193 * Memory references from any later RCU read-side critical sections 1225 * Memory references from any later RCU read-side critical sections
@@ -1252,17 +1284,6 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1252EXPORT_SYMBOL_GPL(call_rcu_sched); 1284EXPORT_SYMBOL_GPL(call_rcu_sched);
1253 1285
1254/* 1286/*
1255 * @@@ Queue an RCU callback for invocation after a grace period.
1256 * @@@ Placeholder pending rcutree_plugin.h.
1257 */
1258void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1259{
1260 call_rcu_sched(head, func);
1261}
1262EXPORT_SYMBOL_GPL(call_rcu);
1263
1264
1265/*
1266 * Queue an RCU for invocation after a quicker grace period. 1287 * Queue an RCU for invocation after a quicker grace period.
1267 */ 1288 */
1268void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1289void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
@@ -1335,7 +1356,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1335static int rcu_pending(int cpu) 1356static int rcu_pending(int cpu)
1336{ 1357{
1337 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || 1358 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
1338 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)); 1359 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) ||
1360 rcu_preempt_pending(cpu);
1339} 1361}
1340 1362
1341/* 1363/*
@@ -1348,7 +1370,8 @@ int rcu_needs_cpu(int cpu)
1348{ 1370{
1349 /* RCU callbacks either ready or pending? */ 1371 /* RCU callbacks either ready or pending? */
1350 return per_cpu(rcu_sched_data, cpu).nxtlist || 1372 return per_cpu(rcu_sched_data, cpu).nxtlist ||
1351 per_cpu(rcu_bh_data, cpu).nxtlist; 1373 per_cpu(rcu_bh_data, cpu).nxtlist ||
1374 rcu_preempt_needs_cpu(cpu);
1352} 1375}
1353 1376
1354/* 1377/*
@@ -1383,7 +1406,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1383 * that this CPU cannot possibly have any RCU callbacks in flight yet. 1406 * that this CPU cannot possibly have any RCU callbacks in flight yet.
1384 */ 1407 */
1385static void __cpuinit 1408static void __cpuinit
1386rcu_init_percpu_data(int cpu, struct rcu_state *rsp) 1409rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
1387{ 1410{
1388 unsigned long flags; 1411 unsigned long flags;
1389 long lastcomp; 1412 long lastcomp;
@@ -1399,6 +1422,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
1399 rdp->passed_quiesc = 0; /* We could be racing with new GP, */ 1422 rdp->passed_quiesc = 0; /* We could be racing with new GP, */
1400 rdp->qs_pending = 1; /* so set up to respond to current GP. */ 1423 rdp->qs_pending = 1; /* so set up to respond to current GP. */
1401 rdp->beenonline = 1; /* We have now been online. */ 1424 rdp->beenonline = 1; /* We have now been online. */
1425 rdp->preemptable = preemptable;
1402 rdp->passed_quiesc_completed = lastcomp - 1; 1426 rdp->passed_quiesc_completed = lastcomp - 1;
1403 rdp->blimit = blimit; 1427 rdp->blimit = blimit;
1404 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1428 spin_unlock(&rnp->lock); /* irqs remain disabled. */
@@ -1441,12 +1465,13 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
1441 1465
1442static void __cpuinit rcu_online_cpu(int cpu) 1466static void __cpuinit rcu_online_cpu(int cpu)
1443{ 1467{
1444 rcu_init_percpu_data(cpu, &rcu_sched_state); 1468 rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
1445 rcu_init_percpu_data(cpu, &rcu_bh_state); 1469 rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
1470 rcu_preempt_init_percpu_data(cpu);
1446} 1471}
1447 1472
1448/* 1473/*
1449 * Handle CPU online/offline notifcation events. 1474 * Handle CPU online/offline notification events.
1450 */ 1475 */
1451int __cpuinit rcu_cpu_notify(struct notifier_block *self, 1476int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1452 unsigned long action, void *hcpu) 1477 unsigned long action, void *hcpu)
@@ -1521,6 +1546,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
1521 rnp = rsp->level[i]; 1546 rnp = rsp->level[i];
1522 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 1547 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
1523 spin_lock_init(&rnp->lock); 1548 spin_lock_init(&rnp->lock);
1549 rnp->gpnum = 0;
1524 rnp->qsmask = 0; 1550 rnp->qsmask = 0;
1525 rnp->qsmaskinit = 0; 1551 rnp->qsmaskinit = 0;
1526 rnp->grplo = j * cpustride; 1552 rnp->grplo = j * cpustride;
@@ -1538,13 +1564,16 @@ static void __init rcu_init_one(struct rcu_state *rsp)
1538 j / rsp->levelspread[i - 1]; 1564 j / rsp->levelspread[i - 1];
1539 } 1565 }
1540 rnp->level = i; 1566 rnp->level = i;
1567 INIT_LIST_HEAD(&rnp->blocked_tasks[0]);
1568 INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
1541 } 1569 }
1542 } 1570 }
1543} 1571}
1544 1572
1545/* 1573/*
1546 * Helper macro for __rcu_init(). To be used nowhere else! 1574 * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used
1547 * Assigns leaf node pointers into each CPU's rcu_data structure. 1575 * nowhere else! Assigns leaf node pointers into each CPU's rcu_data
1576 * structure.
1548 */ 1577 */
1549#define RCU_INIT_FLAVOR(rsp, rcu_data) \ 1578#define RCU_INIT_FLAVOR(rsp, rcu_data) \
1550do { \ 1579do { \
@@ -1560,18 +1589,38 @@ do { \
1560 } \ 1589 } \
1561} while (0) 1590} while (0)
1562 1591
1592#ifdef CONFIG_TREE_PREEMPT_RCU
1593
1594void __init __rcu_init_preempt(void)
1595{
1596 int i; /* All used by RCU_INIT_FLAVOR(). */
1597 int j;
1598 struct rcu_node *rnp;
1599
1600 RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
1601}
1602
1603#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1604
1605void __init __rcu_init_preempt(void)
1606{
1607}
1608
1609#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1610
1563void __init __rcu_init(void) 1611void __init __rcu_init(void)
1564{ 1612{
1565 int i; /* All used by RCU_DATA_PTR_INIT(). */ 1613 int i; /* All used by RCU_INIT_FLAVOR(). */
1566 int j; 1614 int j;
1567 struct rcu_node *rnp; 1615 struct rcu_node *rnp;
1568 1616
1569 printk(KERN_INFO "Hierarchical RCU implementation.\n"); 1617 rcu_bootup_announce();
1570#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 1618#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
1571 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); 1619 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
1572#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 1620#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
1573 RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); 1621 RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data);
1574 RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); 1622 RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
1623 __rcu_init_preempt();
1575 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 1624 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
1576} 1625}
1577 1626
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 0024e5ddcc68..ca560364d8cd 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -80,6 +80,7 @@ struct rcu_dynticks {
80 */ 80 */
81struct rcu_node { 81struct rcu_node {
82 spinlock_t lock; 82 spinlock_t lock;
83 long gpnum; /* Current grace period for this node. */
83 unsigned long qsmask; /* CPUs or groups that need to switch in */ 84 unsigned long qsmask; /* CPUs or groups that need to switch in */
84 /* order for current grace period to proceed.*/ 85 /* order for current grace period to proceed.*/
85 unsigned long qsmaskinit; 86 unsigned long qsmaskinit;
@@ -90,6 +91,8 @@ struct rcu_node {
90 u8 grpnum; /* CPU/group number for next level up. */ 91 u8 grpnum; /* CPU/group number for next level up. */
91 u8 level; /* root is at level 0. */ 92 u8 level; /* root is at level 0. */
92 struct rcu_node *parent; 93 struct rcu_node *parent;
94 struct list_head blocked_tasks[2];
95 /* Tasks blocked in RCU read-side critsect. */
93} ____cacheline_internodealigned_in_smp; 96} ____cacheline_internodealigned_in_smp;
94 97
95/* Index values for nxttail array in struct rcu_data. */ 98/* Index values for nxttail array in struct rcu_data. */
@@ -111,6 +114,7 @@ struct rcu_data {
111 bool passed_quiesc; /* User-mode/idle loop etc. */ 114 bool passed_quiesc; /* User-mode/idle loop etc. */
112 bool qs_pending; /* Core waits for quiesc state. */ 115 bool qs_pending; /* Core waits for quiesc state. */
113 bool beenonline; /* CPU online at least once. */ 116 bool beenonline; /* CPU online at least once. */
117 bool preemptable; /* Preemptable RCU? */
114 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 118 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
115 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 119 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
116 120
@@ -244,5 +248,10 @@ DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
244extern struct rcu_state rcu_bh_state; 248extern struct rcu_state rcu_bh_state;
245DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); 249DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
246 250
251#ifdef CONFIG_TREE_PREEMPT_RCU
252extern struct rcu_state rcu_preempt_state;
253DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
254#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
255
247#endif /* #ifdef RCU_TREE_NONCORE */ 256#endif /* #ifdef RCU_TREE_NONCORE */
248 257
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
new file mode 100644
index 000000000000..cd2ab67400c6
--- /dev/null
+++ b/kernel/rcutree_plugin.h
@@ -0,0 +1,447 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions that provide either classic
4 * or preemptable semantics.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright Red Hat, 2009
21 * Copyright IBM Corporation, 2009
22 *
23 * Author: Ingo Molnar <mingo@elte.hu>
24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
25 */
26
27
28#ifdef CONFIG_TREE_PREEMPT_RCU
29
30struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
31DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
32
33/*
34 * Tell them what RCU they are running.
35 */
36static inline void rcu_bootup_announce(void)
37{
38 printk(KERN_INFO
39 "Experimental preemptable hierarchical RCU implementation.\n");
40}
41
42/*
43 * Return the number of RCU-preempt batches processed thus far
44 * for debug and statistics.
45 */
46long rcu_batches_completed_preempt(void)
47{
48 return rcu_preempt_state.completed;
49}
50EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
51
52/*
53 * Return the number of RCU batches processed thus far for debug & stats.
54 */
55long rcu_batches_completed(void)
56{
57 return rcu_batches_completed_preempt();
58}
59EXPORT_SYMBOL_GPL(rcu_batches_completed);
60
61/*
62 * Record a preemptable-RCU quiescent state for the specified CPU. Note
63 * that this just means that the task currently running on the CPU is
64 * not in a quiescent state. There might be any number of tasks blocked
65 * while in an RCU read-side critical section.
66 */
67static void rcu_preempt_qs_record(int cpu)
68{
69 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
70 rdp->passed_quiesc = 1;
71 rdp->passed_quiesc_completed = rdp->completed;
72}
73
74/*
75 * We have entered the scheduler or are between softirqs in ksoftirqd.
76 * If we are in an RCU read-side critical section, we need to reflect
77 * that in the state of the rcu_node structure corresponding to this CPU.
78 * Caller must disable hardirqs.
79 */
80static void rcu_preempt_qs(int cpu)
81{
82 struct task_struct *t = current;
83 int phase;
84 struct rcu_data *rdp;
85 struct rcu_node *rnp;
86
87 if (t->rcu_read_lock_nesting &&
88 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
89
90 /* Possibly blocking in an RCU read-side critical section. */
91 rdp = rcu_preempt_state.rda[cpu];
92 rnp = rdp->mynode;
93 spin_lock(&rnp->lock);
94 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
95 t->rcu_blocked_cpu = cpu;
96
97 /*
98 * If this CPU has already checked in, then this task
99 * will hold up the next grace period rather than the
100 * current grace period. Queue the task accordingly.
101 * If the task is queued for the current grace period
102 * (i.e., this CPU has not yet passed through a quiescent
103 * state for the current grace period), then as long
104 * as that task remains queued, the current grace period
105 * cannot end.
106 */
107 phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1);
108 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
109 smp_mb(); /* Ensure later ctxt swtch seen after above. */
110 spin_unlock(&rnp->lock);
111 }
112
113 /*
114 * Either we were not in an RCU read-side critical section to
115 * begin with, or we have now recorded that critical section
116 * globally. Either way, we can now note a quiescent state
117 * for this CPU. Again, if we were in an RCU read-side critical
118 * section, and if that critical section was blocking the current
119 * grace period, then the fact that the task has been enqueued
120 * means that we continue to block the current grace period.
121 */
122 rcu_preempt_qs_record(cpu);
123 t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS |
124 RCU_READ_UNLOCK_GOT_QS);
125}
126
127/*
128 * Tree-preemptable RCU implementation for rcu_read_lock().
129 * Just increment ->rcu_read_lock_nesting, shared state will be updated
130 * if we block.
131 */
132void __rcu_read_lock(void)
133{
134 ACCESS_ONCE(current->rcu_read_lock_nesting)++;
135 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
136}
137EXPORT_SYMBOL_GPL(__rcu_read_lock);
138
139static void rcu_read_unlock_special(struct task_struct *t)
140{
141 int empty;
142 unsigned long flags;
143 unsigned long mask;
144 struct rcu_node *rnp;
145 int special;
146
147 /* NMI handlers cannot block and cannot safely manipulate state. */
148 if (in_nmi())
149 return;
150
151 local_irq_save(flags);
152
153 /*
154 * If RCU core is waiting for this CPU to exit critical section,
155 * let it know that we have done so.
156 */
157 special = t->rcu_read_unlock_special;
158 if (special & RCU_READ_UNLOCK_NEED_QS) {
159 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
160 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS;
161 }
162
163 /* Hardware IRQ handlers cannot block. */
164 if (in_irq()) {
165 local_irq_restore(flags);
166 return;
167 }
168
169 /* Clean up if blocked during RCU read-side critical section. */
170 if (special & RCU_READ_UNLOCK_BLOCKED) {
171 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
172
173 /* Remove this task from the list it blocked on. */
174 rnp = rcu_preempt_state.rda[t->rcu_blocked_cpu]->mynode;
175 spin_lock(&rnp->lock);
176 empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
177 list_del_init(&t->rcu_node_entry);
178 t->rcu_blocked_cpu = -1;
179
180 /*
181 * If this was the last task on the current list, and if
182 * we aren't waiting on any CPUs, report the quiescent state.
183 * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk()
184 * drop rnp->lock and restore irq.
185 */
186 if (!empty && rnp->qsmask == 0 &&
187 list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
188 t->rcu_read_unlock_special &=
189 ~(RCU_READ_UNLOCK_NEED_QS |
190 RCU_READ_UNLOCK_GOT_QS);
191 if (rnp->parent == NULL) {
192 /* Only one rcu_node in the tree. */
193 cpu_quiet_msk_finish(&rcu_preempt_state, flags);
194 return;
195 }
196 /* Report up the rest of the hierarchy. */
197 mask = rnp->grpmask;
198 spin_unlock_irqrestore(&rnp->lock, flags);
199 rnp = rnp->parent;
200 spin_lock_irqsave(&rnp->lock, flags);
201 cpu_quiet_msk(mask, &rcu_preempt_state, rnp, flags);
202 return;
203 }
204 spin_unlock(&rnp->lock);
205 }
206 local_irq_restore(flags);
207}
208
209/*
210 * Tree-preemptable RCU implementation for rcu_read_unlock().
211 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
212 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
213 * invoke rcu_read_unlock_special() to clean up after a context switch
214 * in an RCU read-side critical section and other special cases.
215 */
216void __rcu_read_unlock(void)
217{
218 struct task_struct *t = current;
219
220 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
221 if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
222 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
223 rcu_read_unlock_special(t);
224}
225EXPORT_SYMBOL_GPL(__rcu_read_unlock);
226
227#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
228
229/*
230 * Scan the current list of tasks blocked within RCU read-side critical
231 * sections, printing out the tid of each.
232 */
233static void rcu_print_task_stall(struct rcu_node *rnp)
234{
235 unsigned long flags;
236 struct list_head *lp;
237 int phase = rnp->gpnum & 0x1;
238 struct task_struct *t;
239
240 if (!list_empty(&rnp->blocked_tasks[phase])) {
241 spin_lock_irqsave(&rnp->lock, flags);
242 phase = rnp->gpnum & 0x1; /* re-read under lock. */
243 lp = &rnp->blocked_tasks[phase];
244 list_for_each_entry(t, lp, rcu_node_entry)
245 printk(" P%d", t->pid);
246 spin_unlock_irqrestore(&rnp->lock, flags);
247 }
248}
249
250#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
251
252/*
253 * Check for preempted RCU readers for the specified rcu_node structure.
254 * If the caller needs a reliable answer, it must hold the rcu_node's
255 * >lock.
256 */
257static int rcu_preempted_readers(struct rcu_node *rnp)
258{
259 return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
260}
261
262/*
263 * Check for a quiescent state from the current CPU. When a task blocks,
264 * the task is recorded in the corresponding CPU's rcu_node structure,
265 * which is checked elsewhere.
266 *
267 * Caller must disable hard irqs.
268 */
269static void rcu_preempt_check_callbacks(int cpu)
270{
271 struct task_struct *t = current;
272
273 if (t->rcu_read_lock_nesting == 0) {
274 t->rcu_read_unlock_special &=
275 ~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS);
276 rcu_preempt_qs_record(cpu);
277 return;
278 }
279 if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
280 if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) {
281 rcu_preempt_qs_record(cpu);
282 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS;
283 } else if (!(t->rcu_read_unlock_special &
284 RCU_READ_UNLOCK_NEED_QS)) {
285 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
286 }
287 }
288}
289
290/*
291 * Process callbacks for preemptable RCU.
292 */
293static void rcu_preempt_process_callbacks(void)
294{
295 __rcu_process_callbacks(&rcu_preempt_state,
296 &__get_cpu_var(rcu_preempt_data));
297}
298
299/*
300 * Queue a preemptable-RCU callback for invocation after a grace period.
301 */
302void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
303{
304 __call_rcu(head, func, &rcu_preempt_state);
305}
306EXPORT_SYMBOL_GPL(call_rcu);
307
308/*
309 * Check to see if there is any immediate preemptable-RCU-related work
310 * to be done.
311 */
312static int rcu_preempt_pending(int cpu)
313{
314 return __rcu_pending(&rcu_preempt_state,
315 &per_cpu(rcu_preempt_data, cpu));
316}
317
318/*
319 * Does preemptable RCU need the CPU to stay out of dynticks mode?
320 */
321static int rcu_preempt_needs_cpu(int cpu)
322{
323 return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
324}
325
326/*
327 * Initialize preemptable RCU's per-CPU data.
328 */
329static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
330{
331 rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
332}
333
334/*
335 * Check for a task exiting while in a preemptable-RCU read-side
336 * critical section, clean up if so. No need to issue warnings,
337 * as debug_check_no_locks_held() already does this if lockdep
338 * is enabled.
339 */
340void exit_rcu(void)
341{
342 struct task_struct *t = current;
343
344 if (t->rcu_read_lock_nesting == 0)
345 return;
346 t->rcu_read_lock_nesting = 1;
347 rcu_read_unlock();
348}
349
350#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
351
352/*
353 * Tell them what RCU they are running.
354 */
355static inline void rcu_bootup_announce(void)
356{
357 printk(KERN_INFO "Hierarchical RCU implementation.\n");
358}
359
360/*
361 * Return the number of RCU batches processed thus far for debug & stats.
362 */
363long rcu_batches_completed(void)
364{
365 return rcu_batches_completed_sched();
366}
367EXPORT_SYMBOL_GPL(rcu_batches_completed);
368
369/*
370 * Because preemptable RCU does not exist, we never have to check for
371 * CPUs being in quiescent states.
372 */
373static void rcu_preempt_qs(int cpu)
374{
375}
376
377#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
378
379/*
380 * Because preemptable RCU does not exist, we never have to check for
381 * tasks blocked within RCU read-side critical sections.
382 */
383static void rcu_print_task_stall(struct rcu_node *rnp)
384{
385}
386
387#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
388
389/*
390 * Because preemptable RCU does not exist, there are never any preempted
391 * RCU readers.
392 */
393static int rcu_preempted_readers(struct rcu_node *rnp)
394{
395 return 0;
396}
397
398/*
399 * Because preemptable RCU does not exist, it never has any callbacks
400 * to check.
401 */
402void rcu_preempt_check_callbacks(int cpu)
403{
404}
405
406/*
407 * Because preemptable RCU does not exist, it never has any callbacks
408 * to process.
409 */
410void rcu_preempt_process_callbacks(void)
411{
412}
413
414/*
415 * In classic RCU, call_rcu() is just call_rcu_sched().
416 */
417void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
418{
419 call_rcu_sched(head, func);
420}
421EXPORT_SYMBOL_GPL(call_rcu);
422
423/*
424 * Because preemptable RCU does not exist, it never has any work to do.
425 */
426static int rcu_preempt_pending(int cpu)
427{
428 return 0;
429}
430
431/*
432 * Because preemptable RCU does not exist, it never needs any CPU.
433 */
434static int rcu_preempt_needs_cpu(int cpu)
435{
436 return 0;
437}
438
439/*
440 * Because preemptable RCU does not exist, there is no per-CPU
441 * data to initialize.
442 */
443static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
444{
445}
446
447#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 31af3a0fb6d5..0ea1bff69727 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -77,6 +77,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
77 77
78static int show_rcudata(struct seq_file *m, void *unused) 78static int show_rcudata(struct seq_file *m, void *unused)
79{ 79{
80#ifdef CONFIG_TREE_PREEMPT_RCU
81 seq_puts(m, "rcu_preempt:\n");
82 PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m);
83#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
80 seq_puts(m, "rcu_sched:\n"); 84 seq_puts(m, "rcu_sched:\n");
81 PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m); 85 PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m);
82 seq_puts(m, "rcu_bh:\n"); 86 seq_puts(m, "rcu_bh:\n");
@@ -125,6 +129,10 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
125 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); 129 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
126#endif /* #ifdef CONFIG_NO_HZ */ 130#endif /* #ifdef CONFIG_NO_HZ */
127 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n"); 131 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n");
132#ifdef CONFIG_TREE_PREEMPT_RCU
133 seq_puts(m, "\"rcu_preempt:\"\n");
134 PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m);
135#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
128 seq_puts(m, "\"rcu_sched:\"\n"); 136 seq_puts(m, "\"rcu_sched:\"\n");
129 PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m); 137 PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m);
130 seq_puts(m, "\"rcu_bh:\"\n"); 138 seq_puts(m, "\"rcu_bh:\"\n");
@@ -172,6 +180,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
172 180
173static int show_rcuhier(struct seq_file *m, void *unused) 181static int show_rcuhier(struct seq_file *m, void *unused)
174{ 182{
183#ifdef CONFIG_TREE_PREEMPT_RCU
184 seq_puts(m, "rcu_preempt:\n");
185 print_one_rcu_state(m, &rcu_preempt_state);
186#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
175 seq_puts(m, "rcu_sched:\n"); 187 seq_puts(m, "rcu_sched:\n");
176 print_one_rcu_state(m, &rcu_sched_state); 188 print_one_rcu_state(m, &rcu_sched_state);
177 seq_puts(m, "rcu_bh:\n"); 189 seq_puts(m, "rcu_bh:\n");
@@ -194,6 +206,10 @@ static struct file_operations rcuhier_fops = {
194 206
195static int show_rcugp(struct seq_file *m, void *unused) 207static int show_rcugp(struct seq_file *m, void *unused)
196{ 208{
209#ifdef CONFIG_TREE_PREEMPT_RCU
210 seq_printf(m, "rcu_preempt: completed=%ld gpnum=%ld\n",
211 rcu_preempt_state.completed, rcu_preempt_state.gpnum);
212#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
197 seq_printf(m, "rcu_sched: completed=%ld gpnum=%ld\n", 213 seq_printf(m, "rcu_sched: completed=%ld gpnum=%ld\n",
198 rcu_sched_state.completed, rcu_sched_state.gpnum); 214 rcu_sched_state.completed, rcu_sched_state.gpnum);
199 seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n", 215 seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n",
@@ -244,6 +260,10 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
244 260
245static int show_rcu_pending(struct seq_file *m, void *unused) 261static int show_rcu_pending(struct seq_file *m, void *unused)
246{ 262{
263#ifdef CONFIG_TREE_PREEMPT_RCU
264 seq_puts(m, "rcu_preempt:\n");
265 print_rcu_pendings(m, &rcu_preempt_state);
266#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
247 seq_puts(m, "rcu_sched:\n"); 267 seq_puts(m, "rcu_sched:\n");
248 print_rcu_pendings(m, &rcu_sched_state); 268 print_rcu_pendings(m, &rcu_sched_state);
249 seq_puts(m, "rcu_bh:\n"); 269 seq_puts(m, "rcu_bh:\n");
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 12327b2bb785..f87fb0c8f924 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -725,7 +725,7 @@ config RCU_TORTURE_TEST_RUNNABLE
725 725
726config RCU_CPU_STALL_DETECTOR 726config RCU_CPU_STALL_DETECTOR
727 bool "Check for stalled CPUs delaying RCU grace periods" 727 bool "Check for stalled CPUs delaying RCU grace periods"
728 depends on CLASSIC_RCU || TREE_RCU 728 depends on CLASSIC_RCU || TREE_RCU || TREE_PREEMPT_RCU
729 default n 729 default n
730 help 730 help
731 This option causes RCU to printk information on which 731 This option causes RCU to printk information on which